/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 915 - (show annotations)
Tue Feb 14 13:05:39 2012 UTC (7 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 222249 byte(s)
Improved \X and back reference partial matching
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct fallback_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the hot path (expected path), and the other is called as
93 the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the hot path.
95
96 Greedy star operator (*) :
97 Hot path: match happens.
98 Fallback path: match failed.
99 Non-greedy star operator (*?) :
100 Hot path: no need to perform a match.
101 Fallback path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A hot path
112 '(' hot path (pushing arguments to the stack)
113 B hot path
114 ')' hot path (pushing arguments to the stack)
115 D hot path
116 return with successful match
117
118 D fallback path
119 ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120 B fallback path
121 C expected path
122 jump to D hot path
123 C fallback path
124 A fallback path
125
126 Notice, that the order of fallback code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current fallback code path. The fallback code path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the hot path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the fallback code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the fallback mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *ptr;
156 /* Everything else after. */
157 int offsetcount;
158 int calllimit;
159 pcre_uint8 notbol;
160 pcre_uint8 noteol;
161 pcre_uint8 notempty;
162 pcre_uint8 notempty_atstart;
163 } jit_arguments;
164
165 typedef struct executable_functions {
166 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
167 PUBL(jit_callback) callback;
168 void *userdata;
169 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
170 } executable_functions;
171
172 typedef struct jump_list {
173 struct sljit_jump *jump;
174 struct jump_list *next;
175 } jump_list;
176
177 enum stub_types { stack_alloc };
178
179 typedef struct stub_list {
180 enum stub_types type;
181 int data;
182 struct sljit_jump *start;
183 struct sljit_label *leave;
184 struct stub_list *next;
185 } stub_list;
186
187 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
188
189 /* The following structure is the key data type for the recursive
190 code generator. It is allocated by compile_hotpath, and contains
191 the aguments for compile_fallbackpath. Must be the first member
192 of its descendants. */
193 typedef struct fallback_common {
194 /* Concatenation stack. */
195 struct fallback_common *prev;
196 jump_list *nextfallbacks;
197 /* Internal stack (for component operators). */
198 struct fallback_common *top;
199 jump_list *topfallbacks;
200 /* Opcode pointer. */
201 pcre_uchar *cc;
202 } fallback_common;
203
204 typedef struct assert_fallback {
205 fallback_common common;
206 jump_list *condfailed;
207 /* Less than 0 (-1) if a frame is not needed. */
208 int framesize;
209 /* Points to our private memory word on the stack. */
210 int localptr;
211 /* For iterators. */
212 struct sljit_label *hotpath;
213 } assert_fallback;
214
215 typedef struct bracket_fallback {
216 fallback_common common;
217 /* Where to coninue if an alternative is successfully matched. */
218 struct sljit_label *althotpath;
219 /* For rmin and rmax iterators. */
220 struct sljit_label *recursivehotpath;
221 /* For greedy ? operator. */
222 struct sljit_label *zerohotpath;
223 /* Contains the branches of a failed condition. */
224 union {
225 /* Both for OP_COND, OP_SCOND. */
226 jump_list *condfailed;
227 assert_fallback *assert;
228 /* For OP_ONCE. -1 if not needed. */
229 int framesize;
230 } u;
231 /* Points to our private memory word on the stack. */
232 int localptr;
233 } bracket_fallback;
234
235 typedef struct bracketpos_fallback {
236 fallback_common common;
237 /* Points to our private memory word on the stack. */
238 int localptr;
239 /* Reverting stack is needed. */
240 int framesize;
241 /* Allocated stack size. */
242 int stacksize;
243 } bracketpos_fallback;
244
245 typedef struct braminzero_fallback {
246 fallback_common common;
247 struct sljit_label *hotpath;
248 } braminzero_fallback;
249
250 typedef struct iterator_fallback {
251 fallback_common common;
252 /* Next iteration. */
253 struct sljit_label *hotpath;
254 } iterator_fallback;
255
256 typedef struct recurse_entry {
257 struct recurse_entry *next;
258 /* Contains the function entry. */
259 struct sljit_label *entry;
260 /* Collects the calls until the function is not created. */
261 jump_list *calls;
262 /* Points to the starting opcode. */
263 int start;
264 } recurse_entry;
265
266 typedef struct recurse_fallback {
267 fallback_common common;
268 } recurse_fallback;
269
270 typedef struct compiler_common {
271 struct sljit_compiler *compiler;
272 pcre_uchar *start;
273 int localsize;
274 int *localptrs;
275 const pcre_uint8 *fcc;
276 sljit_w lcc;
277 int cbraptr;
278 int mode;
279 int nltype;
280 int newline;
281 int bsr_nltype;
282 int endonly;
283 sljit_w ctypes;
284 sljit_uw name_table;
285 sljit_w name_count;
286 sljit_w name_entry_size;
287 struct sljit_label *partialmatchlabel;
288 struct sljit_label *acceptlabel;
289 stub_list *stubs;
290 recurse_entry *entries;
291 recurse_entry *currententry;
292 jump_list *partialmatch;
293 jump_list *accept;
294 jump_list *calllimit;
295 jump_list *stackalloc;
296 jump_list *revertframes;
297 jump_list *wordboundary;
298 jump_list *anynewline;
299 jump_list *hspace;
300 jump_list *vspace;
301 jump_list *casefulcmp;
302 jump_list *caselesscmp;
303 BOOL jscript_compat;
304 #ifdef SUPPORT_UTF
305 BOOL utf;
306 #ifdef SUPPORT_UCP
307 BOOL use_ucp;
308 #endif
309 jump_list *utfreadchar;
310 #ifdef COMPILE_PCRE8
311 jump_list *utfreadtype8;
312 #endif
313 #endif /* SUPPORT_UTF */
314 #ifdef SUPPORT_UCP
315 jump_list *getucd;
316 #endif
317 } compiler_common;
318
319 /* For byte_sequence_compare. */
320
321 typedef struct compare_context {
322 int length;
323 int sourcereg;
324 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
325 int ucharptr;
326 union {
327 sljit_i asint;
328 sljit_uh asushort;
329 #ifdef COMPILE_PCRE8
330 sljit_ub asbyte;
331 sljit_ub asuchars[4];
332 #else
333 #ifdef COMPILE_PCRE16
334 sljit_uh asuchars[2];
335 #endif
336 #endif
337 } c;
338 union {
339 sljit_i asint;
340 sljit_uh asushort;
341 #ifdef COMPILE_PCRE8
342 sljit_ub asbyte;
343 sljit_ub asuchars[4];
344 #else
345 #ifdef COMPILE_PCRE16
346 sljit_uh asuchars[2];
347 #endif
348 #endif
349 } oc;
350 #endif
351 } compare_context;
352
353 enum {
354 frame_end = 0,
355 frame_setstrbegin = -1
356 };
357
358 /* Undefine sljit macros. */
359 #undef CMP
360
361 /* Used for accessing the elements of the stack. */
362 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
363
364 #define TMP1 SLJIT_TEMPORARY_REG1
365 #define TMP2 SLJIT_TEMPORARY_REG3
366 #define TMP3 SLJIT_TEMPORARY_EREG2
367 #define STR_PTR SLJIT_SAVED_REG1
368 #define STR_END SLJIT_SAVED_REG2
369 #define STACK_TOP SLJIT_TEMPORARY_REG2
370 #define STACK_LIMIT SLJIT_SAVED_REG3
371 #define ARGUMENTS SLJIT_SAVED_EREG1
372 #define CALL_COUNT SLJIT_SAVED_EREG2
373 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
374
375 /* Locals layout. */
376 /* These two locals can be used by the current opcode. */
377 #define LOCALS0 (0 * sizeof(sljit_w))
378 #define LOCALS1 (1 * sizeof(sljit_w))
379 /* Two local variables for possessive quantifiers (char1 cannot use them). */
380 #define POSSESSIVE0 (2 * sizeof(sljit_w))
381 #define POSSESSIVE1 (3 * sizeof(sljit_w))
382 /* Head of the last recursion. */
383 #define RECURSIVE_HEAD (4 * sizeof(sljit_w))
384 /* Max limit of recursions. */
385 #define CALL_LIMIT (5 * sizeof(sljit_w))
386 /* Last known position of the requested byte.
387 Same as START_USED_PTR. (Partial matching and req_char are exclusive) */
388 #define REQ_CHAR_PTR (6 * sizeof(sljit_w))
389 /* First inspected character for partial matching.
390 Same as REQ_CHAR_PTR. (Partial matching and req_char are exclusive) */
391 #define START_USED_PTR (6 * sizeof(sljit_w))
392 /* Starting pointer for partial soft matches. */
393 #define HIT_START (8 * sizeof(sljit_w))
394 /* End pointer of the first line. */
395 #define FIRSTLINE_END (9 * sizeof(sljit_w))
396 /* The output vector is stored on the stack, and contains pointers
397 to characters. The vector data is divided into two groups: the first
398 group contains the start / end character pointers, and the second is
399 the start pointers when the end of the capturing group has not yet reached. */
400 #define OVECTOR_START (10 * sizeof(sljit_w))
401 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
402 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
403 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
404
405 #ifdef COMPILE_PCRE8
406 #define MOV_UCHAR SLJIT_MOV_UB
407 #define MOVU_UCHAR SLJIT_MOVU_UB
408 #else
409 #ifdef COMPILE_PCRE16
410 #define MOV_UCHAR SLJIT_MOV_UH
411 #define MOVU_UCHAR SLJIT_MOVU_UH
412 #else
413 #error Unsupported compiling mode
414 #endif
415 #endif
416
417 /* Shortcuts. */
418 #define DEFINE_COMPILER \
419 struct sljit_compiler *compiler = common->compiler
420 #define OP1(op, dst, dstw, src, srcw) \
421 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
422 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
423 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
424 #define LABEL() \
425 sljit_emit_label(compiler)
426 #define JUMP(type) \
427 sljit_emit_jump(compiler, (type))
428 #define JUMPTO(type, label) \
429 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
430 #define JUMPHERE(jump) \
431 sljit_set_label((jump), sljit_emit_label(compiler))
432 #define CMP(type, src1, src1w, src2, src2w) \
433 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
434 #define CMPTO(type, src1, src1w, src2, src2w, label) \
435 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
436 #define COND_VALUE(op, dst, dstw, type) \
437 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
438
439 static pcre_uchar* bracketend(pcre_uchar* cc)
440 {
441 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
442 do cc += GET(cc, 1); while (*cc == OP_ALT);
443 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
444 cc += 1 + LINK_SIZE;
445 return cc;
446 }
447
448 /* Functions whose might need modification for all new supported opcodes:
449 next_opcode
450 get_localspace
451 set_localptrs
452 get_framesize
453 init_frame
454 get_localsize
455 copy_locals
456 compile_hotpath
457 compile_fallbackpath
458 */
459
460 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
461 {
462 SLJIT_UNUSED_ARG(common);
463 switch(*cc)
464 {
465 case OP_SOD:
466 case OP_SOM:
467 case OP_SET_SOM:
468 case OP_NOT_WORD_BOUNDARY:
469 case OP_WORD_BOUNDARY:
470 case OP_NOT_DIGIT:
471 case OP_DIGIT:
472 case OP_NOT_WHITESPACE:
473 case OP_WHITESPACE:
474 case OP_NOT_WORDCHAR:
475 case OP_WORDCHAR:
476 case OP_ANY:
477 case OP_ALLANY:
478 case OP_ANYNL:
479 case OP_NOT_HSPACE:
480 case OP_HSPACE:
481 case OP_NOT_VSPACE:
482 case OP_VSPACE:
483 case OP_EXTUNI:
484 case OP_EODN:
485 case OP_EOD:
486 case OP_CIRC:
487 case OP_CIRCM:
488 case OP_DOLL:
489 case OP_DOLLM:
490 case OP_TYPESTAR:
491 case OP_TYPEMINSTAR:
492 case OP_TYPEPLUS:
493 case OP_TYPEMINPLUS:
494 case OP_TYPEQUERY:
495 case OP_TYPEMINQUERY:
496 case OP_TYPEPOSSTAR:
497 case OP_TYPEPOSPLUS:
498 case OP_TYPEPOSQUERY:
499 case OP_CRSTAR:
500 case OP_CRMINSTAR:
501 case OP_CRPLUS:
502 case OP_CRMINPLUS:
503 case OP_CRQUERY:
504 case OP_CRMINQUERY:
505 case OP_DEF:
506 case OP_BRAZERO:
507 case OP_BRAMINZERO:
508 case OP_BRAPOSZERO:
509 case OP_FAIL:
510 case OP_ACCEPT:
511 case OP_ASSERT_ACCEPT:
512 case OP_SKIPZERO:
513 return cc + 1;
514
515 case OP_ANYBYTE:
516 #ifdef SUPPORT_UTF
517 if (common->utf) return NULL;
518 #endif
519 return cc + 1;
520
521 case OP_CHAR:
522 case OP_CHARI:
523 case OP_NOT:
524 case OP_NOTI:
525 case OP_STAR:
526 case OP_MINSTAR:
527 case OP_PLUS:
528 case OP_MINPLUS:
529 case OP_QUERY:
530 case OP_MINQUERY:
531 case OP_POSSTAR:
532 case OP_POSPLUS:
533 case OP_POSQUERY:
534 case OP_STARI:
535 case OP_MINSTARI:
536 case OP_PLUSI:
537 case OP_MINPLUSI:
538 case OP_QUERYI:
539 case OP_MINQUERYI:
540 case OP_POSSTARI:
541 case OP_POSPLUSI:
542 case OP_POSQUERYI:
543 case OP_NOTSTAR:
544 case OP_NOTMINSTAR:
545 case OP_NOTPLUS:
546 case OP_NOTMINPLUS:
547 case OP_NOTQUERY:
548 case OP_NOTMINQUERY:
549 case OP_NOTPOSSTAR:
550 case OP_NOTPOSPLUS:
551 case OP_NOTPOSQUERY:
552 case OP_NOTSTARI:
553 case OP_NOTMINSTARI:
554 case OP_NOTPLUSI:
555 case OP_NOTMINPLUSI:
556 case OP_NOTQUERYI:
557 case OP_NOTMINQUERYI:
558 case OP_NOTPOSSTARI:
559 case OP_NOTPOSPLUSI:
560 case OP_NOTPOSQUERYI:
561 cc += 2;
562 #ifdef SUPPORT_UTF
563 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
564 #endif
565 return cc;
566
567 case OP_UPTO:
568 case OP_MINUPTO:
569 case OP_EXACT:
570 case OP_POSUPTO:
571 case OP_UPTOI:
572 case OP_MINUPTOI:
573 case OP_EXACTI:
574 case OP_POSUPTOI:
575 case OP_NOTUPTO:
576 case OP_NOTMINUPTO:
577 case OP_NOTEXACT:
578 case OP_NOTPOSUPTO:
579 case OP_NOTUPTOI:
580 case OP_NOTMINUPTOI:
581 case OP_NOTEXACTI:
582 case OP_NOTPOSUPTOI:
583 cc += 2 + IMM2_SIZE;
584 #ifdef SUPPORT_UTF
585 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
586 #endif
587 return cc;
588
589 case OP_NOTPROP:
590 case OP_PROP:
591 return cc + 1 + 2;
592
593 case OP_TYPEUPTO:
594 case OP_TYPEMINUPTO:
595 case OP_TYPEEXACT:
596 case OP_TYPEPOSUPTO:
597 case OP_REF:
598 case OP_REFI:
599 case OP_CREF:
600 case OP_NCREF:
601 case OP_RREF:
602 case OP_NRREF:
603 case OP_CLOSE:
604 cc += 1 + IMM2_SIZE;
605 return cc;
606
607 case OP_CRRANGE:
608 case OP_CRMINRANGE:
609 return cc + 1 + 2 * IMM2_SIZE;
610
611 case OP_CLASS:
612 case OP_NCLASS:
613 return cc + 1 + 32 / sizeof(pcre_uchar);
614
615 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
616 case OP_XCLASS:
617 return cc + GET(cc, 1);
618 #endif
619
620 case OP_RECURSE:
621 case OP_ASSERT:
622 case OP_ASSERT_NOT:
623 case OP_ASSERTBACK:
624 case OP_ASSERTBACK_NOT:
625 case OP_REVERSE:
626 case OP_ONCE:
627 case OP_ONCE_NC:
628 case OP_BRA:
629 case OP_BRAPOS:
630 case OP_COND:
631 case OP_SBRA:
632 case OP_SBRAPOS:
633 case OP_SCOND:
634 case OP_ALT:
635 case OP_KET:
636 case OP_KETRMAX:
637 case OP_KETRMIN:
638 case OP_KETRPOS:
639 return cc + 1 + LINK_SIZE;
640
641 case OP_CBRA:
642 case OP_CBRAPOS:
643 case OP_SCBRA:
644 case OP_SCBRAPOS:
645 return cc + 1 + LINK_SIZE + IMM2_SIZE;
646
647 default:
648 return NULL;
649 }
650 }
651
652 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
653 {
654 int localspace = 0;
655 pcre_uchar *alternative;
656 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
657 while (cc < ccend)
658 {
659 switch(*cc)
660 {
661 case OP_ASSERT:
662 case OP_ASSERT_NOT:
663 case OP_ASSERTBACK:
664 case OP_ASSERTBACK_NOT:
665 case OP_ONCE:
666 case OP_ONCE_NC:
667 case OP_BRAPOS:
668 case OP_SBRA:
669 case OP_SBRAPOS:
670 case OP_SCOND:
671 localspace += sizeof(sljit_w);
672 cc += 1 + LINK_SIZE;
673 break;
674
675 case OP_CBRAPOS:
676 case OP_SCBRAPOS:
677 localspace += sizeof(sljit_w);
678 cc += 1 + LINK_SIZE + IMM2_SIZE;
679 break;
680
681 case OP_COND:
682 /* Might be a hidden SCOND. */
683 alternative = cc + GET(cc, 1);
684 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
685 localspace += sizeof(sljit_w);
686 cc += 1 + LINK_SIZE;
687 break;
688
689 default:
690 cc = next_opcode(common, cc);
691 if (cc == NULL)
692 return -1;
693 break;
694 }
695 }
696 return localspace;
697 }
698
699 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
700 {
701 pcre_uchar *cc = common->start;
702 pcre_uchar *alternative;
703 while (cc < ccend)
704 {
705 switch(*cc)
706 {
707 case OP_ASSERT:
708 case OP_ASSERT_NOT:
709 case OP_ASSERTBACK:
710 case OP_ASSERTBACK_NOT:
711 case OP_ONCE:
712 case OP_ONCE_NC:
713 case OP_BRAPOS:
714 case OP_SBRA:
715 case OP_SBRAPOS:
716 case OP_SCOND:
717 common->localptrs[cc - common->start] = localptr;
718 localptr += sizeof(sljit_w);
719 cc += 1 + LINK_SIZE;
720 break;
721
722 case OP_CBRAPOS:
723 case OP_SCBRAPOS:
724 common->localptrs[cc - common->start] = localptr;
725 localptr += sizeof(sljit_w);
726 cc += 1 + LINK_SIZE + IMM2_SIZE;
727 break;
728
729 case OP_COND:
730 /* Might be a hidden SCOND. */
731 alternative = cc + GET(cc, 1);
732 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
733 {
734 common->localptrs[cc - common->start] = localptr;
735 localptr += sizeof(sljit_w);
736 }
737 cc += 1 + LINK_SIZE;
738 break;
739
740 default:
741 cc = next_opcode(common, cc);
742 SLJIT_ASSERT(cc != NULL);
743 break;
744 }
745 }
746 }
747
748 /* Returns with -1 if no need for frame. */
749 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
750 {
751 pcre_uchar *ccend = bracketend(cc);
752 int length = 0;
753 BOOL possessive = FALSE;
754 BOOL setsom_found = FALSE;
755
756 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
757 {
758 length = 3;
759 possessive = TRUE;
760 }
761
762 cc = next_opcode(common, cc);
763 SLJIT_ASSERT(cc != NULL);
764 while (cc < ccend)
765 switch(*cc)
766 {
767 case OP_SET_SOM:
768 case OP_RECURSE:
769 if (!setsom_found)
770 {
771 length += 2;
772 setsom_found = TRUE;
773 }
774 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
775 break;
776
777 case OP_CBRA:
778 case OP_CBRAPOS:
779 case OP_SCBRA:
780 case OP_SCBRAPOS:
781 length += 3;
782 cc += 1 + LINK_SIZE + IMM2_SIZE;
783 break;
784
785 default:
786 cc = next_opcode(common, cc);
787 SLJIT_ASSERT(cc != NULL);
788 break;
789 }
790
791 /* Possessive quantifiers can use a special case. */
792 if (SLJIT_UNLIKELY(possessive) && length == 3)
793 return -1;
794
795 if (length > 0)
796 return length + 1;
797 return -1;
798 }
799
800 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
801 {
802 DEFINE_COMPILER;
803 pcre_uchar *ccend = bracketend(cc);
804 BOOL setsom_found = FALSE;
805 int offset;
806
807 /* >= 1 + shortest item size (2) */
808 SLJIT_UNUSED_ARG(stacktop);
809 SLJIT_ASSERT(stackpos >= stacktop + 2);
810
811 stackpos = STACK(stackpos);
812 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
813 cc = next_opcode(common, cc);
814 SLJIT_ASSERT(cc != NULL);
815 while (cc < ccend)
816 switch(*cc)
817 {
818 case OP_SET_SOM:
819 case OP_RECURSE:
820 if (!setsom_found)
821 {
822 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
824 stackpos += (int)sizeof(sljit_w);
825 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
826 stackpos += (int)sizeof(sljit_w);
827 setsom_found = TRUE;
828 }
829 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
830 break;
831
832 case OP_CBRA:
833 case OP_CBRAPOS:
834 case OP_SCBRA:
835 case OP_SCBRAPOS:
836 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
837 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
838 stackpos += (int)sizeof(sljit_w);
839 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
842 stackpos += (int)sizeof(sljit_w);
843 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
844 stackpos += (int)sizeof(sljit_w);
845
846 cc += 1 + LINK_SIZE + IMM2_SIZE;
847 break;
848
849 default:
850 cc = next_opcode(common, cc);
851 SLJIT_ASSERT(cc != NULL);
852 break;
853 }
854
855 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
856 SLJIT_ASSERT(stackpos == STACK(stacktop));
857 }
858
859 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
860 {
861 int localsize = 2;
862 pcre_uchar *alternative;
863 /* Calculate the sum of the local variables. */
864 while (cc < ccend)
865 {
866 switch(*cc)
867 {
868 case OP_ASSERT:
869 case OP_ASSERT_NOT:
870 case OP_ASSERTBACK:
871 case OP_ASSERTBACK_NOT:
872 case OP_ONCE:
873 case OP_ONCE_NC:
874 case OP_BRAPOS:
875 case OP_SBRA:
876 case OP_SBRAPOS:
877 case OP_SCOND:
878 localsize++;
879 cc += 1 + LINK_SIZE;
880 break;
881
882 case OP_CBRA:
883 case OP_SCBRA:
884 localsize++;
885 cc += 1 + LINK_SIZE + IMM2_SIZE;
886 break;
887
888 case OP_CBRAPOS:
889 case OP_SCBRAPOS:
890 localsize += 2;
891 cc += 1 + LINK_SIZE + IMM2_SIZE;
892 break;
893
894 case OP_COND:
895 /* Might be a hidden SCOND. */
896 alternative = cc + GET(cc, 1);
897 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
898 localsize++;
899 cc += 1 + LINK_SIZE;
900 break;
901
902 default:
903 cc = next_opcode(common, cc);
904 SLJIT_ASSERT(cc != NULL);
905 break;
906 }
907 }
908 SLJIT_ASSERT(cc == ccend);
909 return localsize;
910 }
911
912 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
913 BOOL save, int stackptr, int stacktop)
914 {
915 DEFINE_COMPILER;
916 int srcw[2];
917 int count;
918 BOOL tmp1next = TRUE;
919 BOOL tmp1empty = TRUE;
920 BOOL tmp2empty = TRUE;
921 pcre_uchar *alternative;
922 enum {
923 start,
924 loop,
925 end
926 } status;
927
928 status = save ? start : loop;
929 stackptr = STACK(stackptr - 2);
930 stacktop = STACK(stacktop - 1);
931
932 if (!save)
933 {
934 stackptr += sizeof(sljit_w);
935 if (stackptr < stacktop)
936 {
937 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
938 stackptr += sizeof(sljit_w);
939 tmp1empty = FALSE;
940 }
941 if (stackptr < stacktop)
942 {
943 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
944 stackptr += sizeof(sljit_w);
945 tmp2empty = FALSE;
946 }
947 /* The tmp1next must be TRUE in either way. */
948 }
949
950 while (status != end)
951 {
952 count = 0;
953 switch(status)
954 {
955 case start:
956 SLJIT_ASSERT(save);
957 count = 1;
958 srcw[0] = RECURSIVE_HEAD;
959 status = loop;
960 break;
961
962 case loop:
963 if (cc >= ccend)
964 {
965 status = end;
966 break;
967 }
968
969 switch(*cc)
970 {
971 case OP_ASSERT:
972 case OP_ASSERT_NOT:
973 case OP_ASSERTBACK:
974 case OP_ASSERTBACK_NOT:
975 case OP_ONCE:
976 case OP_ONCE_NC:
977 case OP_BRAPOS:
978 case OP_SBRA:
979 case OP_SBRAPOS:
980 case OP_SCOND:
981 count = 1;
982 srcw[0] = PRIV_DATA(cc);
983 SLJIT_ASSERT(srcw[0] != 0);
984 cc += 1 + LINK_SIZE;
985 break;
986
987 case OP_CBRA:
988 case OP_SCBRA:
989 count = 1;
990 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
991 cc += 1 + LINK_SIZE + IMM2_SIZE;
992 break;
993
994 case OP_CBRAPOS:
995 case OP_SCBRAPOS:
996 count = 2;
997 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
998 srcw[0] = PRIV_DATA(cc);
999 SLJIT_ASSERT(srcw[0] != 0);
1000 cc += 1 + LINK_SIZE + IMM2_SIZE;
1001 break;
1002
1003 case OP_COND:
1004 /* Might be a hidden SCOND. */
1005 alternative = cc + GET(cc, 1);
1006 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1007 {
1008 count = 1;
1009 srcw[0] = PRIV_DATA(cc);
1010 SLJIT_ASSERT(srcw[0] != 0);
1011 }
1012 cc += 1 + LINK_SIZE;
1013 break;
1014
1015 default:
1016 cc = next_opcode(common, cc);
1017 SLJIT_ASSERT(cc != NULL);
1018 break;
1019 }
1020 break;
1021
1022 case end:
1023 SLJIT_ASSERT_STOP();
1024 break;
1025 }
1026
1027 while (count > 0)
1028 {
1029 count--;
1030 if (save)
1031 {
1032 if (tmp1next)
1033 {
1034 if (!tmp1empty)
1035 {
1036 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1037 stackptr += sizeof(sljit_w);
1038 }
1039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1040 tmp1empty = FALSE;
1041 tmp1next = FALSE;
1042 }
1043 else
1044 {
1045 if (!tmp2empty)
1046 {
1047 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1048 stackptr += sizeof(sljit_w);
1049 }
1050 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1051 tmp2empty = FALSE;
1052 tmp1next = TRUE;
1053 }
1054 }
1055 else
1056 {
1057 if (tmp1next)
1058 {
1059 SLJIT_ASSERT(!tmp1empty);
1060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1061 tmp1empty = stackptr >= stacktop;
1062 if (!tmp1empty)
1063 {
1064 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1065 stackptr += sizeof(sljit_w);
1066 }
1067 tmp1next = FALSE;
1068 }
1069 else
1070 {
1071 SLJIT_ASSERT(!tmp2empty);
1072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1073 tmp2empty = stackptr >= stacktop;
1074 if (!tmp2empty)
1075 {
1076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1077 stackptr += sizeof(sljit_w);
1078 }
1079 tmp1next = TRUE;
1080 }
1081 }
1082 }
1083 }
1084
1085 if (save)
1086 {
1087 if (tmp1next)
1088 {
1089 if (!tmp1empty)
1090 {
1091 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1092 stackptr += sizeof(sljit_w);
1093 }
1094 if (!tmp2empty)
1095 {
1096 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1097 stackptr += sizeof(sljit_w);
1098 }
1099 }
1100 else
1101 {
1102 if (!tmp2empty)
1103 {
1104 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1105 stackptr += sizeof(sljit_w);
1106 }
1107 if (!tmp1empty)
1108 {
1109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1110 stackptr += sizeof(sljit_w);
1111 }
1112 }
1113 }
1114 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1115 }
1116
1117 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1118 {
1119 return (value & (value - 1)) == 0;
1120 }
1121
1122 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1123 {
1124 while (list)
1125 {
1126 /* sljit_set_label is clever enough to do nothing
1127 if either the jump or the label is NULL */
1128 sljit_set_label(list->jump, label);
1129 list = list->next;
1130 }
1131 }
1132
1133 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1134 {
1135 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1136 if (list_item)
1137 {
1138 list_item->next = *list;
1139 list_item->jump = jump;
1140 *list = list_item;
1141 }
1142 }
1143
1144 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1145 {
1146 DEFINE_COMPILER;
1147 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1148
1149 if (list_item)
1150 {
1151 list_item->type = type;
1152 list_item->data = data;
1153 list_item->start = start;
1154 list_item->leave = LABEL();
1155 list_item->next = common->stubs;
1156 common->stubs = list_item;
1157 }
1158 }
1159
1160 static void flush_stubs(compiler_common *common)
1161 {
1162 DEFINE_COMPILER;
1163 stub_list* list_item = common->stubs;
1164
1165 while (list_item)
1166 {
1167 JUMPHERE(list_item->start);
1168 switch(list_item->type)
1169 {
1170 case stack_alloc:
1171 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1172 break;
1173 }
1174 JUMPTO(SLJIT_JUMP, list_item->leave);
1175 list_item = list_item->next;
1176 }
1177 common->stubs = NULL;
1178 }
1179
1180 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1181 {
1182 DEFINE_COMPILER;
1183
1184 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1185 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1186 }
1187
1188 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1189 {
1190 /* May destroy all locals and registers except TMP2. */
1191 DEFINE_COMPILER;
1192
1193 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1194 #ifdef DESTROY_REGISTERS
1195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1196 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1197 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1198 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1200 #endif
1201 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1202 }
1203
1204 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1205 {
1206 DEFINE_COMPILER;
1207 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1208 }
1209
1210 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1211 {
1212 DEFINE_COMPILER;
1213 struct sljit_label *loop;
1214 int i;
1215 /* At this point we can freely use all temporary registers. */
1216 /* TMP1 returns with begin - 1. */
1217 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1218 if (length < 8)
1219 {
1220 for (i = 0; i < length; i++)
1221 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1222 }
1223 else
1224 {
1225 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1226 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1227 loop = LABEL();
1228 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1229 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1230 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1231 }
1232 }
1233
1234 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1235 {
1236 DEFINE_COMPILER;
1237 struct sljit_label *loop;
1238 struct sljit_jump *earlyexit;
1239
1240 /* At this point we can freely use all registers. */
1241 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1242 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1243
1244 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1245 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1246 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1247 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1248 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1249 /* Unlikely, but possible */
1250 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1251 loop = LABEL();
1252 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1253 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1254 /* Copy the integer value to the output buffer */
1255 #ifdef COMPILE_PCRE16
1256 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1257 #endif
1258 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1259 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1260 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1261 JUMPHERE(earlyexit);
1262
1263 /* Calculate the return value, which is the maximum ovector value. */
1264 if (topbracket > 1)
1265 {
1266 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1267 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1268
1269 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1270 loop = LABEL();
1271 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1272 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1273 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1274 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1275 }
1276 else
1277 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1278 }
1279
1280 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1281 {
1282 DEFINE_COMPILER;
1283
1284 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1285
1286 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1287 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1288 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1289 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1290
1291 /* Store match begin and end. */
1292 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1293 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1294 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? START_USED_PTR : HIT_START);
1295 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1296 #ifdef COMPILE_PCRE16
1297 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1298 #endif
1299 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1300
1301 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1302 #ifdef COMPILE_PCRE16
1303 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1304 #endif
1305 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1306
1307 JUMPTO(SLJIT_JUMP, leave);
1308 }
1309
1310 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1311 {
1312 /* May destroy TMP1. */
1313 DEFINE_COMPILER;
1314 struct sljit_jump *jump;
1315
1316 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1317 {
1318 /* The value of -1 must be kept for START_USED_PTR! */
1319 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, SLJIT_IMM, 1);
1320 /* Jumps if START_USED_PTR < STR_PTR, or START_USED_PTR == -1. Although overwriting
1321 is not necessary if START_USED_PTR == STR_PTR, it does not hurt as well. */
1322 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1323 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1324 JUMPHERE(jump);
1325 }
1326 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1327 {
1328 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1329 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1330 JUMPHERE(jump);
1331 }
1332 }
1333
1334 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1335 {
1336 /* Detects if the character has an othercase. */
1337 unsigned int c;
1338
1339 #ifdef SUPPORT_UTF
1340 if (common->utf)
1341 {
1342 GETCHAR(c, cc);
1343 if (c > 127)
1344 {
1345 #ifdef SUPPORT_UCP
1346 return c != UCD_OTHERCASE(c);
1347 #else
1348 return FALSE;
1349 #endif
1350 }
1351 #ifndef COMPILE_PCRE8
1352 return common->fcc[c] != c;
1353 #endif
1354 }
1355 else
1356 #endif
1357 c = *cc;
1358 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1359 }
1360
1361 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1362 {
1363 /* Returns with the othercase. */
1364 #ifdef SUPPORT_UTF
1365 if (common->utf && c > 127)
1366 {
1367 #ifdef SUPPORT_UCP
1368 return UCD_OTHERCASE(c);
1369 #else
1370 return c;
1371 #endif
1372 }
1373 #endif
1374 return TABLE_GET(c, common->fcc, c);
1375 }
1376
1377 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1378 {
1379 /* Detects if the character and its othercase has only 1 bit difference. */
1380 unsigned int c, oc, bit;
1381 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1382 int n;
1383 #endif
1384
1385 #ifdef SUPPORT_UTF
1386 if (common->utf)
1387 {
1388 GETCHAR(c, cc);
1389 if (c <= 127)
1390 oc = common->fcc[c];
1391 else
1392 {
1393 #ifdef SUPPORT_UCP
1394 oc = UCD_OTHERCASE(c);
1395 #else
1396 oc = c;
1397 #endif
1398 }
1399 }
1400 else
1401 {
1402 c = *cc;
1403 oc = TABLE_GET(c, common->fcc, c);
1404 }
1405 #else
1406 c = *cc;
1407 oc = TABLE_GET(c, common->fcc, c);
1408 #endif
1409
1410 SLJIT_ASSERT(c != oc);
1411
1412 bit = c ^ oc;
1413 /* Optimized for English alphabet. */
1414 if (c <= 127 && bit == 0x20)
1415 return (0 << 8) | 0x20;
1416
1417 /* Since c != oc, they must have at least 1 bit difference. */
1418 if (!ispowerof2(bit))
1419 return 0;
1420
1421 #ifdef COMPILE_PCRE8
1422
1423 #ifdef SUPPORT_UTF
1424 if (common->utf && c > 127)
1425 {
1426 n = GET_EXTRALEN(*cc);
1427 while ((bit & 0x3f) == 0)
1428 {
1429 n--;
1430 bit >>= 6;
1431 }
1432 return (n << 8) | bit;
1433 }
1434 #endif /* SUPPORT_UTF */
1435 return (0 << 8) | bit;
1436
1437 #else /* COMPILE_PCRE8 */
1438
1439 #ifdef COMPILE_PCRE16
1440 #ifdef SUPPORT_UTF
1441 if (common->utf && c > 65535)
1442 {
1443 if (bit >= (1 << 10))
1444 bit >>= 10;
1445 else
1446 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1447 }
1448 #endif /* SUPPORT_UTF */
1449 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1450 #endif /* COMPILE_PCRE16 */
1451
1452 #endif /* COMPILE_PCRE8 */
1453 }
1454
1455 static void check_partial(compiler_common *common)
1456 {
1457 DEFINE_COMPILER;
1458 struct sljit_jump *jump;
1459
1460 if (common->mode == JIT_COMPILE)
1461 return;
1462
1463 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1464 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), HIT_START, SLJIT_IMM, -1);
1466 else
1467 {
1468 if (common->partialmatchlabel != NULL)
1469 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1470 else
1471 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1472 }
1473 JUMPHERE(jump);
1474 }
1475
1476 static struct sljit_jump *check_str_end(compiler_common *common)
1477 {
1478 /* Does not affect registers. Usually used in a tight spot. */
1479 DEFINE_COMPILER;
1480 struct sljit_jump *jump;
1481 struct sljit_jump *nohit;
1482 struct sljit_jump *return_value;
1483
1484 if (common->mode == JIT_COMPILE)
1485 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1486
1487 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1488 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1489 {
1490 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), HIT_START, SLJIT_IMM, -1);
1492 JUMPHERE(nohit);
1493 return_value = JUMP(SLJIT_JUMP);
1494 }
1495 else
1496 {
1497 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1498 if (common->partialmatchlabel != NULL)
1499 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1500 else
1501 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1502 }
1503 JUMPHERE(jump);
1504 return return_value;
1505 }
1506
1507 static void fallback_at_str_end(compiler_common *common, jump_list **fallbacks)
1508 {
1509 DEFINE_COMPILER;
1510 struct sljit_jump *jump;
1511
1512 if (common->mode == JIT_COMPILE)
1513 {
1514 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1515 return;
1516 }
1517
1518 /* Partial matching mode. */
1519 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1520 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0));
1521 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1522 {
1523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), HIT_START, SLJIT_IMM, -1);
1524 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
1525 }
1526 else
1527 {
1528 if (common->partialmatchlabel != NULL)
1529 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1530 else
1531 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1532 }
1533 JUMPHERE(jump);
1534 }
1535
1536 static void read_char(compiler_common *common)
1537 {
1538 /* Reads the character into TMP1, updates STR_PTR.
1539 Does not check STR_END. TMP2 Destroyed. */
1540 DEFINE_COMPILER;
1541 #ifdef SUPPORT_UTF
1542 struct sljit_jump *jump;
1543 #endif
1544
1545 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1546 #ifdef SUPPORT_UTF
1547 if (common->utf)
1548 {
1549 #ifdef COMPILE_PCRE8
1550 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1551 #else
1552 #ifdef COMPILE_PCRE16
1553 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1554 #endif
1555 #endif /* COMPILE_PCRE8 */
1556 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1557 JUMPHERE(jump);
1558 }
1559 #endif
1560 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1561 }
1562
1563 static void peek_char(compiler_common *common)
1564 {
1565 /* Reads the character into TMP1, keeps STR_PTR.
1566 Does not check STR_END. TMP2 Destroyed. */
1567 DEFINE_COMPILER;
1568 #ifdef SUPPORT_UTF
1569 struct sljit_jump *jump;
1570 #endif
1571
1572 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1573 #ifdef SUPPORT_UTF
1574 if (common->utf)
1575 {
1576 #ifdef COMPILE_PCRE8
1577 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1578 #else
1579 #ifdef COMPILE_PCRE16
1580 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1581 #endif
1582 #endif /* COMPILE_PCRE8 */
1583 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1584 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1585 JUMPHERE(jump);
1586 }
1587 #endif
1588 }
1589
1590 static void read_char8_type(compiler_common *common)
1591 {
1592 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1593 DEFINE_COMPILER;
1594 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1595 struct sljit_jump *jump;
1596 #endif
1597
1598 #ifdef SUPPORT_UTF
1599 if (common->utf)
1600 {
1601 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1602 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1603 #ifdef COMPILE_PCRE8
1604 /* This can be an extra read in some situations, but hopefully
1605 it is needed in most cases. */
1606 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1607 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1608 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1609 JUMPHERE(jump);
1610 #else
1611 #ifdef COMPILE_PCRE16
1612 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1613 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1614 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1615 JUMPHERE(jump);
1616 /* Skip low surrogate if necessary. */
1617 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1618 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1619 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1620 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1621 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1622 #endif
1623 #endif /* COMPILE_PCRE8 */
1624 return;
1625 }
1626 #endif
1627 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1628 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1629 #ifdef COMPILE_PCRE16
1630 /* The ctypes array contains only 256 values. */
1631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1632 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1633 #endif
1634 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1635 #ifdef COMPILE_PCRE16
1636 JUMPHERE(jump);
1637 #endif
1638 }
1639
1640 static void skip_char_back(compiler_common *common)
1641 {
1642 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1643 DEFINE_COMPILER;
1644 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1645 struct sljit_label *label;
1646
1647 if (common->utf)
1648 {
1649 label = LABEL();
1650 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1651 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1652 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1653 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1654 return;
1655 }
1656 #endif
1657 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1658 if (common->utf)
1659 {
1660 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1661 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1662 /* Skip low surrogate if necessary. */
1663 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1665 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1666 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1667 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1668 return;
1669 }
1670 #endif
1671 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1672 }
1673
1674 static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1675 {
1676 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1677 DEFINE_COMPILER;
1678
1679 if (nltype == NLTYPE_ANY)
1680 {
1681 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1682 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1683 }
1684 else if (nltype == NLTYPE_ANYCRLF)
1685 {
1686 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1687 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1688 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1689 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1690 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1691 }
1692 else
1693 {
1694 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1695 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1696 }
1697 }
1698
1699 #ifdef SUPPORT_UTF
1700
1701 #ifdef COMPILE_PCRE8
1702 static void do_utfreadchar(compiler_common *common)
1703 {
1704 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1705 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1706 DEFINE_COMPILER;
1707 struct sljit_jump *jump;
1708
1709 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1710 /* Searching for the first zero. */
1711 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1712 jump = JUMP(SLJIT_C_NOT_ZERO);
1713 /* Two byte sequence. */
1714 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1715 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1716 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1717 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1718 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1719 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1720 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1721 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1722 JUMPHERE(jump);
1723
1724 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1725 jump = JUMP(SLJIT_C_NOT_ZERO);
1726 /* Three byte sequence. */
1727 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1728 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1729 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1730 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1731 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1732 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1733 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1734 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1735 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1736 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1737 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1738 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1739 JUMPHERE(jump);
1740
1741 /* Four byte sequence. */
1742 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1743 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1744 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1745 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1746 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1747 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1748 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1749 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1750 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1751 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1752 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1753 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1754 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1755 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1756 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1757 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1758 }
1759
1760 static void do_utfreadtype8(compiler_common *common)
1761 {
1762 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1763 of the character (>= 0xc0). Return value in TMP1. */
1764 DEFINE_COMPILER;
1765 struct sljit_jump *jump;
1766 struct sljit_jump *compare;
1767
1768 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1769
1770 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1771 jump = JUMP(SLJIT_C_NOT_ZERO);
1772 /* Two byte sequence. */
1773 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1774 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1775 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1776 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1777 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1778 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1779 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1780 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1781 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1782
1783 JUMPHERE(compare);
1784 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1785 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1786 JUMPHERE(jump);
1787
1788 /* We only have types for characters less than 256. */
1789 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1790 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1792 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1793 }
1794
1795 #else /* COMPILE_PCRE8 */
1796
1797 #ifdef COMPILE_PCRE16
1798 static void do_utfreadchar(compiler_common *common)
1799 {
1800 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1801 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1802 DEFINE_COMPILER;
1803 struct sljit_jump *jump;
1804
1805 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1806 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1807 /* Do nothing, only return. */
1808 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1809
1810 JUMPHERE(jump);
1811 /* Combine two 16 bit characters. */
1812 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1813 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1814 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1815 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1816 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1817 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1818 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1819 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1820 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1821 }
1822 #endif /* COMPILE_PCRE16 */
1823
1824 #endif /* COMPILE_PCRE8 */
1825
1826 #endif /* SUPPORT_UTF */
1827
1828 #ifdef SUPPORT_UCP
1829
1830 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1831 #define UCD_BLOCK_MASK 127
1832 #define UCD_BLOCK_SHIFT 7
1833
1834 static void do_getucd(compiler_common *common)
1835 {
1836 /* Search the UCD record for the character comes in TMP1.
1837 Returns chartype in TMP1 and UCD offset in TMP2. */
1838 DEFINE_COMPILER;
1839
1840 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1841
1842 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1843 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1844 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1845 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1846 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1847 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1848 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1849 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1850 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1851 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1852 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1853 }
1854 #endif
1855
1856 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1857 {
1858 DEFINE_COMPILER;
1859 struct sljit_label *mainloop;
1860 struct sljit_label *newlinelabel = NULL;
1861 struct sljit_jump *start;
1862 struct sljit_jump *end = NULL;
1863 struct sljit_jump *nl = NULL;
1864 #ifdef SUPPORT_UTF
1865 struct sljit_jump *singlechar;
1866 #endif
1867 jump_list *newline = NULL;
1868 BOOL newlinecheck = FALSE;
1869 BOOL readuchar = FALSE;
1870
1871 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1872 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1873 newlinecheck = TRUE;
1874
1875 if (firstline)
1876 {
1877 /* Search for the end of the first line. */
1878 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1879 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);
1880
1881 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1882 {
1883 mainloop = LABEL();
1884 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1885 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1886 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1887 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1888 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1889 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1890 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1891 }
1892 else
1893 {
1894 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1895 mainloop = LABEL();
1896 /* Continual stores does not cause data dependency. */
1897 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1898 read_char(common);
1899 check_newlinechar(common, common->nltype, &newline, TRUE);
1900 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1901 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1902 set_jumps(newline, LABEL());
1903 }
1904
1905 JUMPHERE(end);
1906 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1907 }
1908
1909 start = JUMP(SLJIT_JUMP);
1910
1911 if (newlinecheck)
1912 {
1913 newlinelabel = LABEL();
1914 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1915 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1916 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1917 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1918 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1919 #ifdef COMPILE_PCRE16
1920 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1921 #endif
1922 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1923 nl = JUMP(SLJIT_JUMP);
1924 }
1925
1926 mainloop = LABEL();
1927
1928 /* Increasing the STR_PTR here requires one less jump in the most common case. */
1929 #ifdef SUPPORT_UTF
1930 if (common->utf) readuchar = TRUE;
1931 #endif
1932 if (newlinecheck) readuchar = TRUE;
1933
1934 if (readuchar)
1935 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1936
1937 if (newlinecheck)
1938 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1939
1940 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1941 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1942 if (common->utf)
1943 {
1944 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1945 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1946 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1947 JUMPHERE(singlechar);
1948 }
1949 #endif
1950 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1951 if (common->utf)
1952 {
1953 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1954 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1955 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1956 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1957 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1958 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1959 JUMPHERE(singlechar);
1960 }
1961 #endif
1962 JUMPHERE(start);
1963
1964 if (newlinecheck)
1965 {
1966 JUMPHERE(end);
1967 JUMPHERE(nl);
1968 }
1969
1970 return mainloop;
1971 }
1972
1973 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
1974 {
1975 DEFINE_COMPILER;
1976 struct sljit_label *start;
1977 struct sljit_jump *leave;
1978 struct sljit_jump *found;
1979 pcre_uchar oc, bit;
1980
1981 if (firstline)
1982 {
1983 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1984 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1985 }
1986
1987 start = LABEL();
1988 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1989 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1990
1991 oc = first_char;
1992 if (caseless)
1993 {
1994 oc = TABLE_GET(first_char, common->fcc, first_char);
1995 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
1996 if (first_char > 127 && common->utf)
1997 oc = UCD_OTHERCASE(first_char);
1998 #endif
1999 }
2000 if (first_char == oc)
2001 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2002 else
2003 {
2004 bit = first_char ^ oc;
2005 if (ispowerof2(bit))
2006 {
2007 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2008 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2009 }
2010 else
2011 {
2012 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2013 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2014 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2015 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2016 found = JUMP(SLJIT_C_NOT_ZERO);
2017 }
2018 }
2019
2020 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2021 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2022 if (common->utf)
2023 {
2024 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2025 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2027 }
2028 #endif
2029 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2030 if (common->utf)
2031 {
2032 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2033 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2034 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2035 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2036 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2037 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2038 }
2039 #endif
2040 JUMPTO(SLJIT_JUMP, start);
2041 JUMPHERE(found);
2042 JUMPHERE(leave);
2043
2044 if (firstline)
2045 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2046 }
2047
2048 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2049 {
2050 DEFINE_COMPILER;
2051 struct sljit_label *loop;
2052 struct sljit_jump *lastchar;
2053 struct sljit_jump *firstchar;
2054 struct sljit_jump *leave;
2055 struct sljit_jump *foundcr = NULL;
2056 struct sljit_jump *notfoundnl;
2057 jump_list *newline = NULL;
2058
2059 if (firstline)
2060 {
2061 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2062 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
2063 }
2064
2065 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2066 {
2067 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2068 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2069 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2070 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2071 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2072
2073 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2074 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2075 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2076 #ifdef COMPILE_PCRE16
2077 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2078 #endif
2079 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2080
2081 loop = LABEL();
2082 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2083 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2084 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2085 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2086 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2087 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2088
2089 JUMPHERE(leave);
2090 JUMPHERE(firstchar);
2091 JUMPHERE(lastchar);
2092
2093 if (firstline)
2094 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2095 return;
2096 }
2097
2098 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2099 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2100 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2101 skip_char_back(common);
2102
2103 loop = LABEL();
2104 read_char(common);
2105 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2106 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2107 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2108 check_newlinechar(common, common->nltype, &newline, FALSE);
2109 set_jumps(newline, loop);
2110
2111 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2112 {
2113 leave = JUMP(SLJIT_JUMP);
2114 JUMPHERE(foundcr);
2115 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2116 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2117 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2118 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2119 #ifdef COMPILE_PCRE16
2120 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2121 #endif
2122 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2123 JUMPHERE(notfoundnl);
2124 JUMPHERE(leave);
2125 }
2126 JUMPHERE(lastchar);
2127 JUMPHERE(firstchar);
2128
2129 if (firstline)
2130 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2131 }
2132
2133 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2134 {
2135 DEFINE_COMPILER;
2136 struct sljit_label *start;
2137 struct sljit_jump *leave;
2138 struct sljit_jump *found;
2139 #ifndef COMPILE_PCRE8
2140 struct sljit_jump *jump;
2141 #endif
2142
2143 if (firstline)
2144 {
2145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2146 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
2147 }
2148
2149 start = LABEL();
2150 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2151 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2152 #ifdef SUPPORT_UTF
2153 if (common->utf)
2154 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2155 #endif
2156 #ifndef COMPILE_PCRE8
2157 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2158 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2159 JUMPHERE(jump);
2160 #endif
2161 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2162 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2163 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2164 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2165 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2166 found = JUMP(SLJIT_C_NOT_ZERO);
2167
2168 #ifdef SUPPORT_UTF
2169 if (common->utf)
2170 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2171 #endif
2172 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2173 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2174 if (common->utf)
2175 {
2176 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2177 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2178 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2179 }
2180 #endif
2181 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2182 if (common->utf)
2183 {
2184 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2185 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2186 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2187 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2188 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2189 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2190 }
2191 #endif
2192 JUMPTO(SLJIT_JUMP, start);
2193 JUMPHERE(found);
2194 JUMPHERE(leave);
2195
2196 if (firstline)
2197 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2198 }
2199
2200 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2201 {
2202 DEFINE_COMPILER;
2203 struct sljit_label *loop;
2204 struct sljit_jump *toolong;
2205 struct sljit_jump *alreadyfound;
2206 struct sljit_jump *found;
2207 struct sljit_jump *foundoc = NULL;
2208 struct sljit_jump *notfound;
2209 pcre_uchar oc, bit;
2210
2211 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR);
2212 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2213 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2214 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2215
2216 if (has_firstchar)
2217 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2218 else
2219 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2220
2221 loop = LABEL();
2222 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2223
2224 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2225 oc = req_char;
2226 if (caseless)
2227 {
2228 oc = TABLE_GET(req_char, common->fcc, req_char);
2229 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2230 if (req_char > 127 && common->utf)
2231 oc = UCD_OTHERCASE(req_char);
2232 #endif
2233 }
2234 if (req_char == oc)
2235 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2236 else
2237 {
2238 bit = req_char ^ oc;
2239 if (ispowerof2(bit))
2240 {
2241 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2242 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2243 }
2244 else
2245 {
2246 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2247 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2248 }
2249 }
2250 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2251 JUMPTO(SLJIT_JUMP, loop);
2252
2253 JUMPHERE(found);
2254 if (foundoc)
2255 JUMPHERE(foundoc);
2256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0);
2257 JUMPHERE(alreadyfound);
2258 JUMPHERE(toolong);
2259 return notfound;
2260 }
2261
2262 static void do_revertframes(compiler_common *common)
2263 {
2264 DEFINE_COMPILER;
2265 struct sljit_jump *jump;
2266 struct sljit_label *mainloop;
2267
2268 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2269 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2270
2271 /* Drop frames until we reach STACK_TOP. */
2272 mainloop = LABEL();
2273 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2274 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2275 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
2276 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2277 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2278 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2279 JUMPTO(SLJIT_JUMP, mainloop);
2280
2281 JUMPHERE(jump);
2282 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2283 /* End of dropping frames. */
2284 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2285
2286 JUMPHERE(jump);
2287 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2288 /* Set string begin. */
2289 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2290 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2291 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2292 JUMPTO(SLJIT_JUMP, mainloop);
2293
2294 JUMPHERE(jump);
2295 /* Unknown command. */
2296 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2297 JUMPTO(SLJIT_JUMP, mainloop);
2298 }
2299
2300 static void check_wordboundary(compiler_common *common)
2301 {
2302 DEFINE_COMPILER;
2303 struct sljit_jump *skipread;
2304 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2305 struct sljit_jump *jump;
2306 #endif
2307
2308 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2309
2310 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2311 /* Get type of the previous char, and put it to LOCALS1. */
2312 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2313 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2314 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2315 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2316 skip_char_back(common);
2317 check_start_used_ptr(common);
2318 read_char(common);
2319
2320 /* Testing char type. */
2321 #ifdef SUPPORT_UCP
2322 if (common->use_ucp)
2323 {
2324 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2325 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2326 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2327 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2328 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2329 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2330 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2331 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2332 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2333 JUMPHERE(jump);
2334 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2335 }
2336 else
2337 #endif
2338 {
2339 #ifndef COMPILE_PCRE8
2340 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2341 #elif defined SUPPORT_UTF
2342 /* Here LOCALS1 has already been zeroed. */
2343 jump = NULL;
2344 if (common->utf)
2345 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2346 #endif /* COMPILE_PCRE8 */
2347 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2348 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2349 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2350 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2351 #ifndef COMPILE_PCRE8
2352 JUMPHERE(jump);
2353 #elif defined SUPPORT_UTF
2354 if (jump != NULL)
2355 JUMPHERE(jump);
2356 #endif /* COMPILE_PCRE8 */
2357 }
2358 JUMPHERE(skipread);
2359
2360 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2361 skipread = check_str_end(common);
2362 peek_char(common);
2363
2364 /* Testing char type. This is a code duplication. */
2365 #ifdef SUPPORT_UCP
2366 if (common->use_ucp)
2367 {
2368 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2369 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2370 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2371 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2372 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2373 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2374 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2375 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2376 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2377 JUMPHERE(jump);
2378 }
2379 else
2380 #endif
2381 {
2382 #ifndef COMPILE_PCRE8
2383 /* TMP2 may be destroyed by peek_char. */
2384 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2385 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2386 #elif defined SUPPORT_UTF
2387 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2388 jump = NULL;
2389 if (common->utf)
2390 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2391 #endif
2392 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2393 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2394 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2395 #ifndef COMPILE_PCRE8
2396 JUMPHERE(jump);
2397 #elif defined SUPPORT_UTF
2398 if (jump != NULL)
2399 JUMPHERE(jump);
2400 #endif /* COMPILE_PCRE8 */
2401 }
2402 JUMPHERE(skipread);
2403
2404 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2405 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2406 }
2407
2408 static void check_anynewline(compiler_common *common)
2409 {
2410 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2411 DEFINE_COMPILER;
2412
2413 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2414
2415 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2416 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2417 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2418 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2419 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2420 #ifdef COMPILE_PCRE8
2421 if (common->utf)
2422 {
2423 #endif
2424 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2425 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2426 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2427 #ifdef COMPILE_PCRE8
2428 }
2429 #endif
2430 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2431 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2432 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2433 }
2434
2435 static void check_hspace(compiler_common *common)
2436 {
2437 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2438 DEFINE_COMPILER;
2439
2440 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2441
2442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2443 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2444 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2445 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2447 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2448 #ifdef COMPILE_PCRE8
2449 if (common->utf)
2450 {
2451 #endif
2452 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2453 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2454 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2455 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2456 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2457 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2458 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2459 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2460 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2461 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2462 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2463 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2464 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2465 #ifdef COMPILE_PCRE8
2466 }
2467 #endif
2468 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2469 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2470
2471 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2472 }
2473
2474 static void check_vspace(compiler_common *common)
2475 {
2476 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2477 DEFINE_COMPILER;
2478
2479 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2480
2481 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2482 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2483 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2484 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2485 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2486 #ifdef COMPILE_PCRE8
2487 if (common->utf)
2488 {
2489 #endif
2490 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2491 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2492 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2493 #ifdef COMPILE_PCRE8
2494 }
2495 #endif
2496 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2497 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2498
2499 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2500 }
2501
2502 #define CHAR1 STR_END
2503 #define CHAR2 STACK_TOP
2504
2505 static void do_casefulcmp(compiler_common *common)
2506 {
2507 DEFINE_COMPILER;
2508 struct sljit_jump *jump;
2509 struct sljit_label *label;
2510
2511 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2512 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2513 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2514 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2515 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2516 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2517
2518 label = LABEL();
2519 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2520 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2521 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2522 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2523 JUMPTO(SLJIT_C_NOT_ZERO, label);
2524
2525 JUMPHERE(jump);
2526 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2527 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2528 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2529 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2530 }
2531
2532 #define LCC_TABLE STACK_LIMIT
2533
2534 static void do_caselesscmp(compiler_common *common)
2535 {
2536 DEFINE_COMPILER;
2537 struct sljit_jump *jump;
2538 struct sljit_label *label;
2539
2540 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2541 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2542
2543 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2544 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2545 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2546 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2547 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2548 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2549
2550 label = LABEL();
2551 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2552 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2553 #ifndef COMPILE_PCRE8
2554 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2555 #endif
2556 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2557 #ifndef COMPILE_PCRE8
2558 JUMPHERE(jump);
2559 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2560 #endif
2561 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2562 #ifndef COMPILE_PCRE8
2563 JUMPHERE(jump);
2564 #endif
2565 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2566 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2567 JUMPTO(SLJIT_C_NOT_ZERO, label);
2568
2569 JUMPHERE(jump);
2570 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2571 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2572 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2573 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2574 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2575 }
2576
2577 #undef LCC_TABLE
2578 #undef CHAR1
2579 #undef CHAR2
2580
2581 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2582
2583 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2584 {
2585 /* This function would be ineffective to do in JIT level. */
2586 int c1, c2;
2587 const pcre_uchar *src2 = args->ptr;
2588 const pcre_uchar *end2 = args->end;
2589
2590 while (src1 < end1)
2591 {
2592 if (src2 >= end2)
2593 return (pcre_uchar*)1;
2594 GETCHARINC(c1, src1);
2595 GETCHARINC(c2, src2);
2596 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
2597 }
2598 return src2;
2599 }
2600
2601 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2602
2603 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2604 compare_context* context, jump_list **fallbacks)
2605 {
2606 DEFINE_COMPILER;
2607 unsigned int othercasebit = 0;
2608 pcre_uchar *othercasechar = NULL;
2609 #ifdef SUPPORT_UTF
2610 int utflength;
2611 #endif
2612
2613 if (caseless && char_has_othercase(common, cc))
2614 {
2615 othercasebit = char_get_othercase_bit(common, cc);
2616 SLJIT_ASSERT(othercasebit);
2617 /* Extracting bit difference info. */
2618 #ifdef COMPILE_PCRE8
2619 othercasechar = cc + (othercasebit >> 8);
2620 othercasebit &= 0xff;
2621 #else
2622 #ifdef COMPILE_PCRE16
2623 othercasechar = cc + (othercasebit >> 9);
2624 if ((othercasebit & 0x100) != 0)
2625 othercasebit = (othercasebit & 0xff) << 8;
2626 else
2627 othercasebit &= 0xff;
2628 #endif
2629 #endif
2630 }
2631
2632 if (context->sourcereg == -1)
2633 {
2634 #ifdef COMPILE_PCRE8
2635 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2636 if (context->length >= 4)
2637 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2638 else if (context->length >= 2)
2639 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2640 else
2641 #endif
2642 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2643 #else
2644 #ifdef COMPILE_PCRE16
2645 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2646 if (context->length >= 4)
2647 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2648 else
2649 #endif
2650 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2651 #endif
2652 #endif /* COMPILE_PCRE8 */
2653 context->sourcereg = TMP2;
2654 }
2655
2656 #ifdef SUPPORT_UTF
2657 utflength = 1;
2658 if (common->utf && HAS_EXTRALEN(*cc))
2659 utflength += GET_EXTRALEN(*cc);
2660
2661 do
2662 {
2663 #endif
2664
2665 context->length -= IN_UCHARS(1);
2666 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2667
2668 /* Unaligned read is supported. */
2669 if (othercasebit != 0 && othercasechar == cc)
2670 {
2671 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2672 context->oc.asuchars[context->ucharptr] = othercasebit;
2673 }
2674 else
2675 {
2676 context->c.asuchars[context->ucharptr] = *cc;
2677 context->oc.asuchars[context->ucharptr] = 0;
2678 }
2679 context->ucharptr++;
2680
2681 #ifdef COMPILE_PCRE8
2682 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2683 #else
2684 if (context->ucharptr >= 2 || context->length == 0)
2685 #endif
2686 {
2687 if (context->length >= 4)
2688 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2689 #ifdef COMPILE_PCRE8
2690 else if (context->length >= 2)
2691 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2692 else if (context->length >= 1)
2693 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2694 #else
2695 else if (context->length >= 2)
2696 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2697 #endif
2698 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2699
2700 switch(context->ucharptr)
2701 {
2702 case 4 / sizeof(pcre_uchar):
2703 if (context->oc.asint != 0)
2704 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2705 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2706 break;
2707
2708 case 2 / sizeof(pcre_uchar):
2709 if (context->oc.asushort != 0)
2710 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2711 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2712 break;
2713
2714 #ifdef COMPILE_PCRE8
2715 case 1:
2716 if (context->oc.asbyte != 0)
2717 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2718 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2719 break;
2720 #endif
2721
2722 default:
2723 SLJIT_ASSERT_STOP();
2724 break;
2725 }
2726 context->ucharptr = 0;
2727 }
2728
2729 #else
2730
2731 /* Unaligned read is unsupported. */
2732 #ifdef COMPILE_PCRE8
2733 if (context->length > 0)
2734 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2735 #else
2736 if (context->length > 0)
2737 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2738 #endif
2739 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2740
2741 if (othercasebit != 0 && othercasechar == cc)
2742 {
2743 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2744 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2745 }
2746 else
2747 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2748
2749 #endif
2750
2751 cc++;
2752 #ifdef SUPPORT_UTF
2753 utflength--;
2754 }
2755 while (utflength > 0);
2756 #endif
2757
2758 return cc;
2759 }
2760
2761 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2762
2763 #define SET_TYPE_OFFSET(value) \
2764 if ((value) != typeoffset) \
2765 { \
2766 if ((value) > typeoffset) \
2767 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2768 else \
2769 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2770 } \
2771 typeoffset = (value);
2772
2773 #define SET_CHAR_OFFSET(value) \
2774 if ((value) != charoffset) \
2775 { \
2776 if ((value) > charoffset) \
2777 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2778 else \
2779 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2780 } \
2781 charoffset = (value);
2782
2783 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2784 {
2785 DEFINE_COMPILER;
2786 jump_list *found = NULL;
2787 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2788 unsigned int c;
2789 int compares;
2790 struct sljit_jump *jump = NULL;
2791 pcre_uchar *ccbegin;
2792 #ifdef SUPPORT_UCP
2793 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2794 BOOL charsaved = FALSE;
2795 int typereg = TMP1, scriptreg = TMP1;
2796 unsigned int typeoffset;
2797 #endif
2798 int invertcmp, numberofcmps;
2799 unsigned int charoffset;
2800
2801 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2802 fallback_at_str_end(common, fallbacks);
2803 read_char(common);
2804
2805 if ((*cc++ & XCL_MAP) != 0)
2806 {
2807 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2808 #ifndef COMPILE_PCRE8
2809 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2810 #elif defined SUPPORT_UTF
2811 if (common->utf)
2812 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2813 #endif
2814
2815 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2816 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2817 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2818 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2819 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2820 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2821
2822 #ifndef COMPILE_PCRE8
2823 JUMPHERE(jump);
2824 #elif defined SUPPORT_UTF
2825 if (common->utf)
2826 JUMPHERE(jump);
2827 #endif
2828 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2829 #ifdef SUPPORT_UCP
2830 charsaved = TRUE;
2831 #endif
2832 cc += 32 / sizeof(pcre_uchar);
2833 }
2834
2835 /* Scanning the necessary info. */
2836 ccbegin = cc;
2837 compares = 0;
2838 while (*cc != XCL_END)
2839 {
2840 compares++;
2841 if (*cc == XCL_SINGLE)
2842 {
2843 cc += 2;
2844 #ifdef SUPPORT_UTF
2845 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2846 #endif
2847 #ifdef SUPPORT_UCP
2848 needschar = TRUE;
2849 #endif
2850 }
2851 else if (*cc == XCL_RANGE)
2852 {
2853 cc += 2;
2854 #ifdef SUPPORT_UTF
2855 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2856 #endif
2857 cc++;
2858 #ifdef SUPPORT_UTF
2859 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2860 #endif
2861 #ifdef SUPPORT_UCP
2862 needschar = TRUE;
2863 #endif
2864 }
2865 #ifdef SUPPORT_UCP
2866 else
2867 {
2868 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2869 cc++;
2870 switch(*cc)
2871 {
2872 case PT_ANY:
2873 break;
2874
2875 case PT_LAMP:
2876 case PT_GC:
2877 case PT_PC:
2878 case PT_ALNUM:
2879 needstype = TRUE;
2880 break;
2881
2882 case PT_SC:
2883 needsscript = TRUE;
2884 break;
2885
2886 case PT_SPACE:
2887 case PT_PXSPACE:
2888 case PT_WORD:
2889 needstype = TRUE;
2890 needschar = TRUE;
2891 break;
2892
2893 default:
2894 SLJIT_ASSERT_STOP();
2895 break;
2896 }
2897 cc += 2;
2898 }
2899 #endif
2900 }
2901
2902 #ifdef SUPPORT_UCP
2903 /* Simple register allocation. TMP1 is preferred if possible. */
2904 if (needstype || needsscript)
2905 {
2906 if (needschar && !charsaved)
2907 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2908 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2909 if (needschar)
2910 {
2911 if (needstype)
2912 {
2913 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2914 typereg = RETURN_ADDR;
2915 }
2916
2917 if (needsscript)
2918 scriptreg = TMP3;
2919 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2920 }
2921 else if (needstype && needsscript)
2922 scriptreg = TMP3;
2923 /* In all other cases only one of them was specified, and that can goes to TMP1. */
2924
2925 if (needsscript)
2926 {
2927 if (scriptreg == TMP1)
2928 {
2929 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2930 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2931 }
2932 else
2933 {
2934 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2935 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2936 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2937 }
2938 }
2939 }
2940 #endif
2941
2942 /* Generating code. */
2943 cc = ccbegin;
2944 charoffset = 0;
2945 numberofcmps = 0;
2946 #ifdef SUPPORT_UCP
2947 typeoffset = 0;
2948 #endif
2949
2950 while (*cc != XCL_END)
2951 {
2952 compares--;
2953 invertcmp = (compares == 0 && list != fallbacks);
2954 jump = NULL;
2955
2956 if (*cc == XCL_SINGLE)
2957 {
2958 cc ++;
2959 #ifdef SUPPORT_UTF
2960 if (common->utf)
2961 {
2962 GETCHARINC(c, cc);
2963 }
2964 else
2965 #endif
2966 c = *cc++;
2967
2968 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2969 {
2970 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2971 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2972 numberofcmps++;
2973 }
2974 else if (numberofcmps > 0)
2975 {
2976 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2977 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2978 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2979 numberofcmps = 0;
2980 }
2981 else
2982 {
2983 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2984 numberofcmps = 0;
2985 }
2986 }
2987 else if (*cc == XCL_RANGE)
2988 {
2989 cc ++;
2990 #ifdef SUPPORT_UTF
2991 if (common->utf)
2992 {
2993 GETCHARINC(c, cc);
2994 }
2995 else
2996 #endif
2997 c = *cc++;
2998 SET_CHAR_OFFSET(c);
2999 #ifdef SUPPORT_UTF
3000 if (common->utf)
3001 {
3002 GETCHARINC(c, cc);
3003 }
3004 else
3005 #endif
3006 c = *cc++;
3007 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3008 {
3009 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3010 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3011 numberofcmps++;
3012 }
3013 else if (numberofcmps > 0)
3014 {
3015 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3016 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3017 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3018 numberofcmps = 0;
3019 }
3020 else
3021 {
3022 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3023 numberofcmps = 0;
3024 }
3025 }
3026 #ifdef SUPPORT_UCP
3027 else
3028 {
3029 if (*cc == XCL_NOTPROP)
3030 invertcmp ^= 0x1;
3031 cc++;
3032 switch(*cc)
3033 {
3034 case PT_ANY:
3035 if (list != fallbacks)
3036 {
3037 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3038 continue;
3039 }
3040 else if (cc[-1] == XCL_NOTPROP)
3041 continue;
3042 jump = JUMP(SLJIT_JUMP);
3043 break;
3044
3045 case PT_LAMP:
3046 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3047 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3048 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3049 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3050 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3051 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3052 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3053 break;
3054
3055 case PT_GC:
3056 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3057 SET_TYPE_OFFSET(c);
3058 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3059 break;
3060
3061 case PT_PC:
3062 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3063 break;
3064
3065 case PT_SC:
3066 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3067 break;
3068
3069 case PT_SPACE:
3070 case PT_PXSPACE:
3071 if (*cc == PT_SPACE)
3072 {
3073 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3074 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3075 }
3076 SET_CHAR_OFFSET(9);
3077 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3078 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3079 if (*cc == PT_SPACE)
3080 JUMPHERE(jump);
3081
3082 SET_TYPE_OFFSET(ucp_Zl);
3083 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3084 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3085 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3086 break;
3087
3088 case PT_WORD:
3089 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3090 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3091 /* ... fall through */
3092
3093 case PT_ALNUM:
3094 SET_TYPE_OFFSET(ucp_Ll);
3095 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3096 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3097 SET_TYPE_OFFSET(ucp_Nd);
3098 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3099 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3100 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3101 break;
3102 }
3103 cc += 2;
3104 }
3105 #endif
3106
3107 if (jump != NULL)
3108 add_jump(compiler, compares > 0 ? list : fallbacks, jump);
3109 }
3110
3111 if (found != NULL)
3112 set_jumps(found, LABEL());
3113 }
3114
3115 #undef SET_TYPE_OFFSET
3116 #undef SET_CHAR_OFFSET
3117
3118 #endif
3119
3120 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
3121 {
3122 DEFINE_COMPILER;
3123 int length;
3124 unsigned int c, oc, bit;
3125 compare_context context;
3126 struct sljit_jump *jump[4];
3127 #ifdef SUPPORT_UTF
3128 struct sljit_label *label;
3129 #ifdef SUPPORT_UCP
3130 pcre_uchar propdata[5];
3131 #endif
3132 #endif
3133
3134 switch(type)
3135 {
3136 case OP_SOD:
3137 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3138 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3139 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3140 return cc;
3141
3142 case OP_SOM:
3143 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3145 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3146 return cc;
3147
3148 case OP_NOT_WORD_BOUNDARY:
3149 case OP_WORD_BOUNDARY:
3150 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3151 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3152 return cc;
3153
3154 case OP_NOT_DIGIT:
3155 case OP_DIGIT:
3156 fallback_at_str_end(common, fallbacks);
3157 read_char8_type(common);
3158 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3159 add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3160 return cc;
3161
3162 case OP_NOT_WHITESPACE:
3163 case OP_WHITESPACE:
3164 fallback_at_str_end(common, fallbacks);
3165 read_char8_type(common);
3166 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3167 add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3168 return cc;
3169
3170 case OP_NOT_WORDCHAR:
3171 case OP_WORDCHAR:
3172 fallback_at_str_end(common, fallbacks);
3173 read_char8_type(common);
3174 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3175 add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3176 return cc;
3177
3178 case OP_ANY:
3179 fallback_at_str_end(common, fallbacks);
3180 read_char(common);
3181 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3182 {
3183 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3184 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3185 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3186 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3187 JUMPHERE(jump[1]);
3188 JUMPHERE(jump[0]);
3189 }
3190 else
3191 check_newlinechar(common, common->nltype, fallbacks, TRUE);
3192 return cc;
3193
3194 case OP_ALLANY:
3195 fallback_at_str_end(common, fallbacks);
3196 #ifdef SUPPORT_UTF
3197 if (common->utf)
3198 {
3199 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3200 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3201 #ifdef COMPILE_PCRE8
3202 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3203 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3204 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3205 #else /* COMPILE_PCRE8 */
3206 #ifdef COMPILE_PCRE16
3207 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3208 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3209 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3210 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3211 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3212 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3213 #endif /* COMPILE_PCRE16 */
3214 #endif /* COMPILE_PCRE8 */
3215 JUMPHERE(jump[0]);
3216 return cc;
3217 }
3218 #endif
3219 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3220 return cc;
3221
3222 case OP_ANYBYTE:
3223 fallback_at_str_end(common, fallbacks);
3224 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3225 return cc;
3226
3227 #ifdef SUPPORT_UTF
3228 #ifdef SUPPORT_UCP
3229 case OP_NOTPROP:
3230 case OP_PROP:
3231 propdata[0] = 0;
3232 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3233 propdata[2] = cc[0];
3234 propdata[3] = cc[1];
3235 propdata[4] = XCL_END;
3236 compile_xclass_hotpath(common, propdata, fallbacks);
3237 return cc + 2;
3238 #endif
3239 #endif
3240
3241 case OP_ANYNL:
3242 fallback_at_str_end(common, fallbacks);
3243 read_char(common);
3244 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3245 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3246 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3247 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3248 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3249 jump[3] = JUMP(SLJIT_JUMP);
3250 JUMPHERE(jump[0]);
3251 check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
3252 JUMPHERE(jump[1]);
3253 JUMPHERE(jump[2]);
3254 JUMPHERE(jump[3]);
3255 return cc;
3256
3257 case OP_NOT_HSPACE:
3258 case OP_HSPACE:
3259 fallback_at_str_end(common, fallbacks);
3260 read_char(common);
3261 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3262 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3263 return cc;
3264
3265 case OP_NOT_VSPACE:
3266 case OP_VSPACE:
3267 fallback_at_str_end(common, fallbacks);
3268 read_char(common);
3269 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3270 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3271 return cc;
3272
3273 #ifdef SUPPORT_UCP
3274 case OP_EXTUNI:
3275 fallback_at_str_end(common, fallbacks);
3276 read_char(common);
3277 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3278 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3279 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3280
3281 label = LABEL();
3282 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3283 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3284 read_char(common);
3285 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3286 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3287 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3288
3289 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3290 JUMPHERE(jump[0]);
3291 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
3292 {
3293 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3294 check_partial(common);
3295 JUMPHERE(jump[0]);
3296 }
3297 return cc;
3298 #endif
3299
3300 case OP_EODN:
3301 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3302 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3303 {
3304 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3305 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3306 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3307 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3308 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3309 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3310 }
3311 else if (common->nltype == NLTYPE_FIXED)
3312 {
3313 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3314 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3315 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3316 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3317 }
3318 else
3319 {
3320 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3321 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3322 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3323 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3324 jump[2] = JUMP(SLJIT_C_GREATER);
3325 add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
3326 /* Equal. */
3327 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3328 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3329 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3330
3331 JUMPHERE(jump[1]);
3332 if (common->nltype == NLTYPE_ANYCRLF)
3333 {
3334 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3335 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3336 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3337 }
3338 else
3339 {
3340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3341 read_char(common);
3342 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3343 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3344 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3345 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3346 }
3347 JUMPHERE(jump[2]);
3348 JUMPHERE(jump[3]);
3349 }
3350 JUMPHERE(jump[0]);
3351 check_partial(common);
3352 return cc;
3353
3354 case OP_EOD:
3355 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3356 check_partial(common);
3357 return cc;
3358
3359 case OP_CIRC:
3360 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3361 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3362 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3363 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3364 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3365 return cc;
3366
3367 case OP_CIRCM:
3368 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3370 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3371 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3372 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3373 jump[0] = JUMP(SLJIT_JUMP);
3374 JUMPHERE(jump[1]);
3375
3376 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3377 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3378 {
3379 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3380 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3381 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3382 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3383 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3384 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3385 }
3386 else
3387 {
3388 skip_char_back(common);
3389 read_char(common);
3390 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3391 }
3392 JUMPHERE(jump[0]);
3393 return cc;
3394
3395 case OP_DOLL:
3396 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3397 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3398 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3399
3400 if (!common->endonly)
3401 compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3402 else
3403 {
3404 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3405 check_partial(common);
3406 }
3407 return cc;
3408
3409 case OP_DOLLM:
3410 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3411 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3412 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3413 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3414 check_partial(common);
3415 jump[0] = JUMP(SLJIT_JUMP);
3416 JUMPHERE(jump[1]);
3417
3418 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3419 {
3420 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3421 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3422 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3423 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3424 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3425 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3426 }
3427 else
3428 {
3429 peek_char(common);
3430 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3431 }
3432 JUMPHERE(jump[0]);
3433 return cc;
3434
3435 case OP_CHAR:
3436 case OP_CHARI:
3437 length = 1;
3438 #ifdef SUPPORT_UTF
3439 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3440 #endif
3441 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
3442 {
3443 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3444 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3445
3446 context.length = IN_UCHARS(length);
3447 context.sourcereg = -1;
3448 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3449 context.ucharptr = 0;
3450 #endif
3451 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3452 }
3453 fallback_at_str_end(common, fallbacks);
3454 read_char(common);
3455 #ifdef SUPPORT_UTF
3456 if (common->utf)
3457 {
3458 GETCHAR(c, cc);
3459 }
3460 else
3461 #endif
3462 c = *cc;
3463 if (type == OP_CHAR || !char_has_othercase(common, cc))
3464 {
3465 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
3466 return cc + length;
3467 }
3468 oc = char_othercase(common, c);
3469 bit = c ^ oc;
3470 if (ispowerof2(bit))
3471 {
3472 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3473 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3474 return cc + length;
3475 }
3476 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3477 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3478 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3479 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3480 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3481 return cc + length;
3482
3483 case OP_NOT:
3484 case OP_NOTI:
3485 fallback_at_str_end(common, fallbacks);
3486 length = 1;
3487 #ifdef SUPPORT_UTF
3488 if (common->utf)
3489 {
3490 #ifdef COMPILE_PCRE8
3491 c = *cc;
3492 if (c < 128)
3493 {
3494 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3495 if (type == OP_NOT || !char_has_othercase(common, cc))
3496 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3497 else
3498 {
3499 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3500 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3501 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3502 }
3503 /* Skip the variable-length character. */
3504 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3505 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3506 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3507 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3508 JUMPHERE(jump[0]);
3509 return cc + 1;
3510 }
3511 else
3512 #endif /* COMPILE_PCRE8 */
3513 {
3514 GETCHARLEN(c, cc, length);
3515 read_char(common);
3516 }
3517 }
3518 else
3519 #endif /* SUPPORT_UTF */
3520 {
3521 read_char(common);
3522 c = *cc;
3523 }
3524
3525 if (type == OP_NOT || !char_has_othercase(common, cc))
3526 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3527 else
3528 {
3529 oc = char_othercase(common, c);
3530 bit = c ^ oc;
3531 if (ispowerof2(bit))
3532 {
3533 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3534 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3535 }
3536 else
3537 {
3538 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3539 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3540 }
3541 }
3542 return cc + 1;
3543
3544 case OP_CLASS:
3545 case OP_NCLASS:
3546 fallback_at_str_end(common, fallbacks);
3547 read_char(common);
3548 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3549 jump[0] = NULL;
3550 #ifdef COMPILE_PCRE8
3551 /* This check only affects 8 bit mode. In other modes, we
3552 always need to compare the value with 255. */
3553 if (common->utf)
3554 #endif /* COMPILE_PCRE8 */
3555 {
3556 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3557 if (type == OP_CLASS)
3558 {
3559 add_jump(compiler, fallbacks, jump[0]);
3560 jump[0] = NULL;
3561 }
3562 }
3563 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3564 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3565 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3566 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3567 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3568 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3569 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3570 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3571 if (jump[0] != NULL)
3572 JUMPHERE(jump[0]);
3573 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3574 return cc + 32 / sizeof(pcre_uchar);
3575
3576 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3577 case OP_XCLASS:
3578 compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3579 return cc + GET(cc, 0) - 1;
3580 #endif
3581
3582 case OP_REVERSE:
3583 length = GET(cc, 0);
3584 SLJIT_ASSERT(length > 0);
3585 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3586 #ifdef SUPPORT_UTF
3587 if (common->utf)
3588 {
3589 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3590 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3591 label = LABEL();
3592 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3593 skip_char_back(common);
3594 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3595 JUMPTO(SLJIT_C_NOT_ZERO, label);
3596 }
3597 else
3598 #endif
3599 {
3600 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3601 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3602 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3603 }
3604 check_start_used_ptr(common);
3605 return cc + LINK_SIZE;
3606 }
3607 SLJIT_ASSERT_STOP();
3608 return cc;
3609 }
3610
3611 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3612 {
3613 /* This function consumes at least one input character. */
3614 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3615 DEFINE_COMPILER;
3616 pcre_uchar *ccbegin = cc;
3617 compare_context context;
3618 int size;
3619
3620 context.length = 0;
3621 do
3622 {
3623 if (cc >= ccend)
3624 break;
3625
3626 if (*cc == OP_CHAR)
3627 {
3628 size = 1;
3629 #ifdef SUPPORT_UTF
3630 if (common->utf && HAS_EXTRALEN(cc[1]))
3631 size += GET_EXTRALEN(cc[1]);
3632 #endif
3633 }
3634 else if (*cc == OP_CHARI)
3635 {
3636 size = 1;
3637 #ifdef SUPPORT_UTF
3638 if (common->utf)
3639 {
3640 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3641 size = 0;
3642 else if (HAS_EXTRALEN(cc[1]))
3643 size += GET_EXTRALEN(cc[1]);
3644 }
3645 else
3646 #endif
3647 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3648 size = 0;
3649 }
3650 else
3651 size = 0;
3652
3653 cc += 1 + size;
3654 context.length += IN_UCHARS(size);
3655 }
3656 while (size > 0 && context.length <= 128);
3657
3658 cc = ccbegin;
3659 if (context.length > 0)
3660 {
3661 /* We have a fixed-length byte sequence. */
3662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3663 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3664
3665 context.sourcereg = -1;
3666 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3667 context.ucharptr = 0;
3668 #endif
3669 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3670 return cc;
3671 }
3672
3673 /* A non-fixed length character will be checked if length == 0. */
3674 return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3675 }
3676
3677 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3678 {
3679 DEFINE_COMPILER;
3680 int offset = GET2(cc, 1) << 1;
3681
3682 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3683 if (!common->jscript_compat)
3684 {
3685 if (fallbacks == NULL)
3686 {
3687 /* OVECTOR(1) contains the "string begin - 1" constant. */
3688 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3689 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3690 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3691 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3692 return JUMP(SLJIT_C_NOT_ZERO);
3693 }
3694 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3695 }
3696 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3697 }
3698
3699 /* Forward definitions. */
3700 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3701 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3702
3703 #define PUSH_FALLBACK(size, ccstart, error) \
3704 do \
3705 { \
3706 fallback = sljit_alloc_memory(compiler, (size)); \
3707 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3708 return error; \
3709 memset(fallback, 0, size); \
3710 fallback->prev = parent->top; \
3711 fallback->cc = (ccstart); \
3712 parent->top = fallback; \
3713 } \
3714 while (0)
3715
3716 #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3717 do \
3718 { \
3719 fallback = sljit_alloc_memory(compiler, (size)); \
3720 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3721 return; \
3722 memset(fallback, 0, size); \
3723 fallback->prev = parent->top; \
3724 fallback->cc = (ccstart); \
3725 parent->top = fallback; \
3726 } \
3727 while (0)
3728
3729 #define FALLBACK_AS(type) ((type *)fallback)
3730
3731 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3732 {
3733 DEFINE_COMPILER;
3734 int offset = GET2(cc, 1) << 1;
3735 struct sljit_jump *jump = NULL;
3736 struct sljit_jump *partial;
3737 struct sljit_jump *nopartial;
3738
3739 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3740 /* OVECTOR(1) contains the "string begin - 1" constant. */
3741 if (withchecks && !common->jscript_compat)
3742 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3743
3744 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3745 if (common->utf && *cc == OP_REFI)
3746 {
3747 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3748 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3749 if (withchecks)
3750 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3751
3752 /* Needed to save important temporary registers. */
3753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3754 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3756 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3757 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3758 if (common->mode == JIT_COMPILE)
3759 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
3760 else
3761 {
3762 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3763 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3764 check_partial(common);
3765 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3766 JUMPHERE(nopartial);
3767 }
3768 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3769 }
3770 else
3771 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3772 {
3773 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3774 if (withchecks)
3775 jump = JUMP(SLJIT_C_ZERO);
3776
3777 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3778 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
3779 if (common->mode == JIT_COMPILE)
3780 add_jump(compiler, fallbacks, partial);
3781
3782 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3783 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3784
3785 if (common->mode != JIT_COMPILE)
3786 {
3787 nopartial = JUMP(SLJIT_JUMP);
3788 JUMPHERE(partial);
3789 /* TMP2 -= STR_END - STR_PTR */
3790 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
3791 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
3792 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
3793 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
3794 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3795 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3796 JUMPHERE(partial);
3797 check_partial(common);
3798 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3799 JUMPHERE(nopartial);
3800 }
3801 }
3802
3803 if (jump != NULL)
3804 {
3805 if (emptyfail)
3806 add_jump(compiler, fallbacks, jump);
3807 else
3808 JUMPHERE(jump);
3809 }
3810 return cc + 1 + IMM2_SIZE;
3811 }
3812
3813 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3814 {
3815 DEFINE_COMPILER;
3816 fallback_common *fallback;
3817 pcre_uchar type;
3818 struct sljit_label *label;
3819 struct sljit_jump *zerolength;
3820 struct sljit_jump *jump = NULL;
3821 pcre_uchar *ccbegin = cc;
3822 int min = 0, max = 0;
3823 BOOL minimize;
3824
3825 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3826
3827 type = cc[1 + IMM2_SIZE];
3828 minimize = (type & 0x1) != 0;
3829 switch(type)
3830 {
3831 case OP_CRSTAR:
3832 case OP_CRMINSTAR:
3833 min = 0;
3834 max = 0;
3835 cc += 1 + IMM2_SIZE + 1;
3836 break;
3837 case OP_CRPLUS:
3838 case OP_CRMINPLUS:
3839 min = 1;
3840 max = 0;
3841 cc += 1 + IMM2_SIZE + 1;
3842 break;
3843 case OP_CRQUERY:
3844 case OP_CRMINQUERY:
3845 min = 0;
3846 max = 1;
3847 cc += 1 + IMM2_SIZE + 1;
3848 break;
3849 case OP_CRRANGE:
3850 case OP_CRMINRANGE:
3851 min = GET2(cc, 1 + IMM2_SIZE + 1);
3852 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
3853 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
3854 break;
3855 default:
3856 SLJIT_ASSERT_STOP();
3857 break;
3858 }
3859
3860 if (!minimize)
3861 {
3862 if (min == 0)
3863 {
3864 allocate_stack(common, 2);
3865 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3866 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3867 /* Temporary release of STR_PTR. */
3868 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3869 zerolength = compile_ref_checks(common, ccbegin, NULL);
3870 /* Restore if not zero length. */
3871 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3872 }
3873 else
3874 {
3875 allocate_stack(common, 1);
3876 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3877 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3878 }
3879
3880 if (min > 1 || max > 1)
3881 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3882
3883 label = LABEL();
3884 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3885
3886 if (min > 1 || max > 1)
3887 {
3888 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3889 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3890 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3891 if (min > 1)
3892 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3893 if (max > 1)
3894 {
3895 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3896 allocate_stack(common, 1);
3897 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3898 JUMPTO(SLJIT_JUMP, label);
3899 JUMPHERE(jump);
3900 }
3901 }
3902
3903 if (max == 0)
3904 {
3905 /* Includes min > 1 case as well. */
3906 allocate_stack(common, 1);
3907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3908 JUMPTO(SLJIT_JUMP, label);
3909 }
3910
3911 JUMPHERE(zerolength);
3912 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3913
3914 decrease_call_count(common);
3915 return cc;
3916 }
3917
3918 allocate_stack(common, 2);
3919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3920 if (type != OP_CRMINSTAR)
3921 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3922
3923 if (min == 0)
3924 {
3925 zerolength = compile_ref_checks(common, ccbegin, NULL);
3926 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3927 jump = JUMP(SLJIT_JUMP);
3928 }
3929 else
3930 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3931
3932 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3933 if (max > 0)
3934 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3935
3936 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
3937 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3938
3939 if (min > 1)
3940 {
3941 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3942 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3943 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3944 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
3945 }
3946 else if (max > 0)
3947 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
3948
3949 if (jump != NULL)
3950 JUMPHERE(jump);
3951 JUMPHERE(zerolength);
3952
3953 decrease_call_count(common);
3954 return cc;
3955 }
3956
3957 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3958 {
3959 DEFINE_COMPILER;
3960 fallback_common *fallback;
3961 recurse_entry *entry = common->entries;
3962 recurse_entry *prev = NULL;
3963 int start = GET(cc, 1);
3964
3965 PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
3966 while (entry != NULL)
3967 {
3968 if (entry->start == start)
3969 break;
3970 prev = entry;
3971 entry = entry->next;
3972 }
3973
3974 if (entry == NULL)
3975 {
3976 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
3977 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3978 return NULL;
3979 entry->next = NULL;
3980 entry->entry = NULL;
3981 entry->calls = NULL;
3982 entry->start = start;
3983
3984 if (prev != NULL)
3985 prev->next = entry;
3986 else
3987 common->entries = entry;
3988 }
3989
3990 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
3991 allocate_stack(common, 1);
3992 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
3993
3994 if (entry->entry == NULL)
3995 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
3996 else
3997 JUMPTO(SLJIT_FAST_CALL, entry->entry);
3998 /* Leave if the match is failed. */
3999 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4000 return cc + 1 + LINK_SIZE;
4001 }
4002
4003 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
4004 {
4005 DEFINE_COMPILER;
4006 int framesize;
4007 int localptr;
4008 fallback_common altfallback;
4009 pcre_uchar *ccbegin;
4010 pcre_uchar opcode;
4011 pcre_uchar bra = OP_BRA;
4012 jump_list *tmp = NULL;
4013 jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
4014 jump_list **found;
4015 /* Saving previous accept variables. */
4016 struct sljit_label *save_acceptlabel = common->acceptlabel;
4017 struct sljit_jump *jump;
4018 struct sljit_jump *brajump = NULL;
4019 jump_list *save_accept = common->accept;
4020
4021 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4022 {
4023 SLJIT_ASSERT(!conditional);
4024 bra = *cc;
4025 cc++;
4026 }
4027 localptr = PRIV_DATA(cc);
4028 SLJIT_ASSERT(localptr != 0);
4029 framesize = get_framesize(common, cc, FALSE);
4030 fallback->framesize = framesize;
4031 fallback->localptr = localptr;
4032 opcode = *cc;
4033 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4034 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4035 ccbegin = cc;
4036 cc += GET(cc, 1);
4037
4038 if (bra == OP_BRAMINZERO)
4039 {
4040 /* This is a braminzero fallback path. */
4041 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4042 free_stack(common, 1);
4043 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4044 }
4045
4046 if (framesize < 0)
4047 {
4048 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4049 allocate_stack(common, 1);
4050 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4051 }
4052 else
4053 {
4054 allocate_stack(common, framesize + 2);
4055 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4056 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4057 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4058 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4059 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4060 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4061 }
4062
4063 memset(&altfallback, 0, sizeof(fallback_common));
4064 while (1)
4065 {
4066 common->acceptlabel = NULL;
4067 common->accept = NULL;
4068 altfallback.top = NULL;
4069 altfallback.topfallbacks = NULL;
4070
4071 if (*ccbegin == OP_ALT)
4072 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4073
4074 altfallback.cc = ccbegin;
4075 compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
4076 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4077 {
4078 common->acceptlabel = save_acceptlabel;
4079 common->accept = save_accept;
4080 return NULL;
4081 }
4082 common->acceptlabel = LABEL();
4083 if (common->accept != NULL)
4084 set_jumps(common->accept, common->acceptlabel);
4085
4086 /* Reset stack. */
4087 if (framesize < 0)
4088 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4089 else {
4090 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
4091 {
4092 /* We don't need to keep the STR_PTR, only the previous localptr. */
4093 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4094 }
4095 else
4096 {
4097 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4098 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4099 }
4100 }
4101
4102 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
4103 {
4104 /* We know that STR_PTR was stored on the top of the stack. */
4105 if (conditional)
4106 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4107 else if (bra == OP_BRAZERO)
4108 {
4109 if (framesize < 0)
4110 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4111 else
4112 {
4113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4114 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
4115 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4116 }
4117 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4118 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4119 }
4120 else if (framesize >= 0)
4121 {
4122 /* For OP_BRA and OP_BRAMINZERO. */
4123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4124 }
4125 }
4126 add_jump(compiler, found, JUMP(SLJIT_JUMP));
4127
4128 compile_fallbackpath(common, altfallback.top);
4129 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4130 {
4131 common->acceptlabel = save_acceptlabel;
4132 common->accept = save_accept;
4133 return NULL;
4134 }
4135 set_jumps(altfallback.topfallbacks, LABEL());
4136
4137 if (*cc != OP_ALT)
4138 break;
4139
4140 ccbegin = cc;
4141 cc += GET(cc, 1);
4142 }
4143 /* None of them matched. */
4144
4145 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
4146 {
4147 /* Assert is failed. */
4148 if (conditional || bra == OP_BRAZERO)
4149 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4150
4151 if (framesize < 0)
4152 {
4153 /* The topmost item should be 0. */
4154 if (bra == OP_BRAZERO)
4155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4156 else
4157 free_stack(common, 1);
4158 }
4159 else
4160 {
4161 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4162 /* The topmost item should be 0. */
4163 if (bra == OP_BRAZERO)
4164 {
4165 free_stack(common, framesize + 1);
4166 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4167 }
4168 else
4169 free_stack(common, framesize + 2);
4170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4171 }
4172 jump = JUMP(SLJIT_JUMP);
4173 if (bra != OP_BRAZERO)
4174 add_jump(compiler, target, jump);
4175
4176 /* Assert is successful. */
4177 set_jumps(tmp, LABEL());
4178 if (framesize < 0)
4179 {
4180 /* We know that STR_PTR was stored on the top of the stack. */
4181 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4182 /* Keep the STR_PTR on the top of the stack. */
4183 if (bra == OP_BRAZERO)
4184 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4185 else if (bra == OP_BRAMINZERO)
4186 {
4187 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4189 }
4190 }
4191 else
4192 {
4193 if (bra == OP_BRA)
4194 {
4195 /* We don't need to keep the STR_PTR, only the previous localptr. */
4196 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4197 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4198 }
4199 else
4200 {
4201 /* We don't need to keep the STR_PTR, only the previous localptr. */
4202 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4203 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4204 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4205 }
4206 }
4207
4208 if (bra == OP_BRAZERO)
4209 {
4210 fallback->hotpath = LABEL();
4211 sljit_set_label(jump, fallback->hotpath);
4212 }
4213 else if (bra == OP_BRAMINZERO)
4214 {
4215 JUMPTO(SLJIT_JUMP, fallback->hotpath);
4216 JUMPHERE(brajump);
4217 if (framesize >= 0)
4218 {
4219 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4220 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4221 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4222 }
4223 set_jumps(fallback->common.topfallbacks, LABEL());
4224 }
4225 }
4226 else
4227 {
4228 /* AssertNot is successful. */
4229 if (framesize < 0)
4230 {
4231 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4232 if (bra != OP_BRA)
4233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4234 else
4235 free_stack(common, 1);
4236 }
4237 else
4238 {
4239 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4240 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4241 /* The topmost item should be 0. */
4242 if (bra != OP_BRA)
4243 {
4244 free_stack(common, framesize + 1);
4245 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4246 }
4247 else
4248 free_stack(common, framesize + 2);
4249 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4250 }
4251
4252 if (bra == OP_BRAZERO)
4253 fallback->hotpath = LABEL();
4254 else if (bra == OP_BRAMINZERO)
4255 {
4256 JUMPTO(SLJIT_JUMP, fallback->hotpath);
4257 JUMPHERE(brajump);
4258 }
4259
4260 if (bra != OP_BRA)
4261 {
4262 SLJIT_ASSERT(found == &fallback->common.topfallbacks);
4263 set_jumps(fallback->common.topfallbacks, LABEL());
4264 fallback->common.topfallbacks = NULL;
4265 }
4266 }
4267
4268 common->acceptlabel = save_acceptlabel;
4269 common->accept = save_accept;
4270 return cc + 1 + LINK_SIZE;
4271 }
4272
4273 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
4274 {
4275 int condition = FALSE;
4276 pcre_uchar *slotA = name_table;
4277 pcre_uchar *slotB;
4278 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4279 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4280 sljit_w no_capture;
4281 int i;
4282
4283 locals += OVECTOR_START / sizeof(sljit_w);
4284 no_capture = locals[1];
4285
4286 for (i = 0; i < name_count; i++)
4287 {
4288 if (GET2(slotA, 0) == refno) break;
4289 slotA += name_entry_size;
4290 }
4291
4292 if (i < name_count)
4293 {
4294 /* Found a name for the number - there can be only one; duplicate names
4295 for different numbers are allowed, but not vice versa. First scan down
4296 for duplicates. */
4297
4298 slotB = slotA;
4299 while (slotB > name_table)
4300 {
4301 slotB -= name_entry_size;
4302 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4303 {
4304 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4305 if (condition) break;
4306 }
4307 else break;
4308 }
4309
4310 /* Scan up for duplicates */
4311 if (!condition)
4312 {
4313 slotB = slotA;
4314 for (i++; i < name_count; i++)
4315 {
4316 slotB += name_entry_size;
4317 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4318 {
4319 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4320 if (condition) break;
4321 }
4322 else break;
4323 }
4324 }
4325 }
4326 return condition;
4327 }
4328
4329 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
4330 {
4331 int condition = FALSE;
4332 pcre_uchar *slotA = name_table;
4333 pcre_uchar *slotB;
4334 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4335 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4336 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
4337 int i;
4338
4339 for (i = 0; i < name_count; i++)
4340 {
4341 if (GET2(slotA, 0) == recno) break;
4342 slotA += name_entry_size;
4343 }
4344
4345 if (i < name_count)
4346 {
4347 /* Found a name for the number - there can be only one; duplicate
4348 names for different numbers are allowed, but not vice versa. First
4349 scan down for duplicates. */
4350
4351 slotB = slotA;
4352 while (slotB > name_table)
4353 {
4354 slotB -= name_entry_size;
4355 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4356 {
4357 condition = GET2(slotB, 0) == group_num;
4358 if (condition) break;
4359 }
4360 else break;
4361 }
4362
4363 /* Scan up for duplicates */
4364 if (!condition)
4365 {
4366 slotB = slotA;
4367 for (i++; i < name_count; i++)
4368 {
4369 slotB += name_entry_size;
4370 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4371 {
4372 condition = GET2(slotB, 0) == group_num;
4373 if (condition) break;
4374 }
4375 else break;
4376 }
4377 }
4378 }
4379 return condition;
4380 }
4381
4382 /*
4383 Handling bracketed expressions is probably the most complex part.
4384
4385 Stack layout naming characters:
4386 S - Push the current STR_PTR
4387 0 - Push a 0 (NULL)
4388 A - Push the current STR_PTR. Needed for restoring the STR_PTR
4389 before the next alternative. Not pushed if there are no alternatives.
4390 M - Any values pushed by the current alternative. Can be empty, or anything.
4391 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4392 L - Push the previous local (pointed by localptr) to the stack
4393 () - opional values stored on the stack
4394 ()* - optonal, can be stored multiple times
4395
4396 The following list shows the regular expression templates, their PCRE byte codes
4397 and stack layout supported by pcre-sljit.
4398
4399 (?:) OP_BRA | OP_KET A M
4400 () OP_CBRA | OP_KET C M
4401 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4402 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4403 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4404 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4405 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4406 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4407 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4408 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4409 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4410 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4411 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4412 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4413 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4414 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4415 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4416 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4417 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4418 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4419 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4420 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4421
4422
4423 Stack layout naming characters:
4424 A - Push the alternative index (starting from 0) on the stack.
4425 Not pushed if there is no alternatives.
4426 M - Any values pushed by the current alternative. Can be empty, or anything.
4427
4428 The next list shows the possible content of a bracket:
4429 (|) OP_*BRA | OP_ALT ... M A
4430 (?()|) OP_*COND | OP_ALT M A
4431 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4432 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4433 Or nothing, if trace is unnecessary
4434 */
4435
4436 static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4437 {
4438 DEFINE_COMPILER;
4439 fallback_common *fallback;
4440 pcre_uchar opcode;
4441 int localptr = 0;
4442 int offset = 0;
4443 int stacksize;
4444 pcre_uchar *ccbegin;
4445 pcre_uchar *hotpath;
4446 pcre_uchar bra = OP_BRA;
4447 pcre_uchar ket;
4448 assert_fallback *assert;
4449 BOOL has_alternatives;
4450 struct sljit_jump *jump;
4451 struct sljit_jump *skip;
4452 struct sljit_label *rmaxlabel = NULL;
4453 struct sljit_jump *braminzerojump = NULL;
4454
4455 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4456
4457 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4458 {
4459 bra = *cc;
4460 cc++;
4461 opcode = *cc;
4462 }
4463
4464 opcode = *cc;
4465 ccbegin = cc;
4466 hotpath = ccbegin + 1 + LINK_SIZE;
4467
4468 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4469 {
4470 /* Drop this bracket_fallback. */
4471 parent->top = fallback->prev;
4472 return bracketend(cc);
4473 }
4474
4475 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4476 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4477 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4478 cc += GET(cc, 1);
4479
4480 has_alternatives = *cc == OP_ALT;
4481 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4482 {
4483 has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE;
4484 if (*hotpath == OP_NRREF)
4485 {
4486 stacksize = GET2(hotpath, 1);
4487 if (common->currententry == NULL || stacksize == RREF_ANY)
4488 has_alternatives = FALSE;
4489 else if (common->currententry->start == 0)
4490 has_alternatives = stacksize != 0;
4491 else
4492 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4493 }
4494 }
4495
4496 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4497 opcode = OP_SCOND;
4498 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4499 opcode = OP_ONCE;
4500
4501 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4502 {
4503 /* Capturing brackets has a pre-allocated space. */
4504 offset = GET2(ccbegin, 1 + LINK_SIZE);
4505 localptr = OVECTOR_PRIV(offset);
4506 offset <<= 1;
4507 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4508 hotpath += IMM2_SIZE;
4509 }
4510 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4511 {
4512 /* Other brackets simply allocate the next entry. */
4513 localptr = PRIV_DATA(ccbegin);
4514 SLJIT_ASSERT(localptr != 0);
4515 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4516 if (opcode == OP_ONCE)
4517 FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE);
4518 }
4519
4520 /* Instructions before the first alternative. */
4521 stacksize = 0;
4522 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4523 stacksize++;
4524 if (bra == OP_BRAZERO)
4525 stacksize++;
4526
4527 if (stacksize > 0)
4528 allocate_stack(common, stacksize);
4529
4530 stacksize = 0;
4531 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4532 {
4533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4534 stacksize++;
4535 }
4536
4537 if (bra == OP_BRAZERO)
4538 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4539
4540 if (bra == OP_BRAMINZERO)
4541 {
4542 /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */
4543 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4544 if (ket != OP_KETRMIN)
4545 {
4546 free_stack(common, 1);
4547 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4548 }
4549 else
4550 {
4551 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4552 {
4553 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4554 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4555 /* Nothing stored during the first run. */
4556 skip = JUMP(SLJIT_JUMP);
4557 JUMPHERE(jump);
4558 /* Checking zero-length iteration. */
4559 if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4560 {
4561 /* When we come from outside, localptr contains the previous STR_PTR. */
4562 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4563 }
4564 else
4565 {
4566 /* Except when the whole stack frame must be saved. */
4567 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4568 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w));
4569 }
4570 JUMPHERE(skip);
4571 }
4572 else
4573 {
4574 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4575 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4576 JUMPHERE(jump);
4577 }
4578 }
4579 }
4580
4581 if (ket == OP_KETRMIN)
4582 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4583
4584 if (ket == OP_KETRMAX)
4585 {
4586 rmaxlabel = LABEL();
4587 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4588 FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel;
4589 }
4590
4591 /* Handling capturing brackets and alternatives. */
4592 if (opcode == OP_ONCE)
4593 {
4594 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4595 {
4596 /* Neither capturing brackets nor recursions are not found in the block. */
4597 if (ket == OP_KETRMIN)
4598 {
4599 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4600 allocate_stack(common, 2);
4601 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4602 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4603 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4604 }
4605 else if (ket == OP_KETRMAX || has_alternatives)
4606 {
4607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4608 allocate_stack(common, 1);
4609 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4610 }
4611 else
4612 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4613 }
4614 else
4615 {
4616 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4617 {
4618 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2);
4619 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4620 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1));
4621 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4622 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4623 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4624 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE);
4625 }
4626 else
4627 {
4628 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1);
4629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4630 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize));
4631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4632 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4633 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE);
4634 }
4635 }
4636 }
4637 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4638 {
4639 /* Saving the previous values. */
4640 allocate_stack(common, 3);
4641 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4642 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4643 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4644 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4645 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4646 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4647 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4648 }
4649 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4650 {
4651 /* Saving the previous value. */
4652 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4653 allocate_stack(common, 1);
4654 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4656 }
4657 else if (has_alternatives)
4658 {
4659 /* Pushing the starting string pointer. */
4660 allocate_stack(common, 1);
4661 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4662 }
4663
4664 /* Generating code for the first alternative. */
4665 if (opcode == OP_COND || opcode == OP_SCOND)
4666 {
4667 if (*hotpath == OP_CREF)
4668 {
4669 SLJIT_ASSERT(has_alternatives);
4670 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed),
4671 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4672 hotpath += 1 + IMM2_SIZE;
4673 }
4674 else if (*hotpath == OP_NCREF)
4675 {
4676 SLJIT_ASSERT(has_alternatives);
4677 stacksize = GET2(hotpath, 1);
4678 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4679
4680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4683 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4684 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4685 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4686 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4687 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4688 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4689
4690 JUMPHERE(jump);
4691 hotpath += 1 + IMM2_SIZE;
4692 }
4693 else if (*hotpath == OP_RREF || *hotpath == OP_NRREF)
4694 {
4695 /* Never has other case. */
4696 FALLBACK_AS(bracket_fallback)->u.condfailed = NULL;
4697
4698 stacksize = GET2(hotpath, 1);
4699 if (common->currententry == NULL)
4700 stacksize = 0;
4701 else if (stacksize == RREF_ANY)
4702 stacksize = 1;
4703 else if (common->currententry->start == 0)
4704 stacksize = stacksize == 0;
4705 else
4706 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4707
4708 if (*hotpath == OP_RREF || stacksize || common->currententry == NULL)
4709 {
4710 SLJIT_ASSERT(!has_alternatives);
4711 if (stacksize != 0)
4712 hotpath += 1 + IMM2_SIZE;
4713 else
4714 {
4715 if (*cc == OP_ALT)
4716 {
4717 hotpath = cc + 1 + LINK_SIZE;
4718 cc += GET(cc, 1);
4719 }
4720 else
4721 hotpath = cc;
4722 }
4723 }
4724 else
4725 {
4726 SLJIT_ASSERT(has_alternatives);
4727
4728 stacksize = GET2(hotpath, 1);
4729 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4730 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4732 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4733 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4734 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4735 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4736 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4737 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4738 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4739 hotpath += 1 + IMM2_SIZE;
4740 }
4741 }
4742 else
4743 {
4744 SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT);
4745 /* Similar code as PUSH_FALLBACK macro. */
4746 assert = sljit_alloc_memory(compiler, sizeof(assert_fallback));
4747 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4748 return NULL;
4749 memset(assert, 0, sizeof(assert_fallback));
4750 assert->common.cc = hotpath;
4751 FALLBACK_AS(bracket_fallback)->u.assert = assert;
4752 hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE);
4753 }
4754 }
4755
4756 compile_hotpath(common, hotpath, cc, fallback);
4757 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4758 return NULL;
4759
4760 if (opcode == OP_ONCE)
4761 {
4762 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4763 {
4764 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4765 /* TMP2 which is set here used by OP_KETRMAX below. */
4766 if (ket == OP_KETRMAX)
4767 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4768 else if (ket == OP_KETRMIN)
4769 {
4770 /* Move the STR_PTR to the localptr. */
4771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4772 }
4773 }
4774 else
4775 {
4776 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4777 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w));
4778 if (ket == OP_KETRMAX)
4779 {
4780 /* TMP2 which is set here used by OP_KETRMAX below. */
4781 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4782 }
4783 }
4784 }
4785
4786 stacksize = 0;
4787 if (ket != OP_KET || bra != OP_BRA)
4788 stacksize++;
4789 if (has_alternatives && opcode != OP_ONCE)
4790 stacksize++;
4791
4792 if (stacksize > 0)
4793 allocate_stack(common, stacksize);
4794
4795 stacksize = 0;
4796 if (ket != OP_KET)
4797 {
4798 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4799 stacksize++;
4800 }
4801 else if (bra != OP_BRA)
4802 {
4803 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4804 stacksize++;
4805 }
4806
4807 if (has_alternatives)
4808 {
4809 if (opcode != OP_ONCE)
4810 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4811 if (ket != OP_KETRMAX)
4812 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4813 }
4814
4815 /* Must be after the hotpath label. */
4816 if (offset != 0)
4817 {
4818 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4819 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
4821 }
4822
4823 if (ket == OP_KETRMAX)
4824 {
4825 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4826 {
4827 if (has_alternatives)
4828 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4829 /* Checking zero-length iteration. */
4830 if (opcode != OP_ONCE)
4831 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
4832 else
4833 /* TMP2 must contain the starting STR_PTR. */
4834 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
4835 }
4836 else
4837 JUMPTO(SLJIT_JUMP, rmaxlabel);
4838 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4839 }
4840
4841 if (bra == OP_BRAZERO)
4842 FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL();
4843
4844 if (bra == OP_BRAMINZERO)
4845 {
4846 /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */
4847 JUMPTO(SLJIT_JUMP, ((braminzero_fallback *)parent)->hotpath);
4848 if (braminzerojump != NULL)
4849 {
4850 JUMPHERE(braminzerojump);
4851 /* We need to release the end pointer to perform the
4852 fallback for the zero-length iteration. When
4853 framesize is < 0, OP_ONCE will do the release itself. */
4854 if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0)
4855 {
4856 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4857 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4858 }
4859 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
4860 free_stack(common, 1);
4861 }
4862 /* Continue to the normal fallback. */
4863 }
4864
4865 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
4866 decrease_call_count(common);
4867
4868 /* Skip the other alternatives. */
4869 while (*cc == OP_ALT)
4870 cc += GET(cc, 1);
4871 cc += 1 + LINK_SIZE;
4872 return cc;
4873 }
4874
4875 static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4876 {
4877 DEFINE_COMPILER;
4878 fallback_common *fallback;
4879 pcre_uchar opcode;
4880 int localptr;
4881 int cbraprivptr = 0;
4882 int framesize;
4883 int stacksize;
4884 int offset = 0;
4885 BOOL zero = FALSE;
4886 pcre_uchar *ccbegin = NULL;
4887 int stack;
4888 struct sljit_label *loop = NULL;
4889 struct jump_list *emptymatch = NULL;
4890
4891 PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL);
4892 if (*cc == OP_BRAPOSZERO)
4893 {
4894 zero = TRUE;
4895 cc++;
4896 }
4897
4898 opcode = *cc;
4899 localptr = PRIV_DATA(cc);
4900 SLJIT_ASSERT(localptr != 0);
4901 FALLBACK_AS(bracketpos_fallback)->localptr = localptr;
4902 switch(opcode)
4903 {
4904 case OP_BRAPOS:
4905 case OP_SBRAPOS:
4906 ccbegin = cc + 1 + LINK_SIZE;
4907 break;
4908
4909 case OP_CBRAPOS:
4910 case OP_SCBRAPOS:
4911 offset = GET2(cc, 1 + LINK_SIZE);
4912 cbraprivptr = OVECTOR_PRIV(offset);
4913 offset <<= 1;
4914 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
4915 break;
4916
4917 default:
4918 SLJIT_ASSERT_STOP();
4919 break;
4920 }
4921
4922 framesize = get_framesize(common, cc, FALSE);
4923 FALLBACK_AS(bracketpos_fallback)->framesize = framesize;
4924 if (framesize < 0)
4925 {
4926 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
4927 if (!zero)
4928 stacksize++;
4929 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4930 allocate_stack(common, stacksize);
4931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4932
4933 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4934 {
4935 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4936 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4937 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4938 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4939 }
4940 else
4941 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4942
4943 if (!zero)
4944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
4945 }
4946 else
4947 {
4948 stacksize = framesize + 1;
4949 if (!zero)
4950 stacksize++;
4951 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4952 stacksize++;
4953 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4954 allocate_stack(common, stacksize);
4955
4956 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4957 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
4958 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4959 stack = 0;
4960 if (!zero)
4961 {
4962 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
4963 stack++;
4964 }
4965 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
4966 {
4967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
4968 stack++;
4969 }
4970 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
4971 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
4972 }
4973
4974 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4975 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4976
4977 loop = LABEL();
4978 while (*cc != OP_KETRPOS)
4979 {
4980 fallback->top = NULL;
4981 fallback->topfallbacks = NULL;
4982 cc += GET(cc, 1);
4983
4984 compile_hotpath(common, ccbegin, cc, fallback);
4985 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4986 return NULL;
4987
4988 if (framesize < 0)
4989 {
4990 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4991
4992 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4993 {
4994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
4995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4996 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
4997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
4998 }
4999 else
5000 {
5001 if (opcode == OP_SBRAPOS)
5002 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5003 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5004 }
5005
5006 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5007 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5008
5009 if (!zero)
5010 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5011 }
5012 else
5013 {
5014 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5015 {
5016 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5017 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5018 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5019 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5021 }
5022 else
5023 {
5024 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5025 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5026 if (opcode == OP_SBRAPOS)
5027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5028 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5029 }
5030
5031 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5032 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5033
5034 if (!zero)
5035 {
5036 if (framesize < 0)
5037 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5038 else
5039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5040 }
5041 }
5042 JUMPTO(SLJIT_JUMP, loop);
5043 flush_stubs(common);
5044
5045 compile_fallbackpath(common, fallback->top);
5046 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5047 return NULL;
5048 set_jumps(fallback->topfallbacks, LABEL());
5049
5050 if (framesize < 0)
5051 {
5052 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5053 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5054 else
5055 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5056 }
5057 else
5058 {
5059 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5060 {
5061 /* Last alternative. */
5062 if (*cc == OP_KETRPOS)
5063 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5064 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5065 }
5066 else
5067 {
5068 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5069 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5070 }
5071 }
5072
5073 if (*cc == OP_KETRPOS)
5074 break;
5075 ccbegin = cc + 1 + LINK_SIZE;
5076 }
5077
5078 fallback->topfallbacks = NULL;
5079 if (!zero)
5080 {
5081 if (framesize < 0)
5082 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
5083 else /* TMP2 is set to [localptr] above. */
5084 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
5085 }
5086
5087 /* None of them matched. */
5088 set_jumps(emptymatch, LABEL());
5089 decrease_call_count(common);
5090 return cc + 1 + LINK_SIZE;
5091 }
5092
5093 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
5094 {
5095 int class_len;
5096
5097 *opcode = *cc;
5098 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
5099 {
5100 cc++;
5101 *type = OP_CHAR;
5102 }
5103 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
5104 {
5105 cc++;
5106 *type = OP_CHARI;
5107 *opcode -= OP_STARI - OP_STAR;
5108 }
5109 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
5110 {
5111 cc++;
5112 *type = OP_NOT;
5113 *opcode -= OP_NOTSTAR - OP_STAR;
5114 }
5115 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
5116 {
5117 cc++;
5118 *type = OP_NOTI;
5119 *opcode -= OP_NOTSTARI - OP_STAR;
5120 }
5121 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
5122 {
5123 cc++;
5124 *opcode -= OP_TYPESTAR - OP_STAR;
5125 *type = 0;
5126 }
5127 else
5128 {
5129 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
5130 *type = *opcode;
5131 cc++;
5132 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
5133 *opcode = cc[class_len - 1];
5134 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
5135 {
5136 *opcode -= OP_CRSTAR - OP_STAR;
5137 if (end != NULL)
5138 *end = cc + class_len;
5139 }
5140 else
5141 {
5142 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
5143 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
5144 *arg2 = GET2(cc, class_len);
5145
5146 if (*arg2 == 0)
5147 {
5148 SLJIT_ASSERT(*arg1 != 0);
5149 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
5150 }
5151 if (*arg1 == *arg2)
5152 *opcode = OP_EXACT;
5153
5154 if (end != NULL)
5155 *end = cc + class_len + 2 * IMM2_SIZE;
5156 }
5157 return cc;
5158 }
5159
5160 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
5161 {
5162 *arg1 = GET2(cc, 0);
5163 cc += IMM2_SIZE;
5164 }
5165
5166 if (*type == 0)
5167 {
5168 *type = *cc;
5169 if (end != NULL)
5170 *end = next_opcode(common, cc);
5171 cc++;
5172 return cc;
5173 }
5174
5175 if (end != NULL)
5176 {
5177 *end = cc + 1;
5178 #ifdef SUPPORT_UTF
5179 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
5180 #endif
5181 }
5182 return cc;
5183 }
5184
5185 static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5186 {
5187 DEFINE_COMPILER;
5188 fallback_common *fallback;
5189 pcre_uchar opcode;
5190 pcre_uchar type;
5191 int arg1 = -1, arg2 = -1;
5192 pcre_uchar* end;
5193 jump_list *nomatch = NULL;
5194 struct sljit_jump *jump = NULL;
5195 struct sljit_label *label;
5196
5197 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
5198
5199 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
5200
5201 switch(opcode)
5202 {
5203 case OP_STAR:
5204 case OP_PLUS:
5205 case OP_UPTO:
5206 case OP_CRRANGE:
5207 if (type == OP_ANYNL || type == OP_EXTUNI)
5208 {
5209 if (opcode == OP_STAR || opcode == OP_UPTO)
5210 {
5211 allocate_stack(common, 2);
5212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5213 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5214 }
5215 else
5216 {
5217 allocate_stack(common, 1);
5218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5219 }
5220 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5221 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5222
5223 label = LABEL();
5224 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5225 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5226 {
5227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5228 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5229 if (opcode == OP_CRRANGE && arg2 > 0)
5230 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
5231 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
5232 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
5233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5234 }
5235
5236 allocate_stack(common, 1);
5237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5238 JUMPTO(SLJIT_JUMP, label);
5239 if (jump != NULL)
5240 JUMPHERE(jump);
5241 }
5242 else
5243 {
5244 allocate_stack(common, 2);
5245 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5247 label = LABEL();
5248 compile_char1_hotpath(common, type, cc, &nomatch);
5249 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5250 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
5251 {
5252 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5253 JUMPTO(SLJIT_JUMP, label);
5254 }
5255 else
5256 {
5257 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5258 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5259 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5260 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5261 }
5262 set_jumps(nomatch, LABEL());
5263 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5264 add_jump(compiler, &fallback->topfallbacks,
5265 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
5266 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5267 }
5268 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5269 break;
5270
5271 case OP_MINSTAR:
5272 case OP_MINPLUS:
5273 allocate_stack(common, 1);
5274 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5275 if (opcode == OP_MINPLUS)
5276 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5277 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5278 break;
5279
5280 case OP_MINUPTO:
5281 case OP_CRMINRANGE:
5282 allocate_stack(common, 2);
5283 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5285 if (opcode == OP_CRMINRANGE)
5286 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5287 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5288 break;
5289
5290 case OP_QUERY:
5291 case OP_MINQUERY:
5292 allocate_stack(common, 1);
5293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5294 if (opcode == OP_QUERY)
5295 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5296 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5297 break;
5298
5299 case OP_EXACT:
5300 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5301 label = LABEL();
5302 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5304 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5306 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5307 break;
5308
5309 case OP_POSSTAR:
5310 case OP_POSPLUS:
5311 case OP_POSUPTO:
5312 if (opcode != OP_POSSTAR)
5313 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5314 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5315 label = LABEL();
5316 compile_char1_hotpath(common, type, cc, &nomatch);
5317 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5318 if (opcode != OP_POSUPTO)
5319 {
5320 if (opcode == OP_POSPLUS)
5321 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
5322 JUMPTO(SLJIT_JUMP, label);
5323 }
5324 else
5325 {
5326 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5327 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5328 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5329 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5330 }
5331 set_jumps(nomatch, LABEL());
5332 if (opcode == OP_POSPLUS)
5333 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
5334 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5335 break;
5336
5337 case OP_POSQUERY:
5338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5339 compile_char1_hotpath(common, type, cc, &nomatch);
5340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5341 set_jumps(nomatch, LABEL());
5342 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5343 break;
5344
5345 default:
5346 SLJIT_ASSERT_STOP();
5347 break;
5348 }
5349
5350 decrease_call_count(common);
5351 return end;
5352 }
5353
5354 static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5355 {
5356 DEFINE_COMPILER;
5357 fallback_common *fallback;
5358
5359 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
5360
5361 if (*cc == OP_FAIL)
5362 {
5363 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5364 return cc + 1;
5365 }
5366
5367 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
5368 {
5369 /* No need to check notempty conditions. */
5370 if (common->acceptlabel == NULL)
5371 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
5372 else
5373 JUMPTO(SLJIT_JUMP, common->acceptlabel);
5374 return cc + 1;
5375 }
5376
5377 if (common->acceptlabel == NULL)
5378 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
5379 else
5380 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
5381 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5382 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
5383 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5384 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
5385 if (common->acceptlabel == NULL)
5386 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5387 else
5388 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
5389 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5390 if (common->acceptlabel == NULL)
5391 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5392 else
5393 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5394 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5395 return cc + 1;
5396 }
5397
5398 static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc)
5399 {
5400 DEFINE_COMPILER;
5401 int offset = GET2(cc, 1);
5402
5403 /* Data will be discarded anyway... */
5404 if (common->currententry != NULL)
5405 return cc + 1 + IMM2_SIZE;
5406
5407 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5408 offset <<= 1;
5409 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5411 return cc + 1 + IMM2_SIZE;
5412 }
5413
5414 static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent)
5415 {
5416 DEFINE_COMPILER;
5417 fallback_common *fallback;
5418
5419 while (cc < ccend)
5420 {
5421 switch(*cc)
5422 {
5423 case OP_SOD:
5424 case OP_SOM:
5425 case OP_NOT_WORD_BOUNDARY:
5426 case OP_WORD_BOUNDARY:
5427 case OP_NOT_DIGIT:
5428 case OP_DIGIT:
5429 case OP_NOT_WHITESPACE:
5430 case OP_WHITESPACE:
5431 case OP_NOT_WORDCHAR:
5432 case OP_WORDCHAR:
5433 case OP_ANY:
5434 case OP_ALLANY:
5435 case OP_ANYBYTE:
5436 case OP_NOTPROP:
5437 case OP_PROP:
5438 case OP_ANYNL:
5439 case OP_NOT_HSPACE:
5440 case OP_HSPACE:
5441 case OP_NOT_VSPACE:
5442 case OP_VSPACE:
5443 case OP_EXTUNI:
5444 case OP_EODN:
5445 case OP_EOD:
5446 case OP_CIRC:
5447 case OP_CIRCM:
5448 case OP_DOLL:
5449 case OP_DOLLM:
5450 case OP_NOT:
5451 case OP_NOTI:
5452 case OP_REVERSE:
5453 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5454 break;
5455
5456 case OP_SET_SOM:
5457 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5458 allocate_stack(common, 1);
5459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5460 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5462 cc++;
5463 break;
5464
5465 case OP_CHAR:
5466 case OP_CHARI:
5467 if (common->mode == JIT_COMPILE)
5468 cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5469 else
5470 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5471 break;
5472
5473 case OP_STAR:
5474 case OP_MINSTAR:
5475 case OP_PLUS:
5476 case OP_MINPLUS:
5477 case OP_QUERY:
5478 case OP_MINQUERY:
5479 case OP_UPTO:
5480 case OP_MINUPTO:
5481 case OP_EXACT:
5482 case OP_POSSTAR:
5483 case OP_POSPLUS:
5484 case OP_POSQUERY:
5485 case OP_POSUPTO:
5486 case OP_STARI:
5487 case OP_MINSTARI:
5488 case OP_PLUSI:
5489 case OP_MINPLUSI:
5490 case OP_QUERYI:
5491 case OP_MINQUERYI:
5492 case OP_UPTOI:
5493 case OP_MINUPTOI:
5494 case OP_EXACTI:
5495 case OP_POSSTARI:
5496 case OP_POSPLUSI:
5497 case OP_POSQUERYI:
5498 case OP_POSUPTOI:
5499 case OP_NOTSTAR:
5500 case OP_NOTMINSTAR:
5501 case OP_NOTPLUS:
5502 case OP_NOTMINPLUS:
5503 case OP_NOTQUERY:
5504 case OP_NOTMINQUERY:
5505 case OP_NOTUPTO:
5506 case OP_NOTMINUPTO:
5507 case OP_NOTEXACT:
5508 case OP_NOTPOSSTAR:
5509 case OP_NOTPOSPLUS:
5510 case OP_NOTPOSQUERY:
5511 case OP_NOTPOSUPTO:
5512 case OP_NOTSTARI:
5513 case OP_NOTMINSTARI:
5514 case OP_NOTPLUSI:
5515 case OP_NOTMINPLUSI:
5516 case OP_NOTQUERYI:
5517 case OP_NOTMINQUERYI:
5518 case OP_NOTUPTOI:
5519 case OP_NOTMINUPTOI:
5520 case OP_NOTEXACTI:
5521 case OP_NOTPOSSTARI:
5522 case OP_NOTPOSPLUSI:
5523 case OP_NOTPOSQUERYI:
5524 case OP_NOTPOSUPTOI:
5525 case OP_TYPESTAR:
5526 case OP_TYPEMINSTAR:
5527 case OP_TYPEPLUS:
5528 case OP_TYPEMINPLUS:
5529 case OP_TYPEQUERY:
5530 case OP_TYPEMINQUERY:
5531 case OP_TYPEUPTO:
5532 case OP_TYPEMINUPTO:
5533 case OP_TYPEEXACT:
5534 case OP_TYPEPOSSTAR:
5535 case OP_TYPEPOSPLUS:
5536 case OP_TYPEPOSQUERY:
5537 case OP_TYPEPOSUPTO:
5538 cc = compile_iterator_hotpath(common, cc, parent);
5539 break;
5540
5541 case OP_CLASS:
5542 case OP_NCLASS:
5543 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5544 cc = compile_iterator_hotpath(common, cc, parent);
5545 else
5546 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5547 break;
5548
5549 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5550 case OP_XCLASS:
5551 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5552 cc = compile_iterator_hotpath(common, cc, parent);
5553 else
5554 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5555 break;
5556 #endif
5557
5558 case OP_REF:
5559 case OP_REFI:
5560 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5561 cc = compile_ref_iterator_hotpath(common, cc, parent);
5562 else
5563 cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE);
5564 break;
5565
5566 case OP_RECURSE:
5567 cc = compile_recurse_hotpath(common, cc, parent);
5568 break;
5569
5570 case OP_ASSERT:
5571 case OP_ASSERT_NOT:
5572 case OP_ASSERTBACK:
5573 case OP_ASSERTBACK_NOT:
5574 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5575 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5576 break;
5577
5578 case OP_BRAMINZERO:
5579 PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc);
5580 cc = bracketend(cc + 1);
5581 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5582 {
5583 allocate_stack(common, 1);
5584 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5585 }
5586 else
5587 {
5588 allocate_stack(common, 2);
5589 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5590 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5591 }
5592 FALLBACK_AS(braminzero_fallback)->hotpath = LABEL();
5593 if (cc[1] > OP_ASSERTBACK_NOT)
5594 decrease_call_count(common);
5595 break;
5596
5597 case OP_ONCE:
5598 case OP_ONCE_NC:
5599 case OP_BRA:
5600 case OP_CBRA:
5601 case OP_COND:
5602 case OP_SBRA:
5603 case OP_SCBRA:
5604 case OP_SCOND:
5605 cc = compile_bracket_hotpath(common, cc, parent);
5606 break;
5607
5608 case OP_BRAZERO:
5609 if (cc[1] > OP_ASSERTBACK_NOT)
5610 cc = compile_bracket_hotpath(common, cc, parent);
5611 else
5612 {
5613 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5614 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5615 }
5616 break;
5617
5618 case OP_BRAPOS:
5619 case OP_CBRAPOS:
5620 case OP_SBRAPOS:
5621 case OP_SCBRAPOS:
5622 case OP_BRAPOSZERO:
5623 cc = compile_bracketpos_hotpath(common, cc, parent);
5624 break;
5625
5626 case OP_FAIL:
5627 case OP_ACCEPT:
5628 case OP_ASSERT_ACCEPT:
5629 cc = compile_fail_accept_hotpath(common, cc, parent);
5630 break;
5631
5632 case OP_CLOSE:
5633 cc = compile_close_hotpath(common, cc);
5634 break;
5635
5636 case OP_SKIPZERO:
5637 cc = bracketend(cc + 1);
5638 break;
5639
5640 default:
5641 SLJIT_ASSERT_STOP();
5642 return;
5643 }
5644 if (cc == NULL)
5645 return;
5646 }
5647 SLJIT_ASSERT(cc == ccend);
5648 }
5649
5650 #undef PUSH_FALLBACK
5651 #undef PUSH_FALLBACK_NOVALUE
5652 #undef FALLBACK_AS
5653
5654 #define COMPILE_FALLBACKPATH(current) \
5655 do \
5656 { \
5657 compile_fallbackpath(common, (current)); \
5658 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5659 return; \
5660 } \
5661 while (0)
5662
5663 #define CURRENT_AS(type) ((type *)current)
5664
5665 static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5666 {
5667 DEFINE_COMPILER;
5668 pcre_uchar *cc = current->cc;
5669 pcre_uchar opcode;
5670 pcre_uchar type;
5671 int arg1 = -1, arg2 = -1;
5672 struct sljit_label *label = NULL;
5673 struct sljit_jump *jump = NULL;
5674
5675 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5676
5677 switch(opcode)
5678 {
5679 case OP_STAR:
5680 case OP_PLUS:
5681 case OP_UPTO:
5682 case OP_CRRANGE:
5683 if (type == OP_ANYNL || type == OP_EXTUNI)
5684 {
5685 set_jumps(current->topfallbacks, LABEL());
5686 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5687 free_stack(common, 1);
5688 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5689 }
5690 else
5691 {
5692 if (opcode == OP_STAR || opcode == OP_UPTO)
5693 arg2 = 0;
5694 else if (opcode == OP_PLUS)
5695 arg2 = 1;
5696 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5697 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5698 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5699 skip_char_back(common);
5700 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5701 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5702 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5703 set_jumps(current->topfallbacks, LABEL());
5704 JUMPHERE(jump);
5705 free_stack(common, 2);
5706 }
5707 break;
5708
5709 case OP_MINSTAR:
5710 case OP_MINPLUS:
5711 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5712 if (opcode == OP_MINPLUS)
5713 {
5714 set_jumps(current->topfallbacks, LABEL());
5715 current->topfallbacks = NULL;
5716 }
5717 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5719 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5720 set_jumps(current->topfallbacks, LABEL());
5721 free_stack(common, 1);
5722 break;
5723
5724 case OP_MINUPTO:
5725 case OP_CRMINRANGE:
5726 if (opcode == OP_CRMINRANGE)
5727 {
5728 set_jumps(current->topfallbacks, LABEL());
5729 current->topfallbacks = NULL;
5730 label = LABEL();
5731 }
5732 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5733 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5734
5735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5736 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5737 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5739
5740 if (opcode == OP_CRMINRANGE)
5741 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5742
5743 if (opcode == OP_CRMINRANGE && arg1 == 0)
5744 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5745 else
5746 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath);
5747
5748 set_jumps(current->topfallbacks, LABEL());
5749 free_stack(common, 2);
5750 break;
5751
5752 case OP_QUERY:
5753 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5754 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5755 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5756 jump = JUMP(SLJIT_JUMP);
5757 set_jumps(current->topfallbacks, LABEL());
5758 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5759 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5760 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5761 JUMPHERE(jump);
5762 free_stack(common, 1);
5763 break;
5764
5765 case OP_MINQUERY:
5766 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5767 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5768 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5769 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5770 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5771 set_jumps(current->topfallbacks, LABEL());
5772 JUMPHERE(jump);
5773 free_stack(common, 1);
5774 break;
5775
5776 case OP_EXACT:
5777 case OP_POSPLUS:
5778 set_jumps(current->topfallbacks, LABEL());
5779 break;
5780
5781 case OP_POSSTAR:
5782 case OP_POSQUERY:
5783 case OP_POSUPTO:
5784 break;
5785
5786 default:
5787 SLJIT_ASSERT_STOP();
5788 break;
5789 }
5790 }
5791
5792 static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5793 {
5794 DEFINE_COMPILER;
5795 pcre_uchar *cc = current->cc;
5796 pcre_uchar type;
5797
5798 type = cc[1 + IMM2_SIZE];
5799 if ((type & 0x1) == 0)
5800 {
5801 set_jumps(current->topfallbacks, LABEL());
5802 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5803 free_stack(common, 1);
5804 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5805 return;
5806 }
5807
5808 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5809 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5810 set_jumps(current->topfallbacks, LABEL());
5811 free_stack(common, 2);
5812 }
5813
5814 static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current)
5815 {
5816 DEFINE_COMPILER;
5817
5818 set_jumps(current->topfallbacks, LABEL());
5819 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5820 free_stack(common, 1);
5821 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
5822 }
5823
5824 static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
5825 {
5826 DEFINE_COMPILER;
5827 pcre_uchar *cc = current->cc;
5828 pcre_uchar bra = OP_BRA;
5829 struct sljit_jump *brajump = NULL;
5830
5831 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
5832 if (*cc == OP_BRAZERO)
5833 {
5834 bra = *cc;
5835 cc++;
5836 }
5837
5838 if (bra == OP_BRAZERO)
5839 {
5840 SLJIT_ASSERT(current->topfallbacks == NULL);
5841 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5842 }
5843
5844 if (CURRENT_AS(assert_fallback)->framesize < 0)
5845 {
5846 set_jumps(current->topfallbacks, LABEL());
5847
5848 if (bra == OP_BRAZERO)
5849 {
5850 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5851 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5852 free_stack(common, 1);
5853 }
5854 return;
5855 }
5856
5857 if (bra == OP_BRAZERO)
5858 {
5859 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
5860 {
5861 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5862 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5863 free_stack(common, 1);
5864 return;
5865 }
5866 free_stack(common, 1);
5867 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5868 }
5869
5870 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
5871 {
5872 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr);
5873 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5874 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w));
5875
5876 set_jumps(current->topfallbacks, LABEL());
5877 }
5878 else
5879 set_jumps(current->topfallbacks, LABEL());
5880
5881 if (bra == OP_BRAZERO)
5882 {
5883 /* We know there is enough place on the stack. */
5884 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5885 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5886 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath);
5887 JUMPHERE(brajump);
5888 }
5889 }
5890
5891 static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current)
5892 {
5893 DEFINE_COMPILER;
5894 int opcode;
5895 int offset = 0;
5896 int localptr = CURRENT_AS(bracket_fallback)->localptr;
5897 int stacksize;
5898 int count;
5899 pcre_uchar *cc = current->cc;
5900 pcre_uchar *ccbegin;
5901 pcre_uchar *ccprev;
5902 jump_list *jumplist = NULL;
5903 jump_list *jumplistitem = NULL;
5904 pcre_uchar bra = OP_BRA;
5905 pcre_uchar ket;
5906 assert_fallback *assert;
5907 BOOL has_alternatives;
5908 struct sljit_jump *brazero = NULL;
5909 struct sljit_jump *once = NULL;
5910 struct sljit_jump *cond = NULL;
5911 struct sljit_label *rminlabel = NULL;
5912
5913 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5914 {
5915 bra = *cc;
5916 cc++;
5917 }
5918
5919 opcode = *cc;
5920 ccbegin = cc;
5921 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
5922 cc += GET(cc, 1);
5923 has_alternatives = *cc == OP_ALT;
5924 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5925 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL;
5926 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5927 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
5928 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5929 opcode = OP_SCOND;
5930 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5931 opcode = OP_ONCE;
5932
5933 if (ket == OP_KETRMAX)
5934 {
5935 if (bra != OP_BRAZERO)
5936 free_stack(common, 1);
5937 else
5938 {
5939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5940 free_stack(common, 1);
5941 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5942 }
5943 }
5944 else if (ket == OP_KETRMIN)
5945 {
5946 if (bra != OP_BRAMINZERO)
5947 {
5948 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5949 if (opcode >= OP_SBRA || opcode == OP_ONCE)
5950 {
5951 /* Checking zero-length iteration. */
5952 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0)
5953 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath);
5954 else
5955 {
5956 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5957 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath);
5958 }
5959 if (opcode != OP_ONCE)
5960 free_stack(common, 1);
5961 }
5962 else
5963 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath);
5964 }
5965 rminlabel = LABEL();
5966 }
5967 else if (bra == OP_BRAZERO)
5968 {
5969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5970 free_stack(common, 1);
5971 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5972 }
5973
5974 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
5975 {
5976 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
5977 {
5978 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5979 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5980 }
5981 once = JUMP(SLJIT_JUMP);
5982 }
5983 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5984 {
5985 if (has_alternatives)
5986 {
5987 /* Always exactly one alternative. */
5988 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5989 free_stack(common, 1);
5990
5991 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
5992 if (SLJIT_UNLIKELY(!jumplistitem))
5993 return;
5994 jumplist = jumplistitem;
5995 jumplistitem->next = NULL;
5996 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
5997 }
5998 }
5999 else if (*cc == OP_ALT)
6000 {
6001 /* Build a jump list. Get the last successfully matched branch index. */
6002 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6003 free_stack(common, 1);
6004 count = 1;
6005 do
6006 {
6007 /* Append as the last item. */
6008 if (jumplist != NULL)
6009 {
6010 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
6011 jumplistitem = jumplistitem->next;
6012 }
6013 else
6014 {
6015 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6016 jumplist = jumplistitem;
6017 }
6018
6019 if (SLJIT_UNLIKELY(!jumplistitem))
6020 return;
6021
6022 jumplistitem->next = NULL;
6023 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
6024 cc += GET(cc, 1);
6025 }
6026 while (*cc == OP_ALT);
6027
6028 cc = ccbegin + GET(ccbegin, 1);
6029 }
6030
6031 COMPILE_FALLBACKPATH(current->top);
6032 if (current->topfallbacks)
6033 set_jumps(current->topfallbacks, LABEL());
6034
6035 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6036 {
6037 /* Conditional block always has at most one alternative. */
6038 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
6039 {
6040 SLJIT_ASSERT(has_alternatives);
6041 assert = CURRENT_AS(bracket_fallback)->u.assert;
6042 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
6043 {
6044 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6045 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6047 }
6048 cond = JUMP(SLJIT_JUMP);
6049 set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL());
6050 }
6051 else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL)
6052 {
6053 SLJIT_ASSERT(has_alternatives);
6054 cond = JUMP(SLJIT_JUMP);
6055 set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL());
6056 }
6057 else
6058 SLJIT_ASSERT(!has_alternatives);
6059 }
6060
6061 if (has_alternatives)
6062 {
6063 count = 1;
6064 do
6065 {
6066 current->top = NULL;
6067 current->topfallbacks = NULL;
6068 current->nextfallbacks = NULL;
6069 if (*cc == OP_ALT)
6070 {
6071 ccprev = cc + 1 + LINK_SIZE;
6072 cc += GET(cc, 1);
6073 if (opcode != OP_COND && opcode != OP_SCOND)
6074 {
6075 if (localptr != 0 && opcode != OP_ONCE)
6076 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6077 else
6078 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6079 }
6080 compile_hotpath(common, ccprev, cc, current);
6081 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6082 return;
6083 }
6084
6085 /* Instructions after the current alternative is succesfully matched. */
6086 /* There is a similar code in compile_bracket_hotpath. */
6087 if (opcode == OP_ONCE)
6088 {
6089 if (CURRENT_AS(bracket_fallback)->u.framesize < 0)
6090 {
6091 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6092 /* TMP2 which is set here used by OP_KETRMAX below. */
6093 if (ket == OP_KETRMAX)
6094 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6095 else if (ket == OP_KETRMIN)
6096 {
6097 /* Move the STR_PTR to the localptr. */
6098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
6099 }
6100 }
6101 else
6102 {
6103 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w));
6104 if (ket == OP_KETRMAX)
6105 {
6106 /* TMP2 which is set here used by OP_KETRMAX below. */
6107 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6108 }
6109 }
6110 }
6111
6112 stacksize = 0;
6113 if (opcode != OP_ONCE)
6114 stacksize++;
6115 if (ket != OP_KET || bra != OP_BRA)
6116 stacksize++;
6117
6118 if (stacksize > 0) {
6119 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6120 allocate_stack(common, stacksize);
6121 else
6122 {
6123 /* We know we have place at least for one item on the top of the stack. */
6124 SLJIT_ASSERT(stacksize == 1);
6125 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6126 }
6127 }
6128
6129 stacksize = 0;
6130 if (ket != OP_KET || bra != OP_BRA)
6131 {
6132 if (ket != OP_KET)
6133 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6134 else
6135 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6136 stacksize++;
6137 }
6138
6139 if (opcode != OP_ONCE)
6140 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
6141
6142 if (offset != 0)
6143 {
6144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
6147 }
6148
6149 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath);
6150
6151 if (opcode != OP_ONCE)
6152 {
6153 SLJIT_ASSERT(jumplist);
6154 JUMPHERE(jumplist->jump);
6155 jumplist = jumplist->next;
6156 }
6157
6158 COMPILE_FALLBACKPATH(current->top);
6159 if (current->topfallbacks)
6160 set_jumps(current->topfallbacks, LABEL());
6161 SLJIT_ASSERT(!current->nextfallbacks);
6162 }
6163 while (*cc == OP_ALT);
6164 SLJIT_ASSERT(!jumplist);
6165
6166 if (cond != NULL)
6167 {
6168 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
6169 assert = CURRENT_AS(bracket_fallback)->u.assert;
6170 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
6171
6172 {
6173 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6174 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6175 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6176 }
6177 JUMPHERE(cond);
6178 }
6179
6180 /* Free the STR_PTR. */
6181 if (localptr == 0)
6182 free_stack(common, 1);
6183 }
6184
6185 if (offset != 0)
6186 {
6187 /* Using both tmp register is better for instruction scheduling. */
6188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6189 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
6193 free_stack(common, 3);
6194 }
6195 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6196 {
6197 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
6198 free_stack(common, 1);
6199 }
6200 else if (opcode == OP_ONCE)
6201 {
6202 cc = ccbegin + GET(ccbegin, 1);
6203 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6204 {
6205 /* Reset head and drop saved frame. */
6206 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
6207 free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize);
6208 }
6209 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
6210 {
6211 /* The STR_PTR must be released. */
6212 free_stack(common, 1);
6213 }
6214
6215 JUMPHERE(once);
6216 /* Restore previous localptr */
6217 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6218 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w));
6219 else if (ket == OP_KETRMIN)
6220 {
6221 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6222 /* See the comment below. */
6223 free_stack(common, 2);
6224 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
6225 }
6226 }
6227
6228 if (ket == OP_KETRMAX)
6229 {
6230 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6231 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath);
6232 if (bra == OP_BRAZERO)
6233 {
6234 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6235 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
6236 JUMPHERE(brazero);
6237 }
6238 free_stack(common, 1);
6239 }
6240 else if (ket == OP_KETRMIN)
6241 {
6242 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6243
6244 /* OP_ONCE removes everything in case of a fallback, so we don't
6245 need to explicitly release the STR_PTR. The extra release would
6246 affect badly the free_stack(2) above. */
6247 if (opcode != OP_ONCE)
6248 free_stack(common, 1);
6249 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
6250 if (opcode == OP_ONCE)
6251 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
6252 else if (bra == OP_BRAMINZERO)
6253 free_stack(common, 1);
6254 }
6255 else if (bra == OP_BRAZERO)
6256 {
6257 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6258 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
6259 JUMPHERE(brazero);
6260 }
6261 }
6262
6263 static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current)
6264 {
6265 DEFINE_COMPILER;
6266 int offset;
6267 struct sljit_jump *jump;
6268
6269 if (CURRENT_AS(bracketpos_fallback)->framesize < 0)
6270 {
6271 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
6272 {
6273 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
6274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6275 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6276 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6277 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6278 }
6279 set_jumps(current->topfallbacks, LABEL());
6280 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6281 return;
6282 }
6283
6284 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr);
6285 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6286
6287 if (current->topfallbacks)
6288 {
6289 jump = JUMP(SLJIT_JUMP);
6290 set_jumps(current->topfallbacks, LABEL());
6291 /* Drop the stack frame. */
6292 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6293 JUMPHERE(jump);
6294 }
6295 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w));
6296 }
6297
6298 static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current)
6299 {
6300 assert_fallback fallback;
6301
6302 current->top = NULL;
6303 current->topfallbacks = NULL;
6304 current->nextfallbacks = NULL;
6305 if (current->cc[1] > OP_ASSERTBACK_NOT)
6306 {
6307 /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */
6308 compile_bracket_hotpath(common, current->cc, current);
6309 compile_bracket_fallbackpath(common, current->top);
6310 }
6311 else
6312 {
6313 memset(&fallback, 0, sizeof(fallback));
6314 fallback.common.cc = current->cc;
6315 fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath;
6316 /* Manual call of compile_assert_hotpath. */
6317 compile_assert_hotpath(common, current->cc, &fallback, FALSE);
6318 }
6319 SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks);
6320 }
6321
6322 static void compile_fallbackpath(compiler_common *common, struct fallback_common *current)
6323 {
6324 DEFINE_COMPILER;
6325
6326 while (current)
6327 {
6328 if (current->nextfallbacks != NULL)
6329 set_jumps(current->nextfallbacks, LABEL());
6330 switch(*current->cc)
6331 {
6332 case OP_SET_SOM:
6333 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6334 free_stack(common, 1);
6335 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
6336 break;
6337
6338 case OP_STAR:
6339 case OP_MINSTAR:
6340 case OP_PLUS:
6341 case OP_MINPLUS:
6342 case OP_QUERY:
6343 case OP_MINQUERY:
6344 case OP_UPTO:
6345 case OP_MINUPTO:
6346 case OP_EXACT:
6347 case OP_POSSTAR:
6348 case OP_POSPLUS:
6349 case OP_POSQUERY:
6350 case OP_POSUPTO:
6351 case OP_STARI:
6352 case OP_MINSTARI:
6353 case OP_PLUSI:
6354 case OP_MINPLUSI:
6355 case OP_QUERYI:
6356 case OP_MINQUERYI:
6357 case OP_UPTOI:
6358 case OP_MINUPTOI:
6359 case OP_EXACTI:
6360 case OP_POSSTARI:
6361 case OP_POSPLUSI:
6362 case OP_POSQUERYI:
6363 case OP_POSUPTOI:
6364 case OP_NOTSTAR:
6365 case OP_NOTMINSTAR:
6366 case OP_NOTPLUS:
6367 case OP_NOTMINPLUS:
6368 case OP_NOTQUERY:
6369 case OP_NOTMINQUERY:
6370 case OP_NOTUPTO:
6371 case OP_NOTMINUPTO:
6372 case OP_NOTEXACT:
6373 case OP_NOTPOSSTAR:
6374 case OP_NOTPOSPLUS:
6375 case OP_NOTPOSQUERY:
6376 case OP_NOTPOSUPTO:
6377 case OP_NOTSTARI:
6378 case OP_NOTMINSTARI:
6379 case OP_NOTPLUSI:
6380 case OP_NOTMINPLUSI:
6381 case OP_NOTQUERYI:
6382 case OP_NOTMINQUERYI:
6383 case OP_NOTUPTOI:
6384 case OP_NOTMINUPTOI:
6385 case OP_NOTEXACTI:
6386 case OP_NOTPOSSTARI:
6387 case OP_NOTPOSPLUSI:
6388 case OP_NOTPOSQUERYI:
6389 case OP_NOTPOSUPTOI:
6390 case OP_TYPESTAR:
6391 case OP_TYPEMINSTAR:
6392 case OP_TYPEPLUS:
6393 case OP_TYPEMINPLUS:
6394 case OP_TYPEQUERY:
6395 case OP_TYPEMINQUERY:
6396 case OP_TYPEUPTO:
6397 case OP_TYPEMINUPTO:
6398 case OP_TYPEEXACT:
6399 case OP_TYPEPOSSTAR:
6400 case OP_TYPEPOSPLUS:
6401 case OP_TYPEPOSQUERY:
6402 case OP_TYPEPOSUPTO:
6403 case OP_CLASS:
6404 case OP_NCLASS:
6405 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6406 case OP_XCLASS:
6407 #endif
6408 compile_iterator_fallbackpath(common, current);
6409 break;
6410
6411 case OP_REF:
6412 case OP_REFI:
6413 compile_ref_iterator_fallbackpath(common, current);
6414 break;
6415
6416 case OP_RECURSE:
6417 compile_recurse_fallbackpath(common, current);
6418 break;
6419
6420 case OP_ASSERT:
6421 case OP_ASSERT_NOT:
6422 case OP_ASSERTBACK:
6423 case OP_ASSERTBACK_NOT:
6424 compile_assert_fallbackpath(common, current);
6425 break;
6426
6427 case OP_ONCE:
6428 case OP_ONCE_NC:
6429 case OP_BRA:
6430 case OP_CBRA:
6431 case OP_COND:
6432 case OP_SBRA:
6433 case OP_SCBRA:
6434 case OP_SCOND:
6435 compile_bracket_fallbackpath(common, current);
6436 break;
6437
6438 case OP_BRAZERO:
6439 if (current->cc[1] > OP_ASSERTBACK_NOT)
6440 compile_bracket_fallbackpath(common, current);
6441 else
6442 compile_assert_fallbackpath(common, current);
6443 break;
6444
6445 case OP_BRAPOS:
6446 case OP_CBRAPOS:
6447 case OP_SBRAPOS:
6448 case OP_SCBRAPOS:
6449 case OP_BRAPOSZERO:
6450 compile_bracketpos_fallbackpath(common, current);
6451 break;
6452
6453 case OP_BRAMINZERO:
6454 compile_braminzero_fallbackpath(common, current);
6455 break;
6456
6457 case OP_FAIL:
6458 case OP_ACCEPT:
6459 case OP_ASSERT_ACCEPT:
6460 set_jumps(current->topfallbacks, LABEL());
6461 break;
6462
6463 default:
6464 SLJIT_ASSERT_STOP();
6465 break;
6466 }
6467 current = current->prev;
6468 }
6469 }
6470
6471 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6472 {
6473 DEFINE_COMPILER;
6474 pcre_uchar *cc = common->start + common->currententry->start;
6475 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6476 pcre_uchar *ccend = bracketend(cc);
6477 int localsize = get_localsize(common, ccbegin, ccend);
6478 int framesize = get_framesize(common, cc, TRUE);
6479 int alternativesize;
6480 BOOL needsframe;
6481 fallback_common altfallback;
6482 struct sljit_jump *jump;
6483
6484 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6485 needsframe = framesize >= 0;
6486 if (!needsframe)
6487 framesize = 0;
6488 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6489
6490 SLJIT_ASSERT(common->currententry->entry == NULL);
6491 common->currententry->entry = LABEL();
6492 set_jumps(common->currententry->calls, common->currententry->entry);
6493
6494 sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
6495 allocate_stack(common, localsize + framesize + alternativesize);
6496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6497 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6498 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, STACK_TOP, 0);
6499 if (needsframe)
6500 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE);
6501
6502 if (alternativesize > 0)
6503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6504
6505 memset(&altfallback, 0, sizeof(fallback_common));
6506 common->acceptlabel = NULL;
6507 common->accept = NULL;
6508 altfallback.cc = ccbegin;
6509 cc += GET(cc, 1);
6510 while (1)
6511 {
6512 altfallback.top = NULL;
6513 altfallback.topfallbacks = NULL;
6514
6515 if (altfallback.cc != ccbegin)
6516 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6517
6518 compile_hotpath(common, altfallback.cc, cc, &altfallback);
6519 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6520 return;
6521
6522 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6523
6524 compile_fallbackpath(common, altfallback.top);
6525 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6526 return;
6527 set_jumps(altfallback.topfallbacks, LABEL());
6528
6529 if (*cc != OP_ALT)
6530 break;
6531
6532 altfallback.cc = cc + 1 + LINK_SIZE;
6533 cc += GET(cc, 1);
6534 }
6535 /* None of them matched. */
6536 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6537 jump = JUMP(SLJIT_JUMP);
6538
6539 set_jumps(common->accept, LABEL());
6540 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD);
6541 if (needsframe)
6542 {
6543 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6544 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6545 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6546 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6547 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0);
6548 }
6549 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6550
6551 JUMPHERE(jump);
6552 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6553 free_stack(common, localsize + framesize + alternativesize);
6554 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6555 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6556 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, TMP2, 0);
6557 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6558 }
6559
6560 #undef COMPILE_FALLBACKPATH
6561 #undef CURRENT_AS
6562
6563 void
6564 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
6565 {
6566 struct sljit_compiler *compiler;
6567 fallback_common rootfallback;
6568 compiler_common common_data;
6569 compiler_common *common = &common_data;
6570 const pcre_uint8 *tables = re->tables;
6571 pcre_study_data *study;
6572 pcre_uchar *ccend;
6573 executable_functions *functions;
6574 void *executable_func;
6575 sljit_uw executable_size;
6576 struct sljit_label *leave;
6577 struct sljit_label *mainloop = NULL;
6578 struct sljit_label *empty_match_found;
6579 struct sljit_label *empty_match_fallback;
6580 struct sljit_jump *jump;
6581 struct sljit_jump *reqbyte_notfound = NULL;
6582 struct sljit_jump *empty_match;
6583
6584 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6585 study = extra->study_data;
6586
6587 if (!tables)
6588 tables = PRIV(default_tables);
6589
6590 memset(&rootfallback, 0, sizeof(fallback_common));
6591 rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6592
6593 common->compiler = NULL;
6594 common->start = rootfallback.cc;
6595 common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
6596 common->fcc = tables + fcc_offset;
6597 common->lcc = (sljit_w)(tables + lcc_offset);
6598 common->mode = mode;
6599 common->nltype = NLTYPE_FIXED;
6600 switch(re->options & PCRE_NEWLINE_BITS)
6601 {
6602 case 0:
6603 /* Compile-time default */
6604 switch (NEWLINE)
6605 {
6606 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6607 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6608 default: common->newline = NEWLINE; break;
6609 }
6610 break;
6611 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6612 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6613 case PCRE_NEWLINE_CR+
6614 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6615 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6616 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6617 default: return;
6618 }
6619 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6620 common->bsr_nltype = NLTYPE_ANYCRLF;
6621 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6622 common->bsr_nltype = NLTYPE_ANY;
6623 else
6624 {
6625 #ifdef BSR_ANYCRLF
6626 common->bsr_nltype = NLTYPE_ANYCRLF;
6627 #else
6628 common->bsr_nltype = NLTYPE_ANY;
6629 #endif
6630 }
6631 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6632 common->ctypes = (sljit_w)(tables + ctypes_offset);
6633 common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset);
6634 common->name_count = re->name_count;
6635 common->name_entry_size = re->name_entry_size;
6636 common->partialmatchlabel = NULL;
6637 common->acceptlabel = NULL;
6638 common->stubs = NULL;
6639 common->entries = NULL;
6640 common->currententry = NULL;
6641 common->partialmatch = NULL;
6642 common->accept = NULL;
6643 common->calllimit = NULL;
6644 common->stackalloc = NULL;
6645 common->revertframes = NULL;
6646 common->wordboundary = NULL;
6647 common->anynewline = NULL;
6648 common->hspace = NULL;
6649 common->vspace = NULL;
6650 common->casefulcmp = NULL;
6651 common->caselesscmp = NULL;
6652 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6653 #ifdef SUPPORT_UTF
6654 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6655 common->utf = (re->options & PCRE_UTF8) != 0;
6656 #ifdef SUPPORT_UCP
6657 common->use_ucp = (re->options & PCRE_UCP) != 0;
6658 #endif
6659 common->utfreadchar = NULL;
6660 #ifdef COMPILE_PCRE8
6661 common->utfreadtype8 = NULL;
6662 #endif
6663 #endif /* SUPPORT_UTF */
6664 #ifdef SUPPORT_UCP
6665 common->getucd = NULL;
6666 #endif
6667 ccend = bracketend(rootfallback.cc);
6668 SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6669 common->localsize = get_localspace(common, rootfallback.cc, ccend);
6670 if (common->localsize < 0)
6671 return;
6672 common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
6673 if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
6674 return;
6675 common->localptrs = (int *)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int));
6676 if (!common->localptrs)
6677 return;
6678 memset(common->localptrs, 0, (ccend - rootfallback.cc) * sizeof(int));
6679 set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend);
6680
6681 compiler = sljit_create_compiler();
6682 if (!compiler)
6683 {
6684 SLJIT_FREE(common->localptrs);
6685 return;
6686 }
6687 common->compiler = compiler;
6688
6689 /* Main pcre_jit_exec entry. */
6690 sljit_emit_enter(compiler, 1, 5, 5, common->localsize);
6691
6692 /* Register init. */
6693 reset_ovector(common, (re->top_bracket + 1) * 2);
6694 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0)
6695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, SLJIT_TEMPORARY_REG1, 0);
6696
6697 OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_SAVED_REG1, 0);
6698 OP1(SLJIT_MOV, TMP1, 0, SLJIT_SAVED_REG1, 0);
6699 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6700 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
6701 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
6702 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, calllimit));
6703 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, base));
6704 OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, limit));
6705 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT, TMP1, 0);
6706
6707 if (mode == JIT_PARTIAL_SOFT_COMPILE)
6708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), HIT_START, SLJIT_IMM, 0);
6709
6710 /* Main part of the matching */
6711 if ((re->options & PCRE_ANCHORED) == 0)
6712 {
6713 mainloop = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6714 /* Forward search if possible. */
6715 if ((re->flags & PCRE_FIRSTSET) != 0)
6716 fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0, (re->options & PCRE_FIRSTLINE) != 0);
6717 else if ((re->flags & PCRE_STARTLINE) != 0)
6718 fast_forward_newline(common, (re->options & PCRE_FIRSTLINE) != 0);
6719 else if ((re->flags & PCRE_STARTLINE) == 0 && study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)
6720 fast_forward_start_bits(common, (sljit_uw)study->start_bits, (re->options & PCRE_FIRSTLINE) != 0);
6721 }
6722 if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0)
6723 reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0);
6724
6725 /* Store the current STR_PTR in OVECTOR(0). */
6726 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6727 /* Copy the limit of allowed recursions. */
6728 OP1(SLJIT_MOV, CALL_COUNT, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CALL_LIMIT);
6729 /* Copy the beginning of the string. */
6730 if (mode == JIT_PARTIAL_SOFT_COMPILE)
6731 {
6732 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), HIT_START, SLJIT_IMM, 0);
6733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
67