/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 918 - (show annotations)
Thu Feb 16 06:39:20 2012 UTC (7 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 224167 byte(s)
Error occurred while calculating annotation data.
Handle remaining partial matching cases in JIT
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct fallback_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the hot path (expected path), and the other is called as
93 the 'fallback' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the hot path.
95
96 Greedy star operator (*) :
97 Hot path: match happens.
98 Fallback path: match failed.
99 Non-greedy star operator (*?) :
100 Hot path: no need to perform a match.
101 Fallback path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A hot path
112 '(' hot path (pushing arguments to the stack)
113 B hot path
114 ')' hot path (pushing arguments to the stack)
115 D hot path
116 return with successful match
117
118 D fallback path
119 ')' fallback path (If we arrived from "C" jump to the fallback of "C")
120 B fallback path
121 C expected path
122 jump to D hot path
123 C fallback path
124 A fallback path
125
126 Notice, that the order of fallback code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current fallback code path. The fallback code path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the hot path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the fallback code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the fallback mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *ptr;
156 /* Everything else after. */
157 int offsetcount;
158 int calllimit;
159 pcre_uint8 notbol;
160 pcre_uint8 noteol;
161 pcre_uint8 notempty;
162 pcre_uint8 notempty_atstart;
163 } jit_arguments;
164
165 typedef struct executable_functions {
166 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
167 PUBL(jit_callback) callback;
168 void *userdata;
169 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
170 } executable_functions;
171
172 typedef struct jump_list {
173 struct sljit_jump *jump;
174 struct jump_list *next;
175 } jump_list;
176
177 enum stub_types { stack_alloc };
178
179 typedef struct stub_list {
180 enum stub_types type;
181 int data;
182 struct sljit_jump *start;
183 struct sljit_label *leave;
184 struct stub_list *next;
185 } stub_list;
186
187 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
188
189 /* The following structure is the key data type for the recursive
190 code generator. It is allocated by compile_hotpath, and contains
191 the aguments for compile_fallbackpath. Must be the first member
192 of its descendants. */
193 typedef struct fallback_common {
194 /* Concatenation stack. */
195 struct fallback_common *prev;
196 jump_list *nextfallbacks;
197 /* Internal stack (for component operators). */
198 struct fallback_common *top;
199 jump_list *topfallbacks;
200 /* Opcode pointer. */
201 pcre_uchar *cc;
202 } fallback_common;
203
204 typedef struct assert_fallback {
205 fallback_common common;
206 jump_list *condfailed;
207 /* Less than 0 (-1) if a frame is not needed. */
208 int framesize;
209 /* Points to our private memory word on the stack. */
210 int localptr;
211 /* For iterators. */
212 struct sljit_label *hotpath;
213 } assert_fallback;
214
215 typedef struct bracket_fallback {
216 fallback_common common;
217 /* Where to coninue if an alternative is successfully matched. */
218 struct sljit_label *althotpath;
219 /* For rmin and rmax iterators. */
220 struct sljit_label *recursivehotpath;
221 /* For greedy ? operator. */
222 struct sljit_label *zerohotpath;
223 /* Contains the branches of a failed condition. */
224 union {
225 /* Both for OP_COND, OP_SCOND. */
226 jump_list *condfailed;
227 assert_fallback *assert;
228 /* For OP_ONCE. -1 if not needed. */
229 int framesize;
230 } u;
231 /* Points to our private memory word on the stack. */
232 int localptr;
233 } bracket_fallback;
234
235 typedef struct bracketpos_fallback {
236 fallback_common common;
237 /* Points to our private memory word on the stack. */
238 int localptr;
239 /* Reverting stack is needed. */
240 int framesize;
241 /* Allocated stack size. */
242 int stacksize;
243 } bracketpos_fallback;
244
245 typedef struct braminzero_fallback {
246 fallback_common common;
247 struct sljit_label *hotpath;
248 } braminzero_fallback;
249
250 typedef struct iterator_fallback {
251 fallback_common common;
252 /* Next iteration. */
253 struct sljit_label *hotpath;
254 } iterator_fallback;
255
256 typedef struct recurse_entry {
257 struct recurse_entry *next;
258 /* Contains the function entry. */
259 struct sljit_label *entry;
260 /* Collects the calls until the function is not created. */
261 jump_list *calls;
262 /* Points to the starting opcode. */
263 int start;
264 } recurse_entry;
265
266 typedef struct recurse_fallback {
267 fallback_common common;
268 } recurse_fallback;
269
270 typedef struct compiler_common {
271 struct sljit_compiler *compiler;
272 pcre_uchar *start;
273 int localsize;
274 int *localptrs;
275 const pcre_uint8 *fcc;
276 sljit_w lcc;
277 int cbraptr;
278 int mode;
279 int nltype;
280 int newline;
281 int bsr_nltype;
282 int endonly;
283 sljit_w ctypes;
284 sljit_uw name_table;
285 sljit_w name_count;
286 sljit_w name_entry_size;
287 struct sljit_label *partialmatchlabel;
288 struct sljit_label *acceptlabel;
289 stub_list *stubs;
290 recurse_entry *entries;
291 recurse_entry *currententry;
292 jump_list *partialmatch;
293 jump_list *accept;
294 jump_list *calllimit;
295 jump_list *stackalloc;
296 jump_list *revertframes;
297 jump_list *wordboundary;
298 jump_list *anynewline;
299 jump_list *hspace;
300 jump_list *vspace;
301 jump_list *casefulcmp;
302 jump_list *caselesscmp;
303 BOOL jscript_compat;
304 #ifdef SUPPORT_UTF
305 BOOL utf;
306 #ifdef SUPPORT_UCP
307 BOOL use_ucp;
308 #endif
309 jump_list *utfreadchar;
310 #ifdef COMPILE_PCRE8
311 jump_list *utfreadtype8;
312 #endif
313 #endif /* SUPPORT_UTF */
314 #ifdef SUPPORT_UCP
315 jump_list *getucd;
316 #endif
317 } compiler_common;
318
319 /* For byte_sequence_compare. */
320
321 typedef struct compare_context {
322 int length;
323 int sourcereg;
324 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
325 int ucharptr;
326 union {
327 sljit_i asint;
328 sljit_uh asushort;
329 #ifdef COMPILE_PCRE8
330 sljit_ub asbyte;
331 sljit_ub asuchars[4];
332 #else
333 #ifdef COMPILE_PCRE16
334 sljit_uh asuchars[2];
335 #endif
336 #endif
337 } c;
338 union {
339 sljit_i asint;
340 sljit_uh asushort;
341 #ifdef COMPILE_PCRE8
342 sljit_ub asbyte;
343 sljit_ub asuchars[4];
344 #else
345 #ifdef COMPILE_PCRE16
346 sljit_uh asuchars[2];
347 #endif
348 #endif
349 } oc;
350 #endif
351 } compare_context;
352
353 enum {
354 frame_end = 0,
355 frame_setstrbegin = -1
356 };
357
358 /* Undefine sljit macros. */
359 #undef CMP
360
361 /* Used for accessing the elements of the stack. */
362 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
363
364 #define TMP1 SLJIT_TEMPORARY_REG1
365 #define TMP2 SLJIT_TEMPORARY_REG3
366 #define TMP3 SLJIT_TEMPORARY_EREG2
367 #define STR_PTR SLJIT_SAVED_REG1
368 #define STR_END SLJIT_SAVED_REG2
369 #define STACK_TOP SLJIT_TEMPORARY_REG2
370 #define STACK_LIMIT SLJIT_SAVED_REG3
371 #define ARGUMENTS SLJIT_SAVED_EREG1
372 #define CALL_COUNT SLJIT_SAVED_EREG2
373 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
374
375 /* Locals layout. */
376 /* These two locals can be used by the current opcode. */
377 #define LOCALS0 (0 * sizeof(sljit_w))
378 #define LOCALS1 (1 * sizeof(sljit_w))
379 /* Two local variables for possessive quantifiers (char1 cannot use them). */
380 #define POSSESSIVE0 (2 * sizeof(sljit_w))
381 #define POSSESSIVE1 (3 * sizeof(sljit_w))
382 /* Head of the last recursion. */
383 #define RECURSIVE_HEAD (4 * sizeof(sljit_w))
384 /* Max limit of recursions. */
385 #define CALL_LIMIT (5 * sizeof(sljit_w))
386 /* Last known position of the requested byte.
387 Same as START_USED_PTR. (Partial matching and req_char are exclusive) */
388 #define REQ_CHAR_PTR (6 * sizeof(sljit_w))
389 /* First inspected character for partial matching.
390 Same as REQ_CHAR_PTR. (Partial matching and req_char are exclusive) */
391 #define START_USED_PTR (6 * sizeof(sljit_w))
392 /* Starting pointer for partial soft matches. */
393 #define HIT_START (8 * sizeof(sljit_w))
394 /* End pointer of the first line. */
395 #define FIRSTLINE_END (9 * sizeof(sljit_w))
396 /* The output vector is stored on the stack, and contains pointers
397 to characters. The vector data is divided into two groups: the first
398 group contains the start / end character pointers, and the second is
399 the start pointers when the end of the capturing group has not yet reached. */
400 #define OVECTOR_START (10 * sizeof(sljit_w))
401 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
402 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
403 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
404
405 #ifdef COMPILE_PCRE8
406 #define MOV_UCHAR SLJIT_MOV_UB
407 #define MOVU_UCHAR SLJIT_MOVU_UB
408 #else
409 #ifdef COMPILE_PCRE16
410 #define MOV_UCHAR SLJIT_MOV_UH
411 #define MOVU_UCHAR SLJIT_MOVU_UH
412 #else
413 #error Unsupported compiling mode
414 #endif
415 #endif
416
417 /* Shortcuts. */
418 #define DEFINE_COMPILER \
419 struct sljit_compiler *compiler = common->compiler
420 #define OP1(op, dst, dstw, src, srcw) \
421 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
422 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
423 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
424 #define LABEL() \
425 sljit_emit_label(compiler)
426 #define JUMP(type) \
427 sljit_emit_jump(compiler, (type))
428 #define JUMPTO(type, label) \
429 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
430 #define JUMPHERE(jump) \
431 sljit_set_label((jump), sljit_emit_label(compiler))
432 #define CMP(type, src1, src1w, src2, src2w) \
433 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
434 #define CMPTO(type, src1, src1w, src2, src2w, label) \
435 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
436 #define COND_VALUE(op, dst, dstw, type) \
437 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
438
439 static pcre_uchar* bracketend(pcre_uchar* cc)
440 {
441 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
442 do cc += GET(cc, 1); while (*cc == OP_ALT);
443 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
444 cc += 1 + LINK_SIZE;
445 return cc;
446 }
447
448 /* Functions whose might need modification for all new supported opcodes:
449 next_opcode
450 get_localspace
451 set_localptrs
452 get_framesize
453 init_frame
454 get_localsize
455 copy_locals
456 compile_hotpath
457 compile_fallbackpath
458 */
459
460 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
461 {
462 SLJIT_UNUSED_ARG(common);
463 switch(*cc)
464 {
465 case OP_SOD:
466 case OP_SOM:
467 case OP_SET_SOM:
468 case OP_NOT_WORD_BOUNDARY:
469 case OP_WORD_BOUNDARY:
470 case OP_NOT_DIGIT:
471 case OP_DIGIT:
472 case OP_NOT_WHITESPACE:
473 case OP_WHITESPACE:
474 case OP_NOT_WORDCHAR:
475 case OP_WORDCHAR:
476 case OP_ANY:
477 case OP_ALLANY:
478 case OP_ANYNL:
479 case OP_NOT_HSPACE:
480 case OP_HSPACE:
481 case OP_NOT_VSPACE:
482 case OP_VSPACE:
483 case OP_EXTUNI:
484 case OP_EODN:
485 case OP_EOD:
486 case OP_CIRC:
487 case OP_CIRCM:
488 case OP_DOLL:
489 case OP_DOLLM:
490 case OP_TYPESTAR:
491 case OP_TYPEMINSTAR:
492 case OP_TYPEPLUS:
493 case OP_TYPEMINPLUS:
494 case OP_TYPEQUERY:
495 case OP_TYPEMINQUERY:
496 case OP_TYPEPOSSTAR:
497 case OP_TYPEPOSPLUS:
498 case OP_TYPEPOSQUERY:
499 case OP_CRSTAR:
500 case OP_CRMINSTAR:
501 case OP_CRPLUS:
502 case OP_CRMINPLUS:
503 case OP_CRQUERY:
504 case OP_CRMINQUERY:
505 case OP_DEF:
506 case OP_BRAZERO:
507 case OP_BRAMINZERO:
508 case OP_BRAPOSZERO:
509 case OP_FAIL:
510 case OP_ACCEPT:
511 case OP_ASSERT_ACCEPT:
512 case OP_SKIPZERO:
513 return cc + 1;
514
515 case OP_ANYBYTE:
516 #ifdef SUPPORT_UTF
517 if (common->utf) return NULL;
518 #endif
519 return cc + 1;
520
521 case OP_CHAR:
522 case OP_CHARI:
523 case OP_NOT:
524 case OP_NOTI:
525 case OP_STAR:
526 case OP_MINSTAR:
527 case OP_PLUS:
528 case OP_MINPLUS:
529 case OP_QUERY:
530 case OP_MINQUERY:
531 case OP_POSSTAR:
532 case OP_POSPLUS:
533 case OP_POSQUERY:
534 case OP_STARI:
535 case OP_MINSTARI:
536 case OP_PLUSI:
537 case OP_MINPLUSI:
538 case OP_QUERYI:
539 case OP_MINQUERYI:
540 case OP_POSSTARI:
541 case OP_POSPLUSI:
542 case OP_POSQUERYI:
543 case OP_NOTSTAR:
544 case OP_NOTMINSTAR:
545 case OP_NOTPLUS:
546 case OP_NOTMINPLUS:
547 case OP_NOTQUERY:
548 case OP_NOTMINQUERY:
549 case OP_NOTPOSSTAR:
550 case OP_NOTPOSPLUS:
551 case OP_NOTPOSQUERY:
552 case OP_NOTSTARI:
553 case OP_NOTMINSTARI:
554 case OP_NOTPLUSI:
555 case OP_NOTMINPLUSI:
556 case OP_NOTQUERYI:
557 case OP_NOTMINQUERYI:
558 case OP_NOTPOSSTARI:
559 case OP_NOTPOSPLUSI:
560 case OP_NOTPOSQUERYI:
561 cc += 2;
562 #ifdef SUPPORT_UTF
563 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
564 #endif
565 return cc;
566
567 case OP_UPTO:
568 case OP_MINUPTO:
569 case OP_EXACT:
570 case OP_POSUPTO:
571 case OP_UPTOI:
572 case OP_MINUPTOI:
573 case OP_EXACTI:
574 case OP_POSUPTOI:
575 case OP_NOTUPTO:
576 case OP_NOTMINUPTO:
577 case OP_NOTEXACT:
578 case OP_NOTPOSUPTO:
579 case OP_NOTUPTOI:
580 case OP_NOTMINUPTOI:
581 case OP_NOTEXACTI:
582 case OP_NOTPOSUPTOI:
583 cc += 2 + IMM2_SIZE;
584 #ifdef SUPPORT_UTF
585 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
586 #endif
587 return cc;
588
589 case OP_NOTPROP:
590 case OP_PROP:
591 return cc + 1 + 2;
592
593 case OP_TYPEUPTO:
594 case OP_TYPEMINUPTO:
595 case OP_TYPEEXACT:
596 case OP_TYPEPOSUPTO:
597 case OP_REF:
598 case OP_REFI:
599 case OP_CREF:
600 case OP_NCREF:
601 case OP_RREF:
602 case OP_NRREF:
603 case OP_CLOSE:
604 cc += 1 + IMM2_SIZE;
605 return cc;
606
607 case OP_CRRANGE:
608 case OP_CRMINRANGE:
609 return cc + 1 + 2 * IMM2_SIZE;
610
611 case OP_CLASS:
612 case OP_NCLASS:
613 return cc + 1 + 32 / sizeof(pcre_uchar);
614
615 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
616 case OP_XCLASS:
617 return cc + GET(cc, 1);
618 #endif
619
620 case OP_RECURSE:
621 case OP_ASSERT:
622 case OP_ASSERT_NOT:
623 case OP_ASSERTBACK:
624 case OP_ASSERTBACK_NOT:
625 case OP_REVERSE:
626 case OP_ONCE:
627 case OP_ONCE_NC:
628 case OP_BRA:
629 case OP_BRAPOS:
630 case OP_COND:
631 case OP_SBRA:
632 case OP_SBRAPOS:
633 case OP_SCOND:
634 case OP_ALT:
635 case OP_KET:
636 case OP_KETRMAX:
637 case OP_KETRMIN:
638 case OP_KETRPOS:
639 return cc + 1 + LINK_SIZE;
640
641 case OP_CBRA:
642 case OP_CBRAPOS:
643 case OP_SCBRA:
644 case OP_SCBRAPOS:
645 return cc + 1 + LINK_SIZE + IMM2_SIZE;
646
647 default:
648 return NULL;
649 }
650 }
651
652 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
653 {
654 int localspace = 0;
655 pcre_uchar *alternative;
656 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
657 while (cc < ccend)
658 {
659 switch(*cc)
660 {
661 case OP_ASSERT:
662 case OP_ASSERT_NOT:
663 case OP_ASSERTBACK:
664 case OP_ASSERTBACK_NOT:
665 case OP_ONCE:
666 case OP_ONCE_NC:
667 case OP_BRAPOS:
668 case OP_SBRA:
669 case OP_SBRAPOS:
670 case OP_SCOND:
671 localspace += sizeof(sljit_w);
672 cc += 1 + LINK_SIZE;
673 break;
674
675 case OP_CBRAPOS:
676 case OP_SCBRAPOS:
677 localspace += sizeof(sljit_w);
678 cc += 1 + LINK_SIZE + IMM2_SIZE;
679 break;
680
681 case OP_COND:
682 /* Might be a hidden SCOND. */
683 alternative = cc + GET(cc, 1);
684 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
685 localspace += sizeof(sljit_w);
686 cc += 1 + LINK_SIZE;
687 break;
688
689 default:
690 cc = next_opcode(common, cc);
691 if (cc == NULL)
692 return -1;
693 break;
694 }
695 }
696 return localspace;
697 }
698
699 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
700 {
701 pcre_uchar *cc = common->start;
702 pcre_uchar *alternative;
703 while (cc < ccend)
704 {
705 switch(*cc)
706 {
707 case OP_ASSERT:
708 case OP_ASSERT_NOT:
709 case OP_ASSERTBACK:
710 case OP_ASSERTBACK_NOT:
711 case OP_ONCE:
712 case OP_ONCE_NC:
713 case OP_BRAPOS:
714 case OP_SBRA:
715 case OP_SBRAPOS:
716 case OP_SCOND:
717 common->localptrs[cc - common->start] = localptr;
718 localptr += sizeof(sljit_w);
719 cc += 1 + LINK_SIZE;
720 break;
721
722 case OP_CBRAPOS:
723 case OP_SCBRAPOS:
724 common->localptrs[cc - common->start] = localptr;
725 localptr += sizeof(sljit_w);
726 cc += 1 + LINK_SIZE + IMM2_SIZE;
727 break;
728
729 case OP_COND:
730 /* Might be a hidden SCOND. */
731 alternative = cc + GET(cc, 1);
732 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
733 {
734 common->localptrs[cc - common->start] = localptr;
735 localptr += sizeof(sljit_w);
736 }
737 cc += 1 + LINK_SIZE;
738 break;
739
740 default:
741 cc = next_opcode(common, cc);
742 SLJIT_ASSERT(cc != NULL);
743 break;
744 }
745 }
746 }
747
748 /* Returns with -1 if no need for frame. */
749 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
750 {
751 pcre_uchar *ccend = bracketend(cc);
752 int length = 0;
753 BOOL possessive = FALSE;
754 BOOL setsom_found = FALSE;
755
756 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
757 {
758 length = 3;
759 possessive = TRUE;
760 }
761
762 cc = next_opcode(common, cc);
763 SLJIT_ASSERT(cc != NULL);
764 while (cc < ccend)
765 switch(*cc)
766 {
767 case OP_SET_SOM:
768 case OP_RECURSE:
769 if (!setsom_found)
770 {
771 length += 2;
772 setsom_found = TRUE;
773 }
774 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
775 break;
776
777 case OP_CBRA:
778 case OP_CBRAPOS:
779 case OP_SCBRA:
780 case OP_SCBRAPOS:
781 length += 3;
782 cc += 1 + LINK_SIZE + IMM2_SIZE;
783 break;
784
785 default:
786 cc = next_opcode(common, cc);
787 SLJIT_ASSERT(cc != NULL);
788 break;
789 }
790
791 /* Possessive quantifiers can use a special case. */
792 if (SLJIT_UNLIKELY(possessive) && length == 3)
793 return -1;
794
795 if (length > 0)
796 return length + 1;
797 return -1;
798 }
799
800 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
801 {
802 DEFINE_COMPILER;
803 pcre_uchar *ccend = bracketend(cc);
804 BOOL setsom_found = FALSE;
805 int offset;
806
807 /* >= 1 + shortest item size (2) */
808 SLJIT_UNUSED_ARG(stacktop);
809 SLJIT_ASSERT(stackpos >= stacktop + 2);
810
811 stackpos = STACK(stackpos);
812 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
813 cc = next_opcode(common, cc);
814 SLJIT_ASSERT(cc != NULL);
815 while (cc < ccend)
816 switch(*cc)
817 {
818 case OP_SET_SOM:
819 case OP_RECURSE:
820 if (!setsom_found)
821 {
822 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
824 stackpos += (int)sizeof(sljit_w);
825 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
826 stackpos += (int)sizeof(sljit_w);
827 setsom_found = TRUE;
828 }
829 cc += (*cc == OP_SET_SOM) ? 1 : 1 + LINK_SIZE;
830 break;
831
832 case OP_CBRA:
833 case OP_CBRAPOS:
834 case OP_SCBRA:
835 case OP_SCBRAPOS:
836 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
837 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
838 stackpos += (int)sizeof(sljit_w);
839 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
842 stackpos += (int)sizeof(sljit_w);
843 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
844 stackpos += (int)sizeof(sljit_w);
845
846 cc += 1 + LINK_SIZE + IMM2_SIZE;
847 break;
848
849 default:
850 cc = next_opcode(common, cc);
851 SLJIT_ASSERT(cc != NULL);
852 break;
853 }
854
855 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
856 SLJIT_ASSERT(stackpos == STACK(stacktop));
857 }
858
859 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
860 {
861 int localsize = 2;
862 pcre_uchar *alternative;
863 /* Calculate the sum of the local variables. */
864 while (cc < ccend)
865 {
866 switch(*cc)
867 {
868 case OP_ASSERT:
869 case OP_ASSERT_NOT:
870 case OP_ASSERTBACK:
871 case OP_ASSERTBACK_NOT:
872 case OP_ONCE:
873 case OP_ONCE_NC:
874 case OP_BRAPOS:
875 case OP_SBRA:
876 case OP_SBRAPOS:
877 case OP_SCOND:
878 localsize++;
879 cc += 1 + LINK_SIZE;
880 break;
881
882 case OP_CBRA:
883 case OP_SCBRA:
884 localsize++;
885 cc += 1 + LINK_SIZE + IMM2_SIZE;
886 break;
887
888 case OP_CBRAPOS:
889 case OP_SCBRAPOS:
890 localsize += 2;
891 cc += 1 + LINK_SIZE + IMM2_SIZE;
892 break;
893
894 case OP_COND:
895 /* Might be a hidden SCOND. */
896 alternative = cc + GET(cc, 1);
897 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
898 localsize++;
899 cc += 1 + LINK_SIZE;
900 break;
901
902 default:
903 cc = next_opcode(common, cc);
904 SLJIT_ASSERT(cc != NULL);
905 break;
906 }
907 }
908 SLJIT_ASSERT(cc == ccend);
909 return localsize;
910 }
911
912 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
913 BOOL save, int stackptr, int stacktop)
914 {
915 DEFINE_COMPILER;
916 int srcw[2];
917 int count;
918 BOOL tmp1next = TRUE;
919 BOOL tmp1empty = TRUE;
920 BOOL tmp2empty = TRUE;
921 pcre_uchar *alternative;
922 enum {
923 start,
924 loop,
925 end
926 } status;
927
928 status = save ? start : loop;
929 stackptr = STACK(stackptr - 2);
930 stacktop = STACK(stacktop - 1);
931
932 if (!save)
933 {
934 stackptr += sizeof(sljit_w);
935 if (stackptr < stacktop)
936 {
937 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
938 stackptr += sizeof(sljit_w);
939 tmp1empty = FALSE;
940 }
941 if (stackptr < stacktop)
942 {
943 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
944 stackptr += sizeof(sljit_w);
945 tmp2empty = FALSE;
946 }
947 /* The tmp1next must be TRUE in either way. */
948 }
949
950 while (status != end)
951 {
952 count = 0;
953 switch(status)
954 {
955 case start:
956 SLJIT_ASSERT(save);
957 count = 1;
958 srcw[0] = RECURSIVE_HEAD;
959 status = loop;
960 break;
961
962 case loop:
963 if (cc >= ccend)
964 {
965 status = end;
966 break;
967 }
968
969 switch(*cc)
970 {
971 case OP_ASSERT:
972 case OP_ASSERT_NOT:
973 case OP_ASSERTBACK:
974 case OP_ASSERTBACK_NOT:
975 case OP_ONCE:
976 case OP_ONCE_NC:
977 case OP_BRAPOS:
978 case OP_SBRA:
979 case OP_SBRAPOS:
980 case OP_SCOND:
981 count = 1;
982 srcw[0] = PRIV_DATA(cc);
983 SLJIT_ASSERT(srcw[0] != 0);
984 cc += 1 + LINK_SIZE;
985 break;
986
987 case OP_CBRA:
988 case OP_SCBRA:
989 count = 1;
990 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
991 cc += 1 + LINK_SIZE + IMM2_SIZE;
992 break;
993
994 case OP_CBRAPOS:
995 case OP_SCBRAPOS:
996 count = 2;
997 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
998 srcw[0] = PRIV_DATA(cc);
999 SLJIT_ASSERT(srcw[0] != 0);
1000 cc += 1 + LINK_SIZE + IMM2_SIZE;
1001 break;
1002
1003 case OP_COND:
1004 /* Might be a hidden SCOND. */
1005 alternative = cc + GET(cc, 1);
1006 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1007 {
1008 count = 1;
1009 srcw[0] = PRIV_DATA(cc);
1010 SLJIT_ASSERT(srcw[0] != 0);
1011 }
1012 cc += 1 + LINK_SIZE;
1013 break;
1014
1015 default:
1016 cc = next_opcode(common, cc);
1017 SLJIT_ASSERT(cc != NULL);
1018 break;
1019 }
1020 break;
1021
1022 case end:
1023 SLJIT_ASSERT_STOP();
1024 break;
1025 }
1026
1027 while (count > 0)
1028 {
1029 count--;
1030 if (save)
1031 {
1032 if (tmp1next)
1033 {
1034 if (!tmp1empty)
1035 {
1036 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1037 stackptr += sizeof(sljit_w);
1038 }
1039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1040 tmp1empty = FALSE;
1041 tmp1next = FALSE;
1042 }
1043 else
1044 {
1045 if (!tmp2empty)
1046 {
1047 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1048 stackptr += sizeof(sljit_w);
1049 }
1050 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1051 tmp2empty = FALSE;
1052 tmp1next = TRUE;
1053 }
1054 }
1055 else
1056 {
1057 if (tmp1next)
1058 {
1059 SLJIT_ASSERT(!tmp1empty);
1060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1061 tmp1empty = stackptr >= stacktop;
1062 if (!tmp1empty)
1063 {
1064 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1065 stackptr += sizeof(sljit_w);
1066 }
1067 tmp1next = FALSE;
1068 }
1069 else
1070 {
1071 SLJIT_ASSERT(!tmp2empty);
1072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1073 tmp2empty = stackptr >= stacktop;
1074 if (!tmp2empty)
1075 {
1076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1077 stackptr += sizeof(sljit_w);
1078 }
1079 tmp1next = TRUE;
1080 }
1081 }
1082 }
1083 }
1084
1085 if (save)
1086 {
1087 if (tmp1next)
1088 {
1089 if (!tmp1empty)
1090 {
1091 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1092 stackptr += sizeof(sljit_w);
1093 }
1094 if (!tmp2empty)
1095 {
1096 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1097 stackptr += sizeof(sljit_w);
1098 }
1099 }
1100 else
1101 {
1102 if (!tmp2empty)
1103 {
1104 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1105 stackptr += sizeof(sljit_w);
1106 }
1107 if (!tmp1empty)
1108 {
1109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1110 stackptr += sizeof(sljit_w);
1111 }
1112 }
1113 }
1114 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1115 }
1116
1117 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1118 {
1119 return (value & (value - 1)) == 0;
1120 }
1121
1122 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1123 {
1124 while (list)
1125 {
1126 /* sljit_set_label is clever enough to do nothing
1127 if either the jump or the label is NULL */
1128 sljit_set_label(list->jump, label);
1129 list = list->next;
1130 }
1131 }
1132
1133 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1134 {
1135 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1136 if (list_item)
1137 {
1138 list_item->next = *list;
1139 list_item->jump = jump;
1140 *list = list_item;
1141 }
1142 }
1143
1144 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1145 {
1146 DEFINE_COMPILER;
1147 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1148
1149 if (list_item)
1150 {
1151 list_item->type = type;
1152 list_item->data = data;
1153 list_item->start = start;
1154 list_item->leave = LABEL();
1155 list_item->next = common->stubs;
1156 common->stubs = list_item;
1157 }
1158 }
1159
1160 static void flush_stubs(compiler_common *common)
1161 {
1162 DEFINE_COMPILER;
1163 stub_list* list_item = common->stubs;
1164
1165 while (list_item)
1166 {
1167 JUMPHERE(list_item->start);
1168 switch(list_item->type)
1169 {
1170 case stack_alloc:
1171 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1172 break;
1173 }
1174 JUMPTO(SLJIT_JUMP, list_item->leave);
1175 list_item = list_item->next;
1176 }
1177 common->stubs = NULL;
1178 }
1179
1180 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1181 {
1182 DEFINE_COMPILER;
1183
1184 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1185 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1186 }
1187
1188 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1189 {
1190 /* May destroy all locals and registers except TMP2. */
1191 DEFINE_COMPILER;
1192
1193 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1194 #ifdef DESTROY_REGISTERS
1195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1196 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1197 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1198 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1200 #endif
1201 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1202 }
1203
1204 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1205 {
1206 DEFINE_COMPILER;
1207 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1208 }
1209
1210 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1211 {
1212 DEFINE_COMPILER;
1213 struct sljit_label *loop;
1214 int i;
1215 /* At this point we can freely use all temporary registers. */
1216 /* TMP1 returns with begin - 1. */
1217 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1218 if (length < 8)
1219 {
1220 for (i = 0; i < length; i++)
1221 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1222 }
1223 else
1224 {
1225 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START - sizeof(sljit_w));
1226 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1227 loop = LABEL();
1228 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1229 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1230 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1231 }
1232 }
1233
1234 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1235 {
1236 DEFINE_COMPILER;
1237 struct sljit_label *loop;
1238 struct sljit_jump *earlyexit;
1239
1240 /* At this point we can freely use all registers. */
1241 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1242 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1243
1244 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1245 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1246 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1247 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1248 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START);
1249 /* Unlikely, but possible */
1250 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1251 loop = LABEL();
1252 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1253 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1254 /* Copy the integer value to the output buffer */
1255 #ifdef COMPILE_PCRE16
1256 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1257 #endif
1258 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1259 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1260 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1261 JUMPHERE(earlyexit);
1262
1263 /* Calculate the return value, which is the maximum ovector value. */
1264 if (topbracket > 1)
1265 {
1266 OP2(SLJIT_ADD, SLJIT_TEMPORARY_REG1, 0, SLJIT_LOCALS_REG, 0, SLJIT_IMM, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1267 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1268
1269 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1270 loop = LABEL();
1271 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1272 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1273 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1274 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1275 }
1276 else
1277 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1278 }
1279
1280 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *leave)
1281 {
1282 DEFINE_COMPILER;
1283
1284 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1285
1286 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1287 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1288 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1289 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, leave);
1290
1291 /* Store match begin and end. */
1292 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1293 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1294 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? START_USED_PTR : HIT_START);
1295 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1296 #ifdef COMPILE_PCRE16
1297 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1298 #endif
1299 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1300
1301 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1302 #ifdef COMPILE_PCRE16
1303 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1304 #endif
1305 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1306
1307 JUMPTO(SLJIT_JUMP, leave);
1308 }
1309
1310 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1311 {
1312 /* May destroy TMP1. */
1313 DEFINE_COMPILER;
1314 struct sljit_jump *jump;
1315
1316 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1317 {
1318 /* The value of -1 must be kept for START_USED_PTR! */
1319 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, SLJIT_IMM, 1);
1320 /* Jumps if START_USED_PTR < STR_PTR, or START_USED_PTR == -1. Although overwriting
1321 is not necessary if START_USED_PTR == STR_PTR, it does not hurt as well. */
1322 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1323 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1324 JUMPHERE(jump);
1325 }
1326 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1327 {
1328 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1329 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1330 JUMPHERE(jump);
1331 }
1332 }
1333
1334 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1335 {
1336 /* Detects if the character has an othercase. */
1337 unsigned int c;
1338
1339 #ifdef SUPPORT_UTF
1340 if (common->utf)
1341 {
1342 GETCHAR(c, cc);
1343 if (c > 127)
1344 {
1345 #ifdef SUPPORT_UCP
1346 return c != UCD_OTHERCASE(c);
1347 #else
1348 return FALSE;
1349 #endif
1350 }
1351 #ifndef COMPILE_PCRE8
1352 return common->fcc[c] != c;
1353 #endif
1354 }
1355 else
1356 #endif
1357 c = *cc;
1358 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1359 }
1360
1361 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1362 {
1363 /* Returns with the othercase. */
1364 #ifdef SUPPORT_UTF
1365 if (common->utf && c > 127)
1366 {
1367 #ifdef SUPPORT_UCP
1368 return UCD_OTHERCASE(c);
1369 #else
1370 return c;
1371 #endif
1372 }
1373 #endif
1374 return TABLE_GET(c, common->fcc, c);
1375 }
1376
1377 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1378 {
1379 /* Detects if the character and its othercase has only 1 bit difference. */
1380 unsigned int c, oc, bit;
1381 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1382 int n;
1383 #endif
1384
1385 #ifdef SUPPORT_UTF
1386 if (common->utf)
1387 {
1388 GETCHAR(c, cc);
1389 if (c <= 127)
1390 oc = common->fcc[c];
1391 else
1392 {
1393 #ifdef SUPPORT_UCP
1394 oc = UCD_OTHERCASE(c);
1395 #else
1396 oc = c;
1397 #endif
1398 }
1399 }
1400 else
1401 {
1402 c = *cc;
1403 oc = TABLE_GET(c, common->fcc, c);
1404 }
1405 #else
1406 c = *cc;
1407 oc = TABLE_GET(c, common->fcc, c);
1408 #endif
1409
1410 SLJIT_ASSERT(c != oc);
1411
1412 bit = c ^ oc;
1413 /* Optimized for English alphabet. */
1414 if (c <= 127 && bit == 0x20)
1415 return (0 << 8) | 0x20;
1416
1417 /* Since c != oc, they must have at least 1 bit difference. */
1418 if (!ispowerof2(bit))
1419 return 0;
1420
1421 #ifdef COMPILE_PCRE8
1422
1423 #ifdef SUPPORT_UTF
1424 if (common->utf && c > 127)
1425 {
1426 n = GET_EXTRALEN(*cc);
1427 while ((bit & 0x3f) == 0)
1428 {
1429 n--;
1430 bit >>= 6;
1431 }
1432 return (n << 8) | bit;
1433 }
1434 #endif /* SUPPORT_UTF */
1435 return (0 << 8) | bit;
1436
1437 #else /* COMPILE_PCRE8 */
1438
1439 #ifdef COMPILE_PCRE16
1440 #ifdef SUPPORT_UTF
1441 if (common->utf && c > 65535)
1442 {
1443 if (bit >= (1 << 10))
1444 bit >>= 10;
1445 else
1446 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1447 }
1448 #endif /* SUPPORT_UTF */
1449 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1450 #endif /* COMPILE_PCRE16 */
1451
1452 #endif /* COMPILE_PCRE8 */
1453 }
1454
1455 static void check_partial(compiler_common *common, BOOL force)
1456 {
1457 /* Checks whether a partial matching is occured. Does not modify registers. */
1458 DEFINE_COMPILER;
1459 struct sljit_jump *jump = NULL;
1460
1461 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1462
1463 if (common->mode == JIT_COMPILE)
1464 return;
1465
1466 if (!force || common->mode == JIT_PARTIAL_SOFT_COMPILE)
1467 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1468
1469 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1470 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), HIT_START, SLJIT_IMM, -1);
1471 else
1472 {
1473 if (common->partialmatchlabel != NULL)
1474 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1475 else
1476 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1477 }
1478
1479 if (jump != NULL)
1480 JUMPHERE(jump);
1481 }
1482
1483 static struct sljit_jump *check_str_end(compiler_common *common)
1484 {
1485 /* Does not affect registers. Usually used in a tight spot. */
1486 DEFINE_COMPILER;
1487 struct sljit_jump *jump;
1488 struct sljit_jump *nohit;
1489 struct sljit_jump *return_value;
1490
1491 if (common->mode == JIT_COMPILE)
1492 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1493
1494 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1495 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1496 {
1497 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1498 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), HIT_START, SLJIT_IMM, -1);
1499 JUMPHERE(nohit);
1500 return_value = JUMP(SLJIT_JUMP);
1501 }
1502 else
1503 {
1504 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0);
1505 if (common->partialmatchlabel != NULL)
1506 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1507 else
1508 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1509 }
1510 JUMPHERE(jump);
1511 return return_value;
1512 }
1513
1514 static void fallback_at_str_end(compiler_common *common, jump_list **fallbacks)
1515 {
1516 DEFINE_COMPILER;
1517 struct sljit_jump *jump;
1518
1519 if (common->mode == JIT_COMPILE)
1520 {
1521 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
1522 return;
1523 }
1524
1525 /* Partial matching mode. */
1526 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
1527 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), START_USED_PTR, STR_PTR, 0));
1528 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1529 {
1530 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), HIT_START, SLJIT_IMM, -1);
1531 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
1532 }
1533 else
1534 {
1535 if (common->partialmatchlabel != NULL)
1536 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
1537 else
1538 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
1539 }
1540 JUMPHERE(jump);
1541 }
1542
1543 static void read_char(compiler_common *common)
1544 {
1545 /* Reads the character into TMP1, updates STR_PTR.
1546 Does not check STR_END. TMP2 Destroyed. */
1547 DEFINE_COMPILER;
1548 #ifdef SUPPORT_UTF
1549 struct sljit_jump *jump;
1550 #endif
1551
1552 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1553 #ifdef SUPPORT_UTF
1554 if (common->utf)
1555 {
1556 #ifdef COMPILE_PCRE8
1557 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1558 #else
1559 #ifdef COMPILE_PCRE16
1560 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1561 #endif
1562 #endif /* COMPILE_PCRE8 */
1563 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1564 JUMPHERE(jump);
1565 }
1566 #endif
1567 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1568 }
1569
1570 static void peek_char(compiler_common *common)
1571 {
1572 /* Reads the character into TMP1, keeps STR_PTR.
1573 Does not check STR_END. TMP2 Destroyed. */
1574 DEFINE_COMPILER;
1575 #ifdef SUPPORT_UTF
1576 struct sljit_jump *jump;
1577 #endif
1578
1579 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1580 #ifdef SUPPORT_UTF
1581 if (common->utf)
1582 {
1583 #ifdef COMPILE_PCRE8
1584 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1585 #else
1586 #ifdef COMPILE_PCRE16
1587 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1588 #endif
1589 #endif /* COMPILE_PCRE8 */
1590 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
1591 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1592 JUMPHERE(jump);
1593 }
1594 #endif
1595 }
1596
1597 static void read_char8_type(compiler_common *common)
1598 {
1599 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
1600 DEFINE_COMPILER;
1601 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
1602 struct sljit_jump *jump;
1603 #endif
1604
1605 #ifdef SUPPORT_UTF
1606 if (common->utf)
1607 {
1608 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1609 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1610 #ifdef COMPILE_PCRE8
1611 /* This can be an extra read in some situations, but hopefully
1612 it is needed in most cases. */
1613 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1614 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
1615 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
1616 JUMPHERE(jump);
1617 #else
1618 #ifdef COMPILE_PCRE16
1619 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1620 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1621 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1622 JUMPHERE(jump);
1623 /* Skip low surrogate if necessary. */
1624 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
1625 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
1626 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1627 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
1628 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
1629 #endif
1630 #endif /* COMPILE_PCRE8 */
1631 return;
1632 }
1633 #endif
1634 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
1635 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1636 #ifdef COMPILE_PCRE16
1637 /* The ctypes array contains only 256 values. */
1638 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1639 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1640 #endif
1641 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1642 #ifdef COMPILE_PCRE16
1643 JUMPHERE(jump);
1644 #endif
1645 }
1646
1647 static void skip_char_back(compiler_common *common)
1648 {
1649 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
1650 DEFINE_COMPILER;
1651 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1652 struct sljit_label *label;
1653
1654 if (common->utf)
1655 {
1656 label = LABEL();
1657 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1658 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1659 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
1660 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
1661 return;
1662 }
1663 #endif
1664 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1665 if (common->utf)
1666 {
1667 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
1668 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1669 /* Skip low surrogate if necessary. */
1670 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1671 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
1672 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1673 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1674 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1675 return;
1676 }
1677 #endif
1678 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1679 }
1680
1681 static void check_newlinechar(compiler_common *common, int nltype, jump_list **fallbacks, BOOL jumpiftrue)
1682 {
1683 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
1684 DEFINE_COMPILER;
1685
1686 if (nltype == NLTYPE_ANY)
1687 {
1688 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
1689 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1690 }
1691 else if (nltype == NLTYPE_ANYCRLF)
1692 {
1693 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
1694 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
1695 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
1696 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
1697 add_jump(compiler, fallbacks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
1698 }
1699 else
1700 {
1701 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
1702 add_jump(compiler, fallbacks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
1703 }
1704 }
1705
1706 #ifdef SUPPORT_UTF
1707
1708 #ifdef COMPILE_PCRE8
1709 static void do_utfreadchar(compiler_common *common)
1710 {
1711 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
1712 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
1713 DEFINE_COMPILER;
1714 struct sljit_jump *jump;
1715
1716 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1717 /* Searching for the first zero. */
1718 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
1719 jump = JUMP(SLJIT_C_NOT_ZERO);
1720 /* Two byte sequence. */
1721 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1722 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1723 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
1724 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
1725 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1726 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1727 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1728 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1729 JUMPHERE(jump);
1730
1731 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
1732 jump = JUMP(SLJIT_C_NOT_ZERO);
1733 /* Three byte sequence. */
1734 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1735 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
1736 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
1737 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1738 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1739 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1740 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1741 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
1742 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1743 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1744 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
1745 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1746 JUMPHERE(jump);
1747
1748 /* Four byte sequence. */
1749 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1750 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
1751 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
1752 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1753 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
1754 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1755 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
1756 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1757 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1758 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1759 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
1760 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
1761 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
1762 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1763 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
1764 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1765 }
1766
1767 static void do_utfreadtype8(compiler_common *common)
1768 {
1769 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
1770 of the character (>= 0xc0). Return value in TMP1. */
1771 DEFINE_COMPILER;
1772 struct sljit_jump *jump;
1773 struct sljit_jump *compare;
1774
1775 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1776
1777 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
1778 jump = JUMP(SLJIT_C_NOT_ZERO);
1779 /* Two byte sequence. */
1780 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1781 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1782 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
1783 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
1784 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
1785 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
1786 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
1787 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
1788 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1789
1790 JUMPHERE(compare);
1791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1792 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1793 JUMPHERE(jump);
1794
1795 /* We only have types for characters less than 256. */
1796 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
1797 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1798 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
1799 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1800 }
1801
1802 #else /* COMPILE_PCRE8 */
1803
1804 #ifdef COMPILE_PCRE16
1805 static void do_utfreadchar(compiler_common *common)
1806 {
1807 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
1808 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
1809 DEFINE_COMPILER;
1810 struct sljit_jump *jump;
1811
1812 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1813 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
1814 /* Do nothing, only return. */
1815 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1816
1817 JUMPHERE(jump);
1818 /* Combine two 16 bit characters. */
1819 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
1820 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1821 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
1822 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
1823 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
1824 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
1825 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
1826 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
1827 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1828 }
1829 #endif /* COMPILE_PCRE16 */
1830
1831 #endif /* COMPILE_PCRE8 */
1832
1833 #endif /* SUPPORT_UTF */
1834
1835 #ifdef SUPPORT_UCP
1836
1837 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
1838 #define UCD_BLOCK_MASK 127
1839 #define UCD_BLOCK_SHIFT 7
1840
1841 static void do_getucd(compiler_common *common)
1842 {
1843 /* Search the UCD record for the character comes in TMP1.
1844 Returns chartype in TMP1 and UCD offset in TMP2. */
1845 DEFINE_COMPILER;
1846
1847 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
1848
1849 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
1850 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1851 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
1852 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
1853 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
1854 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
1855 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
1856 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
1857 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
1858 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
1859 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
1860 }
1861 #endif
1862
1863 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
1864 {
1865 DEFINE_COMPILER;
1866 struct sljit_label *mainloop;
1867 struct sljit_label *newlinelabel = NULL;
1868 struct sljit_jump *start;
1869 struct sljit_jump *end = NULL;
1870 struct sljit_jump *nl = NULL;
1871 #ifdef SUPPORT_UTF
1872 struct sljit_jump *singlechar;
1873 #endif
1874 jump_list *newline = NULL;
1875 BOOL newlinecheck = FALSE;
1876 BOOL readuchar = FALSE;
1877
1878 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
1879 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
1880 newlinecheck = TRUE;
1881
1882 if (firstline)
1883 {
1884 /* Search for the end of the first line. */
1885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
1886 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_END, 0);
1887
1888 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
1889 {
1890 mainloop = LABEL();
1891 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1892 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1893 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
1894 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
1895 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
1896 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
1897 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1898 }
1899 else
1900 {
1901 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1902 mainloop = LABEL();
1903 /* Continual stores does not cause data dependency. */
1904 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1905 read_char(common);
1906 check_newlinechar(common, common->nltype, &newline, TRUE);
1907 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
1908 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END, STR_PTR, 0);
1909 set_jumps(newline, LABEL());
1910 }
1911
1912 JUMPHERE(end);
1913 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
1914 }
1915
1916 start = JUMP(SLJIT_JUMP);
1917
1918 if (newlinecheck)
1919 {
1920 newlinelabel = LABEL();
1921 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1922 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1923 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1924 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
1925 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1926 #ifdef COMPILE_PCRE16
1927 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1928 #endif
1929 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1930 nl = JUMP(SLJIT_JUMP);
1931 }
1932
1933 mainloop = LABEL();
1934
1935 /* Increasing the STR_PTR here requires one less jump in the most common case. */
1936 #ifdef SUPPORT_UTF
1937 if (common->utf) readuchar = TRUE;
1938 #endif
1939 if (newlinecheck) readuchar = TRUE;
1940
1941 if (readuchar)
1942 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1943
1944 if (newlinecheck)
1945 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
1946
1947 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1948 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1949 if (common->utf)
1950 {
1951 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
1952 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
1953 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1954 JUMPHERE(singlechar);
1955 }
1956 #endif
1957 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
1958 if (common->utf)
1959 {
1960 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
1961 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
1962 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
1963 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
1964 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
1965 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
1966 JUMPHERE(singlechar);
1967 }
1968 #endif
1969 JUMPHERE(start);
1970
1971 if (newlinecheck)
1972 {
1973 JUMPHERE(end);
1974 JUMPHERE(nl);
1975 }
1976
1977 return mainloop;
1978 }
1979
1980 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
1981 {
1982 DEFINE_COMPILER;
1983 struct sljit_label *start;
1984 struct sljit_jump *leave;
1985 struct sljit_jump *found;
1986 pcre_uchar oc, bit;
1987
1988 if (firstline)
1989 {
1990 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
1991 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
1992 }
1993
1994 start = LABEL();
1995 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
1996 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
1997
1998 oc = first_char;
1999 if (caseless)
2000 {
2001 oc = TABLE_GET(first_char, common->fcc, first_char);
2002 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2003 if (first_char > 127 && common->utf)
2004 oc = UCD_OTHERCASE(first_char);
2005 #endif
2006 }
2007 if (first_char == oc)
2008 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2009 else
2010 {
2011 bit = first_char ^ oc;
2012 if (ispowerof2(bit))
2013 {
2014 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2015 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2016 }
2017 else
2018 {
2019 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2020 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2021 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2022 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2023 found = JUMP(SLJIT_C_NOT_ZERO);
2024 }
2025 }
2026
2027 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2028 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2029 if (common->utf)
2030 {
2031 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2032 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2033 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2034 }
2035 #endif
2036 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2037 if (common->utf)
2038 {
2039 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2040 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2041 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2042 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2043 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2044 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2045 }
2046 #endif
2047 JUMPTO(SLJIT_JUMP, start);
2048 JUMPHERE(found);
2049 JUMPHERE(leave);
2050
2051 if (firstline)
2052 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2053 }
2054
2055 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2056 {
2057 DEFINE_COMPILER;
2058 struct sljit_label *loop;
2059 struct sljit_jump *lastchar;
2060 struct sljit_jump *firstchar;
2061 struct sljit_jump *leave;
2062 struct sljit_jump *foundcr = NULL;
2063 struct sljit_jump *notfoundnl;
2064 jump_list *newline = NULL;
2065
2066 if (firstline)
2067 {
2068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2069 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
2070 }
2071
2072 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2073 {
2074 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2075 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2076 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2077 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2078 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2079
2080 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2081 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2082 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2083 #ifdef COMPILE_PCRE16
2084 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2085 #endif
2086 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2087
2088 loop = LABEL();
2089 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2090 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2091 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2092 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2093 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2094 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2095
2096 JUMPHERE(leave);
2097 JUMPHERE(firstchar);
2098 JUMPHERE(lastchar);
2099
2100 if (firstline)
2101 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2102 return;
2103 }
2104
2105 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2106 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2107 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2108 skip_char_back(common);
2109
2110 loop = LABEL();
2111 read_char(common);
2112 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2113 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2114 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2115 check_newlinechar(common, common->nltype, &newline, FALSE);
2116 set_jumps(newline, loop);
2117
2118 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2119 {
2120 leave = JUMP(SLJIT_JUMP);
2121 JUMPHERE(foundcr);
2122 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2123 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2124 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2125 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2126 #ifdef COMPILE_PCRE16
2127 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2128 #endif
2129 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2130 JUMPHERE(notfoundnl);
2131 JUMPHERE(leave);
2132 }
2133 JUMPHERE(lastchar);
2134 JUMPHERE(firstchar);
2135
2136 if (firstline)
2137 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2138 }
2139
2140 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2141 {
2142 DEFINE_COMPILER;
2143 struct sljit_label *start;
2144 struct sljit_jump *leave;
2145 struct sljit_jump *found;
2146 #ifndef COMPILE_PCRE8
2147 struct sljit_jump *jump;
2148 #endif
2149
2150 if (firstline)
2151 {
2152 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2153 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), FIRSTLINE_END);
2154 }
2155
2156 start = LABEL();
2157 leave = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2158 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2159 #ifdef SUPPORT_UTF
2160 if (common->utf)
2161 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2162 #endif
2163 #ifndef COMPILE_PCRE8
2164 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2165 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2166 JUMPHERE(jump);
2167 #endif
2168 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2169 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2170 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2171 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2172 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2173 found = JUMP(SLJIT_C_NOT_ZERO);
2174
2175 #ifdef SUPPORT_UTF
2176 if (common->utf)
2177 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2178 #endif
2179 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2180 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2181 if (common->utf)
2182 {
2183 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2184 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2185 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2186 }
2187 #endif
2188 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2189 if (common->utf)
2190 {
2191 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2192 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2193 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2194 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2195 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2196 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2197 }
2198 #endif
2199 JUMPTO(SLJIT_JUMP, start);
2200 JUMPHERE(found);
2201 JUMPHERE(leave);
2202
2203 if (firstline)
2204 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2205 }
2206
2207 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2208 {
2209 DEFINE_COMPILER;
2210 struct sljit_label *loop;
2211 struct sljit_jump *toolong;
2212 struct sljit_jump *alreadyfound;
2213 struct sljit_jump *found;
2214 struct sljit_jump *foundoc = NULL;
2215 struct sljit_jump *notfound;
2216 pcre_uchar oc, bit;
2217
2218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR);
2219 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2220 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2221 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2222
2223 if (has_firstchar)
2224 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2225 else
2226 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2227
2228 loop = LABEL();
2229 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2230
2231 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2232 oc = req_char;
2233 if (caseless)
2234 {
2235 oc = TABLE_GET(req_char, common->fcc, req_char);
2236 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2237 if (req_char > 127 && common->utf)
2238 oc = UCD_OTHERCASE(req_char);
2239 #endif
2240 }
2241 if (req_char == oc)
2242 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2243 else
2244 {
2245 bit = req_char ^ oc;
2246 if (ispowerof2(bit))
2247 {
2248 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2249 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2250 }
2251 else
2252 {
2253 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2254 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2255 }
2256 }
2257 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2258 JUMPTO(SLJIT_JUMP, loop);
2259
2260 JUMPHERE(found);
2261 if (foundoc)
2262 JUMPHERE(foundoc);
2263 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), REQ_CHAR_PTR, TMP1, 0);
2264 JUMPHERE(alreadyfound);
2265 JUMPHERE(toolong);
2266 return notfound;
2267 }
2268
2269 static void do_revertframes(compiler_common *common)
2270 {
2271 DEFINE_COMPILER;
2272 struct sljit_jump *jump;
2273 struct sljit_label *mainloop;
2274
2275 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2276 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2277
2278 /* Drop frames until we reach STACK_TOP. */
2279 mainloop = LABEL();
2280 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2281 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2282 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_LOCALS_REG, 0);
2283 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2284 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2285 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2286 JUMPTO(SLJIT_JUMP, mainloop);
2287
2288 JUMPHERE(jump);
2289 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2290 /* End of dropping frames. */
2291 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2292
2293 JUMPHERE(jump);
2294 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
2295 /* Set string begin. */
2296 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2297 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2298 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
2299 JUMPTO(SLJIT_JUMP, mainloop);
2300
2301 JUMPHERE(jump);
2302 /* Unknown command. */
2303 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
2304 JUMPTO(SLJIT_JUMP, mainloop);
2305 }
2306
2307 static void check_wordboundary(compiler_common *common)
2308 {
2309 DEFINE_COMPILER;
2310 struct sljit_jump *skipread;
2311 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
2312 struct sljit_jump *jump;
2313 #endif
2314
2315 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
2316
2317 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, 1, 5, 5, common->localsize);
2318 /* Get type of the previous char, and put it to LOCALS1. */
2319 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2320 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2321 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
2322 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
2323 skip_char_back(common);
2324 check_start_used_ptr(common);
2325 read_char(common);
2326
2327 /* Testing char type. */
2328 #ifdef SUPPORT_UCP
2329 if (common->use_ucp)
2330 {
2331 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2332 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2333 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2334 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2335 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2336 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2337 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2338 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2339 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2340 JUMPHERE(jump);
2341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
2342 }
2343 else
2344 #endif
2345 {
2346 #ifndef COMPILE_PCRE8
2347 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2348 #elif defined SUPPORT_UTF
2349 /* Here LOCALS1 has already been zeroed. */
2350 jump = NULL;
2351 if (common->utf)
2352 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2353 #endif /* COMPILE_PCRE8 */
2354 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
2355 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
2356 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2358 #ifndef COMPILE_PCRE8
2359 JUMPHERE(jump);
2360 #elif defined SUPPORT_UTF
2361 if (jump != NULL)
2362 JUMPHERE(jump);
2363 #endif /* COMPILE_PCRE8 */
2364 }
2365 JUMPHERE(skipread);
2366
2367 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2368 skipread = check_str_end(common);
2369 peek_char(common);
2370
2371 /* Testing char type. This is a code duplication. */
2372 #ifdef SUPPORT_UCP
2373 if (common->use_ucp)
2374 {
2375 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
2376 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
2377 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2378 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
2379 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
2380 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2381 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
2382 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
2383 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2384 JUMPHERE(jump);
2385 }
2386 else
2387 #endif
2388 {
2389 #ifndef COMPILE_PCRE8
2390 /* TMP2 may be destroyed by peek_char. */
2391 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2392 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2393 #elif defined SUPPORT_UTF
2394 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
2395 jump = NULL;
2396 if (common->utf)
2397 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2398 #endif
2399 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
2400 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
2401 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2402 #ifndef COMPILE_PCRE8
2403 JUMPHERE(jump);
2404 #elif defined SUPPORT_UTF
2405 if (jump != NULL)
2406 JUMPHERE(jump);
2407 #endif /* COMPILE_PCRE8 */
2408 }
2409 JUMPHERE(skipread);
2410
2411 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2412 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2413 }
2414
2415 static void check_anynewline(compiler_common *common)
2416 {
2417 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2418 DEFINE_COMPILER;
2419
2420 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2421
2422 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2423 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2424 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2425 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2426 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2427 #ifdef COMPILE_PCRE8
2428 if (common->utf)
2429 {
2430 #endif
2431 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2432 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2433 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2434 #ifdef COMPILE_PCRE8
2435 }
2436 #endif
2437 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2438 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2439 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2440 }
2441
2442 static void check_hspace(compiler_common *common)
2443 {
2444 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2445 DEFINE_COMPILER;
2446
2447 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2448
2449 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
2450 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2451 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2452 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2453 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
2454 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2455 #ifdef COMPILE_PCRE8
2456 if (common->utf)
2457 {
2458 #endif
2459 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2460 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
2461 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2462 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
2463 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2464 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
2465 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
2466 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
2467 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
2468 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
2470 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2471 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
2472 #ifdef COMPILE_PCRE8
2473 }
2474 #endif
2475 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2476 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2477
2478 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2479 }
2480
2481 static void check_vspace(compiler_common *common)
2482 {
2483 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
2484 DEFINE_COMPILER;
2485
2486 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2487
2488 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
2489 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
2490 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
2491 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
2492 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2493 #ifdef COMPILE_PCRE8
2494 if (common->utf)
2495 {
2496 #endif
2497 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2498 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
2499 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
2500 #ifdef COMPILE_PCRE8
2501 }
2502 #endif
2503 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
2504 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2505
2506 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2507 }
2508
2509 #define CHAR1 STR_END
2510 #define CHAR2 STACK_TOP
2511
2512 static void do_casefulcmp(compiler_common *common)
2513 {
2514 DEFINE_COMPILER;
2515 struct sljit_jump *jump;
2516 struct sljit_label *label;
2517
2518 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2519 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2520 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
2521 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
2522 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2523 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2524
2525 label = LABEL();
2526 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2527 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2528 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2529 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2530 JUMPTO(SLJIT_C_NOT_ZERO, label);
2531
2532 JUMPHERE(jump);
2533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2534 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
2535 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2536 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2537 }
2538
2539 #define LCC_TABLE STACK_LIMIT
2540
2541 static void do_caselesscmp(compiler_common *common)
2542 {
2543 DEFINE_COMPILER;
2544 struct sljit_jump *jump;
2545 struct sljit_label *label;
2546
2547 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0, 1, 5, 5, common->localsize);
2548 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2549
2550 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
2551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
2552 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
2553 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
2554 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2555 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2556
2557 label = LABEL();
2558 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
2559 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2560 #ifndef COMPILE_PCRE8
2561 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
2562 #endif
2563 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
2564 #ifndef COMPILE_PCRE8
2565 JUMPHERE(jump);
2566 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
2567 #endif
2568 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
2569 #ifndef COMPILE_PCRE8
2570 JUMPHERE(jump);
2571 #endif
2572 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
2573 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2574 JUMPTO(SLJIT_C_NOT_ZERO, label);
2575
2576 JUMPHERE(jump);
2577 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2578 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
2579 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2580 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
2581 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2582 }
2583
2584 #undef LCC_TABLE
2585 #undef CHAR1
2586 #undef CHAR2
2587
2588 #if defined SUPPORT_UTF && defined SUPPORT_UCP
2589
2590 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
2591 {
2592 /* This function would be ineffective to do in JIT level. */
2593 int c1, c2;
2594 const pcre_uchar *src2 = args->ptr;
2595 const pcre_uchar *end2 = args->end;
2596
2597 while (src1 < end1)
2598 {
2599 if (src2 >= end2)
2600 return (pcre_uchar*)1;
2601 GETCHARINC(c1, src1);
2602 GETCHARINC(c2, src2);
2603 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
2604 }
2605 return src2;
2606 }
2607
2608 #endif /* SUPPORT_UTF && SUPPORT_UCP */
2609
2610 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
2611 compare_context* context, jump_list **fallbacks)
2612 {
2613 DEFINE_COMPILER;
2614 unsigned int othercasebit = 0;
2615 pcre_uchar *othercasechar = NULL;
2616 #ifdef SUPPORT_UTF
2617 int utflength;
2618 #endif
2619
2620 if (caseless && char_has_othercase(common, cc))
2621 {
2622 othercasebit = char_get_othercase_bit(common, cc);
2623 SLJIT_ASSERT(othercasebit);
2624 /* Extracting bit difference info. */
2625 #ifdef COMPILE_PCRE8
2626 othercasechar = cc + (othercasebit >> 8);
2627 othercasebit &= 0xff;
2628 #else
2629 #ifdef COMPILE_PCRE16
2630 othercasechar = cc + (othercasebit >> 9);
2631 if ((othercasebit & 0x100) != 0)
2632 othercasebit = (othercasebit & 0xff) << 8;
2633 else
2634 othercasebit &= 0xff;
2635 #endif
2636 #endif
2637 }
2638
2639 if (context->sourcereg == -1)
2640 {
2641 #ifdef COMPILE_PCRE8
2642 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2643 if (context->length >= 4)
2644 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2645 else if (context->length >= 2)
2646 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2647 else
2648 #endif
2649 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2650 #else
2651 #ifdef COMPILE_PCRE16
2652 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2653 if (context->length >= 4)
2654 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2655 else
2656 #endif
2657 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
2658 #endif
2659 #endif /* COMPILE_PCRE8 */
2660 context->sourcereg = TMP2;
2661 }
2662
2663 #ifdef SUPPORT_UTF
2664 utflength = 1;
2665 if (common->utf && HAS_EXTRALEN(*cc))
2666 utflength += GET_EXTRALEN(*cc);
2667
2668 do
2669 {
2670 #endif
2671
2672 context->length -= IN_UCHARS(1);
2673 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2674
2675 /* Unaligned read is supported. */
2676 if (othercasebit != 0 && othercasechar == cc)
2677 {
2678 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
2679 context->oc.asuchars[context->ucharptr] = othercasebit;
2680 }
2681 else
2682 {
2683 context->c.asuchars[context->ucharptr] = *cc;
2684 context->oc.asuchars[context->ucharptr] = 0;
2685 }
2686 context->ucharptr++;
2687
2688 #ifdef COMPILE_PCRE8
2689 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
2690 #else
2691 if (context->ucharptr >= 2 || context->length == 0)
2692 #endif
2693 {
2694 if (context->length >= 4)
2695 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2696 #ifdef COMPILE_PCRE8
2697 else if (context->length >= 2)
2698 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2699 else if (context->length >= 1)
2700 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2701 #else
2702 else if (context->length >= 2)
2703 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2704 #endif
2705 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2706
2707 switch(context->ucharptr)
2708 {
2709 case 4 / sizeof(pcre_uchar):
2710 if (context->oc.asint != 0)
2711 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
2712 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
2713 break;
2714
2715 case 2 / sizeof(pcre_uchar):
2716 if (context->oc.asushort != 0)
2717 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
2718 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
2719 break;
2720
2721 #ifdef COMPILE_PCRE8
2722 case 1:
2723 if (context->oc.asbyte != 0)
2724 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
2725 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
2726 break;
2727 #endif
2728
2729 default:
2730 SLJIT_ASSERT_STOP();
2731 break;
2732 }
2733 context->ucharptr = 0;
2734 }
2735
2736 #else
2737
2738 /* Unaligned read is unsupported. */
2739 #ifdef COMPILE_PCRE8
2740 if (context->length > 0)
2741 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2742 #else
2743 if (context->length > 0)
2744 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
2745 #endif
2746 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
2747
2748 if (othercasebit != 0 && othercasechar == cc)
2749 {
2750 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
2751 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
2752 }
2753 else
2754 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
2755
2756 #endif
2757
2758 cc++;
2759 #ifdef SUPPORT_UTF
2760 utflength--;
2761 }
2762 while (utflength > 0);
2763 #endif
2764
2765 return cc;
2766 }
2767
2768 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2769
2770 #define SET_TYPE_OFFSET(value) \
2771 if ((value) != typeoffset) \
2772 { \
2773 if ((value) > typeoffset) \
2774 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
2775 else \
2776 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
2777 } \
2778 typeoffset = (value);
2779
2780 #define SET_CHAR_OFFSET(value) \
2781 if ((value) != charoffset) \
2782 { \
2783 if ((value) > charoffset) \
2784 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
2785 else \
2786 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
2787 } \
2788 charoffset = (value);
2789
2790 static void compile_xclass_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
2791 {
2792 DEFINE_COMPILER;
2793 jump_list *found = NULL;
2794 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : fallbacks;
2795 unsigned int c;
2796 int compares;
2797 struct sljit_jump *jump = NULL;
2798 pcre_uchar *ccbegin;
2799 #ifdef SUPPORT_UCP
2800 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
2801 BOOL charsaved = FALSE;
2802 int typereg = TMP1, scriptreg = TMP1;
2803 unsigned int typeoffset;
2804 #endif
2805 int invertcmp, numberofcmps;
2806 unsigned int charoffset;
2807
2808 /* Although SUPPORT_UTF must be defined, we are not necessary in utf mode. */
2809 fallback_at_str_end(common, fallbacks);
2810 read_char(common);
2811
2812 if ((*cc++ & XCL_MAP) != 0)
2813 {
2814 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2815 #ifndef COMPILE_PCRE8
2816 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2817 #elif defined SUPPORT_UTF
2818 if (common->utf)
2819 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
2820 #endif
2821
2822 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2823 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2824 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
2825 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2826 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2827 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
2828
2829 #ifndef COMPILE_PCRE8
2830 JUMPHERE(jump);
2831 #elif defined SUPPORT_UTF
2832 if (common->utf)
2833 JUMPHERE(jump);
2834 #endif
2835 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2836 #ifdef SUPPORT_UCP
2837 charsaved = TRUE;
2838 #endif
2839 cc += 32 / sizeof(pcre_uchar);
2840 }
2841
2842 /* Scanning the necessary info. */
2843 ccbegin = cc;
2844 compares = 0;
2845 while (*cc != XCL_END)
2846 {
2847 compares++;
2848 if (*cc == XCL_SINGLE)
2849 {
2850 cc += 2;
2851 #ifdef SUPPORT_UTF
2852 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2853 #endif
2854 #ifdef SUPPORT_UCP
2855 needschar = TRUE;
2856 #endif
2857 }
2858 else if (*cc == XCL_RANGE)
2859 {
2860 cc += 2;
2861 #ifdef SUPPORT_UTF
2862 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2863 #endif
2864 cc++;
2865 #ifdef SUPPORT_UTF
2866 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2867 #endif
2868 #ifdef SUPPORT_UCP
2869 needschar = TRUE;
2870 #endif
2871 }
2872 #ifdef SUPPORT_UCP
2873 else
2874 {
2875 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
2876 cc++;
2877 switch(*cc)
2878 {
2879 case PT_ANY:
2880 break;
2881
2882 case PT_LAMP:
2883 case PT_GC:
2884 case PT_PC:
2885 case PT_ALNUM:
2886 needstype = TRUE;
2887 break;
2888
2889 case PT_SC:
2890 needsscript = TRUE;
2891 break;
2892
2893 case PT_SPACE:
2894 case PT_PXSPACE:
2895 case PT_WORD:
2896 needstype = TRUE;
2897 needschar = TRUE;
2898 break;
2899
2900 default:
2901 SLJIT_ASSERT_STOP();
2902 break;
2903 }
2904 cc += 2;
2905 }
2906 #endif
2907 }
2908
2909 #ifdef SUPPORT_UCP
2910 /* Simple register allocation. TMP1 is preferred if possible. */
2911 if (needstype || needsscript)
2912 {
2913 if (needschar && !charsaved)
2914 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2915 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
2916 if (needschar)
2917 {
2918 if (needstype)
2919 {
2920 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2921 typereg = RETURN_ADDR;
2922 }
2923
2924 if (needsscript)
2925 scriptreg = TMP3;
2926 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2927 }
2928 else if (needstype && needsscript)
2929 scriptreg = TMP3;
2930 /* In all other cases only one of them was specified, and that can goes to TMP1. */
2931
2932 if (needsscript)
2933 {
2934 if (scriptreg == TMP1)
2935 {
2936 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2937 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
2938 }
2939 else
2940 {
2941 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
2942 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
2943 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
2944 }
2945 }
2946 }
2947 #endif
2948
2949 /* Generating code. */
2950 cc = ccbegin;
2951 charoffset = 0;
2952 numberofcmps = 0;
2953 #ifdef SUPPORT_UCP
2954 typeoffset = 0;
2955 #endif
2956
2957 while (*cc != XCL_END)
2958 {
2959 compares--;
2960 invertcmp = (compares == 0 && list != fallbacks);
2961 jump = NULL;
2962
2963 if (*cc == XCL_SINGLE)
2964 {
2965 cc ++;
2966 #ifdef SUPPORT_UTF
2967 if (common->utf)
2968 {
2969 GETCHARINC(c, cc);
2970 }
2971 else
2972 #endif
2973 c = *cc++;
2974
2975 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
2976 {
2977 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2978 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
2979 numberofcmps++;
2980 }
2981 else if (numberofcmps > 0)
2982 {
2983 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
2984 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2985 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
2986 numberofcmps = 0;
2987 }
2988 else
2989 {
2990 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
2991 numberofcmps = 0;
2992 }
2993 }
2994 else if (*cc == XCL_RANGE)
2995 {
2996 cc ++;
2997 #ifdef SUPPORT_UTF
2998 if (common->utf)
2999 {
3000 GETCHARINC(c, cc);
3001 }
3002 else
3003 #endif
3004 c = *cc++;
3005 SET_CHAR_OFFSET(c);
3006 #ifdef SUPPORT_UTF
3007 if (common->utf)
3008 {
3009 GETCHARINC(c, cc);
3010 }
3011 else
3012 #endif
3013 c = *cc++;
3014 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3015 {
3016 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3017 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3018 numberofcmps++;
3019 }
3020 else if (numberofcmps > 0)
3021 {
3022 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3023 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3024 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3025 numberofcmps = 0;
3026 }
3027 else
3028 {
3029 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3030 numberofcmps = 0;
3031 }
3032 }
3033 #ifdef SUPPORT_UCP
3034 else
3035 {
3036 if (*cc == XCL_NOTPROP)
3037 invertcmp ^= 0x1;
3038 cc++;
3039 switch(*cc)
3040 {
3041 case PT_ANY:
3042 if (list != fallbacks)
3043 {
3044 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3045 continue;
3046 }
3047 else if (cc[-1] == XCL_NOTPROP)
3048 continue;
3049 jump = JUMP(SLJIT_JUMP);
3050 break;
3051
3052 case PT_LAMP:
3053 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3054 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3055 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3056 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3057 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3058 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3059 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3060 break;
3061
3062 case PT_GC:
3063 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3064 SET_TYPE_OFFSET(c);
3065 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3066 break;
3067
3068 case PT_PC:
3069 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3070 break;
3071
3072 case PT_SC:
3073 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3074 break;
3075
3076 case PT_SPACE:
3077 case PT_PXSPACE:
3078 if (*cc == PT_SPACE)
3079 {
3080 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3081 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3082 }
3083 SET_CHAR_OFFSET(9);
3084 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3085 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3086 if (*cc == PT_SPACE)
3087 JUMPHERE(jump);
3088
3089 SET_TYPE_OFFSET(ucp_Zl);
3090 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3091 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3092 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3093 break;
3094
3095 case PT_WORD:
3096 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3097 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3098 /* ... fall through */
3099
3100 case PT_ALNUM:
3101 SET_TYPE_OFFSET(ucp_Ll);
3102 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3103 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3104 SET_TYPE_OFFSET(ucp_Nd);
3105 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3106 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3107 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3108 break;
3109 }
3110 cc += 2;
3111 }
3112 #endif
3113
3114 if (jump != NULL)
3115 add_jump(compiler, compares > 0 ? list : fallbacks, jump);
3116 }
3117
3118 if (found != NULL)
3119 set_jumps(found, LABEL());
3120 }
3121
3122 #undef SET_TYPE_OFFSET
3123 #undef SET_CHAR_OFFSET
3124
3125 #endif
3126
3127 static pcre_uchar *compile_char1_hotpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **fallbacks)
3128 {
3129 DEFINE_COMPILER;
3130 int length;
3131 unsigned int c, oc, bit;
3132 compare_context context;
3133 struct sljit_jump *jump[4];
3134 #ifdef SUPPORT_UTF
3135 struct sljit_label *label;
3136 #ifdef SUPPORT_UCP
3137 pcre_uchar propdata[5];
3138 #endif
3139 #endif
3140
3141 switch(type)
3142 {
3143 case OP_SOD:
3144 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3145 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3146 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3147 return cc;
3148
3149 case OP_SOM:
3150 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3151 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3152 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
3153 return cc;
3154
3155 case OP_NOT_WORD_BOUNDARY:
3156 case OP_WORD_BOUNDARY:
3157 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
3158 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3159 return cc;
3160
3161 case OP_NOT_DIGIT:
3162 case OP_DIGIT:
3163 fallback_at_str_end(common, fallbacks);
3164 read_char8_type(common);
3165 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
3166 add_jump(compiler, fallbacks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3167 return cc;
3168
3169 case OP_NOT_WHITESPACE:
3170 case OP_WHITESPACE:
3171 fallback_at_str_end(common, fallbacks);
3172 read_char8_type(common);
3173 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
3174 add_jump(compiler, fallbacks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3175 return cc;
3176
3177 case OP_NOT_WORDCHAR:
3178 case OP_WORDCHAR:
3179 fallback_at_str_end(common, fallbacks);
3180 read_char8_type(common);
3181 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
3182 add_jump(compiler, fallbacks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
3183 return cc;
3184
3185 case OP_ANY:
3186 fallback_at_str_end(common, fallbacks);
3187 read_char(common);
3188 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3189 {
3190 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3191 if (common->mode == JIT_COMPILE)
3192 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3193 else
3194 {
3195 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3196 /* Since we successfully read a char above, partial matching must occure. */
3197 check_partial(common, TRUE);
3198 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3199 JUMPHERE(jump[1]);
3200 jump[1] = NULL;
3201 }
3202
3203 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3204 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
3205 if (jump[1] != NULL)
3206 JUMPHERE(jump[1]);
3207 JUMPHERE(jump[0]);
3208 }
3209 else
3210 check_newlinechar(common, common->nltype, fallbacks, TRUE);
3211 return cc;
3212
3213 case OP_ALLANY:
3214 fallback_at_str_end(common, fallbacks);
3215 #ifdef SUPPORT_UTF
3216 if (common->utf)
3217 {
3218 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3219 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3220 #ifdef COMPILE_PCRE8
3221 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3222 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3223 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3224 #else /* COMPILE_PCRE8 */
3225 #ifdef COMPILE_PCRE16
3226 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3227 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3228 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3229 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
3230 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3231 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3232 #endif /* COMPILE_PCRE16 */
3233 #endif /* COMPILE_PCRE8 */
3234 JUMPHERE(jump[0]);
3235 return cc;
3236 }
3237 #endif
3238 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3239 return cc;
3240
3241 case OP_ANYBYTE:
3242 fallback_at_str_end(common, fallbacks);
3243 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3244 return cc;
3245
3246 #ifdef SUPPORT_UTF
3247 #ifdef SUPPORT_UCP
3248 case OP_NOTPROP:
3249 case OP_PROP:
3250 propdata[0] = 0;
3251 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
3252 propdata[2] = cc[0];
3253 propdata[3] = cc[1];
3254 propdata[4] = XCL_END;
3255 compile_xclass_hotpath(common, propdata, fallbacks);
3256 return cc + 2;
3257 #endif
3258 #endif
3259
3260 case OP_ANYNL:
3261 fallback_at_str_end(common, fallbacks);
3262 read_char(common);
3263 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3264 /* We don't need to handle soft partial matching case. */
3265 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
3266 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3267 else
3268 jump[1] = check_str_end(common);
3269 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3270 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3271 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3272 jump[3] = JUMP(SLJIT_JUMP);
3273 JUMPHERE(jump[0]);
3274 check_newlinechar(common, common->bsr_nltype, fallbacks, FALSE);
3275 JUMPHERE(jump[1]);
3276 JUMPHERE(jump[2]);
3277 JUMPHERE(jump[3]);
3278 return cc;
3279
3280 case OP_NOT_HSPACE:
3281 case OP_HSPACE:
3282 fallback_at_str_end(common, fallbacks);
3283 read_char(common);
3284 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
3285 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3286 return cc;
3287
3288 case OP_NOT_VSPACE:
3289 case OP_VSPACE:
3290 fallback_at_str_end(common, fallbacks);
3291 read_char(common);
3292 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
3293 add_jump(compiler, fallbacks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
3294 return cc;
3295
3296 #ifdef SUPPORT_UCP
3297 case OP_EXTUNI:
3298 fallback_at_str_end(common, fallbacks);
3299 read_char(common);
3300 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3301 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3302 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
3303
3304 label = LABEL();
3305 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3306 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3307 read_char(common);
3308 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3309 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
3310 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
3311
3312 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3313 JUMPHERE(jump[0]);
3314 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
3315 {
3316 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3317 /* Since we successfully read a char above, partial matching must occure. */
3318 check_partial(common, TRUE);
3319 JUMPHERE(jump[0]);
3320 }
3321 return cc;
3322 #endif
3323
3324 case OP_EODN:
3325 /* Requires rather complex checks. */
3326 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3327 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3328 {
3329 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3330 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3331 if (common->mode == JIT_COMPILE)
3332 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3333 else
3334 {
3335 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
3336 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3337 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
3338 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
3339 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
3340 add_jump(compiler, fallbacks, JUMP(SLJIT_C_NOT_EQUAL));
3341 check_partial(common, TRUE);
3342 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3343 JUMPHERE(jump[1]);
3344 }
3345 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3346 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3347 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3348 }
3349 else if (common->nltype == NLTYPE_FIXED)
3350 {
3351 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3352 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3353 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
3354 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3355 }
3356 else
3357 {
3358 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3359 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3360 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3361 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
3362 jump[2] = JUMP(SLJIT_C_GREATER);
3363 add_jump(compiler, fallbacks, JUMP(SLJIT_C_LESS));
3364 /* Equal. */
3365 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3366 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
3367 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3368
3369 JUMPHERE(jump[1]);
3370 if (common->nltype == NLTYPE_ANYCRLF)
3371 {
3372 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3373 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
3374 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3375 }
3376 else
3377 {
3378 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
3379 read_char(common);
3380 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
3381 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3382 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3383 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3384 }
3385 JUMPHERE(jump[2]);
3386 JUMPHERE(jump[3]);
3387 }
3388 JUMPHERE(jump[0]);
3389 check_partial(common, FALSE);
3390 return cc;
3391
3392 case OP_EOD:
3393 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3394 check_partial(common, FALSE);
3395 return cc;
3396
3397 case OP_CIRC:
3398 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3399 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3400 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
3401 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3402 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3403 return cc;
3404
3405 case OP_CIRCM:
3406 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3407 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
3408 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
3409 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
3410 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3411 jump[0] = JUMP(SLJIT_JUMP);
3412 JUMPHERE(jump[1]);
3413
3414 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3415 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3416 {
3417 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3418 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
3419 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3420 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3421 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3422 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3423 }
3424 else
3425 {
3426 skip_char_back(common);
3427 read_char(common);
3428 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3429 }
3430 JUMPHERE(jump[0]);
3431 return cc;
3432
3433 case OP_DOLL:
3434 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3435 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3436 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3437
3438 if (!common->endonly)
3439 compile_char1_hotpath(common, OP_EODN, cc, fallbacks);
3440 else
3441 {
3442 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
3443 check_partial(common, FALSE);
3444 }
3445 return cc;
3446
3447 case OP_DOLLM:
3448 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
3449 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
3450 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
3451 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3452 check_partial(common, FALSE);
3453 jump[0] = JUMP(SLJIT_JUMP);
3454 JUMPHERE(jump[1]);
3455
3456 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3457 {
3458 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3459 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3460 if (common->mode == JIT_COMPILE)
3461 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
3462 else
3463 {
3464 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
3465 /* STR_PTR = STR_END - IN_UCHARS(1) */
3466 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3467 check_partial(common, TRUE);
3468 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3469 JUMPHERE(jump[1]);
3470 }
3471
3472 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3473 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
3474 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
3475 }
3476 else
3477 {
3478 peek_char(common);
3479 check_newlinechar(common, common->nltype, fallbacks, FALSE);
3480 }
3481 JUMPHERE(jump[0]);
3482 return cc;
3483
3484 case OP_CHAR:
3485 case OP_CHARI:
3486 length = 1;
3487 #ifdef SUPPORT_UTF
3488 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
3489 #endif
3490 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
3491 {
3492 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3493 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3494
3495 context.length = IN_UCHARS(length);
3496 context.sourcereg = -1;
3497 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3498 context.ucharptr = 0;
3499 #endif
3500 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, fallbacks);
3501 }
3502 fallback_at_str_end(common, fallbacks);
3503 read_char(common);
3504 #ifdef SUPPORT_UTF
3505 if (common->utf)
3506 {
3507 GETCHAR(c, cc);
3508 }
3509 else
3510 #endif
3511 c = *cc;
3512 if (type == OP_CHAR || !char_has_othercase(common, cc))
3513 {
3514 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
3515 return cc + length;
3516 }
3517 oc = char_othercase(common, c);
3518 bit = c ^ oc;
3519 if (ispowerof2(bit))
3520 {
3521 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3522 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3523 return cc + length;
3524 }
3525 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
3526 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3527 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
3528 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3529 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3530 return cc + length;
3531
3532 case OP_NOT:
3533 case OP_NOTI:
3534 fallback_at_str_end(common, fallbacks);
3535 length = 1;
3536 #ifdef SUPPORT_UTF
3537 if (common->utf)
3538 {
3539 #ifdef COMPILE_PCRE8
3540 c = *cc;
3541 if (c < 128)
3542 {
3543 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3544 if (type == OP_NOT || !char_has_othercase(common, cc))
3545 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3546 else
3547 {
3548 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
3549 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
3550 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
3551 }
3552 /* Skip the variable-length character. */
3553 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3554 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3555 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
3556 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3557 JUMPHERE(jump[0]);
3558 return cc + 1;
3559 }
3560 else
3561 #endif /* COMPILE_PCRE8 */
3562 {
3563 GETCHARLEN(c, cc, length);
3564 read_char(common);
3565 }
3566 }
3567 else
3568 #endif /* SUPPORT_UTF */
3569 {
3570 read_char(common);
3571 c = *cc;
3572 }
3573
3574 if (type == OP_NOT || !char_has_othercase(common, cc))
3575 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3576 else
3577 {
3578 oc = char_othercase(common, c);
3579 bit = c ^ oc;
3580 if (ispowerof2(bit))
3581 {
3582 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
3583 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
3584 }
3585 else
3586 {
3587 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
3588 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
3589 }
3590 }
3591 return cc + 1;
3592
3593 case OP_CLASS:
3594 case OP_NCLASS:
3595 fallback_at_str_end(common, fallbacks);
3596 read_char(common);
3597 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3598 jump[0] = NULL;
3599 #ifdef COMPILE_PCRE8
3600 /* This check only affects 8 bit mode. In other modes, we
3601 always need to compare the value with 255. */
3602 if (common->utf)
3603 #endif /* COMPILE_PCRE8 */
3604 {
3605 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3606 if (type == OP_CLASS)
3607 {
3608 add_jump(compiler, fallbacks, jump[0]);
3609 jump[0] = NULL;
3610 }
3611 }
3612 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3613 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3614 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3615 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3616 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3617 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3618 add_jump(compiler, fallbacks, JUMP(SLJIT_C_ZERO));
3619 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3620 if (jump[0] != NULL)
3621 JUMPHERE(jump[0]);
3622 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
3623 return cc + 32 / sizeof(pcre_uchar);
3624
3625 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3626 case OP_XCLASS:
3627 compile_xclass_hotpath(common, cc + LINK_SIZE, fallbacks);
3628 return cc + GET(cc, 0) - 1;
3629 #endif
3630
3631 case OP_REVERSE:
3632 length = GET(cc, 0);
3633 SLJIT_ASSERT(length > 0);
3634 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3635 #ifdef SUPPORT_UTF
3636 if (common->utf)
3637 {
3638 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3639 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
3640 label = LABEL();
3641 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
3642 skip_char_back(common);
3643 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3644 JUMPTO(SLJIT_C_NOT_ZERO, label);
3645 }
3646 else
3647 #endif
3648 {
3649 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3650 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
3651 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
3652 }
3653 check_start_used_ptr(common);
3654 return cc + LINK_SIZE;
3655 }
3656 SLJIT_ASSERT_STOP();
3657 return cc;
3658 }
3659
3660 static SLJIT_INLINE pcre_uchar *compile_charn_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **fallbacks)
3661 {
3662 /* This function consumes at least one input character. */
3663 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
3664 DEFINE_COMPILER;
3665 pcre_uchar *ccbegin = cc;
3666 compare_context context;
3667 int size;
3668
3669 context.length = 0;
3670 do
3671 {
3672 if (cc >= ccend)
3673 break;
3674
3675 if (*cc == OP_CHAR)
3676 {
3677 size = 1;
3678 #ifdef SUPPORT_UTF
3679 if (common->utf && HAS_EXTRALEN(cc[1]))
3680 size += GET_EXTRALEN(cc[1]);
3681 #endif
3682 }
3683 else if (*cc == OP_CHARI)
3684 {
3685 size = 1;
3686 #ifdef SUPPORT_UTF
3687 if (common->utf)
3688 {
3689 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3690 size = 0;
3691 else if (HAS_EXTRALEN(cc[1]))
3692 size += GET_EXTRALEN(cc[1]);
3693 }
3694 else
3695 #endif
3696 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
3697 size = 0;
3698 }
3699 else
3700 size = 0;
3701
3702 cc += 1 + size;
3703 context.length += IN_UCHARS(size);
3704 }
3705 while (size > 0 && context.length <= 128);
3706
3707 cc = ccbegin;
3708 if (context.length > 0)
3709 {
3710 /* We have a fixed-length byte sequence. */
3711 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
3712 add_jump(compiler, fallbacks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
3713
3714 context.sourcereg = -1;
3715 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3716 context.ucharptr = 0;
3717 #endif
3718 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, fallbacks); while (context.length > 0);
3719 return cc;
3720 }
3721
3722 /* A non-fixed length character will be checked if length == 0. */
3723 return compile_char1_hotpath(common, *cc, cc + 1, fallbacks);
3724 }
3725
3726 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks)
3727 {
3728 DEFINE_COMPILER;
3729 int offset = GET2(cc, 1) << 1;
3730
3731 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3732 if (!common->jscript_compat)
3733 {
3734 if (fallbacks == NULL)
3735 {
3736 /* OVECTOR(1) contains the "string begin - 1" constant. */
3737 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
3738 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3739 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3740 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3741 return JUMP(SLJIT_C_NOT_ZERO);
3742 }
3743 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3744 }
3745 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3746 }
3747
3748 /* Forward definitions. */
3749 static void compile_hotpath(compiler_common *, pcre_uchar *, pcre_uchar *, fallback_common *);
3750 static void compile_fallbackpath(compiler_common *, struct fallback_common *);
3751
3752 #define PUSH_FALLBACK(size, ccstart, error) \
3753 do \
3754 { \
3755 fallback = sljit_alloc_memory(compiler, (size)); \
3756 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3757 return error; \
3758 memset(fallback, 0, size); \
3759 fallback->prev = parent->top; \
3760 fallback->cc = (ccstart); \
3761 parent->top = fallback; \
3762 } \
3763 while (0)
3764
3765 #define PUSH_FALLBACK_NOVALUE(size, ccstart) \
3766 do \
3767 { \
3768 fallback = sljit_alloc_memory(compiler, (size)); \
3769 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
3770 return; \
3771 memset(fallback, 0, size); \
3772 fallback->prev = parent->top; \
3773 fallback->cc = (ccstart); \
3774 parent->top = fallback; \
3775 } \
3776 while (0)
3777
3778 #define FALLBACK_AS(type) ((type *)fallback)
3779
3780 static pcre_uchar *compile_ref_hotpath(compiler_common *common, pcre_uchar *cc, jump_list **fallbacks, BOOL withchecks, BOOL emptyfail)
3781 {
3782 DEFINE_COMPILER;
3783 int offset = GET2(cc, 1) << 1;
3784 struct sljit_jump *jump = NULL;
3785 struct sljit_jump *partial;
3786 struct sljit_jump *nopartial;
3787
3788 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
3789 /* OVECTOR(1) contains the "string begin - 1" constant. */
3790 if (withchecks && !common->jscript_compat)
3791 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
3792
3793 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3794 if (common->utf && *cc == OP_REFI)
3795 {
3796 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
3797 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
3798 if (withchecks)
3799 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
3800
3801 /* Needed to save important temporary registers. */
3802 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
3803 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
3804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, ptr), STR_PTR, 0);
3805 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
3806 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3807 if (common->mode == JIT_COMPILE)
3808 add_jump(compiler, fallbacks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
3809 else
3810 {
3811 add_jump(compiler, fallbacks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
3812 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3813 check_partial(common, FALSE);
3814 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3815 JUMPHERE(nopartial);
3816 }
3817 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
3818 }
3819 else
3820 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3821 {
3822 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
3823 if (withchecks)
3824 jump = JUMP(SLJIT_C_ZERO);
3825
3826 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3827 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
3828 if (common->mode == JIT_COMPILE)
3829 add_jump(compiler, fallbacks, partial);
3830
3831 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3832 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3833
3834 if (common->mode != JIT_COMPILE)
3835 {
3836 nopartial = JUMP(SLJIT_JUMP);
3837 JUMPHERE(partial);
3838 /* TMP2 -= STR_END - STR_PTR */
3839 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
3840 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
3841 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
3842 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
3843 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
3844 add_jump(compiler, fallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
3845 JUMPHERE(partial);
3846 check_partial(common, FALSE);
3847 add_jump(compiler, fallbacks, JUMP(SLJIT_JUMP));
3848 JUMPHERE(nopartial);
3849 }
3850 }
3851
3852 if (jump != NULL)
3853 {
3854 if (emptyfail)
3855 add_jump(compiler, fallbacks, jump);
3856 else
3857 JUMPHERE(jump);
3858 }
3859 return cc + 1 + IMM2_SIZE;
3860 }
3861
3862 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
3863 {
3864 DEFINE_COMPILER;
3865 fallback_common *fallback;
3866 pcre_uchar type;
3867 struct sljit_label *label;
3868 struct sljit_jump *zerolength;
3869 struct sljit_jump *jump = NULL;
3870 pcre_uchar *ccbegin = cc;
3871 int min = 0, max = 0;
3872 BOOL minimize;
3873
3874 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
3875
3876 type = cc[1 + IMM2_SIZE];
3877 minimize = (type & 0x1) != 0;
3878 switch(type)
3879 {
3880 case OP_CRSTAR:
3881 case OP_CRMINSTAR:
3882 min = 0;
3883 max = 0;
3884 cc += 1 + IMM2_SIZE + 1;
3885 break;
3886 case OP_CRPLUS:
3887 case OP_CRMINPLUS:
3888 min = 1;
3889 max = 0;
3890 cc += 1 + IMM2_SIZE + 1;
3891 break;
3892 case OP_CRQUERY:
3893 case OP_CRMINQUERY:
3894 min = 0;
3895 max = 1;
3896 cc += 1 + IMM2_SIZE + 1;
3897 break;
3898 case OP_CRRANGE:
3899 case OP_CRMINRANGE:
3900 min = GET2(cc, 1 + IMM2_SIZE + 1);
3901 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
3902 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
3903 break;
3904 default:
3905 SLJIT_ASSERT_STOP();
3906 break;
3907 }
3908
3909 if (!minimize)
3910 {
3911 if (min == 0)
3912 {
3913 allocate_stack(common, 2);
3914 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3916 /* Temporary release of STR_PTR. */
3917 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3918 zerolength = compile_ref_checks(common, ccbegin, NULL);
3919 /* Restore if not zero length. */
3920 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
3921 }
3922 else
3923 {
3924 allocate_stack(common, 1);
3925 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3926 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3927 }
3928
3929 if (min > 1 || max > 1)
3930 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
3931
3932 label = LABEL();
3933 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, FALSE, FALSE);
3934
3935 if (min > 1 || max > 1)
3936 {
3937 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3938 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3939 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
3940 if (min > 1)
3941 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
3942 if (max > 1)
3943 {
3944 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
3945 allocate_stack(common, 1);
3946 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3947 JUMPTO(SLJIT_JUMP, label);
3948 JUMPHERE(jump);
3949 }
3950 }
3951
3952 if (max == 0)
3953 {
3954 /* Includes min > 1 case as well. */
3955 allocate_stack(common, 1);
3956 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3957 JUMPTO(SLJIT_JUMP, label);
3958 }
3959
3960 JUMPHERE(zerolength);
3961 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3962
3963 decrease_call_count(common);
3964 return cc;
3965 }
3966
3967 allocate_stack(common, 2);
3968 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
3969 if (type != OP_CRMINSTAR)
3970 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
3971
3972 if (min == 0)
3973 {
3974 zerolength = compile_ref_checks(common, ccbegin, NULL);
3975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3976 jump = JUMP(SLJIT_JUMP);
3977 }
3978 else
3979 zerolength = compile_ref_checks(common, ccbegin, &fallback->topfallbacks);
3980
3981 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
3982 if (max > 0)
3983 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
3984
3985 compile_ref_hotpath(common, ccbegin, &fallback->topfallbacks, TRUE, TRUE);
3986 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
3987
3988 if (min > 1)
3989 {
3990 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
3991 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3992 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
3993 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, FALLBACK_AS(iterator_fallback)->hotpath);
3994 }
3995 else if (max > 0)
3996 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
3997
3998 if (jump != NULL)
3999 JUMPHERE(jump);
4000 JUMPHERE(zerolength);
4001
4002 decrease_call_count(common);
4003 return cc;
4004 }
4005
4006 static SLJIT_INLINE pcre_uchar *compile_recurse_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4007 {
4008 DEFINE_COMPILER;
4009 fallback_common *fallback;
4010 recurse_entry *entry = common->entries;
4011 recurse_entry *prev = NULL;
4012 int start = GET(cc, 1);
4013
4014 PUSH_FALLBACK(sizeof(recurse_fallback), cc, NULL);
4015 while (entry != NULL)
4016 {
4017 if (entry->start == start)
4018 break;
4019 prev = entry;
4020 entry = entry->next;
4021 }
4022
4023 if (entry == NULL)
4024 {
4025 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4026 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4027 return NULL;
4028 entry->next = NULL;
4029 entry->entry = NULL;
4030 entry->calls = NULL;
4031 entry->start = start;
4032
4033 if (prev != NULL)
4034 prev->next = entry;
4035 else
4036 common->entries = entry;
4037 }
4038
4039 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4040 allocate_stack(common, 1);
4041 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4042
4043 if (entry->entry == NULL)
4044 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4045 else
4046 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4047 /* Leave if the match is failed. */
4048 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4049 return cc + 1 + LINK_SIZE;
4050 }
4051
4052 static pcre_uchar *compile_assert_hotpath(compiler_common *common, pcre_uchar *cc, assert_fallback *fallback, BOOL conditional)
4053 {
4054 DEFINE_COMPILER;
4055 int framesize;
4056 int localptr;
4057 fallback_common altfallback;
4058 pcre_uchar *ccbegin;
4059 pcre_uchar opcode;
4060 pcre_uchar bra = OP_BRA;
4061 jump_list *tmp = NULL;
4062 jump_list **target = (conditional) ? &fallback->condfailed : &fallback->common.topfallbacks;
4063 jump_list **found;
4064 /* Saving previous accept variables. */
4065 struct sljit_label *save_acceptlabel = common->acceptlabel;
4066 struct sljit_jump *jump;
4067 struct sljit_jump *brajump = NULL;
4068 jump_list *save_accept = common->accept;
4069
4070 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4071 {
4072 SLJIT_ASSERT(!conditional);
4073 bra = *cc;
4074 cc++;
4075 }
4076 localptr = PRIV_DATA(cc);
4077 SLJIT_ASSERT(localptr != 0);
4078 framesize = get_framesize(common, cc, FALSE);
4079 fallback->framesize = framesize;
4080 fallback->localptr = localptr;
4081 opcode = *cc;
4082 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4083 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4084 ccbegin = cc;
4085 cc += GET(cc, 1);
4086
4087 if (bra == OP_BRAMINZERO)
4088 {
4089 /* This is a braminzero fallback path. */
4090 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4091 free_stack(common, 1);
4092 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4093 }
4094
4095 if (framesize < 0)
4096 {
4097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4098 allocate_stack(common, 1);
4099 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4100 }
4101 else
4102 {
4103 allocate_stack(common, framesize + 2);
4104 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4105 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4107 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4108 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4109 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4110 }
4111
4112 memset(&altfallback, 0, sizeof(fallback_common));
4113 while (1)
4114 {
4115 common->acceptlabel = NULL;
4116 common->accept = NULL;
4117 altfallback.top = NULL;
4118 altfallback.topfallbacks = NULL;
4119
4120 if (*ccbegin == OP_ALT)
4121 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4122
4123 altfallback.cc = ccbegin;
4124 compile_hotpath(common, ccbegin + 1 + LINK_SIZE, cc, &altfallback);
4125 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4126 {
4127 common->acceptlabel = save_acceptlabel;
4128 common->accept = save_accept;
4129 return NULL;
4130 }
4131 common->acceptlabel = LABEL();
4132 if (common->accept != NULL)
4133 set_jumps(common->accept, common->acceptlabel);
4134
4135 /* Reset stack. */
4136 if (framesize < 0)
4137 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4138 else {
4139 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
4140 {
4141 /* We don't need to keep the STR_PTR, only the previous localptr. */
4142 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4143 }
4144 else
4145 {
4146 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4147 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4148 }
4149 }
4150
4151 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
4152 {
4153 /* We know that STR_PTR was stored on the top of the stack. */
4154 if (conditional)
4155 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4156 else if (bra == OP_BRAZERO)
4157 {
4158 if (framesize < 0)
4159 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4160 else
4161 {
4162 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4163 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
4164 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4165 }
4166 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4167 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4168 }
4169 else if (framesize >= 0)
4170 {
4171 /* For OP_BRA and OP_BRAMINZERO. */
4172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4173 }
4174 }
4175 add_jump(compiler, found, JUMP(SLJIT_JUMP));
4176
4177 compile_fallbackpath(common, altfallback.top);
4178 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4179 {
4180 common->acceptlabel = save_acceptlabel;
4181 common->accept = save_accept;
4182 return NULL;
4183 }
4184 set_jumps(altfallback.topfallbacks, LABEL());
4185
4186 if (*cc != OP_ALT)
4187 break;
4188
4189 ccbegin = cc;
4190 cc += GET(cc, 1);
4191 }
4192 /* None of them matched. */
4193
4194 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
4195 {
4196 /* Assert is failed. */
4197 if (conditional || bra == OP_BRAZERO)
4198 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4199
4200 if (framesize < 0)
4201 {
4202 /* The topmost item should be 0. */
4203 if (bra == OP_BRAZERO)
4204 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4205 else
4206 free_stack(common, 1);
4207 }
4208 else
4209 {
4210 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4211 /* The topmost item should be 0. */
4212 if (bra == OP_BRAZERO)
4213 {
4214 free_stack(common, framesize + 1);
4215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4216 }
4217 else
4218 free_stack(common, framesize + 2);
4219 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4220 }
4221 jump = JUMP(SLJIT_JUMP);
4222 if (bra != OP_BRAZERO)
4223 add_jump(compiler, target, jump);
4224
4225 /* Assert is successful. */
4226 set_jumps(tmp, LABEL());
4227 if (framesize < 0)
4228 {
4229 /* We know that STR_PTR was stored on the top of the stack. */
4230 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4231 /* Keep the STR_PTR on the top of the stack. */
4232 if (bra == OP_BRAZERO)
4233 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4234 else if (bra == OP_BRAMINZERO)
4235 {
4236 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4238 }
4239 }
4240 else
4241 {
4242 if (bra == OP_BRA)
4243 {
4244 /* We don't need to keep the STR_PTR, only the previous localptr. */
4245 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
4246 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
4247 }
4248 else
4249 {
4250 /* We don't need to keep the STR_PTR, only the previous localptr. */
4251 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
4252 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
4254 }
4255 }
4256
4257 if (bra == OP_BRAZERO)
4258 {
4259 fallback->hotpath = LABEL();
4260 sljit_set_label(jump, fallback->hotpath);
4261 }
4262 else if (bra == OP_BRAMINZERO)
4263 {
4264 JUMPTO(SLJIT_JUMP, fallback->hotpath);
4265 JUMPHERE(brajump);
4266 if (framesize >= 0)
4267 {
4268 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4269 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4270 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
4271 }
4272 set_jumps(fallback->common.topfallbacks, LABEL());
4273 }
4274 }
4275 else
4276 {
4277 /* AssertNot is successful. */
4278 if (framesize < 0)
4279 {
4280 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4281 if (bra != OP_BRA)
4282 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4283 else
4284 free_stack(common, 1);
4285 }
4286 else
4287 {
4288 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4289 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4290 /* The topmost item should be 0. */
4291 if (bra != OP_BRA)
4292 {
4293 free_stack(common, framesize + 1);
4294 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4295 }
4296 else
4297 free_stack(common, framesize + 2);
4298 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
4299 }
4300
4301 if (bra == OP_BRAZERO)
4302 fallback->hotpath = LABEL();
4303 else if (bra == OP_BRAMINZERO)
4304 {
4305 JUMPTO(SLJIT_JUMP, fallback->hotpath);
4306 JUMPHERE(brajump);
4307 }
4308
4309 if (bra != OP_BRA)
4310 {
4311 SLJIT_ASSERT(found == &fallback->common.topfallbacks);
4312 set_jumps(fallback->common.topfallbacks, LABEL());
4313 fallback->common.topfallbacks = NULL;
4314 }
4315 }
4316
4317 common->acceptlabel = save_acceptlabel;
4318 common->accept = save_accept;
4319 return cc + 1 + LINK_SIZE;
4320 }
4321
4322 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
4323 {
4324 int condition = FALSE;
4325 pcre_uchar *slotA = name_table;
4326 pcre_uchar *slotB;
4327 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4328 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4329 sljit_w no_capture;
4330 int i;
4331
4332 locals += OVECTOR_START / sizeof(sljit_w);
4333 no_capture = locals[1];
4334
4335 for (i = 0; i < name_count; i++)
4336 {
4337 if (GET2(slotA, 0) == refno) break;
4338 slotA += name_entry_size;
4339 }
4340
4341 if (i < name_count)
4342 {
4343 /* Found a name for the number - there can be only one; duplicate names
4344 for different numbers are allowed, but not vice versa. First scan down
4345 for duplicates. */
4346
4347 slotB = slotA;
4348 while (slotB > name_table)
4349 {
4350 slotB -= name_entry_size;
4351 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4352 {
4353 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4354 if (condition) break;
4355 }
4356 else break;
4357 }
4358
4359 /* Scan up for duplicates */
4360 if (!condition)
4361 {
4362 slotB = slotA;
4363 for (i++; i < name_count; i++)
4364 {
4365 slotB += name_entry_size;
4366 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4367 {
4368 condition = locals[GET2(slotB, 0) << 1] != no_capture;
4369 if (condition) break;
4370 }
4371 else break;
4372 }
4373 }
4374 }
4375 return condition;
4376 }
4377
4378 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
4379 {
4380 int condition = FALSE;
4381 pcre_uchar *slotA = name_table;
4382 pcre_uchar *slotB;
4383 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
4384 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
4385 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
4386 int i;
4387
4388 for (i = 0; i < name_count; i++)
4389 {
4390 if (GET2(slotA, 0) == recno) break;
4391 slotA += name_entry_size;
4392 }
4393
4394 if (i < name_count)
4395 {
4396 /* Found a name for the number - there can be only one; duplicate
4397 names for different numbers are allowed, but not vice versa. First
4398 scan down for duplicates. */
4399
4400 slotB = slotA;
4401 while (slotB > name_table)
4402 {
4403 slotB -= name_entry_size;
4404 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4405 {
4406 condition = GET2(slotB, 0) == group_num;
4407 if (condition) break;
4408 }
4409 else break;
4410 }
4411
4412 /* Scan up for duplicates */
4413 if (!condition)
4414 {
4415 slotB = slotA;
4416 for (i++; i < name_count; i++)
4417 {
4418 slotB += name_entry_size;
4419 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
4420 {
4421 condition = GET2(slotB, 0) == group_num;
4422 if (condition) break;
4423 }
4424 else break;
4425 }
4426 }
4427 }
4428 return condition;
4429 }
4430
4431 /*
4432 Handling bracketed expressions is probably the most complex part.
4433
4434 Stack layout naming characters:
4435 S - Push the current STR_PTR
4436 0 - Push a 0 (NULL)
4437 A - Push the current STR_PTR. Needed for restoring the STR_PTR
4438 before the next alternative. Not pushed if there are no alternatives.
4439 M - Any values pushed by the current alternative. Can be empty, or anything.
4440 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
4441 L - Push the previous local (pointed by localptr) to the stack
4442 () - opional values stored on the stack
4443 ()* - optonal, can be stored multiple times
4444
4445 The following list shows the regular expression templates, their PCRE byte codes
4446 and stack layout supported by pcre-sljit.
4447
4448 (?:) OP_BRA | OP_KET A M
4449 () OP_CBRA | OP_KET C M
4450 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
4451 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
4452 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
4453 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
4454 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
4455 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
4456 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
4457 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
4458 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
4459 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
4460 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
4461 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
4462 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
4463 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
4464 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
4465 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
4466 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
4467 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
4468 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
4469 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
4470
4471
4472 Stack layout naming characters:
4473 A - Push the alternative index (starting from 0) on the stack.
4474 Not pushed if there is no alternatives.
4475 M - Any values pushed by the current alternative. Can be empty, or anything.
4476
4477 The next list shows the possible content of a bracket:
4478 (|) OP_*BRA | OP_ALT ... M A
4479 (?()|) OP_*COND | OP_ALT M A
4480 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
4481 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
4482 Or nothing, if trace is unnecessary
4483 */
4484
4485 static pcre_uchar *compile_bracket_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4486 {
4487 DEFINE_COMPILER;
4488 fallback_common *fallback;
4489 pcre_uchar opcode;
4490 int localptr = 0;
4491 int offset = 0;
4492 int stacksize;
4493 pcre_uchar *ccbegin;
4494 pcre_uchar *hotpath;
4495 pcre_uchar bra = OP_BRA;
4496 pcre_uchar ket;
4497 assert_fallback *assert;
4498 BOOL has_alternatives;
4499 struct sljit_jump *jump;
4500 struct sljit_jump *skip;
4501 struct sljit_label *rmaxlabel = NULL;
4502 struct sljit_jump *braminzerojump = NULL;
4503
4504 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
4505
4506 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4507 {
4508 bra = *cc;
4509 cc++;
4510 opcode = *cc;
4511 }
4512
4513 opcode = *cc;
4514 ccbegin = cc;
4515 hotpath = ccbegin + 1 + LINK_SIZE;
4516
4517 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
4518 {
4519 /* Drop this bracket_fallback. */
4520 parent->top = fallback->prev;
4521 return bracketend(cc);
4522 }
4523
4524 ket = *(bracketend(cc) - 1 - LINK_SIZE);
4525 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
4526 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
4527 cc += GET(cc, 1);
4528
4529 has_alternatives = *cc == OP_ALT;
4530 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
4531 {
4532 has_alternatives = (*hotpath == OP_RREF) ? FALSE : TRUE;
4533 if (*hotpath == OP_NRREF)
4534 {
4535 stacksize = GET2(hotpath, 1);
4536 if (common->currententry == NULL || stacksize == RREF_ANY)
4537 has_alternatives = FALSE;
4538 else if (common->currententry->start == 0)
4539 has_alternatives = stacksize != 0;
4540 else
4541 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4542 }
4543 }
4544
4545 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
4546 opcode = OP_SCOND;
4547 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
4548 opcode = OP_ONCE;
4549
4550 if (opcode == OP_CBRA || opcode == OP_SCBRA)
4551 {
4552 /* Capturing brackets has a pre-allocated space. */
4553 offset = GET2(ccbegin, 1 + LINK_SIZE);
4554 localptr = OVECTOR_PRIV(offset);
4555 offset <<= 1;
4556 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4557 hotpath += IMM2_SIZE;
4558 }
4559 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
4560 {
4561 /* Other brackets simply allocate the next entry. */
4562 localptr = PRIV_DATA(ccbegin);
4563 SLJIT_ASSERT(localptr != 0);
4564 FALLBACK_AS(bracket_fallback)->localptr = localptr;
4565 if (opcode == OP_ONCE)
4566 FALLBACK_AS(bracket_fallback)->u.framesize = get_framesize(common, ccbegin, FALSE);
4567 }
4568
4569 /* Instructions before the first alternative. */
4570 stacksize = 0;
4571 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4572 stacksize++;
4573 if (bra == OP_BRAZERO)
4574 stacksize++;
4575
4576 if (stacksize > 0)
4577 allocate_stack(common, stacksize);
4578
4579 stacksize = 0;
4580 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
4581 {
4582 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4583 stacksize++;
4584 }
4585
4586 if (bra == OP_BRAZERO)
4587 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4588
4589 if (bra == OP_BRAMINZERO)
4590 {
4591 /* This is a fallback path! (Since the hot-path of OP_BRAMINZERO matches to the empty string) */
4592 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4593 if (ket != OP_KETRMIN)
4594 {
4595 free_stack(common, 1);
4596 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4597 }
4598 else
4599 {
4600 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4601 {
4602 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4603 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4604 /* Nothing stored during the first run. */
4605 skip = JUMP(SLJIT_JUMP);
4606 JUMPHERE(jump);
4607 /* Checking zero-length iteration. */
4608 if (opcode != OP_ONCE || FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4609 {
4610 /* When we come from outside, localptr contains the previous STR_PTR. */
4611 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4612 }
4613 else
4614 {
4615 /* Except when the whole stack frame must be saved. */
4616 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4617 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (FALLBACK_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w));
4618 }
4619 JUMPHERE(skip);
4620 }
4621 else
4622 {
4623 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4624 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4625 JUMPHERE(jump);
4626 }
4627 }
4628 }
4629
4630 if (ket == OP_KETRMIN)
4631 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4632
4633 if (ket == OP_KETRMAX)
4634 {
4635 rmaxlabel = LABEL();
4636 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
4637 FALLBACK_AS(bracket_fallback)->althotpath = rmaxlabel;
4638 }
4639
4640 /* Handling capturing brackets and alternatives. */
4641 if (opcode == OP_ONCE)
4642 {
4643 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4644 {
4645 /* Neither capturing brackets nor recursions are not found in the block. */
4646 if (ket == OP_KETRMIN)
4647 {
4648 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4649 allocate_stack(common, 2);
4650 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4651 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4652 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4653 }
4654 else if (ket == OP_KETRMAX || has_alternatives)
4655 {
4656 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4657 allocate_stack(common, 1);
4658 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4659 }
4660 else
4661 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4662 }
4663 else
4664 {
4665 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
4666 {
4667 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 2);
4668 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4669 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize + 1));
4670 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4672 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4673 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize + 1, 2, FALSE);
4674 }
4675 else
4676 {
4677 allocate_stack(common, FALLBACK_AS(bracket_fallback)->u.framesize + 1);
4678 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4679 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(FALLBACK_AS(bracket_fallback)->u.framesize));
4680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4682 init_frame(common, ccbegin, FALLBACK_AS(bracket_fallback)->u.framesize, 1, FALSE);
4683 }
4684 }
4685 }
4686 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
4687 {
4688 /* Saving the previous values. */
4689 allocate_stack(common, 3);
4690 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4691 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4694 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4696 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
4697 }
4698 else if (opcode == OP_SBRA || opcode == OP_SCOND)
4699 {
4700 /* Saving the previous value. */
4701 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4702 allocate_stack(common, 1);
4703 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
4704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4705 }
4706 else if (has_alternatives)
4707 {
4708 /* Pushing the starting string pointer. */
4709 allocate_stack(common, 1);
4710 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4711 }
4712
4713 /* Generating code for the first alternative. */
4714 if (opcode == OP_COND || opcode == OP_SCOND)
4715 {
4716 if (*hotpath == OP_CREF)
4717 {
4718 SLJIT_ASSERT(has_alternatives);
4719 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed),
4720 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(hotpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4721 hotpath += 1 + IMM2_SIZE;
4722 }
4723 else if (*hotpath == OP_NCREF)
4724 {
4725 SLJIT_ASSERT(has_alternatives);
4726 stacksize = GET2(hotpath, 1);
4727 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4728
4729 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4730 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4732 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4733 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4734 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4735 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
4736 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4737 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4738
4739 JUMPHERE(jump);
4740 hotpath += 1 + IMM2_SIZE;
4741 }
4742 else if (*hotpath == OP_RREF || *hotpath == OP_NRREF)
4743 {
4744 /* Never has other case. */
4745 FALLBACK_AS(bracket_fallback)->u.condfailed = NULL;
4746
4747 stacksize = GET2(hotpath, 1);
4748 if (common->currententry == NULL)
4749 stacksize = 0;
4750 else if (stacksize == RREF_ANY)
4751 stacksize = 1;
4752 else if (common->currententry->start == 0)
4753 stacksize = stacksize == 0;
4754 else
4755 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
4756
4757 if (*hotpath == OP_RREF || stacksize || common->currententry == NULL)
4758 {
4759 SLJIT_ASSERT(!has_alternatives);
4760 if (stacksize != 0)
4761 hotpath += 1 + IMM2_SIZE;
4762 else
4763 {
4764 if (*cc == OP_ALT)
4765 {
4766 hotpath = cc + 1 + LINK_SIZE;
4767 cc += GET(cc, 1);
4768 }
4769 else
4770 hotpath = cc;
4771 }
4772 }
4773 else
4774 {
4775 SLJIT_ASSERT(has_alternatives);
4776
4777 stacksize = GET2(hotpath, 1);
4778 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
4779 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
4780 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
4781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
4782 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
4783 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_LOCALS_REG, 0);
4784 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
4785 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
4786 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
4787 add_jump(compiler, &(FALLBACK_AS(bracket_fallback)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
4788 hotpath += 1 + IMM2_SIZE;
4789 }
4790 }
4791 else
4792 {
4793 SLJIT_ASSERT(has_alternatives && *hotpath >= OP_ASSERT && *hotpath <= OP_ASSERTBACK_NOT);
4794 /* Similar code as PUSH_FALLBACK macro. */
4795 assert = sljit_alloc_memory(compiler, sizeof(assert_fallback));
4796 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4797 return NULL;
4798 memset(assert, 0, sizeof(assert_fallback));
4799 assert->common.cc = hotpath;
4800 FALLBACK_AS(bracket_fallback)->u.assert = assert;
4801 hotpath = compile_assert_hotpath(common, hotpath, assert, TRUE);
4802 }
4803 }
4804
4805 compile_hotpath(common, hotpath, cc, fallback);
4806 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4807 return NULL;
4808
4809 if (opcode == OP_ONCE)
4810 {
4811 if (FALLBACK_AS(bracket_fallback)->u.framesize < 0)
4812 {
4813 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4814 /* TMP2 which is set here used by OP_KETRMAX below. */
4815 if (ket == OP_KETRMAX)
4816 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
4817 else if (ket == OP_KETRMIN)
4818 {
4819 /* Move the STR_PTR to the localptr. */
4820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
4821 }
4822 }
4823 else
4824 {
4825 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
4826 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (FALLBACK_AS(bracket_fallback)->u.framesize + stacksize) * sizeof(sljit_w));
4827 if (ket == OP_KETRMAX)
4828 {
4829 /* TMP2 which is set here used by OP_KETRMAX below. */
4830 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4831 }
4832 }
4833 }
4834
4835 stacksize = 0;
4836 if (ket != OP_KET || bra != OP_BRA)
4837 stacksize++;
4838 if (has_alternatives && opcode != OP_ONCE)
4839 stacksize++;
4840
4841 if (stacksize > 0)
4842 allocate_stack(common, stacksize);
4843
4844 stacksize = 0;
4845 if (ket != OP_KET)
4846 {
4847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
4848 stacksize++;
4849 }
4850 else if (bra != OP_BRA)
4851 {
4852 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4853 stacksize++;
4854 }
4855
4856 if (has_alternatives)
4857 {
4858 if (opcode != OP_ONCE)
4859 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
4860 if (ket != OP_KETRMAX)
4861 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4862 }
4863
4864 /* Must be after the hotpath label. */
4865 if (offset != 0)
4866 {
4867 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4868 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
4869 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
4870 }
4871
4872 if (ket == OP_KETRMAX)
4873 {
4874 if (opcode == OP_ONCE || opcode >= OP_SBRA)
4875 {
4876 if (has_alternatives)
4877 FALLBACK_AS(bracket_fallback)->althotpath = LABEL();
4878 /* Checking zero-length iteration. */
4879 if (opcode != OP_ONCE)
4880 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
4881 else
4882 /* TMP2 must contain the starting STR_PTR. */
4883 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
4884 }
4885 else
4886 JUMPTO(SLJIT_JUMP, rmaxlabel);
4887 FALLBACK_AS(bracket_fallback)->recursivehotpath = LABEL();
4888 }
4889
4890 if (bra == OP_BRAZERO)
4891 FALLBACK_AS(bracket_fallback)->zerohotpath = LABEL();
4892
4893 if (bra == OP_BRAMINZERO)
4894 {
4895 /* This is a fallback path! (From the viewpoint of OP_BRAMINZERO) */
4896 JUMPTO(SLJIT_JUMP, ((braminzero_fallback *)parent)->hotpath);
4897 if (braminzerojump != NULL)
4898 {
4899 JUMPHERE(braminzerojump);
4900 /* We need to release the end pointer to perform the
4901 fallback for the zero-length iteration. When
4902 framesize is < 0, OP_ONCE will do the release itself. */
4903 if (opcode == OP_ONCE && FALLBACK_AS(bracket_fallback)->u.framesize >= 0)
4904 {
4905 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4906 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
4907 }
4908 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
4909 free_stack(common, 1);
4910 }
4911 /* Continue to the normal fallback. */
4912 }
4913
4914 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
4915 decrease_call_count(common);
4916
4917 /* Skip the other alternatives. */
4918 while (*cc == OP_ALT)
4919 cc += GET(cc, 1);
4920 cc += 1 + LINK_SIZE;
4921 return cc;
4922 }
4923
4924 static pcre_uchar *compile_bracketpos_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
4925 {
4926 DEFINE_COMPILER;
4927 fallback_common *fallback;
4928 pcre_uchar opcode;
4929 int localptr;
4930 int cbraprivptr = 0;
4931 int framesize;
4932 int stacksize;
4933 int offset = 0;
4934 BOOL zero = FALSE;
4935 pcre_uchar *ccbegin = NULL;
4936 int stack;
4937 struct sljit_label *loop = NULL;
4938 struct jump_list *emptymatch = NULL;
4939
4940 PUSH_FALLBACK(sizeof(bracketpos_fallback), cc, NULL);
4941 if (*cc == OP_BRAPOSZERO)
4942 {
4943 zero = TRUE;
4944 cc++;
4945 }
4946
4947 opcode = *cc;
4948 localptr = PRIV_DATA(cc);
4949 SLJIT_ASSERT(localptr != 0);
4950 FALLBACK_AS(bracketpos_fallback)->localptr = localptr;
4951 switch(opcode)
4952 {
4953 case OP_BRAPOS:
4954 case OP_SBRAPOS:
4955 ccbegin = cc + 1 + LINK_SIZE;
4956 break;
4957
4958 case OP_CBRAPOS:
4959 case OP_SCBRAPOS:
4960 offset = GET2(cc, 1 + LINK_SIZE);
4961 cbraprivptr = OVECTOR_PRIV(offset);
4962 offset <<= 1;
4963 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
4964 break;
4965
4966 default:
4967 SLJIT_ASSERT_STOP();
4968 break;
4969 }
4970
4971 framesize = get_framesize(common, cc, FALSE);
4972 FALLBACK_AS(bracketpos_fallback)->framesize = framesize;
4973 if (framesize < 0)
4974 {
4975 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
4976 if (!zero)
4977 stacksize++;
4978 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
4979 allocate_stack(common, stacksize);
4980 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4981
4982 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
4983 {
4984 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4985 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4986 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
4987 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
4988 }
4989 else
4990 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4991
4992 if (!zero)
4993 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
4994 }
4995 else
4996 {
4997 stacksize = framesize + 1;
4998 if (!zero)
4999 stacksize++;
5000 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5001 stacksize++;
5002 FALLBACK_AS(bracketpos_fallback)->stacksize = stacksize;
5003 allocate_stack(common, stacksize);
5004
5005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5006 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5007 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5008 stack = 0;
5009 if (!zero)
5010 {
5011 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5012 stack++;
5013 }
5014 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5015 {
5016 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5017 stack++;
5018 }
5019 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5020 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5021 }
5022
5023 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5025
5026 loop = LABEL();
5027 while (*cc != OP_KETRPOS)
5028 {
5029 fallback->top = NULL;
5030 fallback->topfallbacks = NULL;
5031 cc += GET(cc, 1);
5032
5033 compile_hotpath(common, ccbegin, cc, fallback);
5034 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5035 return NULL;
5036
5037 if (framesize < 0)
5038 {
5039 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5040
5041 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5042 {
5043 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5047 }
5048 else
5049 {
5050 if (opcode == OP_SBRAPOS)
5051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5052 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5053 }
5054
5055 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5056 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5057
5058 if (!zero)
5059 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5060 }
5061 else
5062 {
5063 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5064 {
5065 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5067 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5069 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5070 }
5071 else
5072 {
5073 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5074 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5075 if (opcode == OP_SBRAPOS)
5076 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5077 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5078 }
5079
5080 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5081 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5082
5083 if (!zero)
5084 {
5085 if (framesize < 0)
5086 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5087 else
5088 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5089 }
5090 }
5091 JUMPTO(SLJIT_JUMP, loop);
5092 flush_stubs(common);
5093
5094 compile_fallbackpath(common, fallback->top);
5095 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5096 return NULL;
5097 set_jumps(fallback->topfallbacks, LABEL());
5098
5099 if (framesize < 0)
5100 {
5101 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5102 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5103 else
5104 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5105 }
5106 else
5107 {
5108 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5109 {
5110 /* Last alternative. */
5111 if (*cc == OP_KETRPOS)
5112 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5113 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5114 }
5115 else
5116 {
5117 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5118 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5119 }
5120 }
5121
5122 if (*cc == OP_KETRPOS)
5123 break;
5124 ccbegin = cc + 1 + LINK_SIZE;
5125 }
5126
5127 fallback->topfallbacks = NULL;
5128 if (!zero)
5129 {
5130 if (framesize < 0)
5131 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
5132 else /* TMP2 is set to [localptr] above. */
5133 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
5134 }
5135
5136 /* None of them matched. */
5137 set_jumps(emptymatch, LABEL());
5138 decrease_call_count(common);
5139 return cc + 1 + LINK_SIZE;
5140 }
5141
5142 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
5143 {
5144 int class_len;
5145
5146 *opcode = *cc;
5147 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
5148 {
5149 cc++;
5150 *type = OP_CHAR;
5151 }
5152 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
5153 {
5154 cc++;
5155 *type = OP_CHARI;
5156 *opcode -= OP_STARI - OP_STAR;
5157 }
5158 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
5159 {
5160 cc++;
5161 *type = OP_NOT;
5162 *opcode -= OP_NOTSTAR - OP_STAR;
5163 }
5164 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
5165 {
5166 cc++;
5167 *type = OP_NOTI;
5168 *opcode -= OP_NOTSTARI - OP_STAR;
5169 }
5170 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
5171 {
5172 cc++;
5173 *opcode -= OP_TYPESTAR - OP_STAR;
5174 *type = 0;
5175 }
5176 else
5177 {
5178 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
5179 *type = *opcode;
5180 cc++;
5181 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
5182 *opcode = cc[class_len - 1];
5183 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
5184 {
5185 *opcode -= OP_CRSTAR - OP_STAR;
5186 if (end != NULL)
5187 *end = cc + class_len;
5188 }
5189 else
5190 {
5191 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
5192 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
5193 *arg2 = GET2(cc, class_len);
5194
5195 if (*arg2 == 0)
5196 {
5197 SLJIT_ASSERT(*arg1 != 0);
5198 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
5199 }
5200 if (*arg1 == *arg2)
5201 *opcode = OP_EXACT;
5202
5203 if (end != NULL)
5204 *end = cc + class_len + 2 * IMM2_SIZE;
5205 }
5206 return cc;
5207 }
5208
5209 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
5210 {
5211 *arg1 = GET2(cc, 0);
5212 cc += IMM2_SIZE;
5213 }
5214
5215 if (*type == 0)
5216 {
5217 *type = *cc;
5218 if (end != NULL)
5219 *end = next_opcode(common, cc);
5220 cc++;
5221 return cc;
5222 }
5223
5224 if (end != NULL)
5225 {
5226 *end = cc + 1;
5227 #ifdef SUPPORT_UTF
5228 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
5229 #endif
5230 }
5231 return cc;
5232 }
5233
5234 static pcre_uchar *compile_iterator_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5235 {
5236 DEFINE_COMPILER;
5237 fallback_common *fallback;
5238 pcre_uchar opcode;
5239 pcre_uchar type;
5240 int arg1 = -1, arg2 = -1;
5241 pcre_uchar* end;
5242 jump_list *nomatch = NULL;
5243 struct sljit_jump *jump = NULL;
5244 struct sljit_label *label;
5245
5246 PUSH_FALLBACK(sizeof(iterator_fallback), cc, NULL);
5247
5248 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
5249
5250 switch(opcode)
5251 {
5252 case OP_STAR:
5253 case OP_PLUS:
5254 case OP_UPTO:
5255 case OP_CRRANGE:
5256 if (type == OP_ANYNL || type == OP_EXTUNI)
5257 {
5258 if (opcode == OP_STAR || opcode == OP_UPTO)
5259 {
5260 allocate_stack(common, 2);
5261 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5262 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5263 }
5264 else
5265 {
5266 allocate_stack(common, 1);
5267 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5268 }
5269 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5270 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5271
5272 label = LABEL();
5273 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5274 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
5275 {
5276 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5277 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5278 if (opcode == OP_CRRANGE && arg2 > 0)
5279 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
5280 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
5281 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
5282 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5283 }
5284
5285 allocate_stack(common, 1);
5286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5287 JUMPTO(SLJIT_JUMP, label);
5288 if (jump != NULL)
5289 JUMPHERE(jump);
5290 }
5291 else
5292 {
5293 allocate_stack(common, 2);
5294 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5295 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5296 label = LABEL();
5297 compile_char1_hotpath(common, type, cc, &nomatch);
5298 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5299 if (opcode <= OP_PLUS || (opcode == OP_CRRANGE && arg1 == 0))
5300 {
5301 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5302 JUMPTO(SLJIT_JUMP, label);
5303 }
5304 else
5305 {
5306 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5307 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5309 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5310 }
5311 set_jumps(nomatch, LABEL());
5312 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5313 add_jump(compiler, &fallback->topfallbacks,
5314 CMP(SLJIT_C_LESS, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, opcode == OP_PLUS ? 2 : arg2 + 1));
5315 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5316 }
5317 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5318 break;
5319
5320 case OP_MINSTAR:
5321 case OP_MINPLUS:
5322 allocate_stack(common, 1);
5323 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5324 if (opcode == OP_MINPLUS)
5325 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5326 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5327 break;
5328
5329 case OP_MINUPTO:
5330 case OP_CRMINRANGE:
5331 allocate_stack(common, 2);
5332 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5334 if (opcode == OP_CRMINRANGE)
5335 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5336 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5337 break;
5338
5339 case OP_QUERY:
5340 case OP_MINQUERY:
5341 allocate_stack(common, 1);
5342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5343 if (opcode == OP_QUERY)
5344 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5345 FALLBACK_AS(iterator_fallback)->hotpath = LABEL();
5346 break;
5347
5348 case OP_EXACT:
5349 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5350 label = LABEL();
5351 compile_char1_hotpath(common, type, cc, &fallback->topfallbacks);
5352 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5353 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5354 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5355 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5356 break;
5357
5358 case OP_POSSTAR:
5359 case OP_POSPLUS:
5360 case OP_POSUPTO:
5361 if (opcode != OP_POSSTAR)
5362 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 1);
5363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5364 label = LABEL();
5365 compile_char1_hotpath(common, type, cc, &nomatch);
5366 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5367 if (opcode != OP_POSUPTO)
5368 {
5369 if (opcode == OP_POSPLUS)
5370 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2);
5371 JUMPTO(SLJIT_JUMP, label);
5372 }
5373 else
5374 {
5375 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5376 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5377 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5378 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
5379 }
5380 set_jumps(nomatch, LABEL());
5381 if (opcode == OP_POSPLUS)
5382 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_LESS, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 2));
5383 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5384 break;
5385
5386 case OP_POSQUERY:
5387 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5388 compile_char1_hotpath(common, type, cc, &nomatch);
5389 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STR_PTR, 0);
5390 set_jumps(nomatch, LABEL());
5391 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5392 break;
5393
5394 default:
5395 SLJIT_ASSERT_STOP();
5396 break;
5397 }
5398
5399 decrease_call_count(common);
5400 return end;
5401 }
5402
5403 static SLJIT_INLINE pcre_uchar *compile_fail_accept_hotpath(compiler_common *common, pcre_uchar *cc, fallback_common *parent)
5404 {
5405 DEFINE_COMPILER;
5406 fallback_common *fallback;
5407
5408 PUSH_FALLBACK(sizeof(bracket_fallback), cc, NULL);
5409
5410 if (*cc == OP_FAIL)
5411 {
5412 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5413 return cc + 1;
5414 }
5415
5416 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
5417 {
5418 /* No need to check notempty conditions. */
5419 if (common->acceptlabel == NULL)
5420 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
5421 else
5422 JUMPTO(SLJIT_JUMP, common->acceptlabel);
5423 return cc + 1;
5424 }
5425
5426 if (common->acceptlabel == NULL)
5427 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
5428 else
5429 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
5430 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5431 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
5432 add_jump(compiler, &fallback->topfallbacks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5433 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
5434 if (common->acceptlabel == NULL)
5435 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5436 else
5437 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
5438 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5439 if (common->acceptlabel == NULL)
5440 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
5441 else
5442 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
5443 add_jump(compiler, &fallback->topfallbacks, JUMP(SLJIT_JUMP));
5444 return cc + 1;
5445 }
5446
5447 static SLJIT_INLINE pcre_uchar *compile_close_hotpath(compiler_common *common, pcre_uchar *cc)
5448 {
5449 DEFINE_COMPILER;
5450 int offset = GET2(cc, 1);
5451
5452 /* Data will be discarded anyway... */
5453 if (common->currententry != NULL)
5454 return cc + 1 + IMM2_SIZE;
5455
5456 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
5457 offset <<= 1;
5458 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5459 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5460 return cc + 1 + IMM2_SIZE;
5461 }
5462
5463 static void compile_hotpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, fallback_common *parent)
5464 {
5465 DEFINE_COMPILER;
5466 fallback_common *fallback;
5467
5468 while (cc < ccend)
5469 {
5470 switch(*cc)
5471 {
5472 case OP_SOD:
5473 case OP_SOM:
5474 case OP_NOT_WORD_BOUNDARY:
5475 case OP_WORD_BOUNDARY:
5476 case OP_NOT_DIGIT:
5477 case OP_DIGIT:
5478 case OP_NOT_WHITESPACE:
5479 case OP_WHITESPACE:
5480 case OP_NOT_WORDCHAR:
5481 case OP_WORDCHAR:
5482 case OP_ANY:
5483 case OP_ALLANY:
5484 case OP_ANYBYTE:
5485 case OP_NOTPROP:
5486 case OP_PROP:
5487 case OP_ANYNL:
5488 case OP_NOT_HSPACE:
5489 case OP_HSPACE:
5490 case OP_NOT_VSPACE:
5491 case OP_VSPACE:
5492 case OP_EXTUNI:
5493 case OP_EODN:
5494 case OP_EOD:
5495 case OP_CIRC:
5496 case OP_CIRCM:
5497 case OP_DOLL:
5498 case OP_DOLLM:
5499 case OP_NOT:
5500 case OP_NOTI:
5501 case OP_REVERSE:
5502 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5503 break;
5504
5505 case OP_SET_SOM:
5506 PUSH_FALLBACK_NOVALUE(sizeof(fallback_common), cc);
5507 allocate_stack(common, 1);
5508 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
5510 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5511 cc++;
5512 break;
5513
5514 case OP_CHAR:
5515 case OP_CHARI:
5516 if (common->mode == JIT_COMPILE)
5517 cc = compile_charn_hotpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5518 else
5519 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5520 break;
5521
5522 case OP_STAR:
5523 case OP_MINSTAR:
5524 case OP_PLUS:
5525 case OP_MINPLUS:
5526 case OP_QUERY:
5527 case OP_MINQUERY:
5528 case OP_UPTO:
5529 case OP_MINUPTO:
5530 case OP_EXACT:
5531 case OP_POSSTAR:
5532 case OP_POSPLUS:
5533 case OP_POSQUERY:
5534 case OP_POSUPTO:
5535 case OP_STARI:
5536 case OP_MINSTARI:
5537 case OP_PLUSI:
5538 case OP_MINPLUSI:
5539 case OP_QUERYI:
5540 case OP_MINQUERYI:
5541 case OP_UPTOI:
5542 case OP_MINUPTOI:
5543 case OP_EXACTI:
5544 case OP_POSSTARI:
5545 case OP_POSPLUSI:
5546 case OP_POSQUERYI:
5547 case OP_POSUPTOI:
5548 case OP_NOTSTAR:
5549 case OP_NOTMINSTAR:
5550 case OP_NOTPLUS:
5551 case OP_NOTMINPLUS:
5552 case OP_NOTQUERY:
5553 case OP_NOTMINQUERY:
5554 case OP_NOTUPTO:
5555 case OP_NOTMINUPTO:
5556 case OP_NOTEXACT:
5557 case OP_NOTPOSSTAR:
5558 case OP_NOTPOSPLUS:
5559 case OP_NOTPOSQUERY:
5560 case OP_NOTPOSUPTO:
5561 case OP_NOTSTARI:
5562 case OP_NOTMINSTARI:
5563 case OP_NOTPLUSI:
5564 case OP_NOTMINPLUSI:
5565 case OP_NOTQUERYI:
5566 case OP_NOTMINQUERYI:
5567 case OP_NOTUPTOI:
5568 case OP_NOTMINUPTOI:
5569 case OP_NOTEXACTI:
5570 case OP_NOTPOSSTARI:
5571 case OP_NOTPOSPLUSI:
5572 case OP_NOTPOSQUERYI:
5573 case OP_NOTPOSUPTOI:
5574 case OP_TYPESTAR:
5575 case OP_TYPEMINSTAR:
5576 case OP_TYPEPLUS:
5577 case OP_TYPEMINPLUS:
5578 case OP_TYPEQUERY:
5579 case OP_TYPEMINQUERY:
5580 case OP_TYPEUPTO:
5581 case OP_TYPEMINUPTO:
5582 case OP_TYPEEXACT:
5583 case OP_TYPEPOSSTAR:
5584 case OP_TYPEPOSPLUS:
5585 case OP_TYPEPOSQUERY:
5586 case OP_TYPEPOSUPTO:
5587 cc = compile_iterator_hotpath(common, cc, parent);
5588 break;
5589
5590 case OP_CLASS:
5591 case OP_NCLASS:
5592 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
5593 cc = compile_iterator_hotpath(common, cc, parent);
5594 else
5595 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5596 break;
5597
5598 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
5599 case OP_XCLASS:
5600 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
5601 cc = compile_iterator_hotpath(common, cc, parent);
5602 else
5603 cc = compile_char1_hotpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks);
5604 break;
5605 #endif
5606
5607 case OP_REF:
5608 case OP_REFI:
5609 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
5610 cc = compile_ref_iterator_hotpath(common, cc, parent);
5611 else
5612 cc = compile_ref_hotpath(common, cc, parent->top != NULL ? &parent->top->nextfallbacks : &parent->topfallbacks, TRUE, FALSE);
5613 break;
5614
5615 case OP_RECURSE:
5616 cc = compile_recurse_hotpath(common, cc, parent);
5617 break;
5618
5619 case OP_ASSERT:
5620 case OP_ASSERT_NOT:
5621 case OP_ASSERTBACK:
5622 case OP_ASSERTBACK_NOT:
5623 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5624 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5625 break;
5626
5627 case OP_BRAMINZERO:
5628 PUSH_FALLBACK_NOVALUE(sizeof(braminzero_fallback), cc);
5629 cc = bracketend(cc + 1);
5630 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
5631 {
5632 allocate_stack(common, 1);
5633 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5634 }
5635 else
5636 {
5637 allocate_stack(common, 2);
5638 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5639 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
5640 }
5641 FALLBACK_AS(braminzero_fallback)->hotpath = LABEL();
5642 if (cc[1] > OP_ASSERTBACK_NOT)
5643 decrease_call_count(common);
5644 break;
5645
5646 case OP_ONCE:
5647 case OP_ONCE_NC:
5648 case OP_BRA:
5649 case OP_CBRA:
5650 case OP_COND:
5651 case OP_SBRA:
5652 case OP_SCBRA:
5653 case OP_SCOND:
5654 cc = compile_bracket_hotpath(common, cc, parent);
5655 break;
5656
5657 case OP_BRAZERO:
5658 if (cc[1] > OP_ASSERTBACK_NOT)
5659 cc = compile_bracket_hotpath(common, cc, parent);
5660 else
5661 {
5662 PUSH_FALLBACK_NOVALUE(sizeof(assert_fallback), cc);
5663 cc = compile_assert_hotpath(common, cc, FALLBACK_AS(assert_fallback), FALSE);
5664 }
5665 break;
5666
5667 case OP_BRAPOS:
5668 case OP_CBRAPOS:
5669 case OP_SBRAPOS:
5670 case OP_SCBRAPOS:
5671 case OP_BRAPOSZERO:
5672 cc = compile_bracketpos_hotpath(common, cc, parent);
5673 break;
5674
5675 case OP_FAIL:
5676 case OP_ACCEPT:
5677 case OP_ASSERT_ACCEPT:
5678 cc = compile_fail_accept_hotpath(common, cc, parent);
5679 break;
5680
5681 case OP_CLOSE:
5682 cc = compile_close_hotpath(common, cc);
5683 break;
5684
5685 case OP_SKIPZERO:
5686 cc = bracketend(cc + 1);
5687 break;
5688
5689 default:
5690 SLJIT_ASSERT_STOP();
5691 return;
5692 }
5693 if (cc == NULL)
5694 return;
5695 }
5696 SLJIT_ASSERT(cc == ccend);
5697 }
5698
5699 #undef PUSH_FALLBACK
5700 #undef PUSH_FALLBACK_NOVALUE
5701 #undef FALLBACK_AS
5702
5703 #define COMPILE_FALLBACKPATH(current) \
5704 do \
5705 { \
5706 compile_fallbackpath(common, (current)); \
5707 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5708 return; \
5709 } \
5710 while (0)
5711
5712 #define CURRENT_AS(type) ((type *)current)
5713
5714 static void compile_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5715 {
5716 DEFINE_COMPILER;
5717 pcre_uchar *cc = current->cc;
5718 pcre_uchar opcode;
5719 pcre_uchar type;
5720 int arg1 = -1, arg2 = -1;
5721 struct sljit_label *label = NULL;
5722 struct sljit_jump *jump = NULL;
5723
5724 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
5725
5726 switch(opcode)
5727 {
5728 case OP_STAR:
5729 case OP_PLUS:
5730 case OP_UPTO:
5731 case OP_CRRANGE:
5732 if (type == OP_ANYNL || type == OP_EXTUNI)
5733 {
5734 set_jumps(current->topfallbacks, LABEL());
5735 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5736 free_stack(common, 1);
5737 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5738 }
5739 else
5740 {
5741 if (opcode == OP_STAR || opcode == OP_UPTO)
5742 arg2 = 0;
5743 else if (opcode == OP_PLUS)
5744 arg2 = 1;
5745 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, arg2 + 1);
5746 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5747 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5748 skip_char_back(common);
5749 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5750 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5751 if (opcode == OP_PLUS || opcode == OP_CRRANGE)
5752 set_jumps(current->topfallbacks, LABEL());
5753 JUMPHERE(jump);
5754 free_stack(common, 2);
5755 }
5756 break;
5757
5758 case OP_MINSTAR:
5759 case OP_MINPLUS:
5760 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5761 if (opcode == OP_MINPLUS)
5762 {
5763 set_jumps(current->topfallbacks, LABEL());
5764 current->topfallbacks = NULL;
5765 }
5766 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5767 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5768 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5769 set_jumps(current->topfallbacks, LABEL());
5770 free_stack(common, 1);
5771 break;
5772
5773 case OP_MINUPTO:
5774 case OP_CRMINRANGE:
5775 if (opcode == OP_CRMINRANGE)
5776 {
5777 set_jumps(current->topfallbacks, LABEL());
5778 current->topfallbacks = NULL;
5779 label = LABEL();
5780 }
5781 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5782 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5783
5784 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5785 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5786 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5787 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5788
5789 if (opcode == OP_CRMINRANGE)
5790 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
5791
5792 if (opcode == OP_CRMINRANGE && arg1 == 0)
5793 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5794 else
5795 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_fallback)->hotpath);
5796
5797 set_jumps(current->topfallbacks, LABEL());
5798 free_stack(common, 2);
5799 break;
5800
5801 case OP_QUERY:
5802 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5803 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5804 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5805 jump = JUMP(SLJIT_JUMP);
5806 set_jumps(current->topfallbacks, LABEL());
5807 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5808 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5809 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5810 JUMPHERE(jump);
5811 free_stack(common, 1);
5812 break;
5813
5814 case OP_MINQUERY:
5815 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5816 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5817 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5818 compile_char1_hotpath(common, type, cc, &current->topfallbacks);
5819 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_fallback)->hotpath);
5820 set_jumps(current->topfallbacks, LABEL());
5821 JUMPHERE(jump);
5822 free_stack(common, 1);
5823 break;
5824
5825 case OP_EXACT:
5826 case OP_POSPLUS:
5827 set_jumps(current->topfallbacks, LABEL());
5828 break;
5829
5830 case OP_POSSTAR:
5831 case OP_POSQUERY:
5832 case OP_POSUPTO:
5833 break;
5834
5835 default:
5836 SLJIT_ASSERT_STOP();
5837 break;
5838 }
5839 }
5840
5841 static void compile_ref_iterator_fallbackpath(compiler_common *common, struct fallback_common *current)
5842 {
5843 DEFINE_COMPILER;
5844 pcre_uchar *cc = current->cc;
5845 pcre_uchar type;
5846
5847 type = cc[1 + IMM2_SIZE];
5848 if ((type & 0x1) == 0)
5849 {
5850 set_jumps(current->topfallbacks, LABEL());
5851 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5852 free_stack(common, 1);
5853 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5854 return;
5855 }
5856
5857 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5858 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_fallback)->hotpath);
5859 set_jumps(current->topfallbacks, LABEL());
5860 free_stack(common, 2);
5861 }
5862
5863 static void compile_recurse_fallbackpath(compiler_common *common, struct fallback_common *current)
5864 {
5865 DEFINE_COMPILER;
5866
5867 set_jumps(current->topfallbacks, LABEL());
5868 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5869 free_stack(common, 1);
5870 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
5871 }
5872
5873 static void compile_assert_fallbackpath(compiler_common *common, struct fallback_common *current)
5874 {
5875 DEFINE_COMPILER;
5876 pcre_uchar *cc = current->cc;
5877 pcre_uchar bra = OP_BRA;
5878 struct sljit_jump *brajump = NULL;
5879
5880 SLJIT_ASSERT(*cc != OP_BRAMINZERO);
5881 if (*cc == OP_BRAZERO)
5882 {
5883 bra = *cc;
5884 cc++;
5885 }
5886
5887 if (bra == OP_BRAZERO)
5888 {
5889 SLJIT_ASSERT(current->topfallbacks == NULL);
5890 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5891 }
5892
5893 if (CURRENT_AS(assert_fallback)->framesize < 0)
5894 {
5895 set_jumps(current->topfallbacks, LABEL());
5896
5897 if (bra == OP_BRAZERO)
5898 {
5899 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5900 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5901 free_stack(common, 1);
5902 }
5903 return;
5904 }
5905
5906 if (bra == OP_BRAZERO)
5907 {
5908 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
5909 {
5910 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5911 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_fallback)->hotpath);
5912 free_stack(common, 1);
5913 return;
5914 }
5915 free_stack(common, 1);
5916 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5917 }
5918
5919 if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
5920 {
5921 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr);
5922 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5923 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(assert_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(assert_fallback)->framesize * sizeof(sljit_w));
5924
5925 set_jumps(current->topfallbacks, LABEL());
5926 }
5927 else
5928 set_jumps(current->topfallbacks, LABEL());
5929
5930 if (bra == OP_BRAZERO)
5931 {
5932 /* We know there is enough place on the stack. */
5933 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5934 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5935 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_fallback)->hotpath);
5936 JUMPHERE(brajump);
5937 }
5938 }
5939
5940 static void compile_bracket_fallbackpath(compiler_common *common, struct fallback_common *current)
5941 {
5942 DEFINE_COMPILER;
5943 int opcode;
5944 int offset = 0;
5945 int localptr = CURRENT_AS(bracket_fallback)->localptr;
5946 int stacksize;
5947 int count;
5948 pcre_uchar *cc = current->cc;
5949 pcre_uchar *ccbegin;
5950 pcre_uchar *ccprev;
5951 jump_list *jumplist = NULL;
5952 jump_list *jumplistitem = NULL;
5953 pcre_uchar bra = OP_BRA;
5954 pcre_uchar ket;
5955 assert_fallback *assert;
5956 BOOL has_alternatives;
5957 struct sljit_jump *brazero = NULL;
5958 struct sljit_jump *once = NULL;
5959 struct sljit_jump *cond = NULL;
5960 struct sljit_label *rminlabel = NULL;
5961
5962 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5963 {
5964 bra = *cc;
5965 cc++;
5966 }
5967
5968 opcode = *cc;
5969 ccbegin = cc;
5970 ket = *(bracketend(ccbegin) - 1 - LINK_SIZE);
5971 cc += GET(cc, 1);
5972 has_alternatives = *cc == OP_ALT;
5973 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5974 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_fallback)->u.condfailed != NULL;
5975 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5976 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
5977 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5978 opcode = OP_SCOND;
5979 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5980 opcode = OP_ONCE;
5981
5982 if (ket == OP_KETRMAX)
5983 {
5984 if (bra != OP_BRAZERO)
5985 free_stack(common, 1);
5986 else
5987 {
5988 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5989 free_stack(common, 1);
5990 brazero = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0);
5991 }
5992 }
5993 else if (ket == OP_KETRMIN)
5994 {
5995 if (bra != OP_BRAMINZERO)
5996 {
5997 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5998 if (opcode >= OP_SBRA || opcode == OP_ONCE)
5999 {
6000 /* Checking zero-length iteration. */
6001 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize < 0)
6002 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, CURRENT_AS(bracket_fallback)->recursivehotpath);
6003 else
6004 {
6005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6006 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (CURRENT_AS(bracket_fallback)->u.framesize + 1) * sizeof(sljit_w), CURRENT_AS(bracket_fallback)->recursivehotpath);
6007 }
6008 if (opcode != OP_ONCE)
6009 free_stack(common, 1);
6010 }
6011 else
6012 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->recursivehotpath);
6013 }
6014 rminlabel = LABEL();
6015 }
6016 else if (bra == OP_BRAZERO)
6017 {
6018 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6019 free_stack(common, 1);
6020 brazero = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
6021 }
6022
6023 if (SLJIT_UNLIKELY(opcode == OP_ONCE))
6024 {
6025 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6026 {
6027 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6028 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6029 }
6030 once = JUMP(SLJIT_JUMP);
6031 }
6032 else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6033 {
6034 if (has_alternatives)
6035 {
6036 /* Always exactly one alternative. */
6037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6038 free_stack(common, 1);
6039
6040 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6041 if (SLJIT_UNLIKELY(!jumplistitem))
6042 return;
6043 jumplist = jumplistitem;
6044 jumplistitem->next = NULL;
6045 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 1);
6046 }
6047 }
6048 else if (*cc == OP_ALT)
6049 {
6050 /* Build a jump list. Get the last successfully matched branch index. */
6051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6052 free_stack(common, 1);
6053 count = 1;
6054 do
6055 {
6056 /* Append as the last item. */
6057 if (jumplist != NULL)
6058 {
6059 jumplistitem->next = sljit_alloc_memory(compiler, sizeof(jump_list));
6060 jumplistitem = jumplistitem->next;
6061 }
6062 else
6063 {
6064 jumplistitem = sljit_alloc_memory(compiler, sizeof(jump_list));
6065 jumplist = jumplistitem;
6066 }
6067
6068 if (SLJIT_UNLIKELY(!jumplistitem))
6069 return;
6070
6071 jumplistitem->next = NULL;
6072 jumplistitem->jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, count++);
6073 cc += GET(cc, 1);
6074 }
6075 while (*cc == OP_ALT);
6076
6077 cc = ccbegin + GET(ccbegin, 1);
6078 }
6079
6080 COMPILE_FALLBACKPATH(current->top);
6081 if (current->topfallbacks)
6082 set_jumps(current->topfallbacks, LABEL());
6083
6084 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6085 {
6086 /* Conditional block always has at most one alternative. */
6087 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
6088 {
6089 SLJIT_ASSERT(has_alternatives);
6090 assert = CURRENT_AS(bracket_fallback)->u.assert;
6091 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
6092 {
6093 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6094 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6095 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6096 }
6097 cond = JUMP(SLJIT_JUMP);
6098 set_jumps(CURRENT_AS(bracket_fallback)->u.assert->condfailed, LABEL());
6099 }
6100 else if (CURRENT_AS(bracket_fallback)->u.condfailed != NULL)
6101 {
6102 SLJIT_ASSERT(has_alternatives);
6103 cond = JUMP(SLJIT_JUMP);
6104 set_jumps(CURRENT_AS(bracket_fallback)->u.condfailed, LABEL());
6105 }
6106 else
6107 SLJIT_ASSERT(!has_alternatives);
6108 }
6109
6110 if (has_alternatives)
6111 {
6112 count = 1;
6113 do
6114 {
6115 current->top = NULL;
6116 current->topfallbacks = NULL;
6117 current->nextfallbacks = NULL;
6118 if (*cc == OP_ALT)
6119 {
6120 ccprev = cc + 1 + LINK_SIZE;
6121 cc += GET(cc, 1);
6122 if (opcode != OP_COND && opcode != OP_SCOND)
6123 {
6124 if (localptr != 0 && opcode != OP_ONCE)
6125 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6126 else
6127 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6128 }
6129 compile_hotpath(common, ccprev, cc, current);
6130 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6131 return;
6132 }
6133
6134 /* Instructions after the current alternative is succesfully matched. */
6135 /* There is a similar code in compile_bracket_hotpath. */
6136 if (opcode == OP_ONCE)
6137 {
6138 if (CURRENT_AS(bracket_fallback)->u.framesize < 0)
6139 {
6140 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6141 /* TMP2 which is set here used by OP_KETRMAX below. */
6142 if (ket == OP_KETRMAX)
6143 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6144 else if (ket == OP_KETRMIN)
6145 {
6146 /* Move the STR_PTR to the localptr. */
6147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
6148 }
6149 }
6150 else
6151 {
6152 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (CURRENT_AS(bracket_fallback)->u.framesize + 2) * sizeof(sljit_w));
6153 if (ket == OP_KETRMAX)
6154 {
6155 /* TMP2 which is set here used by OP_KETRMAX below. */
6156 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6157 }
6158 }
6159 }
6160
6161 stacksize = 0;
6162 if (opcode != OP_ONCE)
6163 stacksize++;
6164 if (ket != OP_KET || bra != OP_BRA)
6165 stacksize++;
6166
6167 if (stacksize > 0) {
6168 if (opcode != OP_ONCE || CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6169 allocate_stack(common, stacksize);
6170 else
6171 {
6172 /* We know we have place at least for one item on the top of the stack. */
6173 SLJIT_ASSERT(stacksize == 1);
6174 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
6175 }
6176 }
6177
6178 stacksize = 0;
6179 if (ket != OP_KET || bra != OP_BRA)
6180 {
6181 if (ket != OP_KET)
6182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6183 else
6184 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6185 stacksize++;
6186 }
6187
6188 if (opcode != OP_ONCE)
6189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, count++);
6190
6191 if (offset != 0)
6192 {
6193 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6194 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
6196 }
6197
6198 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->althotpath);
6199
6200 if (opcode != OP_ONCE)
6201 {
6202 SLJIT_ASSERT(jumplist);
6203 JUMPHERE(jumplist->jump);
6204 jumplist = jumplist->next;
6205 }
6206
6207 COMPILE_FALLBACKPATH(current->top);
6208 if (current->topfallbacks)
6209 set_jumps(current->topfallbacks, LABEL());
6210 SLJIT_ASSERT(!current->nextfallbacks);
6211 }
6212 while (*cc == OP_ALT);
6213 SLJIT_ASSERT(!jumplist);
6214
6215 if (cond != NULL)
6216 {
6217 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
6218 assert = CURRENT_AS(bracket_fallback)->u.assert;
6219 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
6220
6221 {
6222 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr);
6223 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6224 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), assert->localptr, SLJIT_MEM1(STACK_TOP), assert->framesize * sizeof(sljit_w));
6225 }
6226 JUMPHERE(cond);
6227 }
6228
6229 /* Free the STR_PTR. */
6230 if (localptr == 0)
6231 free_stack(common, 1);
6232 }
6233
6234 if (offset != 0)
6235 {
6236 /* Using both tmp register is better for instruction scheduling. */
6237 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6238 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6239 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6240 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(2));
6242 free_stack(common, 3);
6243 }
6244 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6245 {
6246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), STACK(0));
6247 free_stack(common, 1);
6248 }
6249 else if (opcode == OP_ONCE)
6250 {
6251 cc = ccbegin + GET(ccbegin, 1);
6252 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6253 {
6254 /* Reset head and drop saved frame. */
6255 stacksize = (ket == OP_KETRMAX || ket == OP_KETRMIN || *cc == OP_ALT) ? 2 : 1;
6256 free_stack(common, CURRENT_AS(bracket_fallback)->u.framesize + stacksize);
6257 }
6258 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
6259 {
6260 /* The STR_PTR must be released. */
6261 free_stack(common, 1);
6262 }
6263
6264 JUMPHERE(once);
6265 /* Restore previous localptr */
6266 if (CURRENT_AS(bracket_fallback)->u.framesize >= 0)
6267 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracket_fallback)->u.framesize * sizeof(sljit_w));
6268 else if (ket == OP_KETRMIN)
6269 {
6270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6271 /* See the comment below. */
6272 free_stack(common, 2);
6273 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
6274 }
6275 }
6276
6277 if (ket == OP_KETRMAX)
6278 {
6279 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6280 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_fallback)->recursivehotpath);
6281 if (bra == OP_BRAZERO)
6282 {
6283 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6284 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
6285 JUMPHERE(brazero);
6286 }
6287 free_stack(common, 1);
6288 }
6289 else if (ket == OP_KETRMIN)
6290 {
6291 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6292
6293 /* OP_ONCE removes everything in case of a fallback, so we don't
6294 need to explicitly release the STR_PTR. The extra release would
6295 affect badly the free_stack(2) above. */
6296 if (opcode != OP_ONCE)
6297 free_stack(common, 1);
6298 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rminlabel);
6299 if (opcode == OP_ONCE)
6300 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
6301 else if (bra == OP_BRAMINZERO)
6302 free_stack(common, 1);
6303 }
6304 else if (bra == OP_BRAZERO)
6305 {
6306 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6307 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_fallback)->zerohotpath);
6308 JUMPHERE(brazero);
6309 }
6310 }
6311
6312 static void compile_bracketpos_fallbackpath(compiler_common *common, struct fallback_common *current)
6313 {
6314 DEFINE_COMPILER;
6315 int offset;
6316 struct sljit_jump *jump;
6317
6318 if (CURRENT_AS(bracketpos_fallback)->framesize < 0)
6319 {
6320 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
6321 {
6322 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
6323 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6324 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6325 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6326 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP2, 0);
6327 }
6328 set_jumps(current->topfallbacks, LABEL());
6329 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6330 return;
6331 }
6332
6333 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr);
6334 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6335
6336 if (current->topfallbacks)
6337 {
6338 jump = JUMP(SLJIT_JUMP);
6339 set_jumps(current->topfallbacks, LABEL());
6340 /* Drop the stack frame. */
6341 free_stack(common, CURRENT_AS(bracketpos_fallback)->stacksize);
6342 JUMPHERE(jump);
6343 }
6344 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), CURRENT_AS(bracketpos_fallback)->localptr, SLJIT_MEM1(STACK_TOP), CURRENT_AS(bracketpos_fallback)->framesize * sizeof(sljit_w));
6345 }
6346
6347 static void compile_braminzero_fallbackpath(compiler_common *common, struct fallback_common *current)
6348 {
6349 assert_fallback fallback;
6350
6351 current->top = NULL;
6352 current->topfallbacks = NULL;
6353 current->nextfallbacks = NULL;
6354 if (current->cc[1] > OP_ASSERTBACK_NOT)
6355 {
6356 /* Manual call of compile_bracket_hotpath and compile_bracket_fallbackpath. */
6357 compile_bracket_hotpath(common, current->cc, current);
6358 compile_bracket_fallbackpath(common, current->top);
6359 }
6360 else
6361 {
6362 memset(&fallback, 0, sizeof(fallback));
6363 fallback.common.cc = current->cc;
6364 fallback.hotpath = CURRENT_AS(braminzero_fallback)->hotpath;
6365 /* Manual call of compile_assert_hotpath. */
6366 compile_assert_hotpath(common, current->cc, &fallback, FALSE);
6367 }
6368 SLJIT_ASSERT(!current->nextfallbacks && !current->topfallbacks);
6369 }
6370
6371 static void compile_fallbackpath(compiler_common *common, struct fallback_common *current)
6372 {
6373 DEFINE_COMPILER;
6374
6375 while (current)
6376 {
6377 if (current->nextfallbacks != NULL)
6378 set_jumps(current->nextfallbacks, LABEL());
6379 switch(*current->cc)
6380 {
6381 case OP_SET_SOM:
6382 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6383 free_stack(common, 1);
6384 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP1, 0);
6385 break;
6386
6387 case OP_STAR:
6388 case OP_MINSTAR:
6389 case OP_PLUS:
6390 case OP_MINPLUS:
6391 case OP_QUERY:
6392 case OP_MINQUERY:
6393 case OP_UPTO:
6394 case OP_MINUPTO:
6395 case OP_EXACT:
6396 case OP_POSSTAR:
6397 case OP_POSPLUS:
6398 case OP_POSQUERY:
6399 case OP_POSUPTO:
6400 case OP_STARI:
6401 case OP_MINSTARI:
6402 case OP_PLUSI:
6403 case OP_MINPLUSI:
6404 case OP_QUERYI:
6405 case OP_MINQUERYI:
6406 case OP_UPTOI:
6407 case OP_MINUPTOI:
6408 case OP_EXACTI:
6409 case OP_POSSTARI:
6410 case OP_POSPLUSI:
6411 case OP_POSQUERYI:
6412 case OP_POSUPTOI:
6413 case OP_NOTSTAR:
6414 case OP_NOTMINSTAR:
6415 case OP_NOTPLUS:
6416 case OP_NOTMINPLUS:
6417 case OP_NOTQUERY:
6418 case OP_NOTMINQUERY:
6419 case OP_NOTUPTO:
6420 case OP_NOTMINUPTO:
6421 case OP_NOTEXACT:
6422 case OP_NOTPOSSTAR:
6423 case OP_NOTPOSPLUS:
6424 case OP_NOTPOSQUERY:
6425 case OP_NOTPOSUPTO:
6426 case OP_NOTSTARI:
6427 case OP_NOTMINSTARI:
6428 case OP_NOTPLUSI:
6429 case OP_NOTMINPLUSI:
6430 case OP_NOTQUERYI:
6431 case OP_NOTMINQUERYI:
6432 case OP_NOTUPTOI:
6433 case OP_NOTMINUPTOI:
6434 case OP_NOTEXACTI:
6435 case OP_NOTPOSSTARI:
6436 case OP_NOTPOSPLUSI:
6437 case OP_NOTPOSQUERYI:
6438 case OP_NOTPOSUPTOI:
6439 case OP_TYPESTAR:
6440 case OP_TYPEMINSTAR:
6441 case OP_TYPEPLUS:
6442 case OP_TYPEMINPLUS:
6443 case OP_TYPEQUERY:
6444 case OP_TYPEMINQUERY:
6445 case OP_TYPEUPTO:
6446 case OP_TYPEMINUPTO:
6447 case OP_TYPEEXACT:
6448 case OP_TYPEPOSSTAR:
6449 case OP_TYPEPOSPLUS:
6450 case OP_TYPEPOSQUERY:
6451 case OP_TYPEPOSUPTO:
6452 case OP_CLASS:
6453 case OP_NCLASS:
6454 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6455 case OP_XCLASS:
6456 #endif
6457 compile_iterator_fallbackpath(common, current);
6458 break;
6459
6460 case OP_REF:
6461 case OP_REFI:
6462 compile_ref_iterator_fallbackpath(common, current);
6463 break;
6464
6465 case OP_RECURSE:
6466 compile_recurse_fallbackpath(common, current);
6467 break;
6468
6469 case OP_ASSERT:
6470 case OP_ASSERT_NOT:
6471 case OP_ASSERTBACK:
6472 case OP_ASSERTBACK_NOT:
6473 compile_assert_fallbackpath(common, current);
6474 break;
6475
6476 case OP_ONCE:
6477 case OP_ONCE_NC:
6478 case OP_BRA:
6479 case OP_CBRA:
6480 case OP_COND:
6481 case OP_SBRA:
6482 case OP_SCBRA:
6483 case OP_SCOND:
6484 compile_bracket_fallbackpath(common, current);
6485 break;
6486
6487 case OP_BRAZERO:
6488 if (current->cc[1] > OP_ASSERTBACK_NOT)
6489 compile_bracket_fallbackpath(common, current);
6490 else
6491 compile_assert_fallbackpath(common, current);
6492 break;
6493
6494 case OP_BRAPOS:
6495 case OP_CBRAPOS:
6496 case OP_SBRAPOS:
6497 case OP_SCBRAPOS:
6498 case OP_BRAPOSZERO:
6499 compile_bracketpos_fallbackpath(common, current);
6500 break;
6501
6502 case OP_BRAMINZERO:
6503 compile_braminzero_fallbackpath(common, current);
6504 break;
6505
6506 case OP_FAIL:
6507 case OP_ACCEPT:
6508 case OP_ASSERT_ACCEPT:
6509 set_jumps(current->topfallbacks, LABEL());
6510 break;
6511
6512 default:
6513 SLJIT_ASSERT_STOP();
6514 break;
6515 }
6516 current = current->prev;
6517 }
6518 }
6519
6520 static SLJIT_INLINE void compile_recurse(compiler_common *common)
6521 {
6522 DEFINE_COMPILER;
6523 pcre_uchar *cc = common->start + common->currententry->start;
6524 pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
6525 pcre_uchar *ccend = bracketend(cc);
6526 int localsize = get_localsize(common, ccbegin, ccend);
6527 int framesize = get_framesize(common, cc, TRUE);
6528 int alternativesize;
6529 BOOL needsframe;
6530 fallback_common altfallback;
6531 struct sljit_jump *jump;
6532
6533 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
6534 needsframe = framesize >= 0;
6535 if (!needsframe)
6536 framesize = 0;
6537 alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0;
6538
6539 SLJIT_ASSERT(common->currententry->entry == NULL);
6540 common->currententry->entry = LABEL();
6541 set_jumps(common->currententry->calls, common->currententry->entry);
6542
6543 sljit_emit_fast_enter(compiler, TMP2, 0, 1, 5, 5, common->localsize);
6544 allocate_stack(common, localsize + framesize + alternativesize);
6545 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(localsize + framesize + alternativesize - 1), TMP2, 0);
6546 copy_locals(common, ccbegin, ccend, TRUE, localsize + framesize + alternativesize, framesize + alternativesize);
6547 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, STACK_TOP, 0);
6548 if (needsframe)
6549 init_frame(common, cc, framesize + alternativesize - 1, alternativesize, FALSE);
6550
6551 if (alternativesize > 0)
6552 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6553
6554 memset(&altfallback, 0, sizeof(fallback_common));
6555 common->acceptlabel = NULL;
6556 common->accept = NULL;
6557 altfallback.cc = ccbegin;
6558 cc += GET(cc, 1);
6559 while (1)
6560 {
6561 altfallback.top = NULL;
6562 altfallback.topfallbacks = NULL;
6563
6564 if (altfallback.cc != ccbegin)
6565 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6566
6567 compile_hotpath(common, altfallback.cc, cc, &altfallback);
6568 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6569 return;
6570
6571 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6572
6573 compile_fallbackpath(common, altfallback.top);
6574 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6575 return;
6576 set_jumps(altfallback.topfallbacks, LABEL());
6577
6578 if (*cc != OP_ALT)
6579 break;
6580
6581 altfallback.cc = cc + 1 + LINK_SIZE;
6582 cc += GET(cc, 1);
6583 }
6584 /* None of them matched. */
6585 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6586 jump = JUMP(SLJIT_JUMP);
6587
6588 set_jumps(common->accept, LABEL());
6589 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD);
6590 if (needsframe)
6591 {
6592 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6593 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6594 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6595 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_w));
6596 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP3, 0);
6597 }
6598 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1);
6599
6600 JUMPHERE(jump);
6601 copy_locals(common, ccbegin, ccend, FALSE, localsize + framesize + alternativesize, framesize + alternativesize);
6602 free_stack(common, localsize + framesize + alternativesize);
6603 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), sizeof(sljit_w));
6604 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
6605 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), RECURSIVE_HEAD, TMP2, 0);
6606 sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), 0);
6607 }
6608
6609 #undef COMPILE_FALLBACKPATH
6610 #undef CURRENT_AS
6611
6612 void
6613 PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode)
6614 {
6615 struct sljit_compiler *compiler;
6616 fallback_common rootfallback;
6617 compiler_common common_data;
6618 compiler_common *common = &common_data;
6619 const pcre_uint8 *tables = re->tables;
6620 pcre_study_data *study;
6621 pcre_uchar *ccend;
6622 executable_functions *functions;
6623 void *executable_func;
6624 sljit_uw executable_size;
6625 struct sljit_label *leave;
6626 struct sljit_label *mainloop = NULL;
6627 struct sljit_label *empty_match_found;
6628 struct sljit_label *empty_match_fallback;
6629 struct sljit_jump *jump;
6630 struct sljit_jump *reqbyte_notfound = NULL;
6631 struct sljit_jump *empty_match;
6632
6633 SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0);
6634 study = extra->study_data;
6635
6636 if (!tables)
6637 tables = PRIV(default_tables);
6638
6639 memset(&rootfallback, 0, sizeof(fallback_common));
6640 rootfallback.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size;
6641
6642 common->compiler = NULL;
6643 common->start = rootfallback.cc;
6644 common->cbraptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_w);
6645 common->fcc = tables + fcc_offset;
6646 common->lcc = (sljit_w)(tables + lcc_offset);
6647 common->mode = mode;
6648 common->nltype = NLTYPE_FIXED;
6649 switch(re->options & PCRE_NEWLINE_BITS)
6650 {
6651 case 0:
6652 /* Compile-time default */
6653 switch (NEWLINE)
6654 {
6655 case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6656 case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6657 default: common->newline = NEWLINE; break;
6658 }
6659 break;
6660 case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break;
6661 case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break;
6662 case PCRE_NEWLINE_CR+
6663 PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
6664 case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
6665 case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
6666 default: return;
6667 }
6668 if ((re->options & PCRE_BSR_ANYCRLF) != 0)
6669 common->bsr_nltype = NLTYPE_ANYCRLF;
6670 else if ((re->options & PCRE_BSR_UNICODE) != 0)
6671 common->bsr_nltype = NLTYPE_ANY;
6672 else
6673 {
6674 #ifdef BSR_ANYCRLF
6675 common->bsr_nltype = NLTYPE_ANYCRLF;
6676 #else
6677 common->bsr_nltype = NLTYPE_ANY;
6678 #endif
6679 }
6680 common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6681 common->ctypes = (sljit_w)(tables + ctypes_offset);
6682 common->name_table = (sljit_w)((pcre_uchar *)re + re->name_table_offset);
6683 common->name_count = re->name_count;
6684 common->name_entry_size = re->name_entry_size;
6685 common->partialmatchlabel = NULL;
6686 common->acceptlabel = NULL;
6687 common->stubs = NULL;
6688 common->entries = NULL;
6689 common->currententry = NULL;
6690 common->partialmatch = NULL;
6691 common->accept = NULL;
6692 common->calllimit = NULL;
6693 common->stackalloc = NULL;
6694 common->revertframes = NULL;
6695 common->wordboundary = NULL;
6696 common->anynewline = NULL;
6697 common->hspace = NULL;
6698 common->vspace = NULL;
6699 common->casefulcmp = NULL;
6700 common->caselesscmp = NULL;
6701 common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6702 #ifdef SUPPORT_UTF
6703 /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6704 common->utf = (re->options & PCRE_UTF8) != 0;
6705 #ifdef SUPPORT_UCP
6706 common->use_ucp = (re->options & PCRE_UCP) != 0;
6707 #endif
6708 common->utfreadchar = NULL;
6709 #ifdef COMPILE_PCRE8
6710 common->utfreadtype8 = NULL;
6711 #endif
6712 #endif /* SUPPORT_UTF */
6713 #ifdef SUPPORT_UCP
6714 common->getucd = NULL;
6715 #endif
6716 ccend = bracketend(rootfallback.cc);
6717 SLJIT_ASSERT(*rootfallback.cc == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
6718 common->localsize = get_localspace(common, rootfallback.cc, ccend);
6719 if (common->localsize < 0)
6720 return;
6721 common->localsize += common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w);
6722 if (common->localsize > SLJIT_MAX_LOCAL_SIZE)
6723 return;
6724 common->localptrs = (int *)SLJIT_MALLOC((ccend - rootfallback.cc) * sizeof(int));
6725 if (!common->localptrs)
6726 return;
6727 memset(common->localptrs, 0, (ccend - rootfallback.cc) * sizeof(int));
6728 set_localptrs(common, common->cbraptr + (re->top_bracket + 1) * sizeof(sljit_w), ccend);
6729
6730 compiler = sljit_create_compiler();
6731 if (!compiler)
6732 {
6733 SLJIT_FREE(common->localptrs);
6734 return;
6735 }
6736 common->compiler = compiler;
6737
6738 /* Main pcre_jit_exec entry. */