/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 996 - (show annotations)
Thu Jul 12 10:10:51 2012 UTC (7 years, 3 months ago) by zherczeg
File MIME type: text/plain
File size: 249219 byte(s)
Improved cache flush for AIX
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct backtrack_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the try path (expected path), and the other is called as
93 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the try path.
95
96 Greedy star operator (*) :
97 Try path: match happens.
98 Backtrack path: match failed.
99 Non-greedy star operator (*?) :
100 Try path: no need to perform a match.
101 Backtrack path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A try path
112 '(' try path (pushing arguments to the stack)
113 B try path
114 ')' try path (pushing arguments to the stack)
115 D try path
116 return with successful match
117
118 D backtrack path
119 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
120 B backtrack path
121 C expected path
122 jump to D try path
123 C backtrack path
124 A backtrack path
125
126 Notice, that the order of backtrack code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current backtrack code path. The backtrack path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the try path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the backtrack code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the backtrack mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *uchar_ptr;
156 pcre_uchar *mark_ptr;
157 /* Everything else after. */
158 int offsetcount;
159 int calllimit;
160 pcre_uint8 notbol;
161 pcre_uint8 noteol;
162 pcre_uint8 notempty;
163 pcre_uint8 notempty_atstart;
164 } jit_arguments;
165
166 typedef struct executable_functions {
167 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
168 PUBL(jit_callback) callback;
169 void *userdata;
170 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
171 } executable_functions;
172
173 typedef struct jump_list {
174 struct sljit_jump *jump;
175 struct jump_list *next;
176 } jump_list;
177
178 enum stub_types { stack_alloc };
179
180 typedef struct stub_list {
181 enum stub_types type;
182 int data;
183 struct sljit_jump *start;
184 struct sljit_label *quit;
185 struct stub_list *next;
186 } stub_list;
187
188 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
189
190 /* The following structure is the key data type for the recursive
191 code generator. It is allocated by compile_trypath, and contains
192 the aguments for compile_backtrackpath. Must be the first member
193 of its descendants. */
194 typedef struct backtrack_common {
195 /* Concatenation stack. */
196 struct backtrack_common *prev;
197 jump_list *nextbacktracks;
198 /* Internal stack (for component operators). */
199 struct backtrack_common *top;
200 jump_list *topbacktracks;
201 /* Opcode pointer. */
202 pcre_uchar *cc;
203 } backtrack_common;
204
205 typedef struct assert_backtrack {
206 backtrack_common common;
207 jump_list *condfailed;
208 /* Less than 0 (-1) if a frame is not needed. */
209 int framesize;
210 /* Points to our private memory word on the stack. */
211 int localptr;
212 /* For iterators. */
213 struct sljit_label *trypath;
214 } assert_backtrack;
215
216 typedef struct bracket_backtrack {
217 backtrack_common common;
218 /* Where to coninue if an alternative is successfully matched. */
219 struct sljit_label *alttrypath;
220 /* For rmin and rmax iterators. */
221 struct sljit_label *recursivetrypath;
222 /* For greedy ? operator. */
223 struct sljit_label *zerotrypath;
224 /* Contains the branches of a failed condition. */
225 union {
226 /* Both for OP_COND, OP_SCOND. */
227 jump_list *condfailed;
228 assert_backtrack *assert;
229 /* For OP_ONCE. -1 if not needed. */
230 int framesize;
231 } u;
232 /* Points to our private memory word on the stack. */
233 int localptr;
234 } bracket_backtrack;
235
236 typedef struct bracketpos_backtrack {
237 backtrack_common common;
238 /* Points to our private memory word on the stack. */
239 int localptr;
240 /* Reverting stack is needed. */
241 int framesize;
242 /* Allocated stack size. */
243 int stacksize;
244 } bracketpos_backtrack;
245
246 typedef struct braminzero_backtrack {
247 backtrack_common common;
248 struct sljit_label *trypath;
249 } braminzero_backtrack;
250
251 typedef struct iterator_backtrack {
252 backtrack_common common;
253 /* Next iteration. */
254 struct sljit_label *trypath;
255 } iterator_backtrack;
256
257 typedef struct recurse_entry {
258 struct recurse_entry *next;
259 /* Contains the function entry. */
260 struct sljit_label *entry;
261 /* Collects the calls until the function is not created. */
262 jump_list *calls;
263 /* Points to the starting opcode. */
264 int start;
265 } recurse_entry;
266
267 typedef struct recurse_backtrack {
268 backtrack_common common;
269 } recurse_backtrack;
270
271 #define MAX_RANGE_SIZE 6
272
273 typedef struct compiler_common {
274 struct sljit_compiler *compiler;
275 pcre_uchar *start;
276
277 /* Opcode local area direct map. */
278 int *localptrs;
279 int cbraptr;
280 /* OVector starting point. Must be divisible by 2. */
281 int ovector_start;
282 /* Last known position of the requested byte. */
283 int req_char_ptr;
284 /* Head of the last recursion. */
285 int recursive_head;
286 /* First inspected character for partial matching. */
287 int start_used_ptr;
288 /* Starting pointer for partial soft matches. */
289 int hit_start;
290 /* End pointer of the first line. */
291 int first_line_end;
292 /* Points to the marked string. */
293 int mark_ptr;
294
295 /* Flipped and lower case tables. */
296 const pcre_uint8 *fcc;
297 sljit_w lcc;
298 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
299 int mode;
300 /* Newline control. */
301 int nltype;
302 int newline;
303 int bsr_nltype;
304 /* Dollar endonly. */
305 int endonly;
306 BOOL has_set_som;
307 /* Tables. */
308 sljit_w ctypes;
309 int digits[2 + MAX_RANGE_SIZE];
310 /* Named capturing brackets. */
311 sljit_uw name_table;
312 sljit_w name_count;
313 sljit_w name_entry_size;
314
315 /* Labels and jump lists. */
316 struct sljit_label *partialmatchlabel;
317 struct sljit_label *quitlabel;
318 struct sljit_label *acceptlabel;
319 stub_list *stubs;
320 recurse_entry *entries;
321 recurse_entry *currententry;
322 jump_list *partialmatch;
323 jump_list *quit;
324 jump_list *accept;
325 jump_list *calllimit;
326 jump_list *stackalloc;
327 jump_list *revertframes;
328 jump_list *wordboundary;
329 jump_list *anynewline;
330 jump_list *hspace;
331 jump_list *vspace;
332 jump_list *casefulcmp;
333 jump_list *caselesscmp;
334 BOOL jscript_compat;
335 #ifdef SUPPORT_UTF
336 BOOL utf;
337 #ifdef SUPPORT_UCP
338 BOOL use_ucp;
339 #endif
340 jump_list *utfreadchar;
341 #ifdef COMPILE_PCRE8
342 jump_list *utfreadtype8;
343 #endif
344 #endif /* SUPPORT_UTF */
345 #ifdef SUPPORT_UCP
346 jump_list *getucd;
347 #endif
348 } compiler_common;
349
350 /* For byte_sequence_compare. */
351
352 typedef struct compare_context {
353 int length;
354 int sourcereg;
355 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
356 int ucharptr;
357 union {
358 sljit_i asint;
359 sljit_uh asushort;
360 #ifdef COMPILE_PCRE8
361 sljit_ub asbyte;
362 sljit_ub asuchars[4];
363 #else
364 #ifdef COMPILE_PCRE16
365 sljit_uh asuchars[2];
366 #endif
367 #endif
368 } c;
369 union {
370 sljit_i asint;
371 sljit_uh asushort;
372 #ifdef COMPILE_PCRE8
373 sljit_ub asbyte;
374 sljit_ub asuchars[4];
375 #else
376 #ifdef COMPILE_PCRE16
377 sljit_uh asuchars[2];
378 #endif
379 #endif
380 } oc;
381 #endif
382 } compare_context;
383
384 enum {
385 frame_end = 0,
386 frame_setstrbegin = -1,
387 frame_setmark = -2
388 };
389
390 /* Undefine sljit macros. */
391 #undef CMP
392
393 /* Used for accessing the elements of the stack. */
394 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
395
396 #define TMP1 SLJIT_TEMPORARY_REG1
397 #define TMP2 SLJIT_TEMPORARY_REG3
398 #define TMP3 SLJIT_TEMPORARY_EREG2
399 #define STR_PTR SLJIT_SAVED_REG1
400 #define STR_END SLJIT_SAVED_REG2
401 #define STACK_TOP SLJIT_TEMPORARY_REG2
402 #define STACK_LIMIT SLJIT_SAVED_REG3
403 #define ARGUMENTS SLJIT_SAVED_EREG1
404 #define CALL_COUNT SLJIT_SAVED_EREG2
405 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
406
407 /* Locals layout. */
408 /* These two locals can be used by the current opcode. */
409 #define LOCALS0 (0 * sizeof(sljit_w))
410 #define LOCALS1 (1 * sizeof(sljit_w))
411 /* Two local variables for possessive quantifiers (char1 cannot use them). */
412 #define POSSESSIVE0 (2 * sizeof(sljit_w))
413 #define POSSESSIVE1 (3 * sizeof(sljit_w))
414 /* Max limit of recursions. */
415 #define CALL_LIMIT (4 * sizeof(sljit_w))
416 /* The output vector is stored on the stack, and contains pointers
417 to characters. The vector data is divided into two groups: the first
418 group contains the start / end character pointers, and the second is
419 the start pointers when the end of the capturing group has not yet reached. */
420 #define OVECTOR_START (common->ovector_start)
421 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
422 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
423 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
424
425 #ifdef COMPILE_PCRE8
426 #define MOV_UCHAR SLJIT_MOV_UB
427 #define MOVU_UCHAR SLJIT_MOVU_UB
428 #else
429 #ifdef COMPILE_PCRE16
430 #define MOV_UCHAR SLJIT_MOV_UH
431 #define MOVU_UCHAR SLJIT_MOVU_UH
432 #else
433 #error Unsupported compiling mode
434 #endif
435 #endif
436
437 /* Shortcuts. */
438 #define DEFINE_COMPILER \
439 struct sljit_compiler *compiler = common->compiler
440 #define OP1(op, dst, dstw, src, srcw) \
441 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
442 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
443 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
444 #define LABEL() \
445 sljit_emit_label(compiler)
446 #define JUMP(type) \
447 sljit_emit_jump(compiler, (type))
448 #define JUMPTO(type, label) \
449 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
450 #define JUMPHERE(jump) \
451 sljit_set_label((jump), sljit_emit_label(compiler))
452 #define CMP(type, src1, src1w, src2, src2w) \
453 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
454 #define CMPTO(type, src1, src1w, src2, src2w, label) \
455 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
456 #define COND_VALUE(op, dst, dstw, type) \
457 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
458 #define GET_LOCAL_BASE(dst, dstw, offset) \
459 sljit_get_local_base(compiler, (dst), (dstw), (offset))
460
461 static pcre_uchar* bracketend(pcre_uchar* cc)
462 {
463 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
464 do cc += GET(cc, 1); while (*cc == OP_ALT);
465 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
466 cc += 1 + LINK_SIZE;
467 return cc;
468 }
469
470 /* Functions whose might need modification for all new supported opcodes:
471 next_opcode
472 get_localspace
473 set_localptrs
474 get_framesize
475 init_frame
476 get_localsize
477 copy_locals
478 compile_trypath
479 compile_backtrackpath
480 */
481
482 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
483 {
484 SLJIT_UNUSED_ARG(common);
485 switch(*cc)
486 {
487 case OP_SOD:
488 case OP_SOM:
489 case OP_SET_SOM:
490 case OP_NOT_WORD_BOUNDARY:
491 case OP_WORD_BOUNDARY:
492 case OP_NOT_DIGIT:
493 case OP_DIGIT:
494 case OP_NOT_WHITESPACE:
495 case OP_WHITESPACE:
496 case OP_NOT_WORDCHAR:
497 case OP_WORDCHAR:
498 case OP_ANY:
499 case OP_ALLANY:
500 case OP_ANYNL:
501 case OP_NOT_HSPACE:
502 case OP_HSPACE:
503 case OP_NOT_VSPACE:
504 case OP_VSPACE:
505 case OP_EXTUNI:
506 case OP_EODN:
507 case OP_EOD:
508 case OP_CIRC:
509 case OP_CIRCM:
510 case OP_DOLL:
511 case OP_DOLLM:
512 case OP_TYPESTAR:
513 case OP_TYPEMINSTAR:
514 case OP_TYPEPLUS:
515 case OP_TYPEMINPLUS:
516 case OP_TYPEQUERY:
517 case OP_TYPEMINQUERY:
518 case OP_TYPEPOSSTAR:
519 case OP_TYPEPOSPLUS:
520 case OP_TYPEPOSQUERY:
521 case OP_CRSTAR:
522 case OP_CRMINSTAR:
523 case OP_CRPLUS:
524 case OP_CRMINPLUS:
525 case OP_CRQUERY:
526 case OP_CRMINQUERY:
527 case OP_DEF:
528 case OP_BRAZERO:
529 case OP_BRAMINZERO:
530 case OP_BRAPOSZERO:
531 case OP_COMMIT:
532 case OP_FAIL:
533 case OP_ACCEPT:
534 case OP_ASSERT_ACCEPT:
535 case OP_SKIPZERO:
536 return cc + 1;
537
538 case OP_ANYBYTE:
539 #ifdef SUPPORT_UTF
540 if (common->utf) return NULL;
541 #endif
542 return cc + 1;
543
544 case OP_CHAR:
545 case OP_CHARI:
546 case OP_NOT:
547 case OP_NOTI:
548 case OP_STAR:
549 case OP_MINSTAR:
550 case OP_PLUS:
551 case OP_MINPLUS:
552 case OP_QUERY:
553 case OP_MINQUERY:
554 case OP_POSSTAR:
555 case OP_POSPLUS:
556 case OP_POSQUERY:
557 case OP_STARI:
558 case OP_MINSTARI:
559 case OP_PLUSI:
560 case OP_MINPLUSI:
561 case OP_QUERYI:
562 case OP_MINQUERYI:
563 case OP_POSSTARI:
564 case OP_POSPLUSI:
565 case OP_POSQUERYI:
566 case OP_NOTSTAR:
567 case OP_NOTMINSTAR:
568 case OP_NOTPLUS:
569 case OP_NOTMINPLUS:
570 case OP_NOTQUERY:
571 case OP_NOTMINQUERY:
572 case OP_NOTPOSSTAR:
573 case OP_NOTPOSPLUS:
574 case OP_NOTPOSQUERY:
575 case OP_NOTSTARI:
576 case OP_NOTMINSTARI:
577 case OP_NOTPLUSI:
578 case OP_NOTMINPLUSI:
579 case OP_NOTQUERYI:
580 case OP_NOTMINQUERYI:
581 case OP_NOTPOSSTARI:
582 case OP_NOTPOSPLUSI:
583 case OP_NOTPOSQUERYI:
584 cc += 2;
585 #ifdef SUPPORT_UTF
586 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
587 #endif
588 return cc;
589
590 case OP_UPTO:
591 case OP_MINUPTO:
592 case OP_EXACT:
593 case OP_POSUPTO:
594 case OP_UPTOI:
595 case OP_MINUPTOI:
596 case OP_EXACTI:
597 case OP_POSUPTOI:
598 case OP_NOTUPTO:
599 case OP_NOTMINUPTO:
600 case OP_NOTEXACT:
601 case OP_NOTPOSUPTO:
602 case OP_NOTUPTOI:
603 case OP_NOTMINUPTOI:
604 case OP_NOTEXACTI:
605 case OP_NOTPOSUPTOI:
606 cc += 2 + IMM2_SIZE;
607 #ifdef SUPPORT_UTF
608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
609 #endif
610 return cc;
611
612 case OP_NOTPROP:
613 case OP_PROP:
614 return cc + 1 + 2;
615
616 case OP_TYPEUPTO:
617 case OP_TYPEMINUPTO:
618 case OP_TYPEEXACT:
619 case OP_TYPEPOSUPTO:
620 case OP_REF:
621 case OP_REFI:
622 case OP_CREF:
623 case OP_NCREF:
624 case OP_RREF:
625 case OP_NRREF:
626 case OP_CLOSE:
627 cc += 1 + IMM2_SIZE;
628 return cc;
629
630 case OP_CRRANGE:
631 case OP_CRMINRANGE:
632 return cc + 1 + 2 * IMM2_SIZE;
633
634 case OP_CLASS:
635 case OP_NCLASS:
636 return cc + 1 + 32 / sizeof(pcre_uchar);
637
638 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
639 case OP_XCLASS:
640 return cc + GET(cc, 1);
641 #endif
642
643 case OP_RECURSE:
644 case OP_ASSERT:
645 case OP_ASSERT_NOT:
646 case OP_ASSERTBACK:
647 case OP_ASSERTBACK_NOT:
648 case OP_REVERSE:
649 case OP_ONCE:
650 case OP_ONCE_NC:
651 case OP_BRA:
652 case OP_BRAPOS:
653 case OP_COND:
654 case OP_SBRA:
655 case OP_SBRAPOS:
656 case OP_SCOND:
657 case OP_ALT:
658 case OP_KET:
659 case OP_KETRMAX:
660 case OP_KETRMIN:
661 case OP_KETRPOS:
662 return cc + 1 + LINK_SIZE;
663
664 case OP_CBRA:
665 case OP_CBRAPOS:
666 case OP_SCBRA:
667 case OP_SCBRAPOS:
668 return cc + 1 + LINK_SIZE + IMM2_SIZE;
669
670 case OP_MARK:
671 return cc + 1 + 2 + cc[1];
672
673 default:
674 return NULL;
675 }
676 }
677
678 #define CASE_ITERATOR_LOCAL1 \
679 case OP_MINSTAR: \
680 case OP_MINPLUS: \
681 case OP_QUERY: \
682 case OP_MINQUERY: \
683 case OP_MINSTARI: \
684 case OP_MINPLUSI: \
685 case OP_QUERYI: \
686 case OP_MINQUERYI: \
687 case OP_NOTMINSTAR: \
688 case OP_NOTMINPLUS: \
689 case OP_NOTQUERY: \
690 case OP_NOTMINQUERY: \
691 case OP_NOTMINSTARI: \
692 case OP_NOTMINPLUSI: \
693 case OP_NOTQUERYI: \
694 case OP_NOTMINQUERYI:
695
696 #define CASE_ITERATOR_LOCAL2A \
697 case OP_STAR: \
698 case OP_PLUS: \
699 case OP_STARI: \
700 case OP_PLUSI: \
701 case OP_NOTSTAR: \
702 case OP_NOTPLUS: \
703 case OP_NOTSTARI: \
704 case OP_NOTPLUSI:
705
706 #define CASE_ITERATOR_LOCAL2B \
707 case OP_UPTO: \
708 case OP_MINUPTO: \
709 case OP_UPTOI: \
710 case OP_MINUPTOI: \
711 case OP_NOTUPTO: \
712 case OP_NOTMINUPTO: \
713 case OP_NOTUPTOI: \
714 case OP_NOTMINUPTOI:
715
716 #define CASE_ITERATOR_TYPE_LOCAL1 \
717 case OP_TYPEMINSTAR: \
718 case OP_TYPEMINPLUS: \
719 case OP_TYPEQUERY: \
720 case OP_TYPEMINQUERY:
721
722 #define CASE_ITERATOR_TYPE_LOCAL2A \
723 case OP_TYPESTAR: \
724 case OP_TYPEPLUS:
725
726 #define CASE_ITERATOR_TYPE_LOCAL2B \
727 case OP_TYPEUPTO: \
728 case OP_TYPEMINUPTO:
729
730 static int get_class_iterator_size(pcre_uchar *cc)
731 {
732 switch(*cc)
733 {
734 case OP_CRSTAR:
735 case OP_CRPLUS:
736 return 2;
737
738 case OP_CRMINSTAR:
739 case OP_CRMINPLUS:
740 case OP_CRQUERY:
741 case OP_CRMINQUERY:
742 return 1;
743
744 case OP_CRRANGE:
745 case OP_CRMINRANGE:
746 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
747 return 0;
748 return 2;
749
750 default:
751 return 0;
752 }
753 }
754
755 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
756 {
757 int localspace = 0;
758 pcre_uchar *alternative;
759 pcre_uchar *end = NULL;
760 int space, size, bracketlen;
761
762 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
763 while (cc < ccend)
764 {
765 space = 0;
766 size = 0;
767 bracketlen = 0;
768 switch(*cc)
769 {
770 case OP_SET_SOM:
771 common->has_set_som = TRUE;
772 cc += 1;
773 break;
774
775 case OP_ASSERT:
776 case OP_ASSERT_NOT:
777 case OP_ASSERTBACK:
778 case OP_ASSERTBACK_NOT:
779 case OP_ONCE:
780 case OP_ONCE_NC:
781 case OP_BRAPOS:
782 case OP_SBRA:
783 case OP_SBRAPOS:
784 case OP_SCOND:
785 localspace += sizeof(sljit_w);
786 bracketlen = 1 + LINK_SIZE;
787 break;
788
789 case OP_CBRAPOS:
790 case OP_SCBRAPOS:
791 localspace += sizeof(sljit_w);
792 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
793 break;
794
795 case OP_COND:
796 /* Might be a hidden SCOND. */
797 alternative = cc + GET(cc, 1);
798 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
799 localspace += sizeof(sljit_w);
800 bracketlen = 1 + LINK_SIZE;
801 break;
802
803 case OP_BRA:
804 bracketlen = 1 + LINK_SIZE;
805 break;
806
807 case OP_CBRA:
808 case OP_SCBRA:
809 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
810 break;
811
812 CASE_ITERATOR_LOCAL1
813 space = 1;
814 size = -2;
815 break;
816
817 CASE_ITERATOR_LOCAL2A
818 space = 2;
819 size = -2;
820 break;
821
822 CASE_ITERATOR_LOCAL2B
823 space = 2;
824 size = -(2 + IMM2_SIZE);
825 break;
826
827 CASE_ITERATOR_TYPE_LOCAL1
828 space = 1;
829 size = 1;
830 break;
831
832 CASE_ITERATOR_TYPE_LOCAL2A
833 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
834 space = 2;
835 size = 1;
836 break;
837
838 CASE_ITERATOR_TYPE_LOCAL2B
839 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
840 space = 2;
841 size = 1 + IMM2_SIZE;
842 break;
843
844 case OP_CLASS:
845 case OP_NCLASS:
846 size += 1 + 32 / sizeof(pcre_uchar);
847 space = get_class_iterator_size(cc + size);
848 break;
849
850 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
851 case OP_XCLASS:
852 size = GET(cc, 1);
853 space = get_class_iterator_size(cc + size);
854 break;
855 #endif
856
857 case OP_RECURSE:
858 /* Set its value only once. */
859 if (common->recursive_head == 0)
860 {
861 common->recursive_head = common->ovector_start;
862 common->ovector_start += sizeof(sljit_w);
863 }
864 cc += 1 + LINK_SIZE;
865 break;
866
867 case OP_MARK:
868 if (common->mark_ptr == 0)
869 {
870 common->mark_ptr = common->ovector_start;
871 common->ovector_start += sizeof(sljit_w);
872 }
873 cc += 1 + 2 + cc[1];
874 break;
875
876 default:
877 cc = next_opcode(common, cc);
878 if (cc == NULL)
879 return -1;
880 break;
881 }
882
883 if (space > 0 && cc >= end)
884 localspace += sizeof(sljit_w) * space;
885
886 if (size != 0)
887 {
888 if (size < 0)
889 {
890 cc += -size;
891 #ifdef SUPPORT_UTF
892 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
893 #endif
894 }
895 else
896 cc += size;
897 }
898
899 if (bracketlen > 0)
900 {
901 if (cc >= end)
902 {
903 end = bracketend(cc);
904 if (end[-1 - LINK_SIZE] == OP_KET)
905 end = NULL;
906 }
907 cc += bracketlen;
908 }
909 }
910 return localspace;
911 }
912
913 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
914 {
915 pcre_uchar *cc = common->start;
916 pcre_uchar *alternative;
917 pcre_uchar *end = NULL;
918 int space, size, bracketlen;
919
920 while (cc < ccend)
921 {
922 space = 0;
923 size = 0;
924 bracketlen = 0;
925 switch(*cc)
926 {
927 case OP_ASSERT:
928 case OP_ASSERT_NOT:
929 case OP_ASSERTBACK:
930 case OP_ASSERTBACK_NOT:
931 case OP_ONCE:
932 case OP_ONCE_NC:
933 case OP_BRAPOS:
934 case OP_SBRA:
935 case OP_SBRAPOS:
936 case OP_SCOND:
937 common->localptrs[cc - common->start] = localptr;
938 localptr += sizeof(sljit_w);
939 bracketlen = 1 + LINK_SIZE;
940 break;
941
942 case OP_CBRAPOS:
943 case OP_SCBRAPOS:
944 common->localptrs[cc - common->start] = localptr;
945 localptr += sizeof(sljit_w);
946 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
947 break;
948
949 case OP_COND:
950 /* Might be a hidden SCOND. */
951 alternative = cc + GET(cc, 1);
952 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
953 {
954 common->localptrs[cc - common->start] = localptr;
955 localptr += sizeof(sljit_w);
956 }
957 bracketlen = 1 + LINK_SIZE;
958 break;
959
960 case OP_BRA:
961 bracketlen = 1 + LINK_SIZE;
962 break;
963
964 case OP_CBRA:
965 case OP_SCBRA:
966 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
967 break;
968
969 CASE_ITERATOR_LOCAL1
970 space = 1;
971 size = -2;
972 break;
973
974 CASE_ITERATOR_LOCAL2A
975 space = 2;
976 size = -2;
977 break;
978
979 CASE_ITERATOR_LOCAL2B
980 space = 2;
981 size = -(2 + IMM2_SIZE);
982 break;
983
984 CASE_ITERATOR_TYPE_LOCAL1
985 space = 1;
986 size = 1;
987 break;
988
989 CASE_ITERATOR_TYPE_LOCAL2A
990 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
991 space = 2;
992 size = 1;
993 break;
994
995 CASE_ITERATOR_TYPE_LOCAL2B
996 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
997 space = 2;
998 size = 1 + IMM2_SIZE;
999 break;
1000
1001 case OP_CLASS:
1002 case OP_NCLASS:
1003 size += 1 + 32 / sizeof(pcre_uchar);
1004 space = get_class_iterator_size(cc + size);
1005 break;
1006
1007 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1008 case OP_XCLASS:
1009 size = GET(cc, 1);
1010 space = get_class_iterator_size(cc + size);
1011 break;
1012 #endif
1013
1014 default:
1015 cc = next_opcode(common, cc);
1016 SLJIT_ASSERT(cc != NULL);
1017 break;
1018 }
1019
1020 if (space > 0 && cc >= end)
1021 {
1022 common->localptrs[cc - common->start] = localptr;
1023 localptr += sizeof(sljit_w) * space;
1024 }
1025
1026 if (size != 0)
1027 {
1028 if (size < 0)
1029 {
1030 cc += -size;
1031 #ifdef SUPPORT_UTF
1032 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1033 #endif
1034 }
1035 else
1036 cc += size;
1037 }
1038
1039 if (bracketlen > 0)
1040 {
1041 if (cc >= end)
1042 {
1043 end = bracketend(cc);
1044 if (end[-1 - LINK_SIZE] == OP_KET)
1045 end = NULL;
1046 }
1047 cc += bracketlen;
1048 }
1049 }
1050 }
1051
1052 /* Returns with -1 if no need for frame. */
1053 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1054 {
1055 pcre_uchar *ccend = bracketend(cc);
1056 int length = 0;
1057 BOOL possessive = FALSE;
1058 BOOL setsom_found = recursive;
1059 BOOL setmark_found = recursive;
1060
1061 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1062 {
1063 length = 3;
1064 possessive = TRUE;
1065 }
1066
1067 cc = next_opcode(common, cc);
1068 SLJIT_ASSERT(cc != NULL);
1069 while (cc < ccend)
1070 switch(*cc)
1071 {
1072 case OP_SET_SOM:
1073 SLJIT_ASSERT(common->has_set_som);
1074 if (!setsom_found)
1075 {
1076 length += 2;
1077 setsom_found = TRUE;
1078 }
1079 cc += 1;
1080 break;
1081
1082 case OP_MARK:
1083 SLJIT_ASSERT(common->mark_ptr != 0);
1084 if (!setmark_found)
1085 {
1086 length += 2;
1087 setmark_found = TRUE;
1088 }
1089 cc += 1 + 2 + cc[1];
1090 break;
1091
1092 case OP_RECURSE:
1093 if (common->has_set_som && !setsom_found)
1094 {
1095 length += 2;
1096 setsom_found = TRUE;
1097 }
1098 if (common->mark_ptr != 0 && !setmark_found)
1099 {
1100 length += 2;
1101 setmark_found = TRUE;
1102 }
1103 cc += 1 + LINK_SIZE;
1104 break;
1105
1106 case OP_CBRA:
1107 case OP_CBRAPOS:
1108 case OP_SCBRA:
1109 case OP_SCBRAPOS:
1110 length += 3;
1111 cc += 1 + LINK_SIZE + IMM2_SIZE;
1112 break;
1113
1114 default:
1115 cc = next_opcode(common, cc);
1116 SLJIT_ASSERT(cc != NULL);
1117 break;
1118 }
1119
1120 /* Possessive quantifiers can use a special case. */
1121 if (SLJIT_UNLIKELY(possessive) && length == 3)
1122 return -1;
1123
1124 if (length > 0)
1125 return length + 1;
1126 return -1;
1127 }
1128
1129 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1130 {
1131 DEFINE_COMPILER;
1132 pcre_uchar *ccend = bracketend(cc);
1133 BOOL setsom_found = recursive;
1134 BOOL setmark_found = recursive;
1135 int offset;
1136
1137 /* >= 1 + shortest item size (2) */
1138 SLJIT_UNUSED_ARG(stacktop);
1139 SLJIT_ASSERT(stackpos >= stacktop + 2);
1140
1141 stackpos = STACK(stackpos);
1142 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1143 cc = next_opcode(common, cc);
1144 SLJIT_ASSERT(cc != NULL);
1145 while (cc < ccend)
1146 switch(*cc)
1147 {
1148 case OP_SET_SOM:
1149 SLJIT_ASSERT(common->has_set_som);
1150 if (!setsom_found)
1151 {
1152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1154 stackpos += (int)sizeof(sljit_w);
1155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1156 stackpos += (int)sizeof(sljit_w);
1157 setsom_found = TRUE;
1158 }
1159 cc += 1;
1160 break;
1161
1162 case OP_MARK:
1163 SLJIT_ASSERT(common->mark_ptr != 0);
1164 if (!setmark_found)
1165 {
1166 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1167 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1168 stackpos += (int)sizeof(sljit_w);
1169 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1170 stackpos += (int)sizeof(sljit_w);
1171 setmark_found = TRUE;
1172 }
1173 cc += 1 + 2 + cc[1];
1174 break;
1175
1176 case OP_RECURSE:
1177 if (common->has_set_som && !setsom_found)
1178 {
1179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1181 stackpos += (int)sizeof(sljit_w);
1182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1183 stackpos += (int)sizeof(sljit_w);
1184 setsom_found = TRUE;
1185 }
1186 if (common->mark_ptr != 0 && !setmark_found)
1187 {
1188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1190 stackpos += (int)sizeof(sljit_w);
1191 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1192 stackpos += (int)sizeof(sljit_w);
1193 setmark_found = TRUE;
1194 }
1195 cc += 1 + LINK_SIZE;
1196 break;
1197
1198 case OP_CBRA:
1199 case OP_CBRAPOS:
1200 case OP_SCBRA:
1201 case OP_SCBRAPOS:
1202 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1204 stackpos += (int)sizeof(sljit_w);
1205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1206 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1208 stackpos += (int)sizeof(sljit_w);
1209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1210 stackpos += (int)sizeof(sljit_w);
1211
1212 cc += 1 + LINK_SIZE + IMM2_SIZE;
1213 break;
1214
1215 default:
1216 cc = next_opcode(common, cc);
1217 SLJIT_ASSERT(cc != NULL);
1218 break;
1219 }
1220
1221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1222 SLJIT_ASSERT(stackpos == STACK(stacktop));
1223 }
1224
1225 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1226 {
1227 int localsize = 2;
1228 int size;
1229 pcre_uchar *alternative;
1230 /* Calculate the sum of the local variables. */
1231 while (cc < ccend)
1232 {
1233 size = 0;
1234 switch(*cc)
1235 {
1236 case OP_ASSERT:
1237 case OP_ASSERT_NOT:
1238 case OP_ASSERTBACK:
1239 case OP_ASSERTBACK_NOT:
1240 case OP_ONCE:
1241 case OP_ONCE_NC:
1242 case OP_BRAPOS:
1243 case OP_SBRA:
1244 case OP_SBRAPOS:
1245 case OP_SCOND:
1246 localsize++;
1247 cc += 1 + LINK_SIZE;
1248 break;
1249
1250 case OP_CBRA:
1251 case OP_SCBRA:
1252 localsize++;
1253 cc += 1 + LINK_SIZE + IMM2_SIZE;
1254 break;
1255
1256 case OP_CBRAPOS:
1257 case OP_SCBRAPOS:
1258 localsize += 2;
1259 cc += 1 + LINK_SIZE + IMM2_SIZE;
1260 break;
1261
1262 case OP_COND:
1263 /* Might be a hidden SCOND. */
1264 alternative = cc + GET(cc, 1);
1265 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1266 localsize++;
1267 cc += 1 + LINK_SIZE;
1268 break;
1269
1270 CASE_ITERATOR_LOCAL1
1271 if (PRIV_DATA(cc))
1272 localsize++;
1273 cc += 2;
1274 #ifdef SUPPORT_UTF
1275 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1276 #endif
1277 break;
1278
1279 CASE_ITERATOR_LOCAL2A
1280 if (PRIV_DATA(cc))
1281 localsize += 2;
1282 cc += 2;
1283 #ifdef SUPPORT_UTF
1284 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1285 #endif
1286 break;
1287
1288 CASE_ITERATOR_LOCAL2B
1289 if (PRIV_DATA(cc))
1290 localsize += 2;
1291 cc += 2 + IMM2_SIZE;
1292 #ifdef SUPPORT_UTF
1293 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1294 #endif
1295 break;
1296
1297 CASE_ITERATOR_TYPE_LOCAL1
1298 if (PRIV_DATA(cc))
1299 localsize++;
1300 cc += 1;
1301 break;
1302
1303 CASE_ITERATOR_TYPE_LOCAL2A
1304 if (PRIV_DATA(cc))
1305 localsize += 2;
1306 cc += 1;
1307 break;
1308
1309 CASE_ITERATOR_TYPE_LOCAL2B
1310 if (PRIV_DATA(cc))
1311 localsize += 2;
1312 cc += 1 + IMM2_SIZE;
1313 break;
1314
1315 case OP_CLASS:
1316 case OP_NCLASS:
1317 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1318 case OP_XCLASS:
1319 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1320 #else
1321 size = 1 + 32 / (int)sizeof(pcre_uchar);
1322 #endif
1323 if (PRIV_DATA(cc))
1324 localsize += get_class_iterator_size(cc + size);
1325 cc += size;
1326 break;
1327
1328 default:
1329 cc = next_opcode(common, cc);
1330 SLJIT_ASSERT(cc != NULL);
1331 break;
1332 }
1333 }
1334 SLJIT_ASSERT(cc == ccend);
1335 return localsize;
1336 }
1337
1338 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1339 BOOL save, int stackptr, int stacktop)
1340 {
1341 DEFINE_COMPILER;
1342 int srcw[2];
1343 int count, size;
1344 BOOL tmp1next = TRUE;
1345 BOOL tmp1empty = TRUE;
1346 BOOL tmp2empty = TRUE;
1347 pcre_uchar *alternative;
1348 enum {
1349 start,
1350 loop,
1351 end
1352 } status;
1353
1354 status = save ? start : loop;
1355 stackptr = STACK(stackptr - 2);
1356 stacktop = STACK(stacktop - 1);
1357
1358 if (!save)
1359 {
1360 stackptr += sizeof(sljit_w);
1361 if (stackptr < stacktop)
1362 {
1363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1364 stackptr += sizeof(sljit_w);
1365 tmp1empty = FALSE;
1366 }
1367 if (stackptr < stacktop)
1368 {
1369 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1370 stackptr += sizeof(sljit_w);
1371 tmp2empty = FALSE;
1372 }
1373 /* The tmp1next must be TRUE in either way. */
1374 }
1375
1376 while (status != end)
1377 {
1378 count = 0;
1379 switch(status)
1380 {
1381 case start:
1382 SLJIT_ASSERT(save && common->recursive_head != 0);
1383 count = 1;
1384 srcw[0] = common->recursive_head;
1385 status = loop;
1386 break;
1387
1388 case loop:
1389 if (cc >= ccend)
1390 {
1391 status = end;
1392 break;
1393 }
1394
1395 switch(*cc)
1396 {
1397 case OP_ASSERT:
1398 case OP_ASSERT_NOT:
1399 case OP_ASSERTBACK:
1400 case OP_ASSERTBACK_NOT:
1401 case OP_ONCE:
1402 case OP_ONCE_NC:
1403 case OP_BRAPOS:
1404 case OP_SBRA:
1405 case OP_SBRAPOS:
1406 case OP_SCOND:
1407 count = 1;
1408 srcw[0] = PRIV_DATA(cc);
1409 SLJIT_ASSERT(srcw[0] != 0);
1410 cc += 1 + LINK_SIZE;
1411 break;
1412
1413 case OP_CBRA:
1414 case OP_SCBRA:
1415 count = 1;
1416 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1417 cc += 1 + LINK_SIZE + IMM2_SIZE;
1418 break;
1419
1420 case OP_CBRAPOS:
1421 case OP_SCBRAPOS:
1422 count = 2;
1423 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1424 srcw[1] = PRIV_DATA(cc);
1425 SLJIT_ASSERT(srcw[0] != 0);
1426 cc += 1 + LINK_SIZE + IMM2_SIZE;
1427 break;
1428
1429 case OP_COND:
1430 /* Might be a hidden SCOND. */
1431 alternative = cc + GET(cc, 1);
1432 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1433 {
1434 count = 1;
1435 srcw[0] = PRIV_DATA(cc);
1436 SLJIT_ASSERT(srcw[0] != 0);
1437 }
1438 cc += 1 + LINK_SIZE;
1439 break;
1440
1441 CASE_ITERATOR_LOCAL1
1442 if (PRIV_DATA(cc))
1443 {
1444 count = 1;
1445 srcw[0] = PRIV_DATA(cc);
1446 }
1447 cc += 2;
1448 #ifdef SUPPORT_UTF
1449 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1450 #endif
1451 break;
1452
1453 CASE_ITERATOR_LOCAL2A
1454 if (PRIV_DATA(cc))
1455 {
1456 count = 2;
1457 srcw[0] = PRIV_DATA(cc);
1458 srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
1459 }
1460 cc += 2;
1461 #ifdef SUPPORT_UTF
1462 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1463 #endif
1464 break;
1465
1466 CASE_ITERATOR_LOCAL2B
1467 if (PRIV_DATA(cc))
1468 {
1469 count = 2;
1470 srcw[0] = PRIV_DATA(cc);
1471 srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
1472 }
1473 cc += 2 + IMM2_SIZE;
1474 #ifdef SUPPORT_UTF
1475 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1476 #endif
1477 break;
1478
1479 CASE_ITERATOR_TYPE_LOCAL1
1480 if (PRIV_DATA(cc))
1481 {
1482 count = 1;
1483 srcw[0] = PRIV_DATA(cc);
1484 }
1485 cc += 1;
1486 break;
1487
1488 CASE_ITERATOR_TYPE_LOCAL2A
1489 if (PRIV_DATA(cc))
1490 {
1491 count = 2;
1492 srcw[0] = PRIV_DATA(cc);
1493 srcw[1] = srcw[0] + sizeof(sljit_w);
1494 }
1495 cc += 1;
1496 break;
1497
1498 CASE_ITERATOR_TYPE_LOCAL2B
1499 if (PRIV_DATA(cc))
1500 {
1501 count = 2;
1502 srcw[0] = PRIV_DATA(cc);
1503 srcw[1] = srcw[0] + sizeof(sljit_w);
1504 }
1505 cc += 1 + IMM2_SIZE;
1506 break;
1507
1508 case OP_CLASS:
1509 case OP_NCLASS:
1510 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1511 case OP_XCLASS:
1512 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1513 #else
1514 size = 1 + 32 / (int)sizeof(pcre_uchar);
1515 #endif
1516 if (PRIV_DATA(cc))
1517 switch(get_class_iterator_size(cc + size))
1518 {
1519 case 1:
1520 count = 1;
1521 srcw[0] = PRIV_DATA(cc);
1522 break;
1523
1524 case 2:
1525 count = 2;
1526 srcw[0] = PRIV_DATA(cc);
1527 srcw[1] = srcw[0] + sizeof(sljit_w);
1528 break;
1529
1530 default:
1531 SLJIT_ASSERT_STOP();
1532 break;
1533 }
1534 cc += size;
1535 break;
1536
1537 default:
1538 cc = next_opcode(common, cc);
1539 SLJIT_ASSERT(cc != NULL);
1540 break;
1541 }
1542 break;
1543
1544 case end:
1545 SLJIT_ASSERT_STOP();
1546 break;
1547 }
1548
1549 while (count > 0)
1550 {
1551 count--;
1552 if (save)
1553 {
1554 if (tmp1next)
1555 {
1556 if (!tmp1empty)
1557 {
1558 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1559 stackptr += sizeof(sljit_w);
1560 }
1561 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1562 tmp1empty = FALSE;
1563 tmp1next = FALSE;
1564 }
1565 else
1566 {
1567 if (!tmp2empty)
1568 {
1569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1570 stackptr += sizeof(sljit_w);
1571 }
1572 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1573 tmp2empty = FALSE;
1574 tmp1next = TRUE;
1575 }
1576 }
1577 else
1578 {
1579 if (tmp1next)
1580 {
1581 SLJIT_ASSERT(!tmp1empty);
1582 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1583 tmp1empty = stackptr >= stacktop;
1584 if (!tmp1empty)
1585 {
1586 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1587 stackptr += sizeof(sljit_w);
1588 }
1589 tmp1next = FALSE;
1590 }
1591 else
1592 {
1593 SLJIT_ASSERT(!tmp2empty);
1594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1595 tmp2empty = stackptr >= stacktop;
1596 if (!tmp2empty)
1597 {
1598 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1599 stackptr += sizeof(sljit_w);
1600 }
1601 tmp1next = TRUE;
1602 }
1603 }
1604 }
1605 }
1606
1607 if (save)
1608 {
1609 if (tmp1next)
1610 {
1611 if (!tmp1empty)
1612 {
1613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1614 stackptr += sizeof(sljit_w);
1615 }
1616 if (!tmp2empty)
1617 {
1618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1619 stackptr += sizeof(sljit_w);
1620 }
1621 }
1622 else
1623 {
1624 if (!tmp2empty)
1625 {
1626 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1627 stackptr += sizeof(sljit_w);
1628 }
1629 if (!tmp1empty)
1630 {
1631 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1632 stackptr += sizeof(sljit_w);
1633 }
1634 }
1635 }
1636 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1637 }
1638
1639 #undef CASE_ITERATOR_LOCAL1
1640 #undef CASE_ITERATOR_LOCAL2A
1641 #undef CASE_ITERATOR_LOCAL2B
1642 #undef CASE_ITERATOR_TYPE_LOCAL1
1643 #undef CASE_ITERATOR_TYPE_LOCAL2A
1644 #undef CASE_ITERATOR_TYPE_LOCAL2B
1645
1646 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1647 {
1648 return (value & (value - 1)) == 0;
1649 }
1650
1651 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1652 {
1653 while (list)
1654 {
1655 /* sljit_set_label is clever enough to do nothing
1656 if either the jump or the label is NULL. */
1657 sljit_set_label(list->jump, label);
1658 list = list->next;
1659 }
1660 }
1661
1662 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1663 {
1664 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1665 if (list_item)
1666 {
1667 list_item->next = *list;
1668 list_item->jump = jump;
1669 *list = list_item;
1670 }
1671 }
1672
1673 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1674 {
1675 DEFINE_COMPILER;
1676 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1677
1678 if (list_item)
1679 {
1680 list_item->type = type;
1681 list_item->data = data;
1682 list_item->start = start;
1683 list_item->quit = LABEL();
1684 list_item->next = common->stubs;
1685 common->stubs = list_item;
1686 }
1687 }
1688
1689 static void flush_stubs(compiler_common *common)
1690 {
1691 DEFINE_COMPILER;
1692 stub_list* list_item = common->stubs;
1693
1694 while (list_item)
1695 {
1696 JUMPHERE(list_item->start);
1697 switch(list_item->type)
1698 {
1699 case stack_alloc:
1700 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1701 break;
1702 }
1703 JUMPTO(SLJIT_JUMP, list_item->quit);
1704 list_item = list_item->next;
1705 }
1706 common->stubs = NULL;
1707 }
1708
1709 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1710 {
1711 DEFINE_COMPILER;
1712
1713 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1714 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1715 }
1716
1717 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1718 {
1719 /* May destroy all locals and registers except TMP2. */
1720 DEFINE_COMPILER;
1721
1722 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1723 #ifdef DESTROY_REGISTERS
1724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1725 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1726 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1728 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1729 #endif
1730 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1731 }
1732
1733 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1734 {
1735 DEFINE_COMPILER;
1736 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1737 }
1738
1739 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1740 {
1741 DEFINE_COMPILER;
1742 struct sljit_label *loop;
1743 int i;
1744 /* At this point we can freely use all temporary registers. */
1745 /* TMP1 returns with begin - 1. */
1746 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1747 if (length < 8)
1748 {
1749 for (i = 0; i < length; i++)
1750 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1751 }
1752 else
1753 {
1754 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1755 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1756 loop = LABEL();
1757 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1758 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1759 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1760 }
1761 }
1762
1763 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1764 {
1765 DEFINE_COMPILER;
1766 struct sljit_label *loop;
1767 struct sljit_jump *earlyexit;
1768
1769 /* At this point we can freely use all registers. */
1770 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1772
1773 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1774 if (common->mark_ptr != 0)
1775 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1776 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1777 if (common->mark_ptr != 0)
1778 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1779 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1780 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1781 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1782 /* Unlikely, but possible */
1783 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1784 loop = LABEL();
1785 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1786 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1787 /* Copy the integer value to the output buffer */
1788 #ifdef COMPILE_PCRE16
1789 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1790 #endif
1791 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1792 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1793 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1794 JUMPHERE(earlyexit);
1795
1796 /* Calculate the return value, which is the maximum ovector value. */
1797 if (topbracket > 1)
1798 {
1799 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1800 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1801
1802 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1803 loop = LABEL();
1804 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1805 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1806 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1807 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1808 }
1809 else
1810 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1811 }
1812
1813 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1814 {
1815 DEFINE_COMPILER;
1816
1817 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1818 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1819
1820 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1821 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1822 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1823 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1824
1825 /* Store match begin and end. */
1826 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1827 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1828 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1829 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1830 #ifdef COMPILE_PCRE16
1831 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1832 #endif
1833 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1834
1835 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1836 #ifdef COMPILE_PCRE16
1837 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1838 #endif
1839 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1840
1841 JUMPTO(SLJIT_JUMP, quit);
1842 }
1843
1844 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1845 {
1846 /* May destroy TMP1. */
1847 DEFINE_COMPILER;
1848 struct sljit_jump *jump;
1849
1850 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1851 {
1852 /* The value of -1 must be kept for start_used_ptr! */
1853 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1854 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1855 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1856 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1857 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1858 JUMPHERE(jump);
1859 }
1860 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1861 {
1862 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1863 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1864 JUMPHERE(jump);
1865 }
1866 }
1867
1868 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1869 {
1870 /* Detects if the character has an othercase. */
1871 unsigned int c;
1872
1873 #ifdef SUPPORT_UTF
1874 if (common->utf)
1875 {
1876 GETCHAR(c, cc);
1877 if (c > 127)
1878 {
1879 #ifdef SUPPORT_UCP
1880 return c != UCD_OTHERCASE(c);
1881 #else
1882 return FALSE;
1883 #endif
1884 }
1885 #ifndef COMPILE_PCRE8
1886 return common->fcc[c] != c;
1887 #endif
1888 }
1889 else
1890 #endif
1891 c = *cc;
1892 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1893 }
1894
1895 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1896 {
1897 /* Returns with the othercase. */
1898 #ifdef SUPPORT_UTF
1899 if (common->utf && c > 127)
1900 {
1901 #ifdef SUPPORT_UCP
1902 return UCD_OTHERCASE(c);
1903 #else
1904 return c;
1905 #endif
1906 }
1907 #endif
1908 return TABLE_GET(c, common->fcc, c);
1909 }
1910
1911 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1912 {
1913 /* Detects if the character and its othercase has only 1 bit difference. */
1914 unsigned int c, oc, bit;
1915 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1916 int n;
1917 #endif
1918
1919 #ifdef SUPPORT_UTF
1920 if (common->utf)
1921 {
1922 GETCHAR(c, cc);
1923 if (c <= 127)
1924 oc = common->fcc[c];
1925 else
1926 {
1927 #ifdef SUPPORT_UCP
1928 oc = UCD_OTHERCASE(c);
1929 #else
1930 oc = c;
1931 #endif
1932 }
1933 }
1934 else
1935 {
1936 c = *cc;
1937 oc = TABLE_GET(c, common->fcc, c);
1938 }
1939 #else
1940 c = *cc;
1941 oc = TABLE_GET(c, common->fcc, c);
1942 #endif
1943
1944 SLJIT_ASSERT(c != oc);
1945
1946 bit = c ^ oc;
1947 /* Optimized for English alphabet. */
1948 if (c <= 127 && bit == 0x20)
1949 return (0 << 8) | 0x20;
1950
1951 /* Since c != oc, they must have at least 1 bit difference. */
1952 if (!ispowerof2(bit))
1953 return 0;
1954
1955 #ifdef COMPILE_PCRE8
1956
1957 #ifdef SUPPORT_UTF
1958 if (common->utf && c > 127)
1959 {
1960 n = GET_EXTRALEN(*cc);
1961 while ((bit & 0x3f) == 0)
1962 {
1963 n--;
1964 bit >>= 6;
1965 }
1966 return (n << 8) | bit;
1967 }
1968 #endif /* SUPPORT_UTF */
1969 return (0 << 8) | bit;
1970
1971 #else /* COMPILE_PCRE8 */
1972
1973 #ifdef COMPILE_PCRE16
1974 #ifdef SUPPORT_UTF
1975 if (common->utf && c > 65535)
1976 {
1977 if (bit >= (1 << 10))
1978 bit >>= 10;
1979 else
1980 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1981 }
1982 #endif /* SUPPORT_UTF */
1983 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1984 #endif /* COMPILE_PCRE16 */
1985
1986 #endif /* COMPILE_PCRE8 */
1987 }
1988
1989 static void check_partial(compiler_common *common, BOOL force)
1990 {
1991 /* Checks whether a partial matching is occured. Does not modify registers. */
1992 DEFINE_COMPILER;
1993 struct sljit_jump *jump = NULL;
1994
1995 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1996
1997 if (common->mode == JIT_COMPILE)
1998 return;
1999
2000 if (!force)
2001 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2002 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2003 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2004
2005 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2006 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2007 else
2008 {
2009 if (common->partialmatchlabel != NULL)
2010 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2011 else
2012 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2013 }
2014
2015 if (jump != NULL)
2016 JUMPHERE(jump);
2017 }
2018
2019 static struct sljit_jump *check_str_end(compiler_common *common)
2020 {
2021 /* Does not affect registers. Usually used in a tight spot. */
2022 DEFINE_COMPILER;
2023 struct sljit_jump *jump;
2024 struct sljit_jump *nohit;
2025 struct sljit_jump *return_value;
2026
2027 if (common->mode == JIT_COMPILE)
2028 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2029
2030 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2031 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2032 {
2033 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2035 JUMPHERE(nohit);
2036 return_value = JUMP(SLJIT_JUMP);
2037 }
2038 else
2039 {
2040 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2041 if (common->partialmatchlabel != NULL)
2042 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2043 else
2044 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2045 }
2046 JUMPHERE(jump);
2047 return return_value;
2048 }
2049
2050 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2051 {
2052 DEFINE_COMPILER;
2053 struct sljit_jump *jump;
2054
2055 if (common->mode == JIT_COMPILE)
2056 {
2057 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2058 return;
2059 }
2060
2061 /* Partial matching mode. */
2062 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2063 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2064 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2065 {
2066 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2067 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2068 }
2069 else
2070 {
2071 if (common->partialmatchlabel != NULL)
2072 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2073 else
2074 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2075 }
2076 JUMPHERE(jump);
2077 }
2078
2079 static void read_char(compiler_common *common)
2080 {
2081 /* Reads the character into TMP1, updates STR_PTR.
2082 Does not check STR_END. TMP2 Destroyed. */
2083 DEFINE_COMPILER;
2084 #ifdef SUPPORT_UTF
2085 struct sljit_jump *jump;
2086 #endif
2087
2088 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2089 #ifdef SUPPORT_UTF
2090 if (common->utf)
2091 {
2092 #ifdef COMPILE_PCRE8
2093 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2094 #else
2095 #ifdef COMPILE_PCRE16
2096 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2097 #endif
2098 #endif /* COMPILE_PCRE8 */
2099 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2100 JUMPHERE(jump);
2101 }
2102 #endif
2103 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2104 }
2105
2106 static void peek_char(compiler_common *common)
2107 {
2108 /* Reads the character into TMP1, keeps STR_PTR.
2109 Does not check STR_END. TMP2 Destroyed. */
2110 DEFINE_COMPILER;
2111 #ifdef SUPPORT_UTF
2112 struct sljit_jump *jump;
2113 #endif
2114
2115 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2116 #ifdef SUPPORT_UTF
2117 if (common->utf)
2118 {
2119 #ifdef COMPILE_PCRE8
2120 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2121 #else
2122 #ifdef COMPILE_PCRE16
2123 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2124 #endif
2125 #endif /* COMPILE_PCRE8 */
2126 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2127 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2128 JUMPHERE(jump);
2129 }
2130 #endif
2131 }
2132
2133 static void read_char8_type(compiler_common *common)
2134 {
2135 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2136 DEFINE_COMPILER;
2137 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2138 struct sljit_jump *jump;
2139 #endif
2140
2141 #ifdef SUPPORT_UTF
2142 if (common->utf)
2143 {
2144 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2146 #ifdef COMPILE_PCRE8
2147 /* This can be an extra read in some situations, but hopefully
2148 it is needed in most cases. */
2149 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2150 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2151 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2152 JUMPHERE(jump);
2153 #else
2154 #ifdef COMPILE_PCRE16
2155 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2156 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2157 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2158 JUMPHERE(jump);
2159 /* Skip low surrogate if necessary. */
2160 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2161 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2162 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2163 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2164 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2165 #endif
2166 #endif /* COMPILE_PCRE8 */
2167 return;
2168 }
2169 #endif
2170 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2171 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2172 #ifdef COMPILE_PCRE16
2173 /* The ctypes array contains only 256 values. */
2174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2175 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2176 #endif
2177 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2178 #ifdef COMPILE_PCRE16
2179 JUMPHERE(jump);
2180 #endif
2181 }
2182
2183 static void skip_char_back(compiler_common *common)
2184 {
2185 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2186 DEFINE_COMPILER;
2187 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2188 struct sljit_label *label;
2189
2190 if (common->utf)
2191 {
2192 label = LABEL();
2193 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2194 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2195 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2196 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2197 return;
2198 }
2199 #endif
2200 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2201 if (common->utf)
2202 {
2203 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2204 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2205 /* Skip low surrogate if necessary. */
2206 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2207 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2208 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2209 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2210 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2211 return;
2212 }
2213 #endif
2214 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2215 }
2216
2217 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2218 {
2219 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2220 DEFINE_COMPILER;
2221
2222 if (nltype == NLTYPE_ANY)
2223 {
2224 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2225 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2226 }
2227 else if (nltype == NLTYPE_ANYCRLF)
2228 {
2229 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2230 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2231 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2232 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2233 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2234 }
2235 else
2236 {
2237 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2238 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2239 }
2240 }
2241
2242 #ifdef SUPPORT_UTF
2243
2244 #ifdef COMPILE_PCRE8
2245 static void do_utfreadchar(compiler_common *common)
2246 {
2247 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2248 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2249 DEFINE_COMPILER;
2250 struct sljit_jump *jump;
2251
2252 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2253 /* Searching for the first zero. */
2254 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2255 jump = JUMP(SLJIT_C_NOT_ZERO);
2256 /* Two byte sequence. */
2257 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2258 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2259 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2260 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2261 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2262 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2263 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2264 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2265 JUMPHERE(jump);
2266
2267 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2268 jump = JUMP(SLJIT_C_NOT_ZERO);
2269 /* Three byte sequence. */
2270 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2271 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2272 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2273 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2274 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2275 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2276 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2277 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2278 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2279 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2280 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2281 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2282 JUMPHERE(jump);
2283
2284 /* Four byte sequence. */
2285 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2286 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2287 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2288 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2289 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2290 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2291 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2292 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2293 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2294 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2295 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2296 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2297 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2298 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2299 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2300 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2301 }
2302
2303 static void do_utfreadtype8(compiler_common *common)
2304 {
2305 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2306 of the character (>= 0xc0). Return value in TMP1. */
2307 DEFINE_COMPILER;
2308 struct sljit_jump *jump;
2309 struct sljit_jump *compare;
2310
2311 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2312
2313 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2314 jump = JUMP(SLJIT_C_NOT_ZERO);
2315 /* Two byte sequence. */
2316 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2317 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2318 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2319 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2320 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2321 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2322 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2323 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2324 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2325
2326 JUMPHERE(compare);
2327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2328 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2329 JUMPHERE(jump);
2330
2331 /* We only have types for characters less than 256. */
2332 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2333 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2334 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2335 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2336 }
2337
2338 #else /* COMPILE_PCRE8 */
2339
2340 #ifdef COMPILE_PCRE16
2341 static void do_utfreadchar(compiler_common *common)
2342 {
2343 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2344 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2345 DEFINE_COMPILER;
2346 struct sljit_jump *jump;
2347
2348 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2349 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2350 /* Do nothing, only return. */
2351 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2352
2353 JUMPHERE(jump);
2354 /* Combine two 16 bit characters. */
2355 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2357 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2358 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2359 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2360 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2361 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2362 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2363 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2364 }
2365 #endif /* COMPILE_PCRE16 */
2366
2367 #endif /* COMPILE_PCRE8 */
2368
2369 #endif /* SUPPORT_UTF */
2370
2371 #ifdef SUPPORT_UCP
2372
2373 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2374 #define UCD_BLOCK_MASK 127
2375 #define UCD_BLOCK_SHIFT 7
2376
2377 static void do_getucd(compiler_common *common)
2378 {
2379 /* Search the UCD record for the character comes in TMP1.
2380 Returns chartype in TMP1 and UCD offset in TMP2. */
2381 DEFINE_COMPILER;
2382
2383 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2384
2385 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2386 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2387 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2388 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2389 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2390 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2391 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2392 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2394 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2395 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2396 }
2397 #endif
2398
2399 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2400 {
2401 DEFINE_COMPILER;
2402 struct sljit_label *mainloop;
2403 struct sljit_label *newlinelabel = NULL;
2404 struct sljit_jump *start;
2405 struct sljit_jump *end = NULL;
2406 struct sljit_jump *nl = NULL;
2407 #ifdef SUPPORT_UTF
2408 struct sljit_jump *singlechar;
2409 #endif
2410 jump_list *newline = NULL;
2411 BOOL newlinecheck = FALSE;
2412 BOOL readuchar = FALSE;
2413
2414 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2415 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2416 newlinecheck = TRUE;
2417
2418 if (firstline)
2419 {
2420 /* Search for the end of the first line. */
2421 SLJIT_ASSERT(common->first_line_end != 0);
2422 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2423
2424 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2425 {
2426 mainloop = LABEL();
2427 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2428 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2429 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2430 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2431 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2432 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2433 JUMPHERE(end);
2434 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2435 }
2436 else
2437 {
2438 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2439 mainloop = LABEL();
2440 /* Continual stores does not cause data dependency. */
2441 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2442 read_char(common);
2443 check_newlinechar(common, common->nltype, &newline, TRUE);
2444 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2445 JUMPHERE(end);
2446 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2447 set_jumps(newline, LABEL());
2448 }
2449
2450 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2451 }
2452
2453 start = JUMP(SLJIT_JUMP);
2454
2455 if (newlinecheck)
2456 {
2457 newlinelabel = LABEL();
2458 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2459 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2460 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2461 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2462 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2463 #ifdef COMPILE_PCRE16
2464 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2465 #endif
2466 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2467 nl = JUMP(SLJIT_JUMP);
2468 }
2469
2470 mainloop = LABEL();
2471
2472 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2473 #ifdef SUPPORT_UTF
2474 if (common->utf) readuchar = TRUE;
2475 #endif
2476 if (newlinecheck) readuchar = TRUE;
2477
2478 if (readuchar)
2479 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2480
2481 if (newlinecheck)
2482 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2483
2484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2485 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2486 if (common->utf)
2487 {
2488 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2489 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2490 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2491 JUMPHERE(singlechar);
2492 }
2493 #endif
2494 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2495 if (common->utf)
2496 {
2497 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2498 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2499 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2500 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2501 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2502 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2503 JUMPHERE(singlechar);
2504 }
2505 #endif
2506 JUMPHERE(start);
2507
2508 if (newlinecheck)
2509 {
2510 JUMPHERE(end);
2511 JUMPHERE(nl);
2512 }
2513
2514 return mainloop;
2515 }
2516
2517 static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
2518 {
2519 DEFINE_COMPILER;
2520 struct sljit_label *start;
2521 struct sljit_jump *quit;
2522 struct sljit_jump *found;
2523 pcre_int32 chars[4];
2524 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2525 int location = 0;
2526 pcre_int32 len, c, bit, caseless;
2527 BOOL must_end;
2528
2529 #ifdef COMPILE_PCRE8
2530 union {
2531 sljit_uh ascombined;
2532 sljit_ub asuchars[2];
2533 } pair;
2534 #else
2535 union {
2536 sljit_ui ascombined;
2537 sljit_uh asuchars[2];
2538 } pair;
2539 #endif
2540
2541 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2542 return FALSE;
2543
2544 while (TRUE)
2545 {
2546 caseless = 0;
2547 must_end = TRUE;
2548 switch(*cc)
2549 {
2550 case OP_CHAR:
2551 must_end = FALSE;
2552 cc++;
2553 break;
2554
2555 case OP_CHARI:
2556 caseless = 1;
2557 must_end = FALSE;
2558 cc++;
2559 break;
2560
2561 case OP_SOD:
2562 case OP_SOM:
2563 case OP_SET_SOM:
2564 case OP_NOT_WORD_BOUNDARY:
2565 case OP_WORD_BOUNDARY:
2566 case OP_EODN:
2567 case OP_EOD:
2568 case OP_CIRC:
2569 case OP_CIRCM:
2570 case OP_DOLL:
2571 case OP_DOLLM:
2572 /* Zero width assertions. */
2573 cc++;
2574 continue;
2575
2576 case OP_PLUS:
2577 case OP_MINPLUS:
2578 case OP_POSPLUS:
2579 cc++;
2580 break;
2581
2582 case OP_EXACT:
2583 cc += 1 + IMM2_SIZE;
2584 break;
2585
2586 case OP_PLUSI:
2587 case OP_MINPLUSI:
2588 case OP_POSPLUSI:
2589 caseless = 1;
2590 cc++;
2591 break;
2592
2593 case OP_EXACTI:
2594 caseless = 1;
2595 cc += 1 + IMM2_SIZE;
2596 break;
2597
2598 default:
2599 return FALSE;
2600 }
2601
2602 len = 1;
2603 #ifdef SUPPORT_UTF
2604 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2605 #endif
2606
2607 if (caseless && char_has_othercase(common, cc))
2608 {
2609 caseless = char_get_othercase_bit(common, cc);
2610 if (caseless == 0)
2611 return FALSE;
2612 #ifdef COMPILE_PCRE8
2613 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2614 #else
2615 if ((caseless & 0x100) != 0)
2616 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2617 else
2618 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2619 #endif
2620 }
2621 else
2622 caseless = 0;
2623
2624 while (len > 0 && location < 2 * 2)
2625 {
2626 c = *cc;
2627 bit = 0;
2628 if (len == (caseless & 0xff))
2629 {
2630 bit = caseless >> 8;
2631 c |= bit;
2632 }
2633
2634 chars[location] = c;
2635 chars[location + 1] = bit;
2636
2637 len--;
2638 location += 2;
2639 cc++;
2640 }
2641
2642 if (location == 2 * 2)
2643 break;
2644 else if (must_end)
2645 return FALSE;
2646 }
2647
2648 if (firstline)
2649 {
2650 SLJIT_ASSERT(common->first_line_end != 0);
2651 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2652 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
2653 }
2654 else
2655 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2656
2657 start = LABEL();
2658 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2659 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2660 #ifdef COMPILE_PCRE8
2661 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2662 #else /* COMPILE_PCRE8 */
2663 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2664 #endif
2665
2666 #else /* SLJIT_UNALIGNED */
2667
2668 #if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
2669 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2670 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2671 #else /* SLJIT_BIG_ENDIAN */
2672 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2673 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2674 #endif /* SLJIT_BIG_ENDIAN */
2675
2676 #ifdef COMPILE_PCRE8
2677 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
2678 #else /* COMPILE_PCRE8 */
2679 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
2680 #endif
2681 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2682
2683 #endif
2684
2685 if (chars[1] != 0 || chars[3] != 0)
2686 {
2687 pair.asuchars[0] = chars[1];
2688 pair.asuchars[1] = chars[3];
2689 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
2690 }
2691
2692 pair.asuchars[0] = chars[0];
2693 pair.asuchars[1] = chars[2];
2694 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
2695
2696 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2697 JUMPTO(SLJIT_JUMP, start);
2698 JUMPHERE(found);
2699 JUMPHERE(quit);
2700
2701 if (firstline)
2702 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2703 else
2704 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2705 return TRUE;
2706 }
2707
2708 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2709 {
2710 DEFINE_COMPILER;
2711 struct sljit_label *start;
2712 struct sljit_jump *quit;
2713 struct sljit_jump *found;
2714 pcre_uchar oc, bit;
2715
2716 if (firstline)
2717 {
2718 SLJIT_ASSERT(common->first_line_end != 0);
2719 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2720 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2721 }
2722
2723 start = LABEL();
2724 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2725 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2726
2727 oc = first_char;
2728 if (caseless)
2729 {
2730 oc = TABLE_GET(first_char, common->fcc, first_char);
2731 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2732 if (first_char > 127 && common->utf)
2733 oc = UCD_OTHERCASE(first_char);
2734 #endif
2735 }
2736 if (first_char == oc)
2737 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2738 else
2739 {
2740 bit = first_char ^ oc;
2741 if (ispowerof2(bit))
2742 {
2743 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2744 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2745 }
2746 else
2747 {
2748 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2749 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2750 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2751 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2752 found = JUMP(SLJIT_C_NOT_ZERO);
2753 }
2754 }
2755
2756 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2757 JUMPTO(SLJIT_JUMP, start);
2758 JUMPHERE(found);
2759 JUMPHERE(quit);
2760
2761 if (firstline)
2762 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2763 }
2764
2765 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2766 {
2767 DEFINE_COMPILER;
2768 struct sljit_label *loop;
2769 struct sljit_jump *lastchar;
2770 struct sljit_jump *firstchar;
2771 struct sljit_jump *quit;
2772 struct sljit_jump *foundcr = NULL;
2773 struct sljit_jump *notfoundnl;
2774 jump_list *newline = NULL;
2775
2776 if (firstline)
2777 {
2778 SLJIT_ASSERT(common->first_line_end != 0);
2779 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2780 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2781 }
2782
2783 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2784 {
2785 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2786 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2787 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2788 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2789 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2790
2791 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2792 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2793 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2794 #ifdef COMPILE_PCRE16
2795 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2796 #endif
2797 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2798
2799 loop = LABEL();
2800 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2801 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2802 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2803 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2804 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2805 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2806
2807 JUMPHERE(quit);
2808 JUMPHERE(firstchar);
2809 JUMPHERE(lastchar);
2810
2811 if (firstline)
2812 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2813 return;
2814 }
2815
2816 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2817 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2818 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2819 skip_char_back(common);
2820
2821 loop = LABEL();
2822 read_char(common);
2823 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2824 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2825 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2826 check_newlinechar(common, common->nltype, &newline, FALSE);
2827 set_jumps(newline, loop);
2828
2829 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2830 {
2831 quit = JUMP(SLJIT_JUMP);
2832 JUMPHERE(foundcr);
2833 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2834 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2836 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2837 #ifdef COMPILE_PCRE16
2838 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2839 #endif
2840 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2841 JUMPHERE(notfoundnl);
2842 JUMPHERE(quit);
2843 }
2844 JUMPHERE(lastchar);
2845 JUMPHERE(firstchar);
2846
2847 if (firstline)
2848 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2849 }
2850
2851 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2852 {
2853 DEFINE_COMPILER;
2854 struct sljit_label *start;
2855 struct sljit_jump *quit;
2856 struct sljit_jump *found;
2857 #ifndef COMPILE_PCRE8
2858 struct sljit_jump *jump;
2859 #endif
2860
2861 if (firstline)
2862 {
2863 SLJIT_ASSERT(common->first_line_end != 0);
2864 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2865 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2866 }
2867
2868 start = LABEL();
2869 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2870 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2871 #ifdef SUPPORT_UTF
2872 if (common->utf)
2873 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2874 #endif
2875 #ifndef COMPILE_PCRE8
2876 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2877 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2878 JUMPHERE(jump);
2879 #endif
2880 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2881 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2882 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2883 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2884 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2885 found = JUMP(SLJIT_C_NOT_ZERO);
2886
2887 #ifdef SUPPORT_UTF
2888 if (common->utf)
2889 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2890 #endif
2891 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2892 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2893 if (common->utf)
2894 {
2895 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2896 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2897 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2898 }
2899 #endif
2900 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2901 if (common->utf)
2902 {
2903 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2904 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2905 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2906 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2907 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2908 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2909 }
2910 #endif
2911 JUMPTO(SLJIT_JUMP, start);
2912 JUMPHERE(found);
2913 JUMPHERE(quit);
2914
2915 if (firstline)
2916 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2917 }
2918
2919 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2920 {
2921 DEFINE_COMPILER;
2922 struct sljit_label *loop;
2923 struct sljit_jump *toolong;
2924 struct sljit_jump *alreadyfound;
2925 struct sljit_jump *found;
2926 struct sljit_jump *foundoc = NULL;
2927 struct sljit_jump *notfound;
2928 pcre_uchar oc, bit;
2929
2930 SLJIT_ASSERT(common->req_char_ptr != 0);
2931 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2932 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2933 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2934 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2935
2936 if (has_firstchar)
2937 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2938 else
2939 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2940
2941 loop = LABEL();
2942 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2943
2944 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2945 oc = req_char;
2946 if (caseless)
2947 {
2948 oc = TABLE_GET(req_char, common->fcc, req_char);
2949 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2950 if (req_char > 127 && common->utf)
2951 oc = UCD_OTHERCASE(req_char);
2952 #endif
2953 }
2954 if (req_char == oc)
2955 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2956 else
2957 {
2958 bit = req_char ^ oc;
2959 if (ispowerof2(bit))
2960 {
2961 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2962 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2963 }
2964 else
2965 {
2966 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2967 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2968 }
2969 }
2970 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2971 JUMPTO(SLJIT_JUMP, loop);
2972
2973 JUMPHERE(found);
2974 if (foundoc)
2975 JUMPHERE(foundoc);
2976 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2977 JUMPHERE(alreadyfound);
2978 JUMPHERE(toolong);
2979 return notfound;
2980 }
2981
2982 static void do_revertframes(compiler_common *common)
2983 {
2984 DEFINE_COMPILER;
2985 struct sljit_jump *jump;
2986 struct sljit_label *mainloop;
2987
2988 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2989 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2990 GET_LOCAL_BASE(TMP3, 0, 0);
2991
2992 /* Drop frames until we reach STACK_TOP. */
2993 mainloop = LABEL();
2994 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2995 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2996 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
2997 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2998 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2999 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
3000 JUMPTO(SLJIT_JUMP, mainloop);
3001
3002 JUMPHERE(jump);
3003 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3004 /* End of dropping frames. */
3005 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3006
3007 JUMPHERE(jump);
3008 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3009 /* Set string begin. */
3010 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3011 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3012 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3013 JUMPTO(SLJIT_JUMP, mainloop);
3014
3015 JUMPHERE(jump);
3016 if (common->mark_ptr != 0)
3017 {
3018 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3019 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3020 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3021 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3022 JUMPTO(SLJIT_JUMP, mainloop);
3023
3024 JUMPHERE(jump);
3025 }
3026
3027 /* Unknown command. */
3028 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3029 JUMPTO(SLJIT_JUMP, mainloop);
3030 }
3031
3032 static void check_wordboundary(compiler_common *common)
3033 {
3034 DEFINE_COMPILER;
3035 struct sljit_jump *skipread;
3036 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3037 struct sljit_jump *jump;
3038 #endif
3039
3040 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3041
3042 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3043 /* Get type of the previous char, and put it to LOCALS1. */
3044 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3045 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3047 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3048 skip_char_back(common);
3049 check_start_used_ptr(common);
3050 read_char(common);
3051
3052 /* Testing char type. */
3053 #ifdef SUPPORT_UCP
3054 if (common->use_ucp)
3055 {
3056 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3057 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3058 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3059 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3060 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3061 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3062 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3063 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3064 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3065 JUMPHERE(jump);
3066 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3067 }
3068 else
3069 #endif
3070 {
3071 #ifndef COMPILE_PCRE8
3072 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3073 #elif defined SUPPORT_UTF
3074 /* Here LOCALS1 has already been zeroed. */
3075 jump = NULL;
3076 if (common->utf)
3077 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3078 #endif /* COMPILE_PCRE8 */
3079 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3080 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3081 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3082 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3083 #ifndef COMPILE_PCRE8
3084 JUMPHERE(jump);
3085 #elif defined SUPPORT_UTF
3086 if (jump != NULL)
3087 JUMPHERE(jump);
3088 #endif /* COMPILE_PCRE8 */
3089 }
3090 JUMPHERE(skipread);
3091
3092 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3093 skipread = check_str_end(common);
3094 peek_char(common);
3095
3096 /* Testing char type. This is a code duplication. */
3097 #ifdef SUPPORT_UCP
3098 if (common->use_ucp)
3099 {
3100 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3101 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3102 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3103 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3104 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3105 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3106 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3107 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3108 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3109 JUMPHERE(jump);
3110 }
3111 else
3112 #endif
3113 {
3114 #ifndef COMPILE_PCRE8
3115 /* TMP2 may be destroyed by peek_char. */
3116 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3117 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3118 #elif defined SUPPORT_UTF
3119 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3120 jump = NULL;
3121 if (common->utf)
3122 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3123 #endif
3124 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3125 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3126 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3127 #ifndef COMPILE_PCRE8
3128 JUMPHERE(jump);
3129 #elif defined SUPPORT_UTF
3130 if (jump != NULL)
3131 JUMPHERE(jump);
3132 #endif /* COMPILE_PCRE8 */
3133 }
3134 JUMPHERE(skipread);
3135
3136 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3137 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3138 }
3139
3140 /*
3141 range format:
3142
3143 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3144 ranges[1] = first bit (0 or 1)
3145 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3146 */
3147
3148 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3149 {
3150 DEFINE_COMPILER;
3151 struct sljit_jump *jump;
3152
3153 if (ranges[0] < 0)
3154 return FALSE;
3155
3156 switch(ranges[0])
3157 {
3158 case 1:
3159 if (readch)
3160 read_char(common);
3161 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3162 return TRUE;
3163
3164 case 2:
3165 if (readch)
3166 read_char(common);
3167 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3168 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3169 return TRUE;
3170
3171 case 4:
3172 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3173 {
3174 if (readch)
3175 read_char(common);
3176 if (ranges[1] != 0)
3177 {
3178 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3179 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3180 }
3181 else
3182 {
3183 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3184 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3185 JUMPHERE(jump);
3186 }
3187 return TRUE;
3188 }
3189 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && ispowerof2(ranges[4] - ranges[2]))
3190 {
3191 if (readch)
3192 read_char(common);
3193 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3194 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3195 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3196 return TRUE;
3197 }
3198 return FALSE;
3199
3200 default:
3201 return FALSE;
3202 }
3203 }
3204
3205 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3206 {
3207 int i, bit, length;
3208 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3209
3210 bit = ctypes[0] & flag;
3211 ranges[0] = -1;
3212 ranges[1] = bit != 0 ? 1 : 0;
3213 length = 0;
3214
3215 for (i = 1; i < 256; i++)
3216 if ((ctypes[i] & flag) != bit)
3217 {
3218 if (length >= MAX_RANGE_SIZE)
3219 return;
3220 ranges[2 + length] = i;
3221 length++;
3222 bit ^= flag;
3223 }
3224
3225 if (bit != 0)
3226 {
3227 if (length >= MAX_RANGE_SIZE)
3228 return;
3229 ranges[2 + length] = 256;
3230 length++;
3231 }
3232 ranges[0] = length;
3233 }
3234
3235 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3236 {
3237 int ranges[2 + MAX_RANGE_SIZE];
3238 pcre_uint8 bit, cbit, all;
3239 int i, byte, length = 0;
3240
3241 bit = bits[0] & 0x1;
3242 ranges[1] = bit;
3243 /* Can be 0 or 255. */
3244 all = -bit;
3245
3246 for (i = 0; i < 256; )
3247 {
3248 byte = i >> 3;
3249 if ((i & 0x7) == 0 && bits[byte] == all)
3250 i += 8;
3251 else
3252 {
3253 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3254 if (cbit != bit)
3255 {
3256 if (length >= MAX_RANGE_SIZE)
3257 return FALSE;
3258 ranges[2 + length] = i;
3259 length++;
3260 bit = cbit;
3261 all = -cbit;
3262 }
3263 i++;
3264 }
3265 }
3266
3267 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3268 {
3269 if (length >= MAX_RANGE_SIZE)
3270 return FALSE;
3271 ranges[2 + length] = 256;
3272 length++;
3273 }
3274 ranges[0] = length;
3275
3276 return check_ranges(common, ranges, backtracks, FALSE);
3277 }
3278
3279 static void check_anynewline(compiler_common *common)
3280 {
3281 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3282 DEFINE_COMPILER;
3283
3284 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3285
3286 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3287 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3288 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3289 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3290 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3291 #ifdef COMPILE_PCRE8
3292 if (common->utf)
3293 {
3294 #endif
3295 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3296 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3297 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3298 #ifdef COMPILE_PCRE8
3299 }
3300 #endif
3301 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3302 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3303 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3304 }
3305
3306 static void check_hspace(compiler_common *common)
3307 {
3308 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3309 DEFINE_COMPILER;
3310
3311 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3312
3313 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3314 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3315 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3316 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3317 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3318 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3319 #ifdef COMPILE_PCRE8
3320 if (common->utf)
3321 {
3322 #endif
3323 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3324 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3325 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3326 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3327 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3328 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3329 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3330 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3331 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3332 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3333 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3334 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3335 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3336 #ifdef COMPILE_PCRE8
3337 }
3338 #endif
3339 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3340 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3341
3342 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3343 }
3344
3345 static void check_vspace(compiler_common *common)
3346 {
3347 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3348 DEFINE_COMPILER;
3349
3350 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3351
3352 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3353 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3354 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3355 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3356 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3357 #ifdef COMPILE_PCRE8
3358 if (common->utf)
3359 {
3360 #endif
3361 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3362 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3363 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3364 #ifdef COMPILE_PCRE8
3365 }
3366 #endif
3367 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3368 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3369
3370 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3371 }
3372
3373 #define CHAR1 STR_END
3374 #define CHAR2 STACK_TOP
3375
3376 static void do_casefulcmp(compiler_common *common)
3377 {
3378 DEFINE_COMPILER;
3379 struct sljit_jump *jump;
3380 struct sljit_label *label;
3381
3382 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3383 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3384 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3386 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3387 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3388
3389 label = LABEL();
3390 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3391 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3392 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3393 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3394 JUMPTO(SLJIT_C_NOT_ZERO, label);
3395
3396 JUMPHERE(jump);
3397 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3398 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3399 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3400 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3401 }
3402
3403 #define LCC_TABLE STACK_LIMIT
3404
3405 static void do_caselesscmp(compiler_common *common)
3406 {
3407 DEFINE_COMPILER;
3408 struct sljit_jump *jump;
3409 struct sljit_label *label;
3410
3411 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3412 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3413
3414 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3416 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3417 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3418 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3419 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3420
3421 label = LABEL();
3422 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3423 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3424 #ifndef COMPILE_PCRE8
3425 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3426 #endif
3427 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3428 #ifndef COMPILE_PCRE8
3429 JUMPHERE(jump);
3430 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3431 #endif
3432 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3433 #ifndef COMPILE_PCRE8
3434 JUMPHERE(jump);
3435 #endif
3436 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3437 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3438 JUMPTO(SLJIT_C_NOT_ZERO, label);
3439
3440 JUMPHERE(jump);
3441 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3442 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3443 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3444 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3445 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3446 }
3447
3448 #undef LCC_TABLE
3449 #undef CHAR1
3450 #undef CHAR2
3451
3452 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3453
3454 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3455 {
3456 /* This function would be ineffective to do in JIT level. */
3457 int c1, c2;
3458 const pcre_uchar *src2 = args->uchar_ptr;
3459 const pcre_uchar *end2 = args->end;
3460
3461 while (src1 < end1)
3462 {
3463 if (src2 >= end2)
3464 return (pcre_uchar*)1;
3465 GETCHARINC(c1, src1);
3466 GETCHARINC(c2, src2);
3467 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
3468 }
3469 return src2;
3470 }
3471
3472 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3473
3474 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3475 compare_context* context, jump_list **backtracks)
3476 {
3477 DEFINE_COMPILER;
3478 unsigned int othercasebit = 0;
3479 pcre_uchar *othercasechar = NULL;
3480 #ifdef SUPPORT_UTF
3481 int utflength;
3482 #endif
3483
3484 if (caseless && char_has_othercase(common, cc))
3485 {
3486 othercasebit = char_get_othercase_bit(common, cc);
3487 SLJIT_ASSERT(othercasebit);
3488 /* Extracting bit difference info. */
3489 #ifdef COMPILE_PCRE8
3490 othercasechar = cc + (othercasebit >> 8);
3491 othercasebit &= 0xff;
3492 #else
3493 #ifdef COMPILE_PCRE16
3494 othercasechar = cc + (othercasebit >> 9);
3495 if ((othercasebit & 0x100) != 0)
3496 othercasebit = (othercasebit & 0xff) << 8;
3497 else
3498 othercasebit &= 0xff;
3499 #endif
3500 #endif
3501 }
3502
3503 if (context->sourcereg == -1)
3504 {
3505 #ifdef COMPILE_PCRE8
3506 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3507 if (context->length >= 4)
3508 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3509 else if (context->length >= 2)
3510 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3511 else
3512 #endif
3513 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3514 #else
3515 #ifdef COMPILE_PCRE16
3516 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3517 if (context->length >= 4)
3518 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3519 else
3520 #endif
3521 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3522 #endif
3523 #endif /* COMPILE_PCRE8 */
3524 context->sourcereg = TMP2;
3525 }
3526
3527 #ifdef SUPPORT_UTF
3528 utflength = 1;
3529 if (common->utf && HAS_EXTRALEN(*cc))
3530 utflength += GET_EXTRALEN(*cc);
3531
3532 do
3533 {
3534 #endif
3535
3536 context->length -= IN_UCHARS(1);
3537 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3538
3539 /* Unaligned read is supported. */
3540 if (othercasebit != 0 && othercasechar == cc)
3541 {
3542 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3543 context->oc.asuchars[context->ucharptr] = othercasebit;
3544 }
3545 else
3546 {
3547 context->c.asuchars[context->ucharptr] = *cc;
3548 context->oc.asuchars[context->ucharptr] = 0;
3549 }
3550 context->ucharptr++;
3551
3552 #ifdef COMPILE_PCRE8
3553 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3554 #else
3555 if (context->ucharptr >= 2 || context->length == 0)
3556 #endif
3557 {
3558 if (context->length >= 4)
3559 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3560 #ifdef COMPILE_PCRE8
3561 else if (context->length >= 2)
3562 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3563 else if (context->length >= 1)
3564 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3565 #else
3566 else if (context->length >= 2)
3567 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3568 #endif
3569 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3570
3571 switch(context->ucharptr)
3572 {
3573 case 4 / sizeof(pcre_uchar):
3574 if (context->oc.asint != 0)
3575 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3576 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3577 break;
3578
3579 case 2 / sizeof(pcre_uchar):
3580 if (context->oc.asushort != 0)
3581 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3582 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3583 break;
3584
3585 #ifdef COMPILE_PCRE8
3586 case 1:
3587 if (context->oc.asbyte != 0)
3588 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3589 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3590 break;
3591 #endif
3592
3593 default:
3594 SLJIT_ASSERT_STOP();
3595 break;
3596 }
3597 context->ucharptr = 0;
3598 }
3599
3600 #else
3601
3602 /* Unaligned read is unsupported. */
3603 #ifdef COMPILE_PCRE8
3604 if (context->length > 0)
3605 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3606 #else
3607 if (context->length > 0)
3608 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3609 #endif
3610 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3611
3612 if (othercasebit != 0 && othercasechar == cc)
3613 {
3614 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3615 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3616 }
3617 else
3618 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3619
3620 #endif
3621
3622 cc++;
3623 #ifdef SUPPORT_UTF
3624 utflength--;
3625 }
3626 while (utflength > 0);
3627 #endif
3628
3629 return cc;
3630 }
3631
3632 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3633
3634 #define SET_TYPE_OFFSET(value) \
3635 if ((value) != typeoffset) \
3636 { \
3637 if ((value) > typeoffset) \
3638 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3639 else \
3640 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3641 } \
3642 typeoffset = (value);
3643
3644 #define SET_CHAR_OFFSET(value) \
3645 if ((value) != charoffset) \
3646 { \
3647 if ((value) > charoffset) \
3648 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3649 else \
3650 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3651 } \
3652 charoffset = (value);
3653
3654 static void compile_xclass_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3655 {
3656 DEFINE_COMPILER;
3657 jump_list *found = NULL;
3658 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3659 unsigned int c;
3660 int compares;
3661 struct sljit_jump *jump = NULL;
3662 pcre_uchar *ccbegin;
3663 #ifdef SUPPORT_UCP
3664 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3665 BOOL charsaved = FALSE;
3666 int typereg = TMP1, scriptreg = TMP1;
3667 unsigned int typeoffset;
3668 #endif
3669 int invertcmp, numberofcmps;
3670 unsigned int charoffset;
3671
3672 /* Although SUPPORT_UTF must be defined, we are
3673 not necessary in utf mode even in 8 bit mode. */
3674 detect_partial_match(common, backtracks);
3675 read_char(common);
3676
3677 if ((*cc++ & XCL_MAP) != 0)
3678 {
3679 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3680 #ifndef COMPILE_PCRE8
3681 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3682 #elif defined SUPPORT_UTF
3683 if (common->utf)
3684 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3685 #endif
3686
3687 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3688 {
3689 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3690 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3691 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3692 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3693 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3694 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3695 }
3696
3697 #ifndef COMPILE_PCRE8
3698 JUMPHERE(jump);
3699 #elif defined SUPPORT_UTF
3700 if (common->utf)
3701 JUMPHERE(jump);
3702 #endif
3703 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3704 #ifdef SUPPORT_UCP
3705 charsaved = TRUE;
3706 #endif
3707 cc += 32 / sizeof(pcre_uchar);
3708 }
3709
3710 /* Scanning the necessary info. */
3711 ccbegin = cc;
3712 compares = 0;
3713 while (*cc != XCL_END)
3714 {
3715 compares++;
3716 if (*cc == XCL_SINGLE)
3717 {
3718 cc += 2;
3719 #ifdef SUPPORT_UTF
3720 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3721 #endif
3722 #ifdef SUPPORT_UCP
3723 needschar = TRUE;
3724 #endif
3725 }
3726 else if (*cc == XCL_RANGE)
3727 {
3728 cc += 2;
3729 #ifdef SUPPORT_UTF
3730 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3731 #endif
3732 cc++;
3733 #ifdef SUPPORT_UTF
3734 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3735 #endif
3736 #ifdef SUPPORT_UCP
3737 needschar = TRUE;
3738 #endif
3739 }
3740 #ifdef SUPPORT_UCP
3741 else
3742 {
3743 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3744 cc++;
3745 switch(*cc)
3746 {
3747 case PT_ANY:
3748 break;
3749
3750 case PT_LAMP:
3751 case PT_GC:
3752 case PT_PC:
3753 case PT_ALNUM:
3754 needstype = TRUE;
3755 break;
3756
3757 case PT_SC:
3758 needsscript = TRUE;
3759 break;
3760
3761 case PT_SPACE:
3762 case PT_PXSPACE:
3763 case PT_WORD:
3764 needstype = TRUE;
3765 needschar = TRUE;
3766 break;
3767
3768 default:
3769 SLJIT_ASSERT_STOP();
3770 break;
3771 }
3772 cc += 2;
3773 }
3774 #endif
3775 }
3776
3777 #ifdef SUPPORT_UCP
3778 /* Simple register allocation. TMP1 is preferred if possible. */
3779 if (needstype || needsscript)
3780 {
3781 if (needschar && !charsaved)
3782 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3783 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3784 if (needschar)
3785 {
3786 if (needstype)
3787 {
3788 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3789 typereg = RETURN_ADDR;
3790 }
3791
3792 if (needsscript)
3793 scriptreg = TMP3;
3794 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3795 }
3796 else if (needstype && needsscript)
3797 scriptreg = TMP3;
3798 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3799
3800 if (needsscript)
3801 {
3802 if (scriptreg == TMP1)
3803 {
3804 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3805 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3806 }
3807 else
3808 {
3809 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3810 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3811 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3812 }
3813 }
3814 }
3815 #endif
3816
3817 /* Generating code. */
3818 cc = ccbegin;
3819 charoffset = 0;
3820 numberofcmps = 0;
3821 #ifdef SUPPORT_UCP
3822 typeoffset = 0;
3823 #endif
3824
3825 while (*cc != XCL_END)
3826 {
3827 compares--;
3828 invertcmp = (compares == 0 && list != backtracks);
3829 jump = NULL;
3830
3831 if (*cc == XCL_SINGLE)
3832 {
3833 cc ++;
3834 #ifdef SUPPORT_UTF
3835 if (common->utf)
3836 {
3837 GETCHARINC(c, cc);
3838 }
3839 else
3840 #endif
3841 c = *cc++;
3842
3843 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3844 {
3845 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3846 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3847 numberofcmps++;
3848 }
3849 else if (numberofcmps > 0)
3850 {
3851 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3852 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3853 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3854 numberofcmps = 0;
3855 }
3856 else
3857 {
3858 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3859 numberofcmps = 0;
3860 }
3861 }
3862 else if (*cc == XCL_RANGE)
3863 {
3864 cc ++;
3865 #ifdef SUPPORT_UTF
3866 if (common->utf)
3867 {
3868 GETCHARINC(c, cc);
3869 }
3870 else
3871 #endif
3872 c = *cc++;
3873 SET_CHAR_OFFSET(c);
3874 #ifdef SUPPORT_UTF
3875 if (common->utf)
3876 {
3877 GETCHARINC(c, cc);
3878 }
3879 else
3880 #endif
3881 c = *cc++;
3882 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3883 {
3884 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3885 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3886 numberofcmps++;
3887 }
3888 else if (numberofcmps > 0)
3889 {
3890 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3891 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3892 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3893 numberofcmps = 0;
3894 }
3895 else
3896 {
3897 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3898 numberofcmps = 0;
3899 }
3900 }
3901 #ifdef SUPPORT_UCP
3902 else
3903 {
3904 if (*cc == XCL_NOTPROP)
3905 invertcmp ^= 0x1;
3906 cc++;
3907 switch(*cc)
3908 {
3909 case PT_ANY:
3910 if (list != backtracks)
3911 {
3912 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3913 continue;
3914 }
3915 else if (cc[-1] == XCL_NOTPROP)
3916 continue;
3917 jump = JUMP(SLJIT_JUMP);
3918 break;
3919
3920 case PT_LAMP:
3921 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3922 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3923 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3924 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3925 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3926 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3927 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3928 break;
3929
3930 case PT_GC:
3931 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3932 SET_TYPE_OFFSET(c);
3933 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3934 break;
3935
3936 case PT_PC:
3937 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3938 break;
3939
3940 case PT_SC:
3941 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3942 break;
3943
3944 case PT_SPACE:
3945 case PT_PXSPACE:
3946 if (*cc == PT_SPACE)
3947 {
3948 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3949 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3950 }
3951 SET_CHAR_OFFSET(9);
3952 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3953 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3954 if (*cc == PT_SPACE)
3955 JUMPHERE(jump);
3956
3957 SET_TYPE_OFFSET(ucp_Zl);
3958 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3959 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3960 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3961 break;
3962
3963 case PT_WORD:
3964 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3965 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3966 /* ... fall through */
3967
3968 case PT_ALNUM:
3969 SET_TYPE_OFFSET(ucp_Ll);
3970 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3971 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3972 SET_TYPE_OFFSET(ucp_Nd);
3973 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3974 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3975 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3976 break;
3977 }
3978 cc += 2;
3979 }
3980 #endif
3981
3982 if (jump != NULL)
3983 add_jump(compiler, compares > 0 ? list : backtracks, jump);
3984 }
3985
3986 if (found != NULL)
3987 set_jumps(found, LABEL());
3988 }
3989
3990 #undef SET_TYPE_OFFSET
3991 #undef SET_CHAR_OFFSET
3992
3993 #endif
3994
3995 static pcre_uchar *compile_char1_trypath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
3996 {
3997 DEFINE_COMPILER;
3998 int length;
3999 unsigned int c, oc, bit;
4000 compare_context context;
4001 struct sljit_jump *jump[4];
4002 #ifdef SUPPORT_UTF
4003 struct sljit_label *label;
4004 #ifdef SUPPORT_UCP
4005 pcre_uchar propdata[5];
4006 #endif
4007 #endif
4008
4009 switch(type)
4010 {
4011 case OP_SOD:
4012 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4013 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4014 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4015 return cc;
4016
4017 case OP_SOM:
4018 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4019 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4020 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4021 return cc;
4022
4023 case OP_NOT_WORD_BOUNDARY:
4024 case OP_WORD_BOUNDARY:
4025 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4026 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4027 return cc;
4028
4029 case OP_NOT_DIGIT:
4030 case OP_DIGIT:
4031 /* Digits are usually 0-9, so it is worth to optimize them. */
4032 if (common->digits[0] == -2)
4033 get_ctype_ranges(common, ctype_digit, common->digits);
4034 detect_partial_match(common, backtracks);
4035 /* Flip the starting bit in the negative case. */
4036 if (type == OP_NOT_DIGIT)
4037 common->digits[1] ^= 1;
4038 if (!check_ranges(common, common->digits, backtracks, TRUE))
4039 {
4040 read_char8_type(common);
4041 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4042 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4043 }
4044 if (type == OP_NOT_DIGIT)
4045 common->digits[1] ^= 1;
4046 return cc;
4047
4048 case OP_NOT_WHITESPACE:
4049 case OP_WHITESPACE:
4050 detect_partial_match(common, backtracks);
4051 read_char8_type(common);
4052 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4053 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4054 return cc;
4055
4056 case OP_NOT_WORDCHAR:
4057 case OP_WORDCHAR:
4058 detect_partial_match(common, backtracks);
4059 read_char8_type(common);
4060 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4061 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4062 return cc;
4063
4064 case OP_ANY:
4065 detect_partial_match(common, backtracks);
4066 read_char(common);
4067 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4068 {
4069 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4070 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4071 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4072 else
4073 jump[1] = check_str_end(common);
4074
4075 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4076 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4077 if (jump[1] != NULL)
4078 JUMPHERE(jump[1]);
4079 JUMPHERE(jump[0]);
4080 }
4081 else
4082 check_newlinechar(common, common->nltype, backtracks, TRUE);
4083 return cc;
4084
4085 case OP_ALLANY:
4086 detect_partial_match(common, backtracks);
4087 #ifdef SUPPORT_UTF
4088 if (common->utf)
4089 {
4090 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4091 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4092 #ifdef COMPILE_PCRE8
4093 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4094 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4095 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4096 #else /* COMPILE_PCRE8 */
4097 #ifdef COMPILE_PCRE16
4098 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4099 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4100 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4101 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
4102 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4103 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4104 #endif /* COMPILE_PCRE16 */
4105 #endif /* COMPILE_PCRE8 */
4106 JUMPHERE(jump[0]);
4107 return cc;
4108 }
4109 #endif
4110 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4111 return cc;
4112
4113 case OP_ANYBYTE:
4114 detect_partial_match(common, backtracks);
4115 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4116 return cc;
4117
4118 #ifdef SUPPORT_UTF
4119 #ifdef SUPPORT_UCP
4120 case OP_NOTPROP:
4121 case OP_PROP:
4122 propdata[0] = 0;
4123 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4124 propdata[2] = cc[0];
4125 propdata[3] = cc[1];
4126 propdata[4] = XCL_END;
4127 compile_xclass_trypath(common, propdata, backtracks);
4128 return cc + 2;
4129 #endif
4130 #endif
4131
4132 case OP_ANYNL:
4133 detect_partial_match(common, backtracks);
4134 read_char(common);
4135 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4136 /* We don't need to handle soft partial matching case. */
4137 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4138 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4139 else
4140 jump[1] = check_str_end(common);
4141 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4142 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4143 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4144 jump[3] = JUMP(SLJIT_JUMP);
4145 JUMPHERE(jump[0]);
4146 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4147 JUMPHERE(jump[1]);
4148 JUMPHERE(jump[2]);
4149 JUMPHERE(jump[3]);
4150 return cc;
4151
4152 case OP_NOT_HSPACE:
4153 case OP_HSPACE:
4154 detect_partial_match(common, backtracks);
4155 read_char(common);
4156 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4157 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4158 return cc;
4159
4160 case OP_NOT_VSPACE:
4161 case OP_VSPACE:
4162 detect_partial_match(common, backtracks);
4163 read_char(common);
4164 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4165 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4166 return cc;
4167
4168 #ifdef SUPPORT_UCP
4169 case OP_EXTUNI:
4170 detect_partial_match(common, backtracks);
4171 read_char(common);
4172 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4173 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4174 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
4175
4176 label = LABEL();
4177 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4178 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4179 read_char(common);
4180 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4181 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4182 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
4183
4184 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4185 JUMPHERE(jump[0]);
4186 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4187 {
4188 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4189 /* Since we successfully read a char above, partial matching must occure. */
4190 check_partial(common, TRUE);
4191 JUMPHERE(jump[0]);
4192 }
4193 return cc;
4194 #endif
4195
4196 case OP_EODN:
4197 /* Requires rather complex checks. */
4198 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4199 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4200 {
4201 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4202 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4203 if (common->mode == JIT_COMPILE)
4204 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4205 else
4206 {
4207 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4208 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4209 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4210 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4211 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4212 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4213 check_partial(common, TRUE);
4214 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4215 JUMPHERE(jump[1]);
4216 }
4217 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4218 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4219 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4220 }
4221 else if (common->nltype == NLTYPE_FIXED)
4222 {
4223 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4224 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4225 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4226 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4227 }
4228 else
4229 {
4230 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4231 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4232 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4233 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4234 jump[2] = JUMP(SLJIT_C_GREATER);
4235 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4236 /* Equal. */
4237 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4238 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4239 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4240
4241 JUMPHERE(jump[1]);
4242 if (common->nltype == NLTYPE_ANYCRLF)
4243 {
4244 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4245 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4246 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4247 }
4248 else
4249 {
4250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4251 read_char(common);
4252 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4253 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4254 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4255 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4256 }
4257 JUMPHERE(jump[2]);
4258 JUMPHERE(jump[3]);
4259 }
4260 JUMPHERE(jump[0]);
4261 check_partial(common, FALSE);
4262 return cc;
4263
4264 case OP_EOD:
4265 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4266 check_partial(common, FALSE);
4267 return cc;
4268
4269 case OP_CIRC:
4270 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4271 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4272 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4273 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4274 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4275 return cc;
4276
4277 case OP_CIRCM:
4278 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4279 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4280 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4281 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4282 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4283 jump[0] = JUMP(SLJIT_JUMP);
4284 JUMPHERE(jump[1]);
4285
4286 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4287 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4288 {
4289 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4290 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4291 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4292 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4293 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4294 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4295 }
4296 else
4297 {
4298 skip_char_back(common);
4299 read_char(common);
4300 check_newlinechar(common, common->nltype, backtracks, FALSE);
4301 }
4302 JUMPHERE(jump[0]);
4303 return cc;
4304
4305 case OP_DOLL:
4306 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4307 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4308 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4309
4310 if (!common->endonly)
4311 compile_char1_trypath(common, OP_EODN, cc, backtracks);
4312 else
4313 {
4314 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4315 check_partial(common, FALSE);
4316 }
4317 return cc;
4318
4319 case OP_DOLLM:
4320 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4321 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4322 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4323 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4324 check_partial(common, FALSE);
4325 jump[0] = JUMP(SLJIT_JUMP);
4326 JUMPHERE(jump[1]);
4327
4328 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4329 {
4330 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4331 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4332 if (common->mode == JIT_COMPILE)
4333 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4334 else
4335 {
4336 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4337 /* STR_PTR = STR_END - IN_UCHARS(1) */
4338 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4339 check_partial(common, TRUE);
4340 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4341 JUMPHERE(jump[1]);
4342 }
4343
4344 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4345 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4346 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4347 }
4348 else
4349 {
4350 peek_char(common);
4351 check_newlinechar(common, common->nltype, backtracks, FALSE);
4352 }
4353 JUMPHERE(jump[0]);
4354 return cc;
4355
4356 case OP_CHAR:
4357 case OP_CHARI:
4358 length = 1;
4359 #ifdef SUPPORT_UTF
4360 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4361 #endif
4362 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4363 {
4364 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4365 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4366
4367 context.length = IN_UCHARS(length);
4368 context.sourcereg = -1;
4369 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4370 context.ucharptr = 0;
4371 #endif
4372 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4373 }
4374 detect_partial_match(common, backtracks);
4375 read_char(common);
4376 #ifdef SUPPORT_UTF
4377 if (common->utf)
4378 {
4379 GETCHAR(c, cc);
4380 }
4381 else
4382 #endif
4383 c = *cc;
4384 if (type == OP_CHAR || !char_has_othercase(common, cc))
4385 {
4386 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4387 return cc + length;
4388 }
4389 oc = char_othercase(common, c);
4390 bit = c ^ oc;
4391 if (ispowerof2(bit))
4392 {
4393 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4394 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4395 return cc + length;
4396 }
4397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4398 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4399 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4400 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4401 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4402 return cc + length;
4403
4404 case OP_NOT:
4405 case OP_NOTI:
4406 detect_partial_match(common, backtracks);
4407 length = 1;
4408 #ifdef SUPPORT_UTF
4409 if (common->utf)
4410 {
4411 #ifdef COMPILE_PCRE8
4412 c = *cc;
4413 if (c < 128)
4414 {
4415 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4416 if (type == OP_NOT || !char_has_othercase(common, cc))
4417 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4418 else
4419 {
4420 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4421 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4422 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4423 }
4424 /* Skip the variable-length character. */
4425 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4426 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4427 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4428 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4429 JUMPHERE(jump[0]);
4430 return cc + 1;
4431 }
4432 else
4433 #endif /* COMPILE_PCRE8 */
4434 {
4435 GETCHARLEN(c, cc, length);
4436 read_char(common);
4437 }
4438 }
4439 else
4440 #endif /* SUPPORT_UTF */
4441 {
4442 read_char(common);
4443 c = *cc;
4444 }
4445
4446 if (type == OP_NOT || !char_has_othercase(common, cc))
4447 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4448 else
4449 {
4450 oc = char_othercase(common, c);
4451 bit = c ^ oc;
4452 if (ispowerof2(bit))
4453 {
4454 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4455 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4456 }
4457 else
4458 {
4459 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4460 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4461 }
4462 }
4463 return cc + length;
4464
4465 case OP_CLASS:
4466 case OP_NCLASS:
4467 detect_partial_match(common, backtracks);
4468 read_char(common);
4469 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4470 return cc + 32 / sizeof(pcre_uchar);
4471
4472 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4473 jump[0] = NULL;
4474 #ifdef COMPILE_PCRE8
4475 /* This check only affects 8 bit mode. In other modes, we
4476 always need to compare the value with 255. */
4477 if (common->utf)
4478 #endif /* COMPILE_PCRE8 */
4479 {
4480 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4481 if (type == OP_CLASS)
4482 {
4483 add_jump(compiler, backtracks, jump[0]);
4484 jump[0] = NULL;
4485 }
4486 }
4487 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4488 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4489 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4490 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4491 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4492 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4493 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4494 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4495 if (jump[0] != NULL)
4496 JUMPHERE(jump[0]);
4497 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4498 return cc + 32 / sizeof(pcre_uchar);
4499
4500 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4501 case OP_XCLASS:
4502 compile_xclass_trypath(common, cc + LINK_SIZE, backtracks);
4503 return cc + GET(cc, 0) - 1;
4504 #endif
4505
4506 case OP_REVERSE:
4507 length = GET(cc, 0);
4508 if (length == 0)
4509 return cc + LINK_SIZE;
4510 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4511 #ifdef SUPPORT_UTF
4512 if (common->utf)
4513 {
4514 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4515 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4516 label = LABEL();
4517 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4518 skip_char_back(common);
4519 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4520 JUMPTO(SLJIT_C_NOT_ZERO, label);
4521 }
4522 else
4523 #endif
4524 {
4525 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4526 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4527 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4528 }
4529 check_start_used_ptr(common);
4530 return cc + LINK_SIZE;
4531 }
4532 SLJIT_ASSERT_STOP();
4533 return cc;
4534 }
4535
4536 static SLJIT_INLINE pcre_uchar *compile_charn_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4537 {
4538 /* This function consumes at least one input character. */
4539 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4540 DEFINE_COMPILER;
4541 pcre_uchar *ccbegin = cc;
4542 compare_context context;
4543 int size;
4544
4545 context.length = 0;
4546 do
4547 {
4548 if (cc >= ccend)
4549 break;
4550
4551 if (*cc == OP_CHAR)
4552 {
4553 size = 1;
4554 #ifdef SUPPORT_UTF
4555 if (common->utf && HAS_EXTRALEN(cc[1]))
4556 size += GET_EXTRALEN(cc[1]);
4557 #endif
4558 }
4559 else if (*cc == OP_CHARI)
4560 {
4561 size = 1;
4562 #ifdef SUPPORT_UTF
4563 if (common->utf)
4564 {
4565 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4566 size = 0;
4567 else if (HAS_EXTRALEN(cc[1]))
4568 size += GET_EXTRALEN(cc[1]);
4569 }
4570 else
4571 #endif
4572 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4573 size = 0;
4574 }
4575 else
4576 size = 0;
4577
4578 cc += 1 + size;
4579 context.length += IN_UCHARS(size);
4580 }
4581 while (size > 0 && context.length <= 128);
4582
4583 cc = ccbegin;
4584 if (context.length > 0)
4585 {
4586 /* We have a fixed-length byte sequence. */
4587 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4588 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4589
4590 context.sourcereg = -1;
4591 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4592 context.ucharptr = 0;
4593 #endif
4594 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4595 return cc;
4596 }
4597
4598 /* A non-fixed length character will be checked if length == 0. */
4599 return compile_char1_trypath(common, *cc, cc + 1, backtracks);
4600 }
4601
4602 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4603 {
4604 DEFINE_COMPILER;
4605 int offset = GET2(cc, 1) << 1;
4606
4607 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4608 if (!common->jscript_compat)
4609 {
4610 if (backtracks == NULL)
4611 {
4612 /* OVECTOR(1) contains the "string begin - 1" constant. */
4613 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4614 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4615 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4616 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4617 return JUMP(SLJIT_C_NOT_ZERO);
4618 }
4619 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4620 }
4621 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4622 }
4623
4624 /* Forward definitions. */
4625 static void compile_trypath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4626 static void compile_backtrackpath(compiler_common *, struct backtrack_common *);
4627
4628 #define PUSH_BACKTRACK(size, ccstart, error) \
4629 do \
4630 { \
4631 backtrack = sljit_alloc_memory(compiler, (size)); \
4632 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4633 return error; \
4634 memset(backtrack, 0, size); \
4635 backtrack->prev = parent->top; \
4636 backtrack->cc = (ccstart); \
4637 parent->top = backtrack; \
4638 } \
4639 while (0)
4640
4641 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4642 do \
4643 { \
4644 backtrack = sljit_alloc_memory(compiler, (size)); \
4645 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4646 return; \
4647 memset(backtrack, 0, size); \
4648 backtrack->prev = parent->top; \
4649 backtrack->cc = (ccstart); \
4650 parent->top = backtrack; \
4651 } \
4652 while (0)
4653
4654 #define BACKTRACK_AS(type) ((type *)backtrack)
4655
4656 static pcre_uchar *compile_ref_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4657 {
4658 DEFINE_COMPILER;
4659 int offset = GET2(cc, 1) << 1;
4660 struct sljit_jump *jump = NULL;
4661 struct sljit_jump *partial;
4662 struct sljit_jump *nopartial;
4663
4664 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4665 /* OVECTOR(1) contains the "string begin - 1" constant. */
4666 if (withchecks && !common->jscript_compat)
4667 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4668
4669 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4670 if (common->utf && *cc == OP_REFI)
4671 {
4672 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4673 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4674 if (withchecks)
4675 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4676
4677 /* Needed to save important temporary registers. */
4678 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4679 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4681 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4682 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4683 if (common->mode == JIT_COMPILE)
4684 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4685 else
4686 {
4687 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4688 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4689 check_partial(common, FALSE);
4690 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4691 JUMPHERE(nopartial);
4692 }
4693 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4694 }
4695 else
4696 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4697 {
4698 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4699 if (withchecks)
4700 jump = JUMP(SLJIT_C_ZERO);
4701
4702 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4703 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4704 if (common->mode == JIT_COMPILE)
4705 add_jump(compiler, backtracks, partial);
4706
4707 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4708 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4709
4710 if (common->mode != JIT_COMPILE)
4711 {
4712 nopartial = JUMP(SLJIT_JUMP);
4713 JUMPHERE(partial);
4714 /* TMP2 -= STR_END - STR_PTR */
4715 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4716 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4717 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4718 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4719 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4720 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4721 JUMPHERE(partial);
4722 check_partial(common, FALSE);
4723 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4724 JUMPHERE(nopartial);
4725 }
4726 }
4727
4728 if (jump != NULL)
4729 {
4730 if (emptyfail)
4731 add_jump(compiler, backtracks, jump);
4732 else
4733 JUMPHERE(jump);
4734 }
4735 return cc + 1 + IMM2_SIZE;
4736 }
4737
4738 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4739 {
4740 DEFINE_COMPILER;
4741 backtrack_common *backtrack;
4742 pcre_uchar type;
4743 struct sljit_label *label;
4744 struct sljit_jump *zerolength;
4745 struct sljit_jump *jump = NULL;
4746 pcre_uchar *ccbegin = cc;
4747 int min = 0, max = 0;
4748 BOOL minimize;
4749
4750 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4751
4752 type = cc[1 + IMM2_SIZE];
4753 minimize = (type & 0x1) != 0;
4754 switch(type)
4755 {
4756 case OP_CRSTAR:
4757 case OP_CRMINSTAR:
4758 min = 0;
4759 max = 0;
4760 cc += 1 + IMM2_SIZE + 1;
4761 break;
4762 case OP_CRPLUS:
4763 case OP_CRMINPLUS:
4764 min = 1;
4765 max = 0;
4766 cc += 1 + IMM2_SIZE + 1;
4767 break;
4768 case OP_CRQUERY:
4769 case OP_CRMINQUERY:
4770 min = 0;
4771 max = 1;
4772 cc += 1 + IMM2_SIZE + 1;
4773 break;
4774 case OP_CRRANGE:
4775 case OP_CRMINRANGE:
4776 min = GET2(cc, 1 + IMM2_SIZE + 1);
4777 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4778 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4779 break;
4780 default:
4781 SLJIT_ASSERT_STOP();
4782 break;
4783 }
4784
4785 if (!minimize)
4786 {
4787 if (min == 0)
4788 {
4789 allocate_stack(common, 2);
4790 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4791 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4792 /* Temporary release of STR_PTR. */
4793 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4794 zerolength = compile_ref_checks(common, ccbegin, NULL);
4795 /* Restore if not zero length. */
4796 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4797 }
4798 else
4799 {
4800 allocate_stack(common, 1);
4801 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4802 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4803 }
4804
4805 if (min > 1 || max > 1)
4806 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4807
4808 label = LABEL();
4809 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4810
4811 if (min > 1 || max > 1)
4812 {
4813 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4814 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4815 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4816 if (min > 1)
4817 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4818 if (max > 1)
4819 {
4820 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4821 allocate_stack(common, 1);
4822 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4823 JUMPTO(SLJIT_JUMP, label);
4824 JUMPHERE(jump);
4825 }
4826 }
4827
4828 if (max == 0)
4829 {
4830 /* Includes min > 1 case as well. */
4831 allocate_stack(common, 1);
4832 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4833 JUMPTO(SLJIT_JUMP, label);
4834 }
4835
4836 JUMPHERE(zerolength);
4837 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4838
4839 decrease_call_count(common);
4840 return cc;
4841 }
4842
4843 allocate_stack(common, 2);
4844 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4845 if (type != OP_CRMINSTAR)
4846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4847
4848 if (min == 0)
4849 {
4850 zerolength = compile_ref_checks(common, ccbegin, NULL);
4851 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4852 jump = JUMP(SLJIT_JUMP);
4853 }
4854 else
4855 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4856
4857 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4858 if (max > 0)
4859 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4860
4861 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4862 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4863
4864 if (min > 1)
4865 {
4866 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4867 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4868 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4869 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->trypath);
4870 }
4871 else if (max > 0)
4872 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4873
4874 if (jump != NULL)
4875 JUMPHERE(jump);
4876 JUMPHERE(zerolength);
4877
4878 decrease_call_count(common);
4879 return cc;
4880 }
4881
4882 static SLJIT_INLINE pcre_uchar *compile_recurse_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4883 {
4884 DEFINE_COMPILER;
4885 backtrack_common *backtrack;
4886 recurse_entry *entry = common->entries;
4887 recurse_entry *prev = NULL;
4888 int start = GET(cc, 1);
4889
4890 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4891 while (entry != NULL)
4892 {
4893 if (entry->start == start)
4894 break;
4895 prev = entry;
4896 entry = entry->next;
4897 }
4898
4899 if (entry == NULL)
4900 {
4901 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4902 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4903 return NULL;
4904 entry->next = NULL;
4905 entry->entry = NULL;
4906 entry->calls = NULL;
4907 entry->start = start;
4908
4909 if (prev != NULL)
4910 prev->next = entry;
4911 else
4912 common->entries = entry;
4913 }
4914
4915 if (common->has_set_som && common->mark_ptr != 0)
4916 {
4917 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4918 allocate_stack(common, 2);
4919 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4920 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4921 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4922 }
4923 else if (common->has_set_som || common->mark_ptr != 0)
4924 {
4925 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4926 allocate_stack(common, 1);
4927 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4928 }
4929
4930 if (entry->entry == NULL)
4931 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4932 else
4933 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4934 /* Leave if the match is failed. */
4935 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4936 return cc + 1 + LINK_SIZE;
4937 }
4938
4939 static pcre_uchar *compile_assert_trypath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4940 {
4941 DEFINE_COMPILER;
4942 int framesize;
4943 int localptr;
4944 backtrack_common altbacktrack;
4945 pcre_uchar *ccbegin;
4946 pcre_uchar opcode;
4947 pcre_uchar bra = OP_BRA;
4948 jump_list *tmp = NULL;
4949 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
4950 jump_list **found;
4951 /* Saving previous accept variables. */
4952 struct sljit_label *save_quitlabel = common->quitlabel;
4953 struct sljit_label *save_acceptlabel = common->acceptlabel;
4954 jump_list *save_quit = common->quit;
4955 jump_list *save_accept = common->accept;
4956 struct sljit_jump *jump;
4957 struct sljit_jump *brajump = NULL;
4958
4959 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4960 {
4961 SLJIT_ASSERT(!conditional);
4962 bra = *cc;
4963 cc++;
4964 }
4965 localptr = PRIV_DATA(cc);
4966 SLJIT_ASSERT(localptr != 0);
4967 framesize = get_framesize(common, cc, FALSE);
4968 backtrack->framesize = framesize;
4969 backtrack->localptr = localptr;
4970 opcode = *cc;
4971 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4972 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4973 ccbegin = cc;
4974 cc += GET(cc, 1);
4975
4976 if (bra == OP_BRAMINZERO)
4977 {
4978 /* This is a braminzero backtrack path. */
4979 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4980 free_stack(common, 1);
4981 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4982 }
4983
4984 if (framesize < 0)
4985 {
4986 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4987 allocate_stack(common, 1);
4988 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4989 }
4990 else
4991 {
4992 allocate_stack(common, framesize + 2);
4993 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4994 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4995 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4996 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4997 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4998 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4999 }
5000
5001 memset(&altbacktrack, 0, sizeof(backtrack_common));
5002 common->quitlabel = NULL;
5003 common->quit = NULL;
5004 while (1)
5005 {
5006 common->acceptlabel = NULL;
5007 common->accept = NULL;
5008 altbacktrack.top = NULL;
5009 altbacktrack.topbacktracks = NULL;
5010
5011 if (*ccbegin == OP_ALT)
5012 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5013
5014 altbacktrack.cc = ccbegin;
5015 compile_trypath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5016 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5017 {
5018 common->quitlabel = save_quitlabel;
5019 common->acceptlabel = save_acceptlabel;
5020 common->quit = save_quit;
5021 common->accept = save_accept;
5022 return NULL;
5023 }
5024 common->acceptlabel = LABEL();
5025 if (common->accept != NULL)
5026 set_jumps(common->accept, common->acceptlabel);
5027
5028 /* Reset stack. */
5029 if (framesize < 0)
5030 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5031 else {
5032 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5033 {
5034 /* We don't need to keep the STR_PTR, only the previous localptr. */
5035 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5036 }
5037 else
5038 {
5039 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5040 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5041 }
5042 }
5043
5044 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5045 {
5046 /* We know that STR_PTR was stored on the top of the stack. */
5047 if (conditional)
5048 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5049 else if (bra == OP_BRAZERO)
5050 {
5051 if (framesize < 0)
5052 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5053 else
5054 {
5055 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5056 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
5057 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5058 }
5059 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5060 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5061 }
5062 else if (framesize >= 0)
5063 {
5064 /* For OP_BRA and OP_BRAMINZERO. */
5065 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5066 }
5067 }
5068 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5069
5070 compile_backtrackpath(common, altbacktrack.top);
5071 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5072 {
5073 common->quitlabel = save_quitlabel;
5074 common->acceptlabel = save_acceptlabel;
5075 common->quit = save_quit;
5076 common->accept = save_accept;
5077 return NULL;
5078 }
5079 set_jumps(altbacktrack.topbacktracks, LABEL());
5080
5081 if (*cc != OP_ALT)
5082 break;
5083
5084 ccbegin = cc;
5085 cc += GET(cc, 1);
5086 }
5087 /* None of them matched. */
5088 if (common->quit != NULL)
5089 set_jumps(common->quit, LABEL());
5090
5091 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5092 {
5093 /* Assert is failed. */
5094 if (conditional || bra == OP_BRAZERO)
5095 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5096
5097 if (framesize < 0)
5098 {
5099 /* The topmost item should be 0. */
5100 if (bra == OP_BRAZERO)
5101 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5102 else
5103 free_stack(common, 1);
5104 }
5105 else
5106 {
5107 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5108 /* The topmost item should be 0. */
5109 if (bra == OP_BRAZERO)
5110 {
5111 free_stack(common, framesize + 1);
5112 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5113 }
5114 else
5115 free_stack(common, framesize + 2);
5116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5117 }
5118 jump = JUMP(SLJIT_JUMP);
5119 if (bra != OP_BRAZERO)
5120 add_jump(compiler, target, jump);
5121
5122 /* Assert is successful. */
5123 set_jumps(tmp, LABEL());
5124 if (framesize < 0)
5125 {
5126 /* We know that STR_PTR was stored on the top of the stack. */
5127 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5128 /* Keep the STR_PTR on the top of the stack. */
5129 if (bra == OP_BRAZERO)
5130 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5131 else if (bra == OP_BRAMINZERO)
5132 {
5133 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5134 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5135 }
5136 }
5137 else
5138 {
5139 if (bra == OP_BRA)
5140 {
5141 /* We don't need to keep the STR_PTR, only the previous localptr. */
5142 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5143 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5144 }
5145 else
5146 {
5147 /* We don't need to keep the STR_PTR, only the previous localptr. */
5148 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
5149 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5150 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5151 }
5152 }
5153
5154 if (bra == OP_BRAZERO)
5155 {
5156 backtrack->trypath = LABEL();
5157 sljit_set_label(jump, backtrack->trypath);
5158 }
5159 else if (bra == OP_BRAMINZERO)
5160 {
5161 JUMPTO(SLJIT_JUMP, backtrack->trypath);
5162 JUMPHERE(brajump);
5163 if (framesize >= 0)
5164 {
5165 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5166 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5167 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5168 }
5169 set_jumps(backtrack->common.topbacktracks, LABEL());
5170 }
5171 }
5172 else
5173 {
5174 /* AssertNot is successful. */
5175 if (framesize < 0)
5176 {
5177 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5178 if (bra != OP_BRA)
5179 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5180 else
5181 free_stack(common, 1);
5182 }
5183 else
5184 {
5185 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5186 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5187 /* The topmost item should be 0. */
5188 if (bra != OP_BRA)
5189 {
5190 free_stack(common, framesize + 1);
5191 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5192 }
5193 else
5194 free_stack(common, framesize + 2);
5195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5196 }
5197
5198 if (bra == OP_BRAZERO)
5199 backtrack->trypath = LABEL();
5200 else if (bra == OP_BRAMINZERO)
5201 {
5202 JUMPTO(SLJIT_JUMP, backtrack->trypath);
5203 JUMPHERE(brajump);
5204 }
5205
5206 if (bra != OP_BRA)
5207 {
5208 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5209 set_jumps(backtrack->common.topbacktracks, LABEL());
5210 backtrack->common.topbacktracks = NULL;
5211 }
5212 }
5213
5214 common->quitlabel = save_quitlabel;
5215 common->acceptlabel = save_acceptlabel;
5216 common->quit = save_quit;
5217 common->accept = save_accept;
5218 return cc + 1 + LINK_SIZE;
5219 }
5220
5221 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
5222 {
5223 int condition = FALSE;
5224 pcre_uchar *slotA = name_table;
5225 pcre_uchar *slotB;
5226 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5227 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5228 sljit_w no_capture;
5229 int i;
5230
5231 locals += refno & 0xff;
5232 refno >>= 8;
5233 no_capture = locals[1];
5234
5235 for (i = 0; i < name_count; i++)
5236 {
5237 if (GET2(slotA, 0) == refno) break;
5238 slotA += name_entry_size;
5239 }
5240
5241 if (i < name_count)
5242 {
5243 /* Found a name for the number - there can be only one; duplicate names
5244 for different numbers are allowed, but not vice versa. First scan down
5245 for duplicates. */
5246
5247 slotB = slotA;
5248 while (slotB > name_table)
5249 {
5250 slotB -= name_entry_size;
5251 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5252 {
5253 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5254 if (condition) break;
5255 }
5256 else break;
5257 }
5258
5259 /* Scan up for duplicates */
5260 if (!condition)
5261 {
5262 slotB = slotA;
5263 for (i++; i < name_count; i++)
5264 {
5265 slotB += name_entry_size;
5266 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5267 {
5268 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5269 if (condition) break;
5270 }
5271 else break;
5272 }
5273 }
5274 }
5275 return condition;
5276 }
5277
5278 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
5279 {
5280 int condition = FALSE;
5281 pcre_uchar *slotA = name_table;
5282 pcre_uchar *slotB;
5283 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5284 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5285 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5286 int i;
5287
5288 for (i = 0; i < name_count; i++)
5289 {
5290 if (GET2(slotA, 0) == recno) break;
5291 slotA += name_entry_size;
5292 }
5293
5294 if (i < name_count)
5295 {
5296 /* Found a name for the number - there can be only one; duplicate
5297 names for different numbers are allowed, but not vice versa. First
5298 scan down for duplicates. */
5299
5300 slotB = slotA;
5301 while (slotB > name_table)
5302 {
5303 slotB -= name_entry_size;
5304 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5305 {
5306 condition = GET2(slotB, 0) == group_num;
5307 if (condition) break;
5308 }
5309 else break;
5310 }
5311
5312 /* Scan up for duplicates */
5313 if (!condition)
5314 {
5315 slotB = slotA;
5316 for (i++; i < name_count; i++)
5317 {
5318 slotB += name_entry_size;
5319 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5320 {
5321 condition = GET2(slotB, 0) == group_num;
5322 if (condition) break;
5323 }
5324 else break;
5325 }
5326 }
5327 }
5328 return condition;
5329 }
5330
5331 /*
5332 Handling bracketed expressions is probably the most complex part.
5333
5334 Stack layout naming characters:
5335 S - Push the current STR_PTR
5336 0 - Push a 0 (NULL)
5337 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5338 before the next alternative. Not pushed if there are no alternatives.
5339 M - Any values pushed by the current alternative. Can be empty, or anything.
5340 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5341 L - Push the previous local (pointed by localptr) to the stack
5342 () - opional values stored on the stack
5343 ()* - optonal, can be stored multiple times
5344
5345 The following list shows the regular expression templates, their PCRE byte codes
5346 and stack layout supported by pcre-sljit.
5347
5348 (?:) OP_BRA | OP_KET A M
5349 () OP_CBRA | OP_KET C M
5350 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5351 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5352 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5353 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5354 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5355 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5356 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5357 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5358 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5359 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5360 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5361 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5362 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5363 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5364 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5365 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5366 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5367 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5368 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5369 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5370
5371
5372 Stack layout naming characters:
5373 A - Push the alternative index (starting from 0) on the stack.
5374 Not pushed if there is no alternatives.
5375 M - Any values pushed by the current alternative. Can be empty, or anything.
5376
5377 The next list shows the possible content of a bracket:
5378 (|) OP_*BRA | OP_ALT ... M A
5379 (?()|) OP_*COND | OP_ALT M A
5380 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5381 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5382 Or nothing, if trace is unnecessary
5383 */
5384
5385 static pcre_uchar *compile_bracket_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5386 {
5387 DEFINE_COMPILER;
5388 backtrack_common *backtrack;
5389 pcre_uchar opcode;
5390 int localptr = 0;
5391 int offset = 0;
5392 int stacksize;
5393 pcre_uchar *ccbegin;
5394 pcre_uchar *trypath;
5395 pcre_uchar bra = OP_BRA;
5396 pcre_uchar ket;
5397 assert_backtrack *assert;
5398 BOOL has_alternatives;
5399 struct sljit_jump *jump;
5400 struct sljit_jump *skip;
5401 struct sljit_label *rmaxlabel = NULL;
5402 struct sljit_jump *braminzerojump = NULL;
5403
5404 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5405
5406 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5407 {
5408 bra = *cc;
5409 cc++;
5410 opcode = *cc;
5411 }
5412
5413 opcode = *cc;
5414 ccbegin = cc;
5415 trypath = ccbegin + 1 + LINK_SIZE;
5416
5417 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5418 {
5419 /* Drop this bracket_backtrack. */
5420 parent->top = backtrack->prev;
5421 return bracketend(cc);
5422 }
5423
5424 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5425 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5426 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5427 cc += GET(cc, 1);
5428
5429 has_alternatives = *cc == OP_ALT;
5430 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5431 {
5432 has_alternatives = (*trypath == OP_RREF) ? FALSE : TRUE;
5433 if (*trypath == OP_NRREF)
5434 {
5435 stacksize = GET2(trypath, 1);
5436 if (common->currententry == NULL || stacksize == RREF_ANY)
5437 has_alternatives = FALSE;
5438 else if (common->currententry->start == 0)
5439 has_alternatives = stacksize != 0;
5440 else
5441 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5442 }
5443 }
5444
5445 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5446 opcode = OP_SCOND;
5447 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5448 opcode = OP_ONCE;
5449
5450 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5451 {
5452 /* Capturing brackets has a pre-allocated space. */
5453 offset = GET2(ccbegin, 1 + LINK_SIZE);
5454 localptr = OVECTOR_PRIV(offset);
5455 offset <<= 1;
5456 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
5457 trypath += IMM2_SIZE;
5458 }
5459 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5460 {
5461 /* Other brackets simply allocate the next entry. */
5462 localptr = PRIV_DATA(ccbegin);
5463 SLJIT_ASSERT(localptr != 0);
5464 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
5465 if (opcode == OP_ONCE)
5466 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5467 }
5468
5469 /* Instructions before the first alternative. */
5470 stacksize = 0;
5471 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5472 stacksize++;
5473 if (bra == OP_BRAZERO)
5474 stacksize++;
5475
5476 if (stacksize > 0)
5477 allocate_stack(common, stacksize);
5478
5479 stacksize = 0;
5480 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5481 {
5482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5483 stacksize++;
5484 }
5485
5486 if (bra == OP_BRAZERO)
5487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5488
5489 if (bra == OP_BRAMINZERO)
5490 {
5491 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5492 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5493 if (ket != OP_KETRMIN)
5494 {
5495 free_stack(common, 1);
5496 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5497 }
5498 else
5499 {
5500 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5501 {
5502 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5503 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5504 /* Nothing stored during the first run. */
5505 skip = JUMP(SLJIT_JUMP);
5506 JUMPHERE(jump);
5507 /* Checking zero-length iteration. */
5508 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5509 {
5510 /* When we come from outside, localptr contains the previous STR_PTR. */
5511 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5512 }
5513 else
5514 {
5515 /* Except when the whole stack frame must be saved. */
5516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5517 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5518 }
5519 JUMPHERE(skip);
5520 }
5521 else
5522 {
5523 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5524 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5525 JUMPHERE(jump);
5526 }
5527 }
5528 }
5529
5530 if (ket == OP_KETRMIN)
5531 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5532
5533 if (ket == OP_KETRMAX)
5534 {
5535 rmaxlabel = LABEL();
5536 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5537 BACKTRACK_AS(bracket_backtrack)->alttrypath = rmaxlabel;
5538 }
5539
5540 /* Handling capturing brackets and alternatives. */
5541 if (opcode == OP_ONCE)
5542 {
5543 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5544 {
5545 /* Neither capturing brackets nor recursions are not found in the block. */
5546 if (ket == OP_KETRMIN)
5547 {
5548 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5549 allocate_stack(common, 2);
5550 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5551 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5552 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5553 }
5554 else if (ket == OP_KETRMAX || has_alternatives)
5555 {
5556 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5557 allocate_stack(common, 1);
5558 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5559 }
5560 else
5561 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5562 }
5563 else
5564 {
5565 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5566 {
5567 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5568 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5569 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5570 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5573 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5574 }
5575 else
5576 {
5577 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5578 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5579 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5580 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5581 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5582 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5583 }
5584 }
5585 }
5586 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5587 {
5588 /* Saving the previous values. */
5589 allocate_stack(common, 3);
5590 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5591 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5592 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5593 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5594 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5595 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
5596 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5597 }
5598 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5599 {
5600 /* Saving the previous value. */
5601 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5602 allocate_stack(common, 1);
5603 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
5604 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5605 }
5606 else if (has_alternatives)
5607 {
5608 /* Pushing the starting string pointer. */
5609 allocate_stack(common, 1);
5610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5611 }
5612
5613 /* Generating code for the first alternative. */
5614 if (opcode == OP_COND || opcode == OP_SCOND)
5615 {
5616 if (*trypath == OP_CREF)
5617 {
5618 SLJIT_ASSERT(has_alternatives);
5619 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5620 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(trypath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5621 trypath += 1 + IMM2_SIZE;
5622 }
5623 else if (*trypath == OP_NCREF)
5624 {
5625 SLJIT_ASSERT(has_alternatives);
5626 stacksize = GET2(trypath, 1);
5627 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5628
5629 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5630 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5632 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5633 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5634 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5635 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5636 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5637 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5638
5639 JUMPHERE(jump);
5640 trypath += 1 + IMM2_SIZE;
5641 }
5642 else if (*trypath == OP_RREF || *trypath == OP_NRREF)
5643 {
5644 /* Never has other case. */
5645 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5646
5647 stacksize = GET2(trypath, 1);
5648 if (common->currententry == NULL)
5649 stacksize = 0;
5650 else if (stacksize == RREF_ANY)
5651 stacksize = 1;
5652 else if (common->currententry->start == 0)
5653 stacksize = stacksize == 0;
5654 else
5655 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5656
5657 if (*trypath == OP_RREF || stacksize || common->currententry == NULL)
5658 {
5659 SLJIT_ASSERT(!has_alternatives);
5660 if (stacksize != 0)
5661 trypath += 1 + IMM2_SIZE;
5662 else
5663 {
5664 if (*cc == OP_ALT)
5665 {
5666 trypath = cc + 1 + LINK_SIZE;
5667 cc += GET(cc, 1);
5668 }
5669 else
5670 trypath = cc;
5671 }
5672 }
5673 else
5674 {
5675 SLJIT_ASSERT(has_alternatives);
5676
5677 stacksize = GET2(trypath, 1);
5678 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5679 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5682 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5683 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5684 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5685 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5686 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5687 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5688 trypath += 1 + IMM2_SIZE;
5689 }
5690 }
5691 else
5692 {
5693 SLJIT_ASSERT(has_alternatives && *trypath >= OP_ASSERT && *trypath <= OP_ASSERTBACK_NOT);
5694 /* Similar code as PUSH_BACKTRACK macro. */
5695 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5696 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5697 return NULL;
5698 memset(assert, 0, sizeof(assert_backtrack));
5699 assert->common.cc = trypath;
5700 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5701 trypath = compile_assert_trypath(common, trypath, assert, TRUE);
5702 }
5703 }
5704
5705 compile_trypath(common, trypath, cc, backtrack);
5706 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5707 return NULL;
5708
5709 if (opcode == OP_ONCE)
5710 {
5711 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5712 {
5713 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5714 /* TMP2 which is set here used by OP_KETRMAX below. */
5715 if (ket == OP_KETRMAX)
5716 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5717 else if (ket == OP_KETRMIN)
5718 {
5719 /* Move the STR_PTR to the localptr. */
5720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5721 }
5722 }
5723 else
5724 {
5725 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5726 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5727 if (ket == OP_KETRMAX)
5728 {
5729 /* TMP2 which is set here used by OP_KETRMAX below. */
5730 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5731 }
5732 }
5733 }
5734
5735 stacksize = 0;
5736 if (ket != OP_KET || bra != OP_BRA)
5737 stacksize++;
5738 if (has_alternatives && opcode != OP_ONCE)
5739 stacksize++;
5740
5741 if (stacksize > 0)
5742 allocate_stack(common, stacksize);
5743
5744 stacksize = 0;
5745 if (ket != OP_KET)
5746 {
5747 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5748 stacksize++;
5749 }
5750 else if (bra != OP_BRA)
5751 {
5752 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5753 stacksize++;
5754 }
5755
5756 if (has_alternatives)
5757 {
5758 if (opcode != OP_ONCE)
5759 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5760 if (ket != OP_KETRMAX)
5761 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5762 }
5763
5764 /* Must be after the trypath label. */
5765 if (offset != 0)
5766 {
5767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5768 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5769 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5770 }
5771
5772 if (ket == OP_KETRMAX)
5773 {
5774 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5775 {
5776 if (has_alternatives)
5777 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5778 /* Checking zero-length iteration. */
5779 if (opcode != OP_ONCE)
5780 {
5781 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
5782 /* Drop STR_PTR for greedy plus quantifier. */
5783 if (bra != OP_BRAZERO)
5784 free_stack(common, 1);
5785 }
5786 else
5787 /* TMP2 must contain the starting STR_PTR. */
5788 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5789 }
5790 else
5791 JUMPTO(SLJIT_JUMP, rmaxlabel);
5792 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5793 }
5794
5795 if (bra == OP_BRAZERO)
5796 BACKTRACK_AS(bracket_backtrack)->zerotrypath = LABEL();
5797
5798 if (bra == OP_BRAMINZERO)
5799 {
5800 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5801 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->trypath);
5802 if (braminzerojump != NULL)
5803 {
5804 JUMPHERE(braminzerojump);
5805 /* We need to release the end pointer to perform the
5806 backtrack for the zero-length iteration. When
5807 framesize is < 0, OP_ONCE will do the release itself. */
5808 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5809 {
5810 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5811 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5812 }
5813 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5814 free_stack(common, 1);
5815 }
5816 /* Continue to the normal backtrack. */
5817 }
5818
5819 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5820 decrease_call_count(common);
5821
5822 /* Skip the other alternatives. */
5823 while (*cc == OP_ALT)
5824 cc += GET(cc, 1);
5825 cc += 1 + LINK_SIZE;
5826 return cc;
5827 }
5828
5829 static pcre_uchar *compile_bracketpos_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5830 {
5831 DEFINE_COMPILER;
5832 backtrack_common *backtrack;
5833 pcre_uchar opcode;
5834 int localptr;
5835 int cbraprivptr = 0;
5836 int framesize;
5837 int stacksize;
5838 int offset = 0;
5839 BOOL zero = FALSE;
5840 pcre_uchar *ccbegin = NULL;
5841 int stack;
5842 struct sljit_label *loop = NULL;
5843 struct jump_list *emptymatch = NULL;
5844
5845 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5846 if (*cc == OP_BRAPOSZERO)
5847 {
5848 zero = TRUE;
5849 cc++;
5850 }
5851
5852 opcode = *cc;
5853 localptr = PRIV_DATA(cc);
5854 SLJIT_ASSERT(localptr != 0);
5855 BACKTRACK_AS(bracketpos_backtrack)->localptr = localptr;
5856 switch(opcode)
5857 {
5858 case OP_BRAPOS:
5859 case OP_SBRAPOS:
5860 ccbegin = cc + 1 + LINK_SIZE;
5861 break;
5862
5863 case OP_CBRAPOS:
5864 case OP_SCBRAPOS:
5865 offset = GET2(cc, 1 + LINK_SIZE);
5866 cbraprivptr = OVECTOR_PRIV(offset);
5867 offset <<= 1;
5868 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5869 break;
5870
5871 default:
5872 SLJIT_ASSERT_STOP();
5873 break;
5874 }
5875
5876 framesize = get_framesize(common, cc, FALSE);
5877 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5878 if (framesize < 0)
5879 {
5880 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5881 if (!zero)
5882 stacksize++;
5883 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5884 allocate_stack(common, stacksize);
5885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5886
5887 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5888 {
5889 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5890 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5891 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5892 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5893 }
5894 else
5895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5896
5897 if (!zero)
5898 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5899 }
5900 else
5901 {
5902 stacksize = framesize + 1;
5903 if (!zero)
5904 stacksize++;
5905 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5906 stacksize++;
5907 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5908 allocate_stack(common, stacksize);
5909
5910 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5911 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5912 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5913 stack = 0;
5914 if (!zero)
5915 {
5916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5917 stack++;
5918 }
5919 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5920 {
5921 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5922 stack++;
5923 }
5924 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5925 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5926 }
5927
5928 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5929 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5930
5931 loop = LABEL();
5932 while (*cc != OP_KETRPOS)
5933 {
5934 backtrack->top = NULL;
5935 backtrack->topbacktracks = NULL;
5936 cc += GET(cc, 1);
5937
5938 compile_trypath(common, ccbegin, cc, backtrack);
5939 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5940 return NULL;
5941
5942 if (framesize < 0)
5943 {
5944 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5945
5946 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5947 {
5948 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5949 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5950 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5951 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5952 }
5953 else
5954 {
5955 if (opcode == OP_SBRAPOS)
5956 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5957 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5958 }
5959
5960 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5961 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5962
5963 if (!zero)
5964 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5965 }
5966 else
5967 {
5968 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5969 {
5970 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5971 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5972 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5973 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5974 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5975 }
5976 else
5977 {
5978 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5979 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5980 if (opcode == OP_SBRAPOS)
5981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5982 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5983 }
5984
5985 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5986 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5987
5988 if (!zero)
5989 {
5990 if (framesize < 0)
5991 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5992 else
5993 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5994 }
5995 }
5996 JUMPTO(SLJIT_JUMP, loop);
5997 flush_stubs(common);
5998
5999 compile_backtrackpath(common, backtrack->top);
6000 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6001 return NULL;
6002 set_jumps(backtrack->topbacktracks, LABEL());
6003
6004 if (framesize < 0)
6005 {
6006 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6007 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6008 else
6009 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6010 }
6011 else
6012 {
6013 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6014 {
6015 /* Last alternative. */
6016 if (*cc == OP_KETRPOS)
6017 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6018 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6019 }
6020 else
6021 {
6022 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6023 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6024 }
6025 }
6026
6027 if (*cc == OP_KETRPOS)
6028 break;
6029 ccbegin = cc + 1 + LINK_SIZE;
6030 }
6031
6032 backtrack->topbacktracks = NULL;
6033 if (!zero)
6034 {
6035 if (framesize < 0)
6036 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6037 else /* TMP2 is set to [localptr] above. */
6038 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
6039 }
6040
6041 /* None of them matched. */
6042 set_jumps(emptymatch, LABEL());
6043 decrease_call_count(common);
6044 return cc + 1 + LINK_SIZE;
6045 }
6046
6047 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6048 {
6049 int class_len;
6050
6051 *opcode = *cc;
6052 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6053 {
6054 cc++;
6055 *type = OP_CHAR;
6056 }
6057 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6058 {
6059 cc++;
6060 *type = OP_CHARI;
6061 *opcode -= OP_STARI - OP_STAR;
6062 }
6063 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6064 {
6065 cc++;
6066 *type = OP_NOT;
6067 *opcode -= OP_NOTSTAR - OP_STAR;
6068 }
6069 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6070 {
6071 cc++;
6072 *type = OP_NOTI;
6073 *opcode -= OP_NOTSTARI - OP_STAR;
6074 }
6075 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6076 {
6077 cc++;
6078 *opcode -= OP_TYPESTAR - OP_STAR;
6079 *type = 0;
6080 }
6081 else
6082 {
6083 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6084 *type = *opcode;
6085 cc++;
6086 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6087 *opcode = cc[class_len - 1];
6088 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6089 {
6090 *opcode -= OP_CRSTAR - OP_STAR;
6091 if (end != NULL)
6092 *end = cc + class_len;
6093 }
6094 else
6095 {
6096 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6097 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6098 *arg2 = GET2(cc, class_len);
6099
6100 if (*arg2 == 0)
6101 {
6102 SLJIT_ASSERT(*arg1 != 0);
6103 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6104 }
6105 if (*arg1 == *arg2)
6106 *opcode = OP_EXACT;
6107
6108 if (end != NULL)
6109 *end = cc + class_len + 2 * IMM2_SIZE;
6110 }
6111 return cc;
6112 }
6113
6114 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6115 {
6116 *arg1 = GET2(cc, 0);
6117 cc += IMM2_SIZE;
6118 }
6119
6120 if (*type == 0)
6121 {
6122 *type = *cc;
6123 if (end != NULL)
6124 *end = next_opcode(common, cc);
6125 cc++;
6126 return cc;
6127 }
6128
6129 if (end != NULL)
6130 {
6131 *end = cc + 1;
6132 #ifdef SUPPORT_UTF
6133 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6134 #endif
6135 }
6136 return cc;
6137 }
6138
6139 static pcre_uchar *compile_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6140 {
6141 DEFINE_COMPILER;
6142 backtrack_common *backtrack;
6143 pcre_uchar opcode;
6144 pcre_uchar type;
6145 int arg1 = -1, arg2 = -1;
6146 pcre_uchar* end;
6147 jump_list *nomatch = NULL;
6148 struct sljit_jump *jump = NULL;
6149 struct sljit_label *label;
6150 int localptr = PRIV_DATA(cc);
6151 int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6152 int offset0 = (localptr == 0) ? STACK(0) : localptr;
6153 int offset1 = (localptr == 0) ? STACK(1) : localptr + (int)sizeof(sljit_w);
6154 int tmp_base, tmp_offset;
6155
6156 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6157
6158 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6159
6160 switch (type)
6161 {
6162 case OP_NOT_DIGIT:
6163 case OP_DIGIT:
6164 case OP_NOT_WHITESPACE:
6165 case OP_WHITESPACE:
6166 case OP_NOT_WORDCHAR:
6167 case OP_WORDCHAR:
6168 case OP_ANY:
6169 case OP_ALLANY:
6170 case OP_ANYBYTE:
6171 case OP_ANYNL:
6172 case OP_NOT_HSPACE:
6173 case OP_HSPACE:
6174 case OP_NOT_VSPACE:
6175 case OP_VSPACE:
6176 case OP_CHAR:
6177 case OP_CHARI:
6178 case OP_NOT:
6179 case OP_NOTI:
6180 case OP_CLASS:
6181 case OP_NCLASS:
6182 tmp_base = TMP3;
6183 tmp_offset = 0;
6184 break;
6185
6186 default:
6187 SLJIT_ASSERT_STOP();
6188 /* Fall through. */
6189
6190 case OP_EXTUNI:
6191 case OP_XCLASS:
6192 case OP_NOTPROP:
6193 case OP_PROP:
6194 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6195 tmp_offset = POSSESSIVE0;
6196 break;
6197 }
6198
6199 switch(opcode)
6200 {
6201 case OP_STAR:
6202 case OP_PLUS:
6203 case OP_UPTO:
6204 case OP_CRRANGE:
6205 if (type == OP_ANYNL || type == OP_EXTUNI)
6206 {
6207 SLJIT_ASSERT(localptr == 0);
6208 if (opcode == OP_STAR || opcode == OP_UPTO)
6209 {
6210 allocate_stack(common, 2);
6211 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6213 }
6214 else
6215 {
6216 allocate_stack(common, 1);
6217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6218 }
6219
6220 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6221 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6222
6223 label = LABEL();
6224 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6225 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6226 {
6227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6228 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6229 if (opcode == OP_CRRANGE && arg2 > 0)
6230 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6231 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6232 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6234 }
6235
6236 /* We cannot use TMP3 because of this allocate_stack. */
6237 allocate_stack(common, 1);
6238 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6239 JUMPTO(SLJIT_JUMP, label);
6240 if (jump != NULL)
6241 JUMPHERE(jump);
6242 }
6243 else
6244 {
6245 if (opcode == OP_PLUS)
6246 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6247 if (localptr == 0)
6248 allocate_stack(common, 2);
6249 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6250 if (opcode <= OP_PLUS)
6251 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6252 else
6253 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6254 label = LABEL();
6255 compile_char1_trypath(common, type, cc, &nomatch);
6256 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6257 if (opcode <= OP_PLUS)
6258 JUMPTO(SLJIT_JUMP, label);
6259 else if (opcode == OP_CRRANGE && arg1 == 0)
6260 {
6261 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6262 JUMPTO(SLJIT_JUMP, label);
6263 }
6264 else
6265 {
6266 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6267 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6268 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6269 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6270 }
6271 set_jumps(nomatch, LABEL());
6272 if (opcode == OP_CRRANGE)
6273 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6274 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6275 }
6276 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6277 break;
6278
6279 case OP_MINSTAR:
6280 case OP_MINPLUS:
6281 if (opcode == OP_MINPLUS)
6282 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6283 if (localptr == 0)
6284 allocate_stack(common, 1);
6285 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6286 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6287 break;
6288
6289 case OP_MINUPTO:
6290 case OP_CRMINRANGE:
6291 if (localptr == 0)
6292 allocate_stack(common, 2);
6293 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6294 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6295 if (opcode == OP_CRMINRANGE)
6296 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6297 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6298 break;
6299
6300 case OP_QUERY:
6301 case OP_MINQUERY:
6302 if (localptr == 0)
6303 allocate_stack(common, 1);
6304 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6305 if (opcode == OP_QUERY)
6306 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6307 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6308 break;
6309
6310 case OP_EXACT:
6311 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6312 label = LABEL();
6313 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6314 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6315 JUMPTO(SLJIT_C_NOT_ZERO, label);
6316 break;
6317
6318 case OP_POSSTAR:
6319 case OP_POSPLUS:
6320 case OP_POSUPTO:
6321 if (opcode == OP_POSPLUS)
6322 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6323 if (opcode == OP_POSUPTO)
6324 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6325 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6326 label = LABEL();
6327 compile_char1_trypath(common, type, cc, &nomatch);
6328 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6329 if (opcode != OP_POSUPTO)
6330 JUMPTO(SLJIT_JUMP, label);
6331 else
6332 {
6333 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6334 JUMPTO(SLJIT_C_NOT_ZERO, label);
6335 }
6336 set_jumps(nomatch, LABEL());
6337 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6338 break;
6339
6340 case OP_POSQUERY:
6341 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6342 compile_char1_trypath(common, type, cc, &nomatch);
6343 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6344 set_jumps(nomatch, LABEL());
6345 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6346 break;
6347
6348 default:
6349 SLJIT_ASSERT_STOP();
6350 break;
6351 }
6352
6353 decrease_call_count(common);
6354 return end;
6355 }
6356
6357 static SLJIT_INLINE pcre_uchar *compile_fail_accept_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6358 {
6359 DEFINE_COMPILER;
6360 backtrack_common *backtrack;
6361
6362 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6363
6364 if (*cc == OP_FAIL)
6365 {
6366 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6367 return cc + 1;
6368 }
6369
6370 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6371 {
6372 /* No need to check notempty conditions. */
6373 if (common->acceptlabel == NULL)
6374 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6375 else
6376 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6377 return cc + 1;
6378 }
6379
6380 if (common->acceptlabel == NULL)
6381 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6382 else
6383 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6384 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6385 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6386 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6387 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6388 if (common->acceptlabel == NULL)
6389 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6390 else
6391 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6392 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6393 if (common->acceptlabel == NULL)
6394 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6395 else
6396 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6397 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6398 return cc + 1;
6399 }
6400
6401 static SLJIT_INLINE pcre_uchar *compile_close_trypath(compiler_common *common, pcre_uchar *cc)
6402 {
6403 DEFINE_COMPILER;
6404 int offset = GET2(cc, 1);
6405
6406 /* Data will be discarded anyway... */
6407 if (common->currententry != NULL)
6408 return cc + 1 + IMM2_SIZE;
6409
6410 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6411 offset <<= 1;
6412 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6414 return cc + 1 + IMM2_SIZE;
6415 }
6416
6417 static void compile_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6418 {
6419 DEFINE_COMPILER;
6420 backtrack_common *backtrack;
6421
6422 while (cc < ccend)
6423 {
6424 switch(*cc)
6425 {
6426 case OP_SOD:
6427 case OP_SOM:
6428 case OP_NOT_WORD_BOUNDARY:
6429 case OP_WORD_BOUNDARY:
6430 case OP_NOT_DIGIT:
6431 case OP_DIGIT:
6432 case OP_NOT_WHITESPACE:
6433 case OP_WHITESPACE:
6434 case OP_NOT_WORDCHAR:
6435 case OP_WORDCHAR:
6436 case OP_ANY:
6437 case OP_ALLANY:
6438 case OP_ANYBYTE:
6439 case OP_NOTPROP:
6440 case OP_PROP:
6441 case OP_ANYNL:
6442 case OP_NOT_HSPACE:
6443 case OP_HSPACE:
6444 case OP_NOT_VSPACE:
6445 case OP_VSPACE:
6446 case OP_EXTUNI:
6447 case OP_EODN:
6448 case OP_EOD:
6449 case OP_CIRC:
6450 case OP_CIRCM:
6451 case OP_DOLL:
6452 case OP_DOLLM:
6453 case OP_NOT:
6454 case OP_NOTI:
6455 case OP_REVERSE:
6456 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6457 break;
6458
6459 case OP_SET_SOM:
6460 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6461 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6462 allocate_stack(common, 1);
6463 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6465 cc++;
6466 break;
6467
6468 case OP_CHAR:
6469 case OP_CHARI:
6470 if (common->mode == JIT_COMPILE)
6471 cc = compile_charn_trypath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6472 else
6473 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6474 break;
6475
6476 case OP_STAR:
6477 case OP_MINSTAR:
6478 case OP_PLUS:
6479 case OP_MINPLUS:
6480 case OP_QUERY:
6481 case OP_MINQUERY:
6482 case OP_UPTO:
6483 case OP_MINUPTO:
6484 case OP_EXACT:
6485 case OP_POSSTAR:
6486 case OP_POSPLUS:
6487 case OP_POSQUERY:
6488 case OP_POSUPTO:
6489 case OP_STARI:
6490 case OP_MINSTARI:
6491 case OP_PLUSI:
6492 case OP_MINPLUSI:
6493 case OP_QUERYI:
6494 case OP_MINQUERYI:
6495 case OP_UPTOI:
6496 case OP_MINUPTOI:
6497 case OP_EXACTI:
6498 case OP_POSSTARI:
6499 case OP_POSPLUSI:
6500 case OP_POSQUERYI:
6501 case OP_POSUPTOI:
6502 case OP_NOTSTAR:
6503 case OP_NOTMINSTAR:
6504 case OP_NOTPLUS:
6505 case OP_NOTMINPLUS:
6506 case OP_NOTQUERY:
6507 case OP_NOTMINQUERY:
6508 case OP_NOTUPTO:
6509 case OP_NOTMINUPTO:
6510 case OP_NOTEXACT:
6511 case OP_NOTPOSSTAR:
6512 case OP_NOTPOSPLUS:
6513 case OP_NOTPOSQUERY:
6514 case OP_NOTPOSUPTO:
6515 case OP_NOTSTARI:
6516 case OP_NOTMINSTARI:
6517 case OP_NOTPLUSI:
6518 case OP_NOTMINPLUSI:
6519 case OP_NOTQUERYI:
6520 case OP_NOTMINQUERYI:
6521 case OP_NOTUPTOI:
6522 case OP_NOTMINUPTOI:
6523 case OP_NOTEXACTI:
6524 case OP_NOTPOSSTARI:
6525 case OP_NOTPOSPLUSI:
6526 case OP_NOTPOSQUERYI:
6527 case OP_NOTPOSUPTOI:
6528 case OP_TYPESTAR:
6529 case OP_TYPEMINSTAR:
6530 case OP_TYPEPLUS:
6531 case OP_TYPEMINPLUS:
6532 case OP_TYPEQUERY:
6533 case OP_TYPEMINQUERY:
6534 case OP_TYPEUPTO:
6535 case OP_TYPEMINUPTO:
6536 case OP_TYPEEXACT:
6537 case OP_TYPEPOSSTAR:
6538 case OP_TYPEPOSPLUS:
6539 case OP_TYPEPOSQUERY:
6540 case OP_TYPEPOSUPTO:
6541 cc = compile_iterator_trypath(common, cc, parent);
6542 break;
6543
6544 case OP_CLASS:
6545 case OP_NCLASS:
6546 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6547 cc = compile_iterator_trypath(common, cc, parent);
6548 else
6549 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6550 break;
6551
6552 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
6553 case OP_XCLASS:
6554 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6555 cc = compile_iterator_trypath(common, cc, parent);
6556 else
6557 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6558 break;
6559 #endif
6560
6561 case OP_REF:
6562 case OP_REFI:
6563 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6564 cc = compile_ref_iterator_trypath(common, cc, parent);
6565 else
6566 cc = compile_ref_trypath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6567 break;
6568
6569 case OP_RECURSE:
6570 cc = compile_recurse_trypath(common, cc, parent);
6571 break;
6572
6573 case OP_ASSERT:
6574 case OP_ASSERT_NOT:
6575 case OP_ASSERTBACK:
6576 case OP_ASSERTBACK_NOT:
6577 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6578 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6579 break;
6580
6581 case OP_BRAMINZERO:
6582 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6583 cc = bracketend(cc + 1);
6584 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6585 {
6586 allocate_stack(common, 1);
6587 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6588 }
6589 else
6590 {
6591 allocate_stack(common, 2);
6592 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6593 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6594 }
6595 BACKTRACK_AS(braminzero_backtrack)->trypath = LABEL();
6596 if (cc[1] > OP_ASSERTBACK_NOT)
6597 decrease_call_count(common);
6598 break;
6599
6600 case OP_ONCE:
6601 case OP_ONCE_NC:
6602 case OP_BRA:
6603 case OP_CBRA:
6604 case OP_COND:
6605 case OP_SBRA:
6606 case OP_SCBRA:
6607 case OP_SCOND:
6608 cc = compile_bracket_trypath(common, cc, parent);
6609 break;
6610
6611 case OP_BRAZERO:
6612 if (cc[1] > OP_ASSERTBACK_NOT)
6613 cc = compile_bracket_trypath(common, cc, parent);
6614 else
6615 {
6616 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6617 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6618 }
6619 break;
6620
6621 case OP_BRAPOS:
6622 case OP_CBRAPOS:
6623 case OP_SBRAPOS:
6624 case OP_SCBRAPOS:
6625 case OP_BRAPOSZERO:
6626 cc = compile_bracketpos_trypath(common, cc, parent);
6627 break;
6628
6629 case OP_MARK:
6630 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6631 SLJIT_ASSERT(common->mark_ptr != 0);
6632 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6633 allocate_stack(common, 1);
6634 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6635 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6636 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
6637 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
6638 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
6639 cc += 1 + 2 + cc[1];
6640 break;
6641
6642 case OP_COMMIT:
6643 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6644 cc += 1;
6645 break;
6646
6647 case OP_FAIL:
6648 case OP_ACCEPT:
6649 case OP_ASSERT_ACCEPT:
6650 cc = compile_fail_accept_trypath(common, cc, parent);
6651 break;
6652
6653 case OP_CLOSE:
6654 cc = compile_close_trypath(common, cc);
6655 break;
6656
6657 case OP_SKIPZERO:
6658 cc = bracketend(cc + 1);
6659 break;
6660
6661 default:
6662 SLJIT_ASSERT_STOP();
6663 return;
6664 }
6665 if (cc == NULL)
6666 return;
6667 }
6668 SLJIT_ASSERT(cc == ccend);
6669 }
6670
6671 #undef PUSH_BACKTRACK
6672 #undef PUSH_BACKTRACK_NOVALUE
6673 #undef BACKTRACK_AS
6674
6675 #define COMPILE_BACKTRACKPATH(current) \
6676 do \
6677 { \
6678 compile_backtrackpath(common, (current)); \
6679 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6680 return; \
6681 } \
6682 while (0)
6683
6684 #define CURRENT_AS(type) ((type *)current)
6685
6686 static void compile_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
6687 {
6688 DEFINE_COMPILER;
6689 pcre_uchar *cc = current->cc;
6690 pcre_uchar opcode;
6691 pcre_uchar type;
6692 int arg1 = -1, arg2 = -1;
6693 struct sljit_label *label = NULL;
6694 struct sljit_jump *jump = NULL;
6695 jump_list *jumplist = NULL;
6696 int localptr = PRIV_DATA(cc);
6697 int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6698 int offset0 = (localptr == 0) ? STACK(0) : localptr;
6699 int offset1 = (localptr == 0) ? STACK(1) : localptr + (int)sizeof(sljit_w);
6700
6701 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
6702
6703 switch(opcode)
6704 {
6705 case OP_STAR:
6706 case OP_PLUS:
6707 case OP_UPTO:
6708 case OP_CRRANGE:
6709 if (type == OP_ANYNL || type == OP_EXTUNI)
6710 {
6711 SLJIT_ASSERT(localptr == 0);
6712 set_jumps(current->topbacktracks, LABEL());
6713 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6714 free_stack(common, 1);
6715 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6716 }
6717 else
6718 {
6719 if (opcode == OP_UPTO)
6720 arg2 = 0;
6721 if (opcode <= OP_PLUS)
6722 {
6723 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6724 jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1);
6725 }
6726 else
6727 {
6728 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6729 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6730 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
6731 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
6732 }
6733 skip_char_back(common);
6734 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6735 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6736 if (opcode == OP_CRRANGE)
6737 set_jumps(current->topbacktracks, LABEL());
6738 JUMPHERE(jump);
6739 if (localptr == 0)
6740 free_stack(common, 2);
6741 if (opcode == OP_PLUS)
6742 set_jumps(current->topbacktracks, LABEL());
6743 }
6744 break;
6745
6746 case OP_MINSTAR:
6747 case OP_MINPLUS:
6748 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6749 compile_char1_trypath(common, type, cc, &jumplist);
6750 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6751 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6752 set_jumps(jumplist, LABEL());
6753 if (localptr == 0)
6754 free_stack(common, 1);
6755 if (opcode == OP_MINPLUS)
6756 set_jumps(current->topbacktracks, LABEL());
6757 break;
6758
6759 case OP_MINUPTO:
6760 case OP_CRMINRANGE:
6761 if (opcode == OP_CRMINRANGE)
6762 {
6763 label = LABEL();
6764 set_jumps(current->topbacktracks, label);
6765 }
6766 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6767 compile_char1_trypath(common, type, cc, &jumplist);
6768
6769 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6770 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6771 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6772 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6773
6774 if (opcode == OP_CRMINRANGE)
6775 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
6776
6777 if (opcode == OP_CRMINRANGE && arg1 == 0)
6778 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6779 else
6780 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->trypath);
6781
6782 set_jumps(jumplist, LABEL());
6783 if (localptr == 0)
6784 free_stack(common, 2);
6785 break;
6786
6787 case OP_QUERY:
6788 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6789 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6790 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6791 jump = JUMP(SLJIT_JUMP);
6792 set_jumps(current->topbacktracks, LABEL());
6793 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6794 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6795 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6796 JUMPHERE(jump);
6797 if (localptr == 0)
6798 free_stack(common,