/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 991 - (show annotations)
Sun Jul 8 16:44:39 2012 UTC (7 years, 4 months ago) by zherczeg
File MIME type: text/plain
File size: 249046 byte(s)
Rename the leave variable names to quit to improve WinCE compatibility.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct backtrack_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the try path (expected path), and the other is called as
93 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the try path.
95
96 Greedy star operator (*) :
97 Try path: match happens.
98 Backtrack path: match failed.
99 Non-greedy star operator (*?) :
100 Try path: no need to perform a match.
101 Backtrack path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A try path
112 '(' try path (pushing arguments to the stack)
113 B try path
114 ')' try path (pushing arguments to the stack)
115 D try path
116 return with successful match
117
118 D backtrack path
119 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
120 B backtrack path
121 C expected path
122 jump to D try path
123 C backtrack path
124 A backtrack path
125
126 Notice, that the order of backtrack code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current backtrack code path. The backtrack path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the try path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the backtrack code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the backtrack mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *uchar_ptr;
156 pcre_uchar *mark_ptr;
157 /* Everything else after. */
158 int offsetcount;
159 int calllimit;
160 pcre_uint8 notbol;
161 pcre_uint8 noteol;
162 pcre_uint8 notempty;
163 pcre_uint8 notempty_atstart;
164 } jit_arguments;
165
166 typedef struct executable_functions {
167 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
168 PUBL(jit_callback) callback;
169 void *userdata;
170 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
171 } executable_functions;
172
173 typedef struct jump_list {
174 struct sljit_jump *jump;
175 struct jump_list *next;
176 } jump_list;
177
178 enum stub_types { stack_alloc };
179
180 typedef struct stub_list {
181 enum stub_types type;
182 int data;
183 struct sljit_jump *start;
184 struct sljit_label *quit;
185 struct stub_list *next;
186 } stub_list;
187
188 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
189
190 /* The following structure is the key data type for the recursive
191 code generator. It is allocated by compile_trypath, and contains
192 the aguments for compile_backtrackpath. Must be the first member
193 of its descendants. */
194 typedef struct backtrack_common {
195 /* Concatenation stack. */
196 struct backtrack_common *prev;
197 jump_list *nextbacktracks;
198 /* Internal stack (for component operators). */
199 struct backtrack_common *top;
200 jump_list *topbacktracks;
201 /* Opcode pointer. */
202 pcre_uchar *cc;
203 } backtrack_common;
204
205 typedef struct assert_backtrack {
206 backtrack_common common;
207 jump_list *condfailed;
208 /* Less than 0 (-1) if a frame is not needed. */
209 int framesize;
210 /* Points to our private memory word on the stack. */
211 int localptr;
212 /* For iterators. */
213 struct sljit_label *trypath;
214 } assert_backtrack;
215
216 typedef struct bracket_backtrack {
217 backtrack_common common;
218 /* Where to coninue if an alternative is successfully matched. */
219 struct sljit_label *alttrypath;
220 /* For rmin and rmax iterators. */
221 struct sljit_label *recursivetrypath;
222 /* For greedy ? operator. */
223 struct sljit_label *zerotrypath;
224 /* Contains the branches of a failed condition. */
225 union {
226 /* Both for OP_COND, OP_SCOND. */
227 jump_list *condfailed;
228 assert_backtrack *assert;
229 /* For OP_ONCE. -1 if not needed. */
230 int framesize;
231 } u;
232 /* Points to our private memory word on the stack. */
233 int localptr;
234 } bracket_backtrack;
235
236 typedef struct bracketpos_backtrack {
237 backtrack_common common;
238 /* Points to our private memory word on the stack. */
239 int localptr;
240 /* Reverting stack is needed. */
241 int framesize;
242 /* Allocated stack size. */
243 int stacksize;
244 } bracketpos_backtrack;
245
246 typedef struct braminzero_backtrack {
247 backtrack_common common;
248 struct sljit_label *trypath;
249 } braminzero_backtrack;
250
251 typedef struct iterator_backtrack {
252 backtrack_common common;
253 /* Next iteration. */
254 struct sljit_label *trypath;
255 } iterator_backtrack;
256
257 typedef struct recurse_entry {
258 struct recurse_entry *next;
259 /* Contains the function entry. */
260 struct sljit_label *entry;
261 /* Collects the calls until the function is not created. */
262 jump_list *calls;
263 /* Points to the starting opcode. */
264 int start;
265 } recurse_entry;
266
267 typedef struct recurse_backtrack {
268 backtrack_common common;
269 } recurse_backtrack;
270
271 #define MAX_RANGE_SIZE 6
272
273 typedef struct compiler_common {
274 struct sljit_compiler *compiler;
275 pcre_uchar *start;
276
277 /* Opcode local area direct map. */
278 int *localptrs;
279 int cbraptr;
280 /* OVector starting point. Must be divisible by 2. */
281 int ovector_start;
282 /* Last known position of the requested byte. */
283 int req_char_ptr;
284 /* Head of the last recursion. */
285 int recursive_head;
286 /* First inspected character for partial matching. */
287 int start_used_ptr;
288 /* Starting pointer for partial soft matches. */
289 int hit_start;
290 /* End pointer of the first line. */
291 int first_line_end;
292 /* Points to the marked string. */
293 int mark_ptr;
294
295 /* Flipped and lower case tables. */
296 const pcre_uint8 *fcc;
297 sljit_w lcc;
298 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
299 int mode;
300 /* Newline control. */
301 int nltype;
302 int newline;
303 int bsr_nltype;
304 /* Dollar endonly. */
305 int endonly;
306 BOOL has_set_som;
307 /* Tables. */
308 sljit_w ctypes;
309 int digits[2 + MAX_RANGE_SIZE];
310 /* Named capturing brackets. */
311 sljit_uw name_table;
312 sljit_w name_count;
313 sljit_w name_entry_size;
314
315 /* Labels and jump lists. */
316 struct sljit_label *partialmatchlabel;
317 struct sljit_label *quitlabel;
318 struct sljit_label *acceptlabel;
319 stub_list *stubs;
320 recurse_entry *entries;
321 recurse_entry *currententry;
322 jump_list *partialmatch;
323 jump_list *quit;
324 jump_list *accept;
325 jump_list *calllimit;
326 jump_list *stackalloc;
327 jump_list *revertframes;
328 jump_list *wordboundary;
329 jump_list *anynewline;
330 jump_list *hspace;
331 jump_list *vspace;
332 jump_list *casefulcmp;
333 jump_list *caselesscmp;
334 BOOL jscript_compat;
335 #ifdef SUPPORT_UTF
336 BOOL utf;
337 #ifdef SUPPORT_UCP
338 BOOL use_ucp;
339 #endif
340 jump_list *utfreadchar;
341 #ifdef COMPILE_PCRE8
342 jump_list *utfreadtype8;
343 #endif
344 #endif /* SUPPORT_UTF */
345 #ifdef SUPPORT_UCP
346 jump_list *getucd;
347 #endif
348 } compiler_common;
349
350 /* For byte_sequence_compare. */
351
352 typedef struct compare_context {
353 int length;
354 int sourcereg;
355 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
356 int ucharptr;
357 union {
358 sljit_i asint;
359 sljit_uh asushort;
360 #ifdef COMPILE_PCRE8
361 sljit_ub asbyte;
362 sljit_ub asuchars[4];
363 #else
364 #ifdef COMPILE_PCRE16
365 sljit_uh asuchars[2];
366 #endif
367 #endif
368 } c;
369 union {
370 sljit_i asint;
371 sljit_uh asushort;
372 #ifdef COMPILE_PCRE8
373 sljit_ub asbyte;
374 sljit_ub asuchars[4];
375 #else
376 #ifdef COMPILE_PCRE16
377 sljit_uh asuchars[2];
378 #endif
379 #endif
380 } oc;
381 #endif
382 } compare_context;
383
384 enum {
385 frame_end = 0,
386 frame_setstrbegin = -1,
387 frame_setmark = -2
388 };
389
390 /* Undefine sljit macros. */
391 #undef CMP
392
393 /* Used for accessing the elements of the stack. */
394 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
395
396 #define TMP1 SLJIT_TEMPORARY_REG1
397 #define TMP2 SLJIT_TEMPORARY_REG3
398 #define TMP3 SLJIT_TEMPORARY_EREG2
399 #define STR_PTR SLJIT_SAVED_REG1
400 #define STR_END SLJIT_SAVED_REG2
401 #define STACK_TOP SLJIT_TEMPORARY_REG2
402 #define STACK_LIMIT SLJIT_SAVED_REG3
403 #define ARGUMENTS SLJIT_SAVED_EREG1
404 #define CALL_COUNT SLJIT_SAVED_EREG2
405 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
406
407 /* Locals layout. */
408 /* These two locals can be used by the current opcode. */
409 #define LOCALS0 (0 * sizeof(sljit_w))
410 #define LOCALS1 (1 * sizeof(sljit_w))
411 /* Two local variables for possessive quantifiers (char1 cannot use them). */
412 #define POSSESSIVE0 (2 * sizeof(sljit_w))
413 #define POSSESSIVE1 (3 * sizeof(sljit_w))
414 /* Max limit of recursions. */
415 #define CALL_LIMIT (4 * sizeof(sljit_w))
416 /* The output vector is stored on the stack, and contains pointers
417 to characters. The vector data is divided into two groups: the first
418 group contains the start / end character pointers, and the second is
419 the start pointers when the end of the capturing group has not yet reached. */
420 #define OVECTOR_START (common->ovector_start)
421 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
422 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
423 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
424
425 #ifdef COMPILE_PCRE8
426 #define MOV_UCHAR SLJIT_MOV_UB
427 #define MOVU_UCHAR SLJIT_MOVU_UB
428 #else
429 #ifdef COMPILE_PCRE16
430 #define MOV_UCHAR SLJIT_MOV_UH
431 #define MOVU_UCHAR SLJIT_MOVU_UH
432 #else
433 #error Unsupported compiling mode
434 #endif
435 #endif
436
437 /* Shortcuts. */
438 #define DEFINE_COMPILER \
439 struct sljit_compiler *compiler = common->compiler
440 #define OP1(op, dst, dstw, src, srcw) \
441 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
442 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
443 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
444 #define LABEL() \
445 sljit_emit_label(compiler)
446 #define JUMP(type) \
447 sljit_emit_jump(compiler, (type))
448 #define JUMPTO(type, label) \
449 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
450 #define JUMPHERE(jump) \
451 sljit_set_label((jump), sljit_emit_label(compiler))
452 #define CMP(type, src1, src1w, src2, src2w) \
453 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
454 #define CMPTO(type, src1, src1w, src2, src2w, label) \
455 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
456 #define COND_VALUE(op, dst, dstw, type) \
457 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
458 #define GET_LOCAL_BASE(dst, dstw, offset) \
459 sljit_get_local_base(compiler, (dst), (dstw), (offset))
460
461 static pcre_uchar* bracketend(pcre_uchar* cc)
462 {
463 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
464 do cc += GET(cc, 1); while (*cc == OP_ALT);
465 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
466 cc += 1 + LINK_SIZE;
467 return cc;
468 }
469
470 /* Functions whose might need modification for all new supported opcodes:
471 next_opcode
472 get_localspace
473 set_localptrs
474 get_framesize
475 init_frame
476 get_localsize
477 copy_locals
478 compile_trypath
479 compile_backtrackpath
480 */
481
482 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
483 {
484 SLJIT_UNUSED_ARG(common);
485 switch(*cc)
486 {
487 case OP_SOD:
488 case OP_SOM:
489 case OP_SET_SOM:
490 case OP_NOT_WORD_BOUNDARY:
491 case OP_WORD_BOUNDARY:
492 case OP_NOT_DIGIT:
493 case OP_DIGIT:
494 case OP_NOT_WHITESPACE:
495 case OP_WHITESPACE:
496 case OP_NOT_WORDCHAR:
497 case OP_WORDCHAR:
498 case OP_ANY:
499 case OP_ALLANY:
500 case OP_ANYNL:
501 case OP_NOT_HSPACE:
502 case OP_HSPACE:
503 case OP_NOT_VSPACE:
504 case OP_VSPACE:
505 case OP_EXTUNI:
506 case OP_EODN:
507 case OP_EOD:
508 case OP_CIRC:
509 case OP_CIRCM:
510 case OP_DOLL:
511 case OP_DOLLM:
512 case OP_TYPESTAR:
513 case OP_TYPEMINSTAR:
514 case OP_TYPEPLUS:
515 case OP_TYPEMINPLUS:
516 case OP_TYPEQUERY:
517 case OP_TYPEMINQUERY:
518 case OP_TYPEPOSSTAR:
519 case OP_TYPEPOSPLUS:
520 case OP_TYPEPOSQUERY:
521 case OP_CRSTAR:
522 case OP_CRMINSTAR:
523 case OP_CRPLUS:
524 case OP_CRMINPLUS:
525 case OP_CRQUERY:
526 case OP_CRMINQUERY:
527 case OP_DEF:
528 case OP_BRAZERO:
529 case OP_BRAMINZERO:
530 case OP_BRAPOSZERO:
531 case OP_COMMIT:
532 case OP_FAIL:
533 case OP_ACCEPT:
534 case OP_ASSERT_ACCEPT:
535 case OP_SKIPZERO:
536 return cc + 1;
537
538 case OP_ANYBYTE:
539 #ifdef SUPPORT_UTF
540 if (common->utf) return NULL;
541 #endif
542 return cc + 1;
543
544 case OP_CHAR:
545 case OP_CHARI:
546 case OP_NOT:
547 case OP_NOTI:
548 case OP_STAR:
549 case OP_MINSTAR:
550 case OP_PLUS:
551 case OP_MINPLUS:
552 case OP_QUERY:
553 case OP_MINQUERY:
554 case OP_POSSTAR:
555 case OP_POSPLUS:
556 case OP_POSQUERY:
557 case OP_STARI:
558 case OP_MINSTARI:
559 case OP_PLUSI:
560 case OP_MINPLUSI:
561 case OP_QUERYI:
562 case OP_MINQUERYI:
563 case OP_POSSTARI:
564 case OP_POSPLUSI:
565 case OP_POSQUERYI:
566 case OP_NOTSTAR:
567 case OP_NOTMINSTAR:
568 case OP_NOTPLUS:
569 case OP_NOTMINPLUS:
570 case OP_NOTQUERY:
571 case OP_NOTMINQUERY:
572 case OP_NOTPOSSTAR:
573 case OP_NOTPOSPLUS:
574 case OP_NOTPOSQUERY:
575 case OP_NOTSTARI:
576 case OP_NOTMINSTARI:
577 case OP_NOTPLUSI:
578 case OP_NOTMINPLUSI:
579 case OP_NOTQUERYI:
580 case OP_NOTMINQUERYI:
581 case OP_NOTPOSSTARI:
582 case OP_NOTPOSPLUSI:
583 case OP_NOTPOSQUERYI:
584 cc += 2;
585 #ifdef SUPPORT_UTF
586 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
587 #endif
588 return cc;
589
590 case OP_UPTO:
591 case OP_MINUPTO:
592 case OP_EXACT:
593 case OP_POSUPTO:
594 case OP_UPTOI:
595 case OP_MINUPTOI:
596 case OP_EXACTI:
597 case OP_POSUPTOI:
598 case OP_NOTUPTO:
599 case OP_NOTMINUPTO:
600 case OP_NOTEXACT:
601 case OP_NOTPOSUPTO:
602 case OP_NOTUPTOI:
603 case OP_NOTMINUPTOI:
604 case OP_NOTEXACTI:
605 case OP_NOTPOSUPTOI:
606 cc += 2 + IMM2_SIZE;
607 #ifdef SUPPORT_UTF
608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
609 #endif
610 return cc;
611
612 case OP_NOTPROP:
613 case OP_PROP:
614 return cc + 1 + 2;
615
616 case OP_TYPEUPTO:
617 case OP_TYPEMINUPTO:
618 case OP_TYPEEXACT:
619 case OP_TYPEPOSUPTO:
620 case OP_REF:
621 case OP_REFI:
622 case OP_CREF:
623 case OP_NCREF:
624 case OP_RREF:
625 case OP_NRREF:
626 case OP_CLOSE:
627 cc += 1 + IMM2_SIZE;
628 return cc;
629
630 case OP_CRRANGE:
631 case OP_CRMINRANGE:
632 return cc + 1 + 2 * IMM2_SIZE;
633
634 case OP_CLASS:
635 case OP_NCLASS:
636 return cc + 1 + 32 / sizeof(pcre_uchar);
637
638 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
639 case OP_XCLASS:
640 return cc + GET(cc, 1);
641 #endif
642
643 case OP_RECURSE:
644 case OP_ASSERT:
645 case OP_ASSERT_NOT:
646 case OP_ASSERTBACK:
647 case OP_ASSERTBACK_NOT:
648 case OP_REVERSE:
649 case OP_ONCE:
650 case OP_ONCE_NC:
651 case OP_BRA:
652 case OP_BRAPOS:
653 case OP_COND:
654 case OP_SBRA:
655 case OP_SBRAPOS:
656 case OP_SCOND:
657 case OP_ALT:
658 case OP_KET:
659 case OP_KETRMAX:
660 case OP_KETRMIN:
661 case OP_KETRPOS:
662 return cc + 1 + LINK_SIZE;
663
664 case OP_CBRA:
665 case OP_CBRAPOS:
666 case OP_SCBRA:
667 case OP_SCBRAPOS:
668 return cc + 1 + LINK_SIZE + IMM2_SIZE;
669
670 case OP_MARK:
671 return cc + 1 + 2 + cc[1];
672
673 default:
674 return NULL;
675 }
676 }
677
678 #define CASE_ITERATOR_LOCAL1 \
679 case OP_MINSTAR: \
680 case OP_MINPLUS: \
681 case OP_QUERY: \
682 case OP_MINQUERY: \
683 case OP_MINSTARI: \
684 case OP_MINPLUSI: \
685 case OP_QUERYI: \
686 case OP_MINQUERYI: \
687 case OP_NOTMINSTAR: \
688 case OP_NOTMINPLUS: \
689 case OP_NOTQUERY: \
690 case OP_NOTMINQUERY: \
691 case OP_NOTMINSTARI: \
692 case OP_NOTMINPLUSI: \
693 case OP_NOTQUERYI: \
694 case OP_NOTMINQUERYI:
695
696 #define CASE_ITERATOR_LOCAL2A \
697 case OP_STAR: \
698 case OP_PLUS: \
699 case OP_STARI: \
700 case OP_PLUSI: \
701 case OP_NOTSTAR: \
702 case OP_NOTPLUS: \
703 case OP_NOTSTARI: \
704 case OP_NOTPLUSI:
705
706 #define CASE_ITERATOR_LOCAL2B \
707 case OP_UPTO: \
708 case OP_MINUPTO: \
709 case OP_UPTOI: \
710 case OP_MINUPTOI: \
711 case OP_NOTUPTO: \
712 case OP_NOTMINUPTO: \
713 case OP_NOTUPTOI: \
714 case OP_NOTMINUPTOI:
715
716 #define CASE_ITERATOR_TYPE_LOCAL1 \
717 case OP_TYPEMINSTAR: \
718 case OP_TYPEMINPLUS: \
719 case OP_TYPEQUERY: \
720 case OP_TYPEMINQUERY:
721
722 #define CASE_ITERATOR_TYPE_LOCAL2A \
723 case OP_TYPESTAR: \
724 case OP_TYPEPLUS:
725
726 #define CASE_ITERATOR_TYPE_LOCAL2B \
727 case OP_TYPEUPTO: \
728 case OP_TYPEMINUPTO:
729
730 static int get_class_iterator_size(pcre_uchar *cc)
731 {
732 switch(*cc)
733 {
734 case OP_CRSTAR:
735 case OP_CRPLUS:
736 return 2;
737
738 case OP_CRMINSTAR:
739 case OP_CRMINPLUS:
740 case OP_CRQUERY:
741 case OP_CRMINQUERY:
742 return 1;
743
744 case OP_CRRANGE:
745 case OP_CRMINRANGE:
746 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
747 return 0;
748 return 2;
749
750 default:
751 return 0;
752 }
753 }
754
755 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
756 {
757 int localspace = 0;
758 pcre_uchar *alternative;
759 pcre_uchar *end = NULL;
760 int space, size, bracketlen;
761
762 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
763 while (cc < ccend)
764 {
765 space = 0;
766 size = 0;
767 bracketlen = 0;
768 switch(*cc)
769 {
770 case OP_SET_SOM:
771 common->has_set_som = TRUE;
772 cc += 1;
773 break;
774
775 case OP_ASSERT:
776 case OP_ASSERT_NOT:
777 case OP_ASSERTBACK:
778 case OP_ASSERTBACK_NOT:
779 case OP_ONCE:
780 case OP_ONCE_NC:
781 case OP_BRAPOS:
782 case OP_SBRA:
783 case OP_SBRAPOS:
784 case OP_SCOND:
785 localspace += sizeof(sljit_w);
786 bracketlen = 1 + LINK_SIZE;
787 break;
788
789 case OP_CBRAPOS:
790 case OP_SCBRAPOS:
791 localspace += sizeof(sljit_w);
792 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
793 break;
794
795 case OP_COND:
796 /* Might be a hidden SCOND. */
797 alternative = cc + GET(cc, 1);
798 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
799 localspace += sizeof(sljit_w);
800 bracketlen = 1 + LINK_SIZE;
801 break;
802
803 case OP_BRA:
804 bracketlen = 1 + LINK_SIZE;
805 break;
806
807 case OP_CBRA:
808 case OP_SCBRA:
809 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
810 break;
811
812 CASE_ITERATOR_LOCAL1
813 space = 1;
814 size = -2;
815 break;
816
817 CASE_ITERATOR_LOCAL2A
818 space = 2;
819 size = -2;
820 break;
821
822 CASE_ITERATOR_LOCAL2B
823 space = 2;
824 size = -(2 + IMM2_SIZE);
825 break;
826
827 CASE_ITERATOR_TYPE_LOCAL1
828 space = 1;
829 size = 1;
830 break;
831
832 CASE_ITERATOR_TYPE_LOCAL2A
833 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
834 space = 2;
835 size = 1;
836 break;
837
838 CASE_ITERATOR_TYPE_LOCAL2B
839 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
840 space = 2;
841 size = 1 + IMM2_SIZE;
842 break;
843
844 case OP_CLASS:
845 case OP_NCLASS:
846 size += 1 + 32 / sizeof(pcre_uchar);
847 space = get_class_iterator_size(cc + size);
848 break;
849
850 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
851 case OP_XCLASS:
852 size = GET(cc, 1);
853 space = get_class_iterator_size(cc + size);
854 break;
855 #endif
856
857 case OP_RECURSE:
858 /* Set its value only once. */
859 if (common->recursive_head == 0)
860 {
861 common->recursive_head = common->ovector_start;
862 common->ovector_start += sizeof(sljit_w);
863 }
864 cc += 1 + LINK_SIZE;
865 break;
866
867 case OP_MARK:
868 if (common->mark_ptr == 0)
869 {
870 common->mark_ptr = common->ovector_start;
871 common->ovector_start += sizeof(sljit_w);
872 }
873 cc += 1 + 2 + cc[1];
874 break;
875
876 default:
877 cc = next_opcode(common, cc);
878 if (cc == NULL)
879 return -1;
880 break;
881 }
882
883 if (space > 0 && cc >= end)
884 localspace += sizeof(sljit_w) * space;
885
886 if (size != 0)
887 {
888 if (size < 0)
889 {
890 cc += -size;
891 #ifdef SUPPORT_UTF
892 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
893 #endif
894 }
895 else
896 cc += size;
897 }
898
899 if (bracketlen > 0)
900 {
901 if (cc >= end)
902 {
903 end = bracketend(cc);
904 if (end[-1 - LINK_SIZE] == OP_KET)
905 end = NULL;
906 }
907 cc += bracketlen;
908 }
909 }
910 return localspace;
911 }
912
913 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
914 {
915 pcre_uchar *cc = common->start;
916 pcre_uchar *alternative;
917 pcre_uchar *end = NULL;
918 int space, size, bracketlen;
919
920 while (cc < ccend)
921 {
922 space = 0;
923 size = 0;
924 bracketlen = 0;
925 switch(*cc)
926 {
927 case OP_ASSERT:
928 case OP_ASSERT_NOT:
929 case OP_ASSERTBACK:
930 case OP_ASSERTBACK_NOT:
931 case OP_ONCE:
932 case OP_ONCE_NC:
933 case OP_BRAPOS:
934 case OP_SBRA:
935 case OP_SBRAPOS:
936 case OP_SCOND:
937 common->localptrs[cc - common->start] = localptr;
938 localptr += sizeof(sljit_w);
939 bracketlen = 1 + LINK_SIZE;
940 break;
941
942 case OP_CBRAPOS:
943 case OP_SCBRAPOS:
944 common->localptrs[cc - common->start] = localptr;
945 localptr += sizeof(sljit_w);
946 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
947 break;
948
949 case OP_COND:
950 /* Might be a hidden SCOND. */
951 alternative = cc + GET(cc, 1);
952 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
953 {
954 common->localptrs[cc - common->start] = localptr;
955 localptr += sizeof(sljit_w);
956 }
957 bracketlen = 1 + LINK_SIZE;
958 break;
959
960 case OP_BRA:
961 bracketlen = 1 + LINK_SIZE;
962 break;
963
964 case OP_CBRA:
965 case OP_SCBRA:
966 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
967 break;
968
969 CASE_ITERATOR_LOCAL1
970 space = 1;
971 size = -2;
972 break;
973
974 CASE_ITERATOR_LOCAL2A
975 space = 2;
976 size = -2;
977 break;
978
979 CASE_ITERATOR_LOCAL2B
980 space = 2;
981 size = -(2 + IMM2_SIZE);
982 break;
983
984 CASE_ITERATOR_TYPE_LOCAL1
985 space = 1;
986 size = 1;
987 break;
988
989 CASE_ITERATOR_TYPE_LOCAL2A
990 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
991 space = 2;
992 size = 1;
993 break;
994
995 CASE_ITERATOR_TYPE_LOCAL2B
996 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
997 space = 2;
998 size = 1 + IMM2_SIZE;
999 break;
1000
1001 case OP_CLASS:
1002 case OP_NCLASS:
1003 size += 1 + 32 / sizeof(pcre_uchar);
1004 space = get_class_iterator_size(cc + size);
1005 break;
1006
1007 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1008 case OP_XCLASS:
1009 size = GET(cc, 1);
1010 space = get_class_iterator_size(cc + size);
1011 break;
1012 #endif
1013
1014 default:
1015 cc = next_opcode(common, cc);
1016 SLJIT_ASSERT(cc != NULL);
1017 break;
1018 }
1019
1020 if (space > 0 && cc >= end)
1021 {
1022 common->localptrs[cc - common->start] = localptr;
1023 localptr += sizeof(sljit_w) * space;
1024 }
1025
1026 if (size != 0)
1027 {
1028 if (size < 0)
1029 {
1030 cc += -size;
1031 #ifdef SUPPORT_UTF
1032 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1033 #endif
1034 }
1035 else
1036 cc += size;
1037 }
1038
1039 if (bracketlen > 0)
1040 {
1041 if (cc >= end)
1042 {
1043 end = bracketend(cc);
1044 if (end[-1 - LINK_SIZE] == OP_KET)
1045 end = NULL;
1046 }
1047 cc += bracketlen;
1048 }
1049 }
1050 }
1051
1052 /* Returns with -1 if no need for frame. */
1053 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1054 {
1055 pcre_uchar *ccend = bracketend(cc);
1056 int length = 0;
1057 BOOL possessive = FALSE;
1058 BOOL setsom_found = recursive;
1059 BOOL setmark_found = recursive;
1060
1061 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1062 {
1063 length = 3;
1064 possessive = TRUE;
1065 }
1066
1067 cc = next_opcode(common, cc);
1068 SLJIT_ASSERT(cc != NULL);
1069 while (cc < ccend)
1070 switch(*cc)
1071 {
1072 case OP_SET_SOM:
1073 SLJIT_ASSERT(common->has_set_som);
1074 if (!setsom_found)
1075 {
1076 length += 2;
1077 setsom_found = TRUE;
1078 }
1079 cc += 1;
1080 break;
1081
1082 case OP_MARK:
1083 SLJIT_ASSERT(common->mark_ptr != 0);
1084 if (!setmark_found)
1085 {
1086 length += 2;
1087 setmark_found = TRUE;
1088 }
1089 cc += 1 + 2 + cc[1];
1090 break;
1091
1092 case OP_RECURSE:
1093 if (common->has_set_som && !setsom_found)
1094 {
1095 length += 2;
1096 setsom_found = TRUE;
1097 }
1098 if (common->mark_ptr != 0 && !setmark_found)
1099 {
1100 length += 2;
1101 setmark_found = TRUE;
1102 }
1103 cc += 1 + LINK_SIZE;
1104 break;
1105
1106 case OP_CBRA:
1107 case OP_CBRAPOS:
1108 case OP_SCBRA:
1109 case OP_SCBRAPOS:
1110 length += 3;
1111 cc += 1 + LINK_SIZE + IMM2_SIZE;
1112 break;
1113
1114 default:
1115 cc = next_opcode(common, cc);
1116 SLJIT_ASSERT(cc != NULL);
1117 break;
1118 }
1119
1120 /* Possessive quantifiers can use a special case. */
1121 if (SLJIT_UNLIKELY(possessive) && length == 3)
1122 return -1;
1123
1124 if (length > 0)
1125 return length + 1;
1126 return -1;
1127 }
1128
1129 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1130 {
1131 DEFINE_COMPILER;
1132 pcre_uchar *ccend = bracketend(cc);
1133 BOOL setsom_found = recursive;
1134 BOOL setmark_found = recursive;
1135 int offset;
1136
1137 /* >= 1 + shortest item size (2) */
1138 SLJIT_UNUSED_ARG(stacktop);
1139 SLJIT_ASSERT(stackpos >= stacktop + 2);
1140
1141 stackpos = STACK(stackpos);
1142 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1143 cc = next_opcode(common, cc);
1144 SLJIT_ASSERT(cc != NULL);
1145 while (cc < ccend)
1146 switch(*cc)
1147 {
1148 case OP_SET_SOM:
1149 SLJIT_ASSERT(common->has_set_som);
1150 if (!setsom_found)
1151 {
1152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1154 stackpos += (int)sizeof(sljit_w);
1155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1156 stackpos += (int)sizeof(sljit_w);
1157 setsom_found = TRUE;
1158 }
1159 cc += 1;
1160 break;
1161
1162 case OP_MARK:
1163 SLJIT_ASSERT(common->mark_ptr != 0);
1164 if (!setmark_found)
1165 {
1166 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1167 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1168 stackpos += (int)sizeof(sljit_w);
1169 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1170 stackpos += (int)sizeof(sljit_w);
1171 setmark_found = TRUE;
1172 }
1173 cc += 1 + 2 + cc[1];
1174 break;
1175
1176 case OP_RECURSE:
1177 if (common->has_set_som && !setsom_found)
1178 {
1179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1181 stackpos += (int)sizeof(sljit_w);
1182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1183 stackpos += (int)sizeof(sljit_w);
1184 setsom_found = TRUE;
1185 }
1186 if (common->mark_ptr != 0 && !setmark_found)
1187 {
1188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1190 stackpos += (int)sizeof(sljit_w);
1191 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1192 stackpos += (int)sizeof(sljit_w);
1193 setmark_found = TRUE;
1194 }
1195 cc += 1 + LINK_SIZE;
1196 break;
1197
1198 case OP_CBRA:
1199 case OP_CBRAPOS:
1200 case OP_SCBRA:
1201 case OP_SCBRAPOS:
1202 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1204 stackpos += (int)sizeof(sljit_w);
1205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1206 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1208 stackpos += (int)sizeof(sljit_w);
1209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1210 stackpos += (int)sizeof(sljit_w);
1211
1212 cc += 1 + LINK_SIZE + IMM2_SIZE;
1213 break;
1214
1215 default:
1216 cc = next_opcode(common, cc);
1217 SLJIT_ASSERT(cc != NULL);
1218 break;
1219 }
1220
1221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1222 SLJIT_ASSERT(stackpos == STACK(stacktop));
1223 }
1224
1225 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1226 {
1227 int localsize = 2;
1228 int size;
1229 pcre_uchar *alternative;
1230 /* Calculate the sum of the local variables. */
1231 while (cc < ccend)
1232 {
1233 size = 0;
1234 switch(*cc)
1235 {
1236 case OP_ASSERT:
1237 case OP_ASSERT_NOT:
1238 case OP_ASSERTBACK:
1239 case OP_ASSERTBACK_NOT:
1240 case OP_ONCE:
1241 case OP_ONCE_NC:
1242 case OP_BRAPOS:
1243 case OP_SBRA:
1244 case OP_SBRAPOS:
1245 case OP_SCOND:
1246 localsize++;
1247 cc += 1 + LINK_SIZE;
1248 break;
1249
1250 case OP_CBRA:
1251 case OP_SCBRA:
1252 localsize++;
1253 cc += 1 + LINK_SIZE + IMM2_SIZE;
1254 break;
1255
1256 case OP_CBRAPOS:
1257 case OP_SCBRAPOS:
1258 localsize += 2;
1259 cc += 1 + LINK_SIZE + IMM2_SIZE;
1260 break;
1261
1262 case OP_COND:
1263 /* Might be a hidden SCOND. */
1264 alternative = cc + GET(cc, 1);
1265 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1266 localsize++;
1267 cc += 1 + LINK_SIZE;
1268 break;
1269
1270 CASE_ITERATOR_LOCAL1
1271 if (PRIV_DATA(cc))
1272 localsize++;
1273 cc += 2;
1274 #ifdef SUPPORT_UTF
1275 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1276 #endif
1277 break;
1278
1279 CASE_ITERATOR_LOCAL2A
1280 if (PRIV_DATA(cc))
1281 localsize += 2;
1282 cc += 2;
1283 #ifdef SUPPORT_UTF
1284 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1285 #endif
1286 break;
1287
1288 CASE_ITERATOR_LOCAL2B
1289 if (PRIV_DATA(cc))
1290 localsize += 2;
1291 cc += 2 + IMM2_SIZE;
1292 #ifdef SUPPORT_UTF
1293 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1294 #endif
1295 break;
1296
1297 CASE_ITERATOR_TYPE_LOCAL1
1298 if (PRIV_DATA(cc))
1299 localsize++;
1300 cc += 1;
1301 break;
1302
1303 CASE_ITERATOR_TYPE_LOCAL2A
1304 if (PRIV_DATA(cc))
1305 localsize += 2;
1306 cc += 1;
1307 break;
1308
1309 CASE_ITERATOR_TYPE_LOCAL2B
1310 if (PRIV_DATA(cc))
1311 localsize += 2;
1312 cc += 1 + IMM2_SIZE;
1313 break;
1314
1315 case OP_CLASS:
1316 case OP_NCLASS:
1317 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1318 case OP_XCLASS:
1319 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1320 #else
1321 size = 1 + 32 / (int)sizeof(pcre_uchar);
1322 #endif
1323 if (PRIV_DATA(cc))
1324 localsize += get_class_iterator_size(cc + size);
1325 cc += size;
1326 break;
1327
1328 default:
1329 cc = next_opcode(common, cc);
1330 SLJIT_ASSERT(cc != NULL);
1331 break;
1332 }
1333 }
1334 SLJIT_ASSERT(cc == ccend);
1335 return localsize;
1336 }
1337
1338 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1339 BOOL save, int stackptr, int stacktop)
1340 {
1341 DEFINE_COMPILER;
1342 int srcw[2];
1343 int count, size;
1344 BOOL tmp1next = TRUE;
1345 BOOL tmp1empty = TRUE;
1346 BOOL tmp2empty = TRUE;
1347 pcre_uchar *alternative;
1348 enum {
1349 start,
1350 loop,
1351 end
1352 } status;
1353
1354 status = save ? start : loop;
1355 stackptr = STACK(stackptr - 2);
1356 stacktop = STACK(stacktop - 1);
1357
1358 if (!save)
1359 {
1360 stackptr += sizeof(sljit_w);
1361 if (stackptr < stacktop)
1362 {
1363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1364 stackptr += sizeof(sljit_w);
1365 tmp1empty = FALSE;
1366 }
1367 if (stackptr < stacktop)
1368 {
1369 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1370 stackptr += sizeof(sljit_w);
1371 tmp2empty = FALSE;
1372 }
1373 /* The tmp1next must be TRUE in either way. */
1374 }
1375
1376 while (status != end)
1377 {
1378 count = 0;
1379 switch(status)
1380 {
1381 case start:
1382 SLJIT_ASSERT(save && common->recursive_head != 0);
1383 count = 1;
1384 srcw[0] = common->recursive_head;
1385 status = loop;
1386 break;
1387
1388 case loop:
1389 if (cc >= ccend)
1390 {
1391 status = end;
1392 break;
1393 }
1394
1395 switch(*cc)
1396 {
1397 case OP_ASSERT:
1398 case OP_ASSERT_NOT:
1399 case OP_ASSERTBACK:
1400 case OP_ASSERTBACK_NOT:
1401 case OP_ONCE:
1402 case OP_ONCE_NC:
1403 case OP_BRAPOS:
1404 case OP_SBRA:
1405 case OP_SBRAPOS:
1406 case OP_SCOND:
1407 count = 1;
1408 srcw[0] = PRIV_DATA(cc);
1409 SLJIT_ASSERT(srcw[0] != 0);
1410 cc += 1 + LINK_SIZE;
1411 break;
1412
1413 case OP_CBRA:
1414 case OP_SCBRA:
1415 count = 1;
1416 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1417 cc += 1 + LINK_SIZE + IMM2_SIZE;
1418 break;
1419
1420 case OP_CBRAPOS:
1421 case OP_SCBRAPOS:
1422 count = 2;
1423 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1424 srcw[1] = PRIV_DATA(cc);
1425 SLJIT_ASSERT(srcw[0] != 0);
1426 cc += 1 + LINK_SIZE + IMM2_SIZE;
1427 break;
1428
1429 case OP_COND:
1430 /* Might be a hidden SCOND. */
1431 alternative = cc + GET(cc, 1);
1432 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1433 {
1434 count = 1;
1435 srcw[0] = PRIV_DATA(cc);
1436 SLJIT_ASSERT(srcw[0] != 0);
1437 }
1438 cc += 1 + LINK_SIZE;
1439 break;
1440
1441 CASE_ITERATOR_LOCAL1
1442 if (PRIV_DATA(cc))
1443 {
1444 count = 1;
1445 srcw[0] = PRIV_DATA(cc);
1446 }
1447 cc += 2;
1448 #ifdef SUPPORT_UTF
1449 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1450 #endif
1451 break;
1452
1453 CASE_ITERATOR_LOCAL2A
1454 if (PRIV_DATA(cc))
1455 {
1456 count = 2;
1457 srcw[0] = PRIV_DATA(cc);
1458 srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
1459 }
1460 cc += 2;
1461 #ifdef SUPPORT_UTF
1462 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1463 #endif
1464 break;
1465
1466 CASE_ITERATOR_LOCAL2B
1467 if (PRIV_DATA(cc))
1468 {
1469 count = 2;
1470 srcw[0] = PRIV_DATA(cc);
1471 srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
1472 }
1473 cc += 2 + IMM2_SIZE;
1474 #ifdef SUPPORT_UTF
1475 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1476 #endif
1477 break;
1478
1479 CASE_ITERATOR_TYPE_LOCAL1
1480 if (PRIV_DATA(cc))
1481 {
1482 count = 1;
1483 srcw[0] = PRIV_DATA(cc);
1484 }
1485 cc += 1;
1486 break;
1487
1488 CASE_ITERATOR_TYPE_LOCAL2A
1489 if (PRIV_DATA(cc))
1490 {
1491 count = 2;
1492 srcw[0] = PRIV_DATA(cc);
1493 srcw[1] = srcw[0] + sizeof(sljit_w);
1494 }
1495 cc += 1;
1496 break;
1497
1498 CASE_ITERATOR_TYPE_LOCAL2B
1499 if (PRIV_DATA(cc))
1500 {
1501 count = 2;
1502 srcw[0] = PRIV_DATA(cc);
1503 srcw[1] = srcw[0] + sizeof(sljit_w);
1504 }
1505 cc += 1 + IMM2_SIZE;
1506 break;
1507
1508 case OP_CLASS:
1509 case OP_NCLASS:
1510 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1511 case OP_XCLASS:
1512 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1513 #else
1514 size = 1 + 32 / (int)sizeof(pcre_uchar);
1515 #endif
1516 if (PRIV_DATA(cc))
1517 switch(get_class_iterator_size(cc + size))
1518 {
1519 case 1:
1520 count = 1;
1521 srcw[0] = PRIV_DATA(cc);
1522 break;
1523
1524 case 2:
1525 count = 2;
1526 srcw[0] = PRIV_DATA(cc);
1527 srcw[1] = srcw[0] + sizeof(sljit_w);
1528 break;
1529
1530 default:
1531 SLJIT_ASSERT_STOP();
1532 break;
1533 }
1534 cc += size;
1535 break;
1536
1537 default:
1538 cc = next_opcode(common, cc);
1539 SLJIT_ASSERT(cc != NULL);
1540 break;
1541 }
1542 break;
1543
1544 case end:
1545 SLJIT_ASSERT_STOP();
1546 break;
1547 }
1548
1549 while (count > 0)
1550 {
1551 count--;
1552 if (save)
1553 {
1554 if (tmp1next)
1555 {
1556 if (!tmp1empty)
1557 {
1558 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1559 stackptr += sizeof(sljit_w);
1560 }
1561 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1562 tmp1empty = FALSE;
1563 tmp1next = FALSE;
1564 }
1565 else
1566 {
1567 if (!tmp2empty)
1568 {
1569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1570 stackptr += sizeof(sljit_w);
1571 }
1572 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1573 tmp2empty = FALSE;
1574 tmp1next = TRUE;
1575 }
1576 }
1577 else
1578 {
1579 if (tmp1next)
1580 {
1581 SLJIT_ASSERT(!tmp1empty);
1582 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1583 tmp1empty = stackptr >= stacktop;
1584 if (!tmp1empty)
1585 {
1586 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1587 stackptr += sizeof(sljit_w);
1588 }
1589 tmp1next = FALSE;
1590 }
1591 else
1592 {
1593 SLJIT_ASSERT(!tmp2empty);
1594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1595 tmp2empty = stackptr >= stacktop;
1596 if (!tmp2empty)
1597 {
1598 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1599 stackptr += sizeof(sljit_w);
1600 }
1601 tmp1next = TRUE;
1602 }
1603 }
1604 }
1605 }
1606
1607 if (save)
1608 {
1609 if (tmp1next)
1610 {
1611 if (!tmp1empty)
1612 {
1613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1614 stackptr += sizeof(sljit_w);
1615 }
1616 if (!tmp2empty)
1617 {
1618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1619 stackptr += sizeof(sljit_w);
1620 }
1621 }
1622 else
1623 {
1624 if (!tmp2empty)
1625 {
1626 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1627 stackptr += sizeof(sljit_w);
1628 }
1629 if (!tmp1empty)
1630 {
1631 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1632 stackptr += sizeof(sljit_w);
1633 }
1634 }
1635 }
1636 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1637 }
1638
1639 #undef CASE_ITERATOR_LOCAL1
1640 #undef CASE_ITERATOR_LOCAL2A
1641 #undef CASE_ITERATOR_LOCAL2B
1642 #undef CASE_ITERATOR_TYPE_LOCAL1
1643 #undef CASE_ITERATOR_TYPE_LOCAL2A
1644 #undef CASE_ITERATOR_TYPE_LOCAL2B
1645
1646 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1647 {
1648 return (value & (value - 1)) == 0;
1649 }
1650
1651 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1652 {
1653 while (list)
1654 {
1655 /* sljit_set_label is clever enough to do nothing
1656 if either the jump or the label is NULL */
1657 sljit_set_label(list->jump, label);
1658 list = list->next;
1659 }
1660 }
1661
1662 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1663 {
1664 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1665 if (list_item)
1666 {
1667 list_item->next = *list;
1668 list_item->jump = jump;
1669 *list = list_item;
1670 }
1671 }
1672
1673 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1674 {
1675 DEFINE_COMPILER;
1676 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1677
1678 if (list_item)
1679 {
1680 list_item->type = type;
1681 list_item->data = data;
1682 list_item->start = start;
1683 list_item->quit = LABEL();
1684 list_item->next = common->stubs;
1685 common->stubs = list_item;
1686 }
1687 }
1688
1689 static void flush_stubs(compiler_common *common)
1690 {
1691 DEFINE_COMPILER;
1692 stub_list* list_item = common->stubs;
1693
1694 while (list_item)
1695 {
1696 JUMPHERE(list_item->start);
1697 switch(list_item->type)
1698 {
1699 case stack_alloc:
1700 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1701 break;
1702 }
1703 JUMPTO(SLJIT_JUMP, list_item->quit);
1704 list_item = list_item->next;
1705 }
1706 common->stubs = NULL;
1707 }
1708
1709 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1710 {
1711 DEFINE_COMPILER;
1712
1713 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1714 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1715 }
1716
1717 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1718 {
1719 /* May destroy all locals and registers except TMP2. */
1720 DEFINE_COMPILER;
1721
1722 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1723 #ifdef DESTROY_REGISTERS
1724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1725 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1726 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1728 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1729 #endif
1730 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1731 }
1732
1733 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1734 {
1735 DEFINE_COMPILER;
1736 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1737 }
1738
1739 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1740 {
1741 DEFINE_COMPILER;
1742 struct sljit_label *loop;
1743 int i;
1744 /* At this point we can freely use all temporary registers. */
1745 /* TMP1 returns with begin - 1. */
1746 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1747 if (length < 8)
1748 {
1749 for (i = 0; i < length; i++)
1750 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1751 }
1752 else
1753 {
1754 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1755 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1756 loop = LABEL();
1757 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1758 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1759 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1760 }
1761 }
1762
1763 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1764 {
1765 DEFINE_COMPILER;
1766 struct sljit_label *loop;
1767 struct sljit_jump *earlyexit;
1768
1769 /* At this point we can freely use all registers. */
1770 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1772
1773 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1774 if (common->mark_ptr != 0)
1775 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1776 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1777 if (common->mark_ptr != 0)
1778 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1779 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1780 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1781 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1782 /* Unlikely, but possible */
1783 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1784 loop = LABEL();
1785 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1786 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1787 /* Copy the integer value to the output buffer */
1788 #ifdef COMPILE_PCRE16
1789 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1790 #endif
1791 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1792 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1793 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1794 JUMPHERE(earlyexit);
1795
1796 /* Calculate the return value, which is the maximum ovector value. */
1797 if (topbracket > 1)
1798 {
1799 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1800 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1801
1802 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1803 loop = LABEL();
1804 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1805 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1806 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1807 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1808 }
1809 else
1810 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1811 }
1812
1813 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1814 {
1815 DEFINE_COMPILER;
1816
1817 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1818 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1819
1820 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1821 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1822 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1823 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1824
1825 /* Store match begin and end. */
1826 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1827 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1828 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1829 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1830 #ifdef COMPILE_PCRE16
1831 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1832 #endif
1833 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1834
1835 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1836 #ifdef COMPILE_PCRE16
1837 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1838 #endif
1839 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1840
1841 JUMPTO(SLJIT_JUMP, quit);
1842 }
1843
1844 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1845 {
1846 /* May destroy TMP1. */
1847 DEFINE_COMPILER;
1848 struct sljit_jump *jump;
1849
1850 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1851 {
1852 /* The value of -1 must be kept for start_used_ptr! */
1853 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1854 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1855 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1856 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1857 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1858 JUMPHERE(jump);
1859 }
1860 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1861 {
1862 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1863 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1864 JUMPHERE(jump);
1865 }
1866 }
1867
1868 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1869 {
1870 /* Detects if the character has an othercase. */
1871 unsigned int c;
1872
1873 #ifdef SUPPORT_UTF
1874 if (common->utf)
1875 {
1876 GETCHAR(c, cc);
1877 if (c > 127)
1878 {
1879 #ifdef SUPPORT_UCP
1880 return c != UCD_OTHERCASE(c);
1881 #else
1882 return FALSE;
1883 #endif
1884 }
1885 #ifndef COMPILE_PCRE8
1886 return common->fcc[c] != c;
1887 #endif
1888 }
1889 else
1890 #endif
1891 c = *cc;
1892 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1893 }
1894
1895 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1896 {
1897 /* Returns with the othercase. */
1898 #ifdef SUPPORT_UTF
1899 if (common->utf && c > 127)
1900 {
1901 #ifdef SUPPORT_UCP
1902 return UCD_OTHERCASE(c);
1903 #else
1904 return c;
1905 #endif
1906 }
1907 #endif
1908 return TABLE_GET(c, common->fcc, c);
1909 }
1910
1911 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1912 {
1913 /* Detects if the character and its othercase has only 1 bit difference. */
1914 unsigned int c, oc, bit;
1915 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1916 int n;
1917 #endif
1918
1919 #ifdef SUPPORT_UTF
1920 if (common->utf)
1921 {
1922 GETCHAR(c, cc);
1923 if (c <= 127)
1924 oc = common->fcc[c];
1925 else
1926 {
1927 #ifdef SUPPORT_UCP
1928 oc = UCD_OTHERCASE(c);
1929 #else
1930 oc = c;
1931 #endif
1932 }
1933 }
1934 else
1935 {
1936 c = *cc;
1937 oc = TABLE_GET(c, common->fcc, c);
1938 }
1939 #else
1940 c = *cc;
1941 oc = TABLE_GET(c, common->fcc, c);
1942 #endif
1943
1944 SLJIT_ASSERT(c != oc);
1945
1946 bit = c ^ oc;
1947 /* Optimized for English alphabet. */
1948 if (c <= 127 && bit == 0x20)
1949 return (0 << 8) | 0x20;
1950
1951 /* Since c != oc, they must have at least 1 bit difference. */
1952 if (!ispowerof2(bit))
1953 return 0;
1954
1955 #ifdef COMPILE_PCRE8
1956
1957 #ifdef SUPPORT_UTF
1958 if (common->utf && c > 127)
1959 {
1960 n = GET_EXTRALEN(*cc);
1961 while ((bit & 0x3f) == 0)
1962 {
1963 n--;
1964 bit >>= 6;
1965 }
1966 return (n << 8) | bit;
1967 }
1968 #endif /* SUPPORT_UTF */
1969 return (0 << 8) | bit;
1970
1971 #else /* COMPILE_PCRE8 */
1972
1973 #ifdef COMPILE_PCRE16
1974 #ifdef SUPPORT_UTF
1975 if (common->utf && c > 65535)
1976 {
1977 if (bit >= (1 << 10))
1978 bit >>= 10;
1979 else
1980 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1981 }
1982 #endif /* SUPPORT_UTF */
1983 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1984 #endif /* COMPILE_PCRE16 */
1985
1986 #endif /* COMPILE_PCRE8 */
1987 }
1988
1989 static void check_partial(compiler_common *common, BOOL force)
1990 {
1991 /* Checks whether a partial matching is occured. Does not modify registers. */
1992 DEFINE_COMPILER;
1993 struct sljit_jump *jump = NULL;
1994
1995 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1996
1997 if (common->mode == JIT_COMPILE)
1998 return;
1999
2000 if (!force)
2001 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2002 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2003 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2004
2005 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2006 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2007 else
2008 {
2009 if (common->partialmatchlabel != NULL)
2010 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2011 else
2012 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2013 }
2014
2015 if (jump != NULL)
2016 JUMPHERE(jump);
2017 }
2018
2019 static struct sljit_jump *check_str_end(compiler_common *common)
2020 {
2021 /* Does not affect registers. Usually used in a tight spot. */
2022 DEFINE_COMPILER;
2023 struct sljit_jump *jump;
2024 struct sljit_jump *nohit;
2025 struct sljit_jump *return_value;
2026
2027 if (common->mode == JIT_COMPILE)
2028 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2029
2030 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2031 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2032 {
2033 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2035 JUMPHERE(nohit);
2036 return_value = JUMP(SLJIT_JUMP);
2037 }
2038 else
2039 {
2040 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2041 if (common->partialmatchlabel != NULL)
2042 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2043 else
2044 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2045 }
2046 JUMPHERE(jump);
2047 return return_value;
2048 }
2049
2050 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2051 {
2052 DEFINE_COMPILER;
2053 struct sljit_jump *jump;
2054
2055 if (common->mode == JIT_COMPILE)
2056 {
2057 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2058 return;
2059 }
2060
2061 /* Partial matching mode. */
2062 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2063 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2064 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2065 {
2066 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2067 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2068 }
2069 else
2070 {
2071 if (common->partialmatchlabel != NULL)
2072 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2073 else
2074 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2075 }
2076 JUMPHERE(jump);
2077 }
2078
2079 static void read_char(compiler_common *common)
2080 {
2081 /* Reads the character into TMP1, updates STR_PTR.
2082 Does not check STR_END. TMP2 Destroyed. */
2083 DEFINE_COMPILER;
2084 #ifdef SUPPORT_UTF
2085 struct sljit_jump *jump;
2086 #endif
2087
2088 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2089 #ifdef SUPPORT_UTF
2090 if (common->utf)
2091 {
2092 #ifdef COMPILE_PCRE8
2093 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2094 #else
2095 #ifdef COMPILE_PCRE16
2096 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2097 #endif
2098 #endif /* COMPILE_PCRE8 */
2099 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2100 JUMPHERE(jump);
2101 }
2102 #endif
2103 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2104 }
2105
2106 static void peek_char(compiler_common *common)
2107 {
2108 /* Reads the character into TMP1, keeps STR_PTR.
2109 Does not check STR_END. TMP2 Destroyed. */
2110 DEFINE_COMPILER;
2111 #ifdef SUPPORT_UTF
2112 struct sljit_jump *jump;
2113 #endif
2114
2115 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2116 #ifdef SUPPORT_UTF
2117 if (common->utf)
2118 {
2119 #ifdef COMPILE_PCRE8
2120 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2121 #else
2122 #ifdef COMPILE_PCRE16
2123 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2124 #endif
2125 #endif /* COMPILE_PCRE8 */
2126 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2127 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2128 JUMPHERE(jump);
2129 }
2130 #endif
2131 }
2132
2133 static void read_char8_type(compiler_common *common)
2134 {
2135 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2136 DEFINE_COMPILER;
2137 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2138 struct sljit_jump *jump;
2139 #endif
2140
2141 #ifdef SUPPORT_UTF
2142 if (common->utf)
2143 {
2144 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2146 #ifdef COMPILE_PCRE8
2147 /* This can be an extra read in some situations, but hopefully
2148 it is needed in most cases. */
2149 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2150 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2151 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2152 JUMPHERE(jump);
2153 #else
2154 #ifdef COMPILE_PCRE16
2155 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2156 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2157 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2158 JUMPHERE(jump);
2159 /* Skip low surrogate if necessary. */
2160 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2161 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2162 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2163 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2164 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2165 #endif
2166 #endif /* COMPILE_PCRE8 */
2167 return;
2168 }
2169 #endif
2170 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2171 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2172 #ifdef COMPILE_PCRE16
2173 /* The ctypes array contains only 256 values. */
2174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2175 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2176 #endif
2177 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2178 #ifdef COMPILE_PCRE16
2179 JUMPHERE(jump);
2180 #endif
2181 }
2182
2183 static void skip_char_back(compiler_common *common)
2184 {
2185 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2186 DEFINE_COMPILER;
2187 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2188 struct sljit_label *label;
2189
2190 if (common->utf)
2191 {
2192 label = LABEL();
2193 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2194 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2195 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2196 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2197 return;
2198 }
2199 #endif
2200 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2201 if (common->utf)
2202 {
2203 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2204 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2205 /* Skip low surrogate if necessary. */
2206 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2207 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2208 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2209 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2210 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2211 return;
2212 }
2213 #endif
2214 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2215 }
2216
2217 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2218 {
2219 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2220 DEFINE_COMPILER;
2221
2222 if (nltype == NLTYPE_ANY)
2223 {
2224 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2225 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2226 }
2227 else if (nltype == NLTYPE_ANYCRLF)
2228 {
2229 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2230 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2231 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2232 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2233 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2234 }
2235 else
2236 {
2237 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2238 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2239 }
2240 }
2241
2242 #ifdef SUPPORT_UTF
2243
2244 #ifdef COMPILE_PCRE8
2245 static void do_utfreadchar(compiler_common *common)
2246 {
2247 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2248 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2249 DEFINE_COMPILER;
2250 struct sljit_jump *jump;
2251
2252 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2253 /* Searching for the first zero. */
2254 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2255 jump = JUMP(SLJIT_C_NOT_ZERO);
2256 /* Two byte sequence. */
2257 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2258 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2259 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2260 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2261 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2262 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2263 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2264 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2265 JUMPHERE(jump);
2266
2267 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2268 jump = JUMP(SLJIT_C_NOT_ZERO);
2269 /* Three byte sequence. */
2270 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2271 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2272 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2273 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2274 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2275 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2276 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2277 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2278 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2279 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2280 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2281 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2282 JUMPHERE(jump);
2283
2284 /* Four byte sequence. */
2285 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2286 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2287 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2288 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2289 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2290 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2291 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2292 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2293 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2294 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2295 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2296 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2297 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2298 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2299 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2300 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2301 }
2302
2303 static void do_utfreadtype8(compiler_common *common)
2304 {
2305 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2306 of the character (>= 0xc0). Return value in TMP1. */
2307 DEFINE_COMPILER;
2308 struct sljit_jump *jump;
2309 struct sljit_jump *compare;
2310
2311 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2312
2313 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2314 jump = JUMP(SLJIT_C_NOT_ZERO);
2315 /* Two byte sequence. */
2316 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2317 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2318 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2319 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2320 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2321 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2322 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2323 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2324 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2325
2326 JUMPHERE(compare);
2327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2328 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2329 JUMPHERE(jump);
2330
2331 /* We only have types for characters less than 256. */
2332 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2333 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2334 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2335 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2336 }
2337
2338 #else /* COMPILE_PCRE8 */
2339
2340 #ifdef COMPILE_PCRE16
2341 static void do_utfreadchar(compiler_common *common)
2342 {
2343 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2344 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2345 DEFINE_COMPILER;
2346 struct sljit_jump *jump;
2347
2348 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2349 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2350 /* Do nothing, only return. */
2351 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2352
2353 JUMPHERE(jump);
2354 /* Combine two 16 bit characters. */
2355 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2357 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2358 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2359 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2360 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2361 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2362 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2363 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2364 }
2365 #endif /* COMPILE_PCRE16 */
2366
2367 #endif /* COMPILE_PCRE8 */
2368
2369 #endif /* SUPPORT_UTF */
2370
2371 #ifdef SUPPORT_UCP
2372
2373 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2374 #define UCD_BLOCK_MASK 127
2375 #define UCD_BLOCK_SHIFT 7
2376
2377 static void do_getucd(compiler_common *common)
2378 {
2379 /* Search the UCD record for the character comes in TMP1.
2380 Returns chartype in TMP1 and UCD offset in TMP2. */
2381 DEFINE_COMPILER;
2382
2383 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2384
2385 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2386 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2387 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2388 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2389 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2390 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2391 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2392 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2394 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2395 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2396 }
2397 #endif
2398
2399 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2400 {
2401 DEFINE_COMPILER;
2402 struct sljit_label *mainloop;
2403 struct sljit_label *newlinelabel = NULL;
2404 struct sljit_jump *start;
2405 struct sljit_jump *end = NULL;
2406 struct sljit_jump *nl = NULL;
2407 #ifdef SUPPORT_UTF
2408 struct sljit_jump *singlechar;
2409 #endif
2410 jump_list *newline = NULL;
2411 BOOL newlinecheck = FALSE;
2412 BOOL readuchar = FALSE;
2413
2414 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2415 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2416 newlinecheck = TRUE;
2417
2418 if (firstline)
2419 {
2420 /* Search for the end of the first line. */
2421 SLJIT_ASSERT(common->first_line_end != 0);
2422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
2423 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
2424
2425 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2426 {
2427 mainloop = LABEL();
2428 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2429 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2430 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2431 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2432 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2433 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2434 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2435 }
2436 else
2437 {
2438 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2439 mainloop = LABEL();
2440 /* Continual stores does not cause data dependency. */
2441 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2442 read_char(common);
2443 check_newlinechar(common, common->nltype, &newline, TRUE);
2444 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2445 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2446 set_jumps(newline, LABEL());
2447 }
2448
2449 JUMPHERE(end);
2450 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2451 }
2452
2453 start = JUMP(SLJIT_JUMP);
2454
2455 if (newlinecheck)
2456 {
2457 newlinelabel = LABEL();
2458 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2459 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2460 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2461 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2462 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2463 #ifdef COMPILE_PCRE16
2464 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2465 #endif
2466 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2467 nl = JUMP(SLJIT_JUMP);
2468 }
2469
2470 mainloop = LABEL();
2471
2472 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2473 #ifdef SUPPORT_UTF
2474 if (common->utf) readuchar = TRUE;
2475 #endif
2476 if (newlinecheck) readuchar = TRUE;
2477
2478 if (readuchar)
2479 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2480
2481 if (newlinecheck)
2482 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2483
2484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2485 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2486 if (common->utf)
2487 {
2488 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2489 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2490 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2491 JUMPHERE(singlechar);
2492 }
2493 #endif
2494 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2495 if (common->utf)
2496 {
2497 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2498 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2499 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2500 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2501 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2502 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2503 JUMPHERE(singlechar);
2504 }
2505 #endif
2506 JUMPHERE(start);
2507
2508 if (newlinecheck)
2509 {
2510 JUMPHERE(end);
2511 JUMPHERE(nl);
2512 }
2513
2514 return mainloop;
2515 }
2516
2517 static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
2518 {
2519 DEFINE_COMPILER;
2520 struct sljit_label *start;
2521 struct sljit_jump *quit;
2522 struct sljit_jump *found;
2523 pcre_int32 chars[4];
2524 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2525 int location = 0;
2526 pcre_int32 len, c, bit, caseless;
2527 BOOL must_end;
2528
2529 #ifdef COMPILE_PCRE8
2530 union {
2531 sljit_uh ascombined;
2532 sljit_ub asuchars[2];
2533 } pair;
2534 #else
2535 union {
2536 sljit_ui ascombined;
2537 sljit_uh asuchars[2];
2538 } pair;
2539 #endif
2540
2541 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2542 return FALSE;
2543
2544 while (TRUE)
2545 {
2546 caseless = 0;
2547 must_end = TRUE;
2548 switch(*cc)
2549 {
2550 case OP_CHAR:
2551 must_end = FALSE;
2552 cc++;
2553 break;
2554
2555 case OP_CHARI:
2556 caseless = 1;
2557 must_end = FALSE;
2558 cc++;
2559 break;
2560
2561 case OP_SOD:
2562 case OP_SOM:
2563 case OP_SET_SOM:
2564 case OP_NOT_WORD_BOUNDARY:
2565 case OP_WORD_BOUNDARY:
2566 case OP_EODN:
2567 case OP_EOD:
2568 case OP_CIRC:
2569 case OP_CIRCM:
2570 case OP_DOLL:
2571 case OP_DOLLM:
2572 /* Zero width assertions. */
2573 cc++;
2574 continue;
2575
2576 case OP_PLUS:
2577 case OP_MINPLUS:
2578 case OP_POSPLUS:
2579 cc++;
2580 break;
2581
2582 case OP_EXACT:
2583 cc += 1 + IMM2_SIZE;
2584 break;
2585
2586 case OP_PLUSI:
2587 case OP_MINPLUSI:
2588 case OP_POSPLUSI:
2589 caseless = 1;
2590 cc++;
2591 break;
2592
2593 case OP_EXACTI:
2594 caseless = 1;
2595 cc += 1 + IMM2_SIZE;
2596 break;
2597
2598 default:
2599 return FALSE;
2600 }
2601
2602 len = 1;
2603 #ifdef SUPPORT_UTF
2604 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2605 #endif
2606
2607 if (caseless && char_has_othercase(common, cc))
2608 {
2609 caseless = char_get_othercase_bit(common, cc);
2610 if (caseless == 0)
2611 return FALSE;
2612 #ifdef COMPILE_PCRE8
2613 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2614 #else
2615 if ((caseless & 0x100) != 0)
2616 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2617 else
2618 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2619 #endif
2620 }
2621 else
2622 caseless = 0;
2623
2624 while (len > 0 && location < 2 * 2)
2625 {
2626 c = *cc;
2627 bit = 0;
2628 if (len == (caseless & 0xff))
2629 {
2630 bit = caseless >> 8;
2631 c |= bit;
2632 }
2633
2634 chars[location] = c;
2635 chars[location + 1] = bit;
2636
2637 len--;
2638 location += 2;
2639 cc++;
2640 }
2641
2642 if (location == 2 * 2)
2643 break;
2644 else if (must_end)
2645 return FALSE;
2646 }
2647
2648 if (firstline)
2649 {
2650 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2651 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
2652 }
2653 else
2654 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2655
2656 start = LABEL();
2657 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2658 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2659 #ifdef COMPILE_PCRE8
2660 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2661 #else /* COMPILE_PCRE8 */
2662 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2663 #endif
2664
2665 #else /* SLJIT_UNALIGNED */
2666
2667 #if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
2668 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2669 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2670 #else /* SLJIT_BIG_ENDIAN */
2671 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2672 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2673 #endif /* SLJIT_BIG_ENDIAN */
2674
2675 #ifdef COMPILE_PCRE8
2676 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
2677 #else /* COMPILE_PCRE8 */
2678 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
2679 #endif
2680 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681
2682 #endif
2683
2684 if (chars[1] != 0 || chars[3] != 0)
2685 {
2686 pair.asuchars[0] = chars[1];
2687 pair.asuchars[1] = chars[3];
2688 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
2689 }
2690
2691 pair.asuchars[0] = chars[0];
2692 pair.asuchars[1] = chars[2];
2693 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
2694
2695 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2696 JUMPTO(SLJIT_JUMP, start);
2697 JUMPHERE(found);
2698 JUMPHERE(quit);
2699
2700 if (firstline)
2701 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2702 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2703 return TRUE;
2704 }
2705
2706 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2707 {
2708 DEFINE_COMPILER;
2709 struct sljit_label *start;
2710 struct sljit_jump *quit;
2711 struct sljit_jump *found;
2712 pcre_uchar oc, bit;
2713
2714 if (firstline)
2715 {
2716 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2717 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2718 }
2719
2720 start = LABEL();
2721 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2722 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2723
2724 oc = first_char;
2725 if (caseless)
2726 {
2727 oc = TABLE_GET(first_char, common->fcc, first_char);
2728 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2729 if (first_char > 127 && common->utf)
2730 oc = UCD_OTHERCASE(first_char);
2731 #endif
2732 }
2733 if (first_char == oc)
2734 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2735 else
2736 {
2737 bit = first_char ^ oc;
2738 if (ispowerof2(bit))
2739 {
2740 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2741 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2742 }
2743 else
2744 {
2745 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2746 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2747 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2748 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2749 found = JUMP(SLJIT_C_NOT_ZERO);
2750 }
2751 }
2752
2753 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2754 JUMPTO(SLJIT_JUMP, start);
2755 JUMPHERE(found);
2756 JUMPHERE(quit);
2757
2758 if (firstline)
2759 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2760 }
2761
2762 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2763 {
2764 DEFINE_COMPILER;
2765 struct sljit_label *loop;
2766 struct sljit_jump *lastchar;
2767 struct sljit_jump *firstchar;
2768 struct sljit_jump *quit;
2769 struct sljit_jump *foundcr = NULL;
2770 struct sljit_jump *notfoundnl;
2771 jump_list *newline = NULL;
2772
2773 if (firstline)
2774 {
2775 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2776 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2777 }
2778
2779 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2780 {
2781 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2782 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2783 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2784 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2785 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2786
2787 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2788 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2789 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2790 #ifdef COMPILE_PCRE16
2791 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2792 #endif
2793 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2794
2795 loop = LABEL();
2796 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2797 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2798 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2799 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2800 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2801 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2802
2803 JUMPHERE(quit);
2804 JUMPHERE(firstchar);
2805 JUMPHERE(lastchar);
2806
2807 if (firstline)
2808 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2809 return;
2810 }
2811
2812 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2813 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2814 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2815 skip_char_back(common);
2816
2817 loop = LABEL();
2818 read_char(common);
2819 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2820 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2821 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2822 check_newlinechar(common, common->nltype, &newline, FALSE);
2823 set_jumps(newline, loop);
2824
2825 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2826 {
2827 quit = JUMP(SLJIT_JUMP);
2828 JUMPHERE(foundcr);
2829 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2830 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2831 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2832 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2833 #ifdef COMPILE_PCRE16
2834 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2835 #endif
2836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2837 JUMPHERE(notfoundnl);
2838 JUMPHERE(quit);
2839 }
2840 JUMPHERE(lastchar);
2841 JUMPHERE(firstchar);
2842
2843 if (firstline)
2844 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2845 }
2846
2847 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2848 {
2849 DEFINE_COMPILER;
2850 struct sljit_label *start;
2851 struct sljit_jump *quit;
2852 struct sljit_jump *found;
2853 #ifndef COMPILE_PCRE8
2854 struct sljit_jump *jump;
2855 #endif
2856
2857 if (firstline)
2858 {
2859 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2860 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2861 }
2862
2863 start = LABEL();
2864 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2865 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2866 #ifdef SUPPORT_UTF
2867 if (common->utf)
2868 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2869 #endif
2870 #ifndef COMPILE_PCRE8
2871 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2872 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2873 JUMPHERE(jump);
2874 #endif
2875 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2876 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2877 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2878 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2879 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2880 found = JUMP(SLJIT_C_NOT_ZERO);
2881
2882 #ifdef SUPPORT_UTF
2883 if (common->utf)
2884 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2885 #endif
2886 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2887 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2888 if (common->utf)
2889 {
2890 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2891 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2892 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2893 }
2894 #endif
2895 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2896 if (common->utf)
2897 {
2898 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2899 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2900 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2901 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2902 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2903 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2904 }
2905 #endif
2906 JUMPTO(SLJIT_JUMP, start);
2907 JUMPHERE(found);
2908 JUMPHERE(quit);
2909
2910 if (firstline)
2911 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2912 }
2913
2914 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2915 {
2916 DEFINE_COMPILER;
2917 struct sljit_label *loop;
2918 struct sljit_jump *toolong;
2919 struct sljit_jump *alreadyfound;
2920 struct sljit_jump *found;
2921 struct sljit_jump *foundoc = NULL;
2922 struct sljit_jump *notfound;
2923 pcre_uchar oc, bit;
2924
2925 SLJIT_ASSERT(common->req_char_ptr != 0);
2926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2927 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2928 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2929 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2930
2931 if (has_firstchar)
2932 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2933 else
2934 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2935
2936 loop = LABEL();
2937 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2938
2939 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2940 oc = req_char;
2941 if (caseless)
2942 {
2943 oc = TABLE_GET(req_char, common->fcc, req_char);
2944 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2945 if (req_char > 127 && common->utf)
2946 oc = UCD_OTHERCASE(req_char);
2947 #endif
2948 }
2949 if (req_char == oc)
2950 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2951 else
2952 {
2953 bit = req_char ^ oc;
2954 if (ispowerof2(bit))
2955 {
2956 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2957 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2958 }
2959 else
2960 {
2961 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2962 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2963 }
2964 }
2965 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2966 JUMPTO(SLJIT_JUMP, loop);
2967
2968 JUMPHERE(found);
2969 if (foundoc)
2970 JUMPHERE(foundoc);
2971 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2972 JUMPHERE(alreadyfound);
2973 JUMPHERE(toolong);
2974 return notfound;
2975 }
2976
2977 static void do_revertframes(compiler_common *common)
2978 {
2979 DEFINE_COMPILER;
2980 struct sljit_jump *jump;
2981 struct sljit_label *mainloop;
2982
2983 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2984 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2985 GET_LOCAL_BASE(TMP3, 0, 0);
2986
2987 /* Drop frames until we reach STACK_TOP. */
2988 mainloop = LABEL();
2989 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2990 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2991 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
2992 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2993 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2994 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2995 JUMPTO(SLJIT_JUMP, mainloop);
2996
2997 JUMPHERE(jump);
2998 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2999 /* End of dropping frames. */
3000 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3001
3002 JUMPHERE(jump);
3003 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3004 /* Set string begin. */
3005 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3006 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3007 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3008 JUMPTO(SLJIT_JUMP, mainloop);
3009
3010 JUMPHERE(jump);
3011 if (common->mark_ptr != 0)
3012 {
3013 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3014 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3015 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3016 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3017 JUMPTO(SLJIT_JUMP, mainloop);
3018
3019 JUMPHERE(jump);
3020 }
3021
3022 /* Unknown command. */
3023 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3024 JUMPTO(SLJIT_JUMP, mainloop);
3025 }
3026
3027 static void check_wordboundary(compiler_common *common)
3028 {
3029 DEFINE_COMPILER;
3030 struct sljit_jump *skipread;
3031 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3032 struct sljit_jump *jump;
3033 #endif
3034
3035 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3036
3037 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3038 /* Get type of the previous char, and put it to LOCALS1. */
3039 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3042 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3043 skip_char_back(common);
3044 check_start_used_ptr(common);
3045 read_char(common);
3046
3047 /* Testing char type. */
3048 #ifdef SUPPORT_UCP
3049 if (common->use_ucp)
3050 {
3051 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3052 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3053 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3054 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3055 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3056 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3057 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3058 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3059 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3060 JUMPHERE(jump);
3061 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3062 }
3063 else
3064 #endif
3065 {
3066 #ifndef COMPILE_PCRE8
3067 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3068 #elif defined SUPPORT_UTF
3069 /* Here LOCALS1 has already been zeroed. */
3070 jump = NULL;
3071 if (common->utf)
3072 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3073 #endif /* COMPILE_PCRE8 */
3074 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3075 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3076 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3077 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3078 #ifndef COMPILE_PCRE8
3079 JUMPHERE(jump);
3080 #elif defined SUPPORT_UTF
3081 if (jump != NULL)
3082 JUMPHERE(jump);
3083 #endif /* COMPILE_PCRE8 */
3084 }
3085 JUMPHERE(skipread);
3086
3087 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3088 skipread = check_str_end(common);
3089 peek_char(common);
3090
3091 /* Testing char type. This is a code duplication. */
3092 #ifdef SUPPORT_UCP
3093 if (common->use_ucp)
3094 {
3095 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3096 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3097 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3098 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3099 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3100 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3101 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3102 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3103 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3104 JUMPHERE(jump);
3105 }
3106 else
3107 #endif
3108 {
3109 #ifndef COMPILE_PCRE8
3110 /* TMP2 may be destroyed by peek_char. */
3111 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3112 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3113 #elif defined SUPPORT_UTF
3114 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3115 jump = NULL;
3116 if (common->utf)
3117 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3118 #endif
3119 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3120 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3121 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3122 #ifndef COMPILE_PCRE8
3123 JUMPHERE(jump);
3124 #elif defined SUPPORT_UTF
3125 if (jump != NULL)
3126 JUMPHERE(jump);
3127 #endif /* COMPILE_PCRE8 */
3128 }
3129 JUMPHERE(skipread);
3130
3131 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3132 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3133 }
3134
3135 /*
3136 range format:
3137
3138 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3139 ranges[1] = first bit (0 or 1)
3140 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3141 */
3142
3143 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3144 {
3145 DEFINE_COMPILER;
3146 struct sljit_jump *jump;
3147
3148 if (ranges[0] < 0)
3149 return FALSE;
3150
3151 switch(ranges[0])
3152 {
3153 case 1:
3154 if (readch)
3155 read_char(common);
3156 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3157 return TRUE;
3158
3159 case 2:
3160 if (readch)
3161 read_char(common);
3162 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3163 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3164 return TRUE;
3165
3166 case 4:
3167 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3168 {
3169 if (readch)
3170 read_char(common);
3171 if (ranges[1] != 0)
3172 {
3173 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3174 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3175 }
3176 else
3177 {
3178 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3179 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3180 JUMPHERE(jump);
3181 }
3182 return TRUE;
3183 }
3184 return FALSE;
3185
3186 default:
3187 return FALSE;
3188 }
3189 }
3190
3191 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3192 {
3193 int i, bit, length;
3194 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3195
3196 bit = ctypes[0] & flag;
3197 ranges[1] = bit != 0 ? 1 : 0;
3198 length = 0;
3199
3200 for (i = 1; i < 256; i++)
3201 if ((ctypes[i] & flag) != bit)
3202 {
3203 if (length >= MAX_RANGE_SIZE)
3204 {
3205 ranges[0] = -1;
3206 return;
3207 }
3208 ranges[2 + length] = i;
3209 length++;
3210 bit ^= flag;
3211 }
3212
3213 if (bit != 0)
3214 {
3215 if (length >= MAX_RANGE_SIZE)
3216 {
3217 ranges[0] = -1;
3218 return;
3219 }
3220 ranges[2 + length] = 256;
3221 length++;
3222 }
3223 ranges[0] = length;
3224 }
3225
3226 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3227 {
3228 int ranges[2 + MAX_RANGE_SIZE];
3229 pcre_uint8 bit, cbit, all;
3230 int i, byte, length = 0;
3231
3232 bit = bits[0] & 0x1;
3233 ranges[1] = bit;
3234 /* Can be 0 or 255. */
3235 all = -bit;
3236
3237 for (i = 0; i < 256; )
3238 {
3239 byte = i >> 3;
3240 if ((i & 0x7) == 0 && bits[byte] == all)
3241 i += 8;
3242 else
3243 {
3244 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3245 if (cbit != bit)
3246 {
3247 if (length >= MAX_RANGE_SIZE)
3248 return FALSE;
3249 ranges[2 + length] = i;
3250 length++;
3251 bit = cbit;
3252 all = -cbit;
3253 }
3254 i++;
3255 }
3256 }
3257
3258 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3259 {
3260 if (length >= MAX_RANGE_SIZE)
3261 return FALSE;
3262 ranges[2 + length] = 256;
3263 length++;
3264 }
3265 ranges[0] = length;
3266
3267 return check_ranges(common, ranges, backtracks, FALSE);
3268 }
3269
3270 static void check_anynewline(compiler_common *common)
3271 {
3272 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3273 DEFINE_COMPILER;
3274
3275 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3276
3277 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3278 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3279 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3280 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3281 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3282 #ifdef COMPILE_PCRE8
3283 if (common->utf)
3284 {
3285 #endif
3286 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3287 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3288 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3289 #ifdef COMPILE_PCRE8
3290 }
3291 #endif
3292 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3293 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3294 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3295 }
3296
3297 static void check_hspace(compiler_common *common)
3298 {
3299 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3300 DEFINE_COMPILER;
3301
3302 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3303
3304 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3305 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3306 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3307 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3308 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3309 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3310 #ifdef COMPILE_PCRE8
3311 if (common->utf)
3312 {
3313 #endif
3314 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3315 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3316 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3317 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3318 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3319 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3320 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3321 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3322 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3323 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3324 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3325 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3326 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3327 #ifdef COMPILE_PCRE8
3328 }
3329 #endif
3330 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3331 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3332
3333 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3334 }
3335
3336 static void check_vspace(compiler_common *common)
3337 {
3338 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3339 DEFINE_COMPILER;
3340
3341 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3342
3343 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3344 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3345 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3346 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3347 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3348 #ifdef COMPILE_PCRE8
3349 if (common->utf)
3350 {
3351 #endif
3352 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3353 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3354 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3355 #ifdef COMPILE_PCRE8
3356 }
3357 #endif
3358 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3359 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3360
3361 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3362 }
3363
3364 #define CHAR1 STR_END
3365 #define CHAR2 STACK_TOP
3366
3367 static void do_casefulcmp(compiler_common *common)
3368 {
3369 DEFINE_COMPILER;
3370 struct sljit_jump *jump;
3371 struct sljit_label *label;
3372
3373 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3374 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3375 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3377 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3378 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3379
3380 label = LABEL();
3381 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3382 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3383 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3384 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3385 JUMPTO(SLJIT_C_NOT_ZERO, label);
3386
3387 JUMPHERE(jump);
3388 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3389 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3390 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3391 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3392 }
3393
3394 #define LCC_TABLE STACK_LIMIT
3395
3396 static void do_caselesscmp(compiler_common *common)
3397 {
3398 DEFINE_COMPILER;
3399 struct sljit_jump *jump;
3400 struct sljit_label *label;
3401
3402 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3403 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3404
3405 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3406 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3408 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3409 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3410 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3411
3412 label = LABEL();
3413 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3414 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3415 #ifndef COMPILE_PCRE8
3416 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3417 #endif
3418 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3419 #ifndef COMPILE_PCRE8
3420 JUMPHERE(jump);
3421 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3422 #endif
3423 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3424 #ifndef COMPILE_PCRE8
3425 JUMPHERE(jump);
3426 #endif
3427 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3428 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3429 JUMPTO(SLJIT_C_NOT_ZERO, label);
3430
3431 JUMPHERE(jump);
3432 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3433 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3434 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3435 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3436 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3437 }
3438
3439 #undef LCC_TABLE
3440 #undef CHAR1
3441 #undef CHAR2
3442
3443 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3444
3445 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3446 {
3447 /* This function would be ineffective to do in JIT level. */
3448 int c1, c2;
3449 const pcre_uchar *src2 = args->uchar_ptr;
3450 const pcre_uchar *end2 = args->end;
3451
3452 while (src1 < end1)
3453 {
3454 if (src2 >= end2)
3455 return (pcre_uchar*)1;
3456 GETCHARINC(c1, src1);
3457 GETCHARINC(c2, src2);
3458 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
3459 }
3460 return src2;
3461 }
3462
3463 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3464
3465 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3466 compare_context* context, jump_list **backtracks)
3467 {
3468 DEFINE_COMPILER;
3469 unsigned int othercasebit = 0;
3470 pcre_uchar *othercasechar = NULL;
3471 #ifdef SUPPORT_UTF
3472 int utflength;
3473 #endif
3474
3475 if (caseless && char_has_othercase(common, cc))
3476 {
3477 othercasebit = char_get_othercase_bit(common, cc);
3478 SLJIT_ASSERT(othercasebit);
3479 /* Extracting bit difference info. */
3480 #ifdef COMPILE_PCRE8
3481 othercasechar = cc + (othercasebit >> 8);
3482 othercasebit &= 0xff;
3483 #else
3484 #ifdef COMPILE_PCRE16
3485 othercasechar = cc + (othercasebit >> 9);
3486 if ((othercasebit & 0x100) != 0)
3487 othercasebit = (othercasebit & 0xff) << 8;
3488 else
3489 othercasebit &= 0xff;
3490 #endif
3491 #endif
3492 }
3493
3494 if (context->sourcereg == -1)
3495 {
3496 #ifdef COMPILE_PCRE8
3497 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3498 if (context->length >= 4)
3499 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3500 else if (context->length >= 2)
3501 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3502 else
3503 #endif
3504 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3505 #else
3506 #ifdef COMPILE_PCRE16
3507 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3508 if (context->length >= 4)
3509 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3510 else
3511 #endif
3512 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3513 #endif
3514 #endif /* COMPILE_PCRE8 */
3515 context->sourcereg = TMP2;
3516 }
3517
3518 #ifdef SUPPORT_UTF
3519 utflength = 1;
3520 if (common->utf && HAS_EXTRALEN(*cc))
3521 utflength += GET_EXTRALEN(*cc);
3522
3523 do
3524 {
3525 #endif
3526
3527 context->length -= IN_UCHARS(1);
3528 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3529
3530 /* Unaligned read is supported. */
3531 if (othercasebit != 0 && othercasechar == cc)
3532 {
3533 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3534 context->oc.asuchars[context->ucharptr] = othercasebit;
3535 }
3536 else
3537 {
3538 context->c.asuchars[context->ucharptr] = *cc;
3539 context->oc.asuchars[context->ucharptr] = 0;
3540 }
3541 context->ucharptr++;
3542
3543 #ifdef COMPILE_PCRE8
3544 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3545 #else
3546 if (context->ucharptr >= 2 || context->length == 0)
3547 #endif
3548 {
3549 if (context->length >= 4)
3550 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3551 #ifdef COMPILE_PCRE8
3552 else if (context->length >= 2)
3553 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3554 else if (context->length >= 1)
3555 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3556 #else
3557 else if (context->length >= 2)
3558 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3559 #endif
3560 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3561
3562 switch(context->ucharptr)
3563 {
3564 case 4 / sizeof(pcre_uchar):
3565 if (context->oc.asint != 0)
3566 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3567 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3568 break;
3569
3570 case 2 / sizeof(pcre_uchar):
3571 if (context->oc.asushort != 0)
3572 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3573 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3574 break;
3575
3576 #ifdef COMPILE_PCRE8
3577 case 1:
3578 if (context->oc.asbyte != 0)
3579 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3580 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3581 break;
3582 #endif
3583
3584 default:
3585 SLJIT_ASSERT_STOP();
3586 break;
3587 }
3588 context->ucharptr = 0;
3589 }
3590
3591 #else
3592
3593 /* Unaligned read is unsupported. */
3594 #ifdef COMPILE_PCRE8
3595 if (context->length > 0)
3596 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3597 #else
3598 if (context->length > 0)
3599 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3600 #endif
3601 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3602
3603 if (othercasebit != 0 && othercasechar == cc)
3604 {
3605 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3606 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3607 }
3608 else
3609 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3610
3611 #endif
3612
3613 cc++;
3614 #ifdef SUPPORT_UTF
3615 utflength--;
3616 }
3617 while (utflength > 0);
3618 #endif
3619
3620 return cc;
3621 }
3622
3623 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3624
3625 #define SET_TYPE_OFFSET(value) \
3626 if ((value) != typeoffset) \
3627 { \
3628 if ((value) > typeoffset) \
3629 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3630 else \
3631 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3632 } \
3633 typeoffset = (value);
3634
3635 #define SET_CHAR_OFFSET(value) \
3636 if ((value) != charoffset) \
3637 { \
3638 if ((value) > charoffset) \
3639 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3640 else \
3641 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3642 } \
3643 charoffset = (value);
3644
3645 static void compile_xclass_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3646 {
3647 DEFINE_COMPILER;
3648 jump_list *found = NULL;
3649 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3650 unsigned int c;
3651 int compares;
3652 struct sljit_jump *jump = NULL;
3653 pcre_uchar *ccbegin;
3654 #ifdef SUPPORT_UCP
3655 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3656 BOOL charsaved = FALSE;
3657 int typereg = TMP1, scriptreg = TMP1;
3658 unsigned int typeoffset;
3659 #endif
3660 int invertcmp, numberofcmps;
3661 unsigned int charoffset;
3662
3663 /* Although SUPPORT_UTF must be defined, we are
3664 not necessary in utf mode even in 8 bit mode. */
3665 detect_partial_match(common, backtracks);
3666 read_char(common);
3667
3668 if ((*cc++ & XCL_MAP) != 0)
3669 {
3670 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3671 #ifndef COMPILE_PCRE8
3672 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3673 #elif defined SUPPORT_UTF
3674 if (common->utf)
3675 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3676 #endif
3677
3678 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3679 {
3680 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3681 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3682 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3683 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3684 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3685 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3686 }
3687
3688 #ifndef COMPILE_PCRE8
3689 JUMPHERE(jump);
3690 #elif defined SUPPORT_UTF
3691 if (common->utf)
3692 JUMPHERE(jump);
3693 #endif
3694 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3695 #ifdef SUPPORT_UCP
3696 charsaved = TRUE;
3697 #endif
3698 cc += 32 / sizeof(pcre_uchar);
3699 }
3700
3701 /* Scanning the necessary info. */
3702 ccbegin = cc;
3703 compares = 0;
3704 while (*cc != XCL_END)
3705 {
3706 compares++;
3707 if (*cc == XCL_SINGLE)
3708 {
3709 cc += 2;
3710 #ifdef SUPPORT_UTF
3711 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3712 #endif
3713 #ifdef SUPPORT_UCP
3714 needschar = TRUE;
3715 #endif
3716 }
3717 else if (*cc == XCL_RANGE)
3718 {
3719 cc += 2;
3720 #ifdef SUPPORT_UTF
3721 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3722 #endif
3723 cc++;
3724 #ifdef SUPPORT_UTF
3725 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3726 #endif
3727 #ifdef SUPPORT_UCP
3728 needschar = TRUE;
3729 #endif
3730 }
3731 #ifdef SUPPORT_UCP
3732 else
3733 {
3734 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3735 cc++;
3736 switch(*cc)
3737 {
3738 case PT_ANY:
3739 break;
3740
3741 case PT_LAMP:
3742 case PT_GC:
3743 case PT_PC:
3744 case PT_ALNUM:
3745 needstype = TRUE;
3746 break;
3747
3748 case PT_SC:
3749 needsscript = TRUE;
3750 break;
3751
3752 case PT_SPACE:
3753 case PT_PXSPACE:
3754 case PT_WORD:
3755 needstype = TRUE;
3756 needschar = TRUE;
3757 break;
3758
3759 default:
3760 SLJIT_ASSERT_STOP();
3761 break;
3762 }
3763 cc += 2;
3764 }
3765 #endif
3766 }
3767
3768 #ifdef SUPPORT_UCP
3769 /* Simple register allocation. TMP1 is preferred if possible. */
3770 if (needstype || needsscript)
3771 {
3772 if (needschar && !charsaved)
3773 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3774 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3775 if (needschar)
3776 {
3777 if (needstype)
3778 {
3779 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3780 typereg = RETURN_ADDR;
3781 }
3782
3783 if (needsscript)
3784 scriptreg = TMP3;
3785 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3786 }
3787 else if (needstype && needsscript)
3788 scriptreg = TMP3;
3789 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3790
3791 if (needsscript)
3792 {
3793 if (scriptreg == TMP1)
3794 {
3795 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3796 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3797 }
3798 else
3799 {
3800 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3801 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3802 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3803 }
3804 }
3805 }
3806 #endif
3807
3808 /* Generating code. */
3809 cc = ccbegin;
3810 charoffset = 0;
3811 numberofcmps = 0;
3812 #ifdef SUPPORT_UCP
3813 typeoffset = 0;
3814 #endif
3815
3816 while (*cc != XCL_END)
3817 {
3818 compares--;
3819 invertcmp = (compares == 0 && list != backtracks);
3820 jump = NULL;
3821
3822 if (*cc == XCL_SINGLE)
3823 {
3824 cc ++;
3825 #ifdef SUPPORT_UTF
3826 if (common->utf)
3827 {
3828 GETCHARINC(c, cc);
3829 }
3830 else
3831 #endif
3832 c = *cc++;
3833
3834 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3835 {
3836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3837 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3838 numberofcmps++;
3839 }
3840 else if (numberofcmps > 0)
3841 {
3842 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3843 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3844 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3845 numberofcmps = 0;
3846 }
3847 else
3848 {
3849 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3850 numberofcmps = 0;
3851 }
3852 }
3853 else if (*cc == XCL_RANGE)
3854 {
3855 cc ++;
3856 #ifdef SUPPORT_UTF
3857 if (common->utf)
3858 {
3859 GETCHARINC(c, cc);
3860 }
3861 else
3862 #endif
3863 c = *cc++;
3864 SET_CHAR_OFFSET(c);
3865 #ifdef SUPPORT_UTF
3866 if (common->utf)
3867 {
3868 GETCHARINC(c, cc);
3869 }
3870 else
3871 #endif
3872 c = *cc++;
3873 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3874 {
3875 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3876 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3877 numberofcmps++;
3878 }
3879 else if (numberofcmps > 0)
3880 {
3881 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3882 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3883 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3884 numberofcmps = 0;
3885 }
3886 else
3887 {
3888 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3889 numberofcmps = 0;
3890 }
3891 }
3892 #ifdef SUPPORT_UCP
3893 else
3894 {
3895 if (*cc == XCL_NOTPROP)
3896 invertcmp ^= 0x1;
3897 cc++;
3898 switch(*cc)
3899 {
3900 case PT_ANY:
3901 if (list != backtracks)
3902 {
3903 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3904 continue;
3905 }
3906 else if (cc[-1] == XCL_NOTPROP)
3907 continue;
3908 jump = JUMP(SLJIT_JUMP);
3909 break;
3910
3911 case PT_LAMP:
3912 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3913 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3914 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3915 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3916 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3917 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3918 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3919 break;
3920
3921 case PT_GC:
3922 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3923 SET_TYPE_OFFSET(c);
3924 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3925 break;
3926
3927 case PT_PC:
3928 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3929 break;
3930
3931 case PT_SC:
3932 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3933 break;
3934
3935 case PT_SPACE:
3936 case PT_PXSPACE:
3937 if (*cc == PT_SPACE)
3938 {
3939 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3940 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3941 }
3942 SET_CHAR_OFFSET(9);
3943 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3944 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3945 if (*cc == PT_SPACE)
3946 JUMPHERE(jump);
3947
3948 SET_TYPE_OFFSET(ucp_Zl);
3949 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3950 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3951 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3952 break;
3953
3954 case PT_WORD:
3955 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3956 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3957 /* ... fall through */
3958
3959 case PT_ALNUM:
3960 SET_TYPE_OFFSET(ucp_Ll);
3961 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3962 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3963 SET_TYPE_OFFSET(ucp_Nd);
3964 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3965 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3966 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3967 break;
3968 }
3969 cc += 2;
3970 }
3971 #endif
3972
3973 if (jump != NULL)
3974 add_jump(compiler, compares > 0 ? list : backtracks, jump);
3975 }
3976
3977 if (found != NULL)
3978 set_jumps(found, LABEL());
3979 }
3980
3981 #undef SET_TYPE_OFFSET
3982 #undef SET_CHAR_OFFSET
3983
3984 #endif
3985
3986 static pcre_uchar *compile_char1_trypath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
3987 {
3988 DEFINE_COMPILER;
3989 int length;
3990 unsigned int c, oc, bit;
3991 compare_context context;
3992 struct sljit_jump *jump[4];
3993 #ifdef SUPPORT_UTF
3994 struct sljit_label *label;
3995 #ifdef SUPPORT_UCP
3996 pcre_uchar propdata[5];
3997 #endif
3998 #endif
3999
4000 switch(type)
4001 {
4002 case OP_SOD:
4003 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4004 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4005 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4006 return cc;
4007
4008 case OP_SOM:
4009 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4010 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4011 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4012 return cc;
4013
4014 case OP_NOT_WORD_BOUNDARY:
4015 case OP_WORD_BOUNDARY:
4016 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4017 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4018 return cc;
4019
4020 case OP_NOT_DIGIT:
4021 case OP_DIGIT:
4022 /* Digits are usually 0-9, so it is worth to optimize them. */
4023 if (common->digits[0] == -2)
4024 get_ctype_ranges(common, ctype_digit, common->digits);
4025 detect_partial_match(common, backtracks);
4026 /* Flip the starting bit in the negative case. */
4027 if (type == OP_NOT_DIGIT)
4028 common->digits[1] ^= 1;
4029 if (!check_ranges(common, common->digits, backtracks, TRUE))
4030 {
4031 read_char8_type(common);
4032 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4033 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4034 }
4035 if (type == OP_NOT_DIGIT)
4036 common->digits[1] ^= 1;
4037 return cc;
4038
4039 case OP_NOT_WHITESPACE:
4040 case OP_WHITESPACE:
4041 detect_partial_match(common, backtracks);
4042 read_char8_type(common);
4043 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4044 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4045 return cc;
4046
4047 case OP_NOT_WORDCHAR:
4048 case OP_WORDCHAR:
4049 detect_partial_match(common, backtracks);
4050 read_char8_type(common);
4051 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4052 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4053 return cc;
4054
4055 case OP_ANY:
4056 detect_partial_match(common, backtracks);
4057 read_char(common);
4058 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4059 {
4060 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4061 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4062 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4063 else
4064 jump[1] = check_str_end(common);
4065
4066 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4067 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4068 if (jump[1] != NULL)
4069 JUMPHERE(jump[1]);
4070 JUMPHERE(jump[0]);
4071 }
4072 else
4073 check_newlinechar(common, common->nltype, backtracks, TRUE);
4074 return cc;
4075
4076 case OP_ALLANY:
4077 detect_partial_match(common, backtracks);
4078 #ifdef SUPPORT_UTF
4079 if (common->utf)
4080 {
4081 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4082 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4083 #ifdef COMPILE_PCRE8
4084 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4085 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4086 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4087 #else /* COMPILE_PCRE8 */
4088 #ifdef COMPILE_PCRE16
4089 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4090 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4091 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4092 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
4093 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4094 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4095 #endif /* COMPILE_PCRE16 */
4096 #endif /* COMPILE_PCRE8 */
4097 JUMPHERE(jump[0]);
4098 return cc;
4099 }
4100 #endif
4101 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4102 return cc;
4103
4104 case OP_ANYBYTE:
4105 detect_partial_match(common, backtracks);
4106 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4107 return cc;
4108
4109 #ifdef SUPPORT_UTF
4110 #ifdef SUPPORT_UCP
4111 case OP_NOTPROP:
4112 case OP_PROP:
4113 propdata[0] = 0;
4114 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4115 propdata[2] = cc[0];
4116 propdata[3] = cc[1];
4117 propdata[4] = XCL_END;
4118 compile_xclass_trypath(common, propdata, backtracks);
4119 return cc + 2;
4120 #endif
4121 #endif
4122
4123 case OP_ANYNL:
4124 detect_partial_match(common, backtracks);
4125 read_char(common);
4126 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4127 /* We don't need to handle soft partial matching case. */
4128 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4129 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4130 else
4131 jump[1] = check_str_end(common);
4132 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4133 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4134 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4135 jump[3] = JUMP(SLJIT_JUMP);
4136 JUMPHERE(jump[0]);
4137 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4138 JUMPHERE(jump[1]);
4139 JUMPHERE(jump[2]);
4140 JUMPHERE(jump[3]);
4141 return cc;
4142
4143 case OP_NOT_HSPACE:
4144 case OP_HSPACE:
4145 detect_partial_match(common, backtracks);
4146 read_char(common);
4147 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4148 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4149 return cc;
4150
4151 case OP_NOT_VSPACE:
4152 case OP_VSPACE:
4153 detect_partial_match(common, backtracks);
4154 read_char(common);
4155 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4156 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4157 return cc;
4158
4159 #ifdef SUPPORT_UCP
4160 case OP_EXTUNI:
4161 detect_partial_match(common, backtracks);
4162 read_char(common);
4163 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4164 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4165 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
4166
4167 label = LABEL();
4168 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4169 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4170 read_char(common);
4171 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4172 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4173 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
4174
4175 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4176 JUMPHERE(jump[0]);
4177 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4178 {
4179 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4180 /* Since we successfully read a char above, partial matching must occure. */
4181 check_partial(common, TRUE);
4182 JUMPHERE(jump[0]);
4183 }
4184 return cc;
4185 #endif
4186
4187 case OP_EODN:
4188 /* Requires rather complex checks. */
4189 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4190 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4191 {
4192 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4193 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4194 if (common->mode == JIT_COMPILE)
4195 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4196 else
4197 {
4198 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4199 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4200 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4201 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4202 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4203 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4204 check_partial(common, TRUE);
4205 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4206 JUMPHERE(jump[1]);
4207 }
4208 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4209 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4210 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4211 }
4212 else if (common->nltype == NLTYPE_FIXED)
4213 {
4214 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4215 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4216 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4217 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4218 }
4219 else
4220 {
4221 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4222 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4223 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4224 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4225 jump[2] = JUMP(SLJIT_C_GREATER);
4226 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4227 /* Equal. */
4228 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4229 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4230 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4231
4232 JUMPHERE(jump[1]);
4233 if (common->nltype == NLTYPE_ANYCRLF)
4234 {
4235 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4236 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4237 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4238 }
4239 else
4240 {
4241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4242 read_char(common);
4243 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4244 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4245 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4246 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4247 }
4248 JUMPHERE(jump[2]);
4249 JUMPHERE(jump[3]);
4250 }
4251 JUMPHERE(jump[0]);
4252 check_partial(common, FALSE);
4253 return cc;
4254
4255 case OP_EOD:
4256 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4257 check_partial(common, FALSE);
4258 return cc;
4259
4260 case OP_CIRC:
4261 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4263 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4264 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4265 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4266 return cc;
4267
4268 case OP_CIRCM:
4269 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4271 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4272 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4273 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4274 jump[0] = JUMP(SLJIT_JUMP);
4275 JUMPHERE(jump[1]);
4276
4277 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4278 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4279 {
4280 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4281 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4282 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4283 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4284 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4285 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4286 }
4287 else
4288 {
4289 skip_char_back(common);
4290 read_char(common);
4291 check_newlinechar(common, common->nltype, backtracks, FALSE);
4292 }
4293 JUMPHERE(jump[0]);
4294 return cc;
4295
4296 case OP_DOLL:
4297 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4298 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4299 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4300
4301 if (!common->endonly)
4302 compile_char1_trypath(common, OP_EODN, cc, backtracks);
4303 else
4304 {
4305 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4306 check_partial(common, FALSE);
4307 }
4308 return cc;
4309
4310 case OP_DOLLM:
4311 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4312 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4313 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4314 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4315 check_partial(common, FALSE);
4316 jump[0] = JUMP(SLJIT_JUMP);
4317 JUMPHERE(jump[1]);
4318
4319 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4320 {
4321 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4322 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4323 if (common->mode == JIT_COMPILE)
4324 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4325 else
4326 {
4327 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4328 /* STR_PTR = STR_END - IN_UCHARS(1) */
4329 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4330 check_partial(common, TRUE);
4331 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4332 JUMPHERE(jump[1]);
4333 }
4334
4335 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4336 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4337 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4338 }
4339 else
4340 {
4341 peek_char(common);
4342 check_newlinechar(common, common->nltype, backtracks, FALSE);
4343 }
4344 JUMPHERE(jump[0]);
4345 return cc;
4346
4347 case OP_CHAR:
4348 case OP_CHARI:
4349 length = 1;
4350 #ifdef SUPPORT_UTF
4351 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4352 #endif
4353 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4354 {
4355 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4356 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4357
4358 context.length = IN_UCHARS(length);
4359 context.sourcereg = -1;
4360 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4361 context.ucharptr = 0;
4362 #endif
4363 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4364 }
4365 detect_partial_match(common, backtracks);
4366 read_char(common);
4367 #ifdef SUPPORT_UTF
4368 if (common->utf)
4369 {
4370 GETCHAR(c, cc);
4371 }
4372 else
4373 #endif
4374 c = *cc;
4375 if (type == OP_CHAR || !char_has_othercase(common, cc))
4376 {
4377 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4378 return cc + length;
4379 }
4380 oc = char_othercase(common, c);
4381 bit = c ^ oc;
4382 if (ispowerof2(bit))
4383 {
4384 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4385 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4386 return cc + length;
4387 }
4388 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4389 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4390 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
4391 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4392 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4393 return cc + length;
4394
4395 case OP_NOT:
4396 case OP_NOTI:
4397 detect_partial_match(common, backtracks);
4398 length = 1;
4399 #ifdef SUPPORT_UTF
4400 if (common->utf)
4401 {
4402 #ifdef COMPILE_PCRE8
4403 c = *cc;
4404 if (c < 128)
4405 {
4406 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4407 if (type == OP_NOT || !char_has_othercase(common, cc))
4408 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4409 else
4410 {
4411 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4412 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4413 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4414 }
4415 /* Skip the variable-length character. */
4416 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4417 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4418 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4419 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4420 JUMPHERE(jump[0]);
4421 return cc + 1;
4422 }
4423 else
4424 #endif /* COMPILE_PCRE8 */
4425 {
4426 GETCHARLEN(c, cc, length);
4427 read_char(common);
4428 }
4429 }
4430 else
4431 #endif /* SUPPORT_UTF */
4432 {
4433 read_char(common);
4434 c = *cc;
4435 }
4436
4437 if (type == OP_NOT || !char_has_othercase(common, cc))
4438 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4439 else
4440 {
4441 oc = char_othercase(common, c);
4442 bit = c ^ oc;
4443 if (ispowerof2(bit))
4444 {
4445 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4446 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4447 }
4448 else
4449 {
4450 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4451 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4452 }
4453 }
4454 return cc + length;
4455
4456 case OP_CLASS:
4457 case OP_NCLASS:
4458 detect_partial_match(common, backtracks);
4459 read_char(common);
4460 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4461 return cc + 32 / sizeof(pcre_uchar);
4462
4463 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4464 jump[0] = NULL;
4465 #ifdef COMPILE_PCRE8
4466 /* This check only affects 8 bit mode. In other modes, we
4467 always need to compare the value with 255. */
4468 if (common->utf)
4469 #endif /* COMPILE_PCRE8 */
4470 {
4471 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4472 if (type == OP_CLASS)
4473 {
4474 add_jump(compiler, backtracks, jump[0]);
4475 jump[0] = NULL;
4476 }
4477 }
4478 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4479 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4480 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4481 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4482 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4483 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4484 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4485 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4486 if (jump[0] != NULL)
4487 JUMPHERE(jump[0]);
4488 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4489 return cc + 32 / sizeof(pcre_uchar);
4490
4491 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4492 case OP_XCLASS:
4493 compile_xclass_trypath(common, cc + LINK_SIZE, backtracks);
4494 return cc + GET(cc, 0) - 1;
4495 #endif
4496
4497 case OP_REVERSE:
4498 length = GET(cc, 0);
4499 if (length == 0)
4500 return cc + LINK_SIZE;
4501 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4502 #ifdef SUPPORT_UTF
4503 if (common->utf)
4504 {
4505 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4506 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4507 label = LABEL();
4508 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4509 skip_char_back(common);
4510 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4511 JUMPTO(SLJIT_C_NOT_ZERO, label);
4512 }
4513 else
4514 #endif
4515 {
4516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4517 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4518 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4519 }
4520 check_start_used_ptr(common);
4521 return cc + LINK_SIZE;
4522 }
4523 SLJIT_ASSERT_STOP();
4524 return cc;
4525 }
4526
4527 static SLJIT_INLINE pcre_uchar *compile_charn_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4528 {
4529 /* This function consumes at least one input character. */
4530 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4531 DEFINE_COMPILER;
4532 pcre_uchar *ccbegin = cc;
4533 compare_context context;
4534 int size;
4535
4536 context.length = 0;
4537 do
4538 {
4539 if (cc >= ccend)
4540 break;
4541
4542 if (*cc == OP_CHAR)
4543 {
4544 size = 1;
4545 #ifdef SUPPORT_UTF
4546 if (common->utf && HAS_EXTRALEN(cc[1]))
4547 size += GET_EXTRALEN(cc[1]);
4548 #endif
4549 }
4550 else if (*cc == OP_CHARI)
4551 {
4552 size = 1;
4553 #ifdef SUPPORT_UTF
4554 if (common->utf)
4555 {
4556 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4557 size = 0;
4558 else if (HAS_EXTRALEN(cc[1]))
4559 size += GET_EXTRALEN(cc[1]);
4560 }
4561 else
4562 #endif
4563 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4564 size = 0;
4565 }
4566 else
4567 size = 0;
4568
4569 cc += 1 + size;
4570 context.length += IN_UCHARS(size);
4571 }
4572 while (size > 0 && context.length <= 128);
4573
4574 cc = ccbegin;
4575 if (context.length > 0)
4576 {
4577 /* We have a fixed-length byte sequence. */
4578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4579 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4580
4581 context.sourcereg = -1;
4582 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4583 context.ucharptr = 0;
4584 #endif
4585 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4586 return cc;
4587 }
4588
4589 /* A non-fixed length character will be checked if length == 0. */
4590 return compile_char1_trypath(common, *cc, cc + 1, backtracks);
4591 }
4592
4593 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4594 {
4595 DEFINE_COMPILER;
4596 int offset = GET2(cc, 1) << 1;
4597
4598 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4599 if (!common->jscript_compat)
4600 {
4601 if (backtracks == NULL)
4602 {
4603 /* OVECTOR(1) contains the "string begin - 1" constant. */
4604 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4605 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4606 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4607 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4608 return JUMP(SLJIT_C_NOT_ZERO);
4609 }
4610 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4611 }
4612 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4613 }
4614
4615 /* Forward definitions. */
4616 static void compile_trypath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4617 static void compile_backtrackpath(compiler_common *, struct backtrack_common *);
4618
4619 #define PUSH_BACKTRACK(size, ccstart, error) \
4620 do \
4621 { \
4622 backtrack = sljit_alloc_memory(compiler, (size)); \
4623 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4624 return error; \
4625 memset(backtrack, 0, size); \
4626 backtrack->prev = parent->top; \
4627 backtrack->cc = (ccstart); \
4628 parent->top = backtrack; \
4629 } \
4630 while (0)
4631
4632 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4633 do \
4634 { \
4635 backtrack = sljit_alloc_memory(compiler, (size)); \
4636 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4637 return; \
4638 memset(backtrack, 0, size); \
4639 backtrack->prev = parent->top; \
4640 backtrack->cc = (ccstart); \
4641 parent->top = backtrack; \
4642 } \
4643 while (0)
4644
4645 #define BACKTRACK_AS(type) ((type *)backtrack)
4646
4647 static pcre_uchar *compile_ref_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4648 {
4649 DEFINE_COMPILER;
4650 int offset = GET2(cc, 1) << 1;
4651 struct sljit_jump *jump = NULL;
4652 struct sljit_jump *partial;
4653 struct sljit_jump *nopartial;
4654
4655 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4656 /* OVECTOR(1) contains the "string begin - 1" constant. */
4657 if (withchecks && !common->jscript_compat)
4658 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4659
4660 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4661 if (common->utf && *cc == OP_REFI)
4662 {
4663 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4665 if (withchecks)
4666 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4667
4668 /* Needed to save important temporary registers. */
4669 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4670 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4672 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4673 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4674 if (common->mode == JIT_COMPILE)
4675 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4676 else
4677 {
4678 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4679 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4680 check_partial(common, FALSE);
4681 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4682 JUMPHERE(nopartial);
4683 }
4684 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4685 }
4686 else
4687 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4688 {
4689 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4690 if (withchecks)
4691 jump = JUMP(SLJIT_C_ZERO);
4692
4693 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4694 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4695 if (common->mode == JIT_COMPILE)
4696 add_jump(compiler, backtracks, partial);
4697
4698 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4699 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4700
4701 if (common->mode != JIT_COMPILE)
4702 {
4703 nopartial = JUMP(SLJIT_JUMP);
4704 JUMPHERE(partial);
4705 /* TMP2 -= STR_END - STR_PTR */
4706 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4707 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4708 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4709 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4710 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4711 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4712 JUMPHERE(partial);
4713 check_partial(common, FALSE);
4714 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4715 JUMPHERE(nopartial);
4716 }
4717 }
4718
4719 if (jump != NULL)
4720 {
4721 if (emptyfail)
4722 add_jump(compiler, backtracks, jump);
4723 else
4724 JUMPHERE(jump);
4725 }
4726 return cc + 1 + IMM2_SIZE;
4727 }
4728
4729 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4730 {
4731 DEFINE_COMPILER;
4732 backtrack_common *backtrack;
4733 pcre_uchar type;
4734 struct sljit_label *label;
4735 struct sljit_jump *zerolength;
4736 struct sljit_jump *jump = NULL;
4737 pcre_uchar *ccbegin = cc;
4738 int min = 0, max = 0;
4739 BOOL minimize;
4740
4741 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4742
4743 type = cc[1 + IMM2_SIZE];
4744 minimize = (type & 0x1) != 0;
4745 switch(type)
4746 {
4747 case OP_CRSTAR:
4748 case OP_CRMINSTAR:
4749 min = 0;
4750 max = 0;
4751 cc += 1 + IMM2_SIZE + 1;
4752 break;
4753 case OP_CRPLUS:
4754 case OP_CRMINPLUS:
4755 min = 1;
4756 max = 0;
4757 cc += 1 + IMM2_SIZE + 1;
4758 break;
4759 case OP_CRQUERY:
4760 case OP_CRMINQUERY:
4761 min = 0;
4762 max = 1;
4763 cc += 1 + IMM2_SIZE + 1;
4764 break;
4765 case OP_CRRANGE:
4766 case OP_CRMINRANGE:
4767 min = GET2(cc, 1 + IMM2_SIZE + 1);
4768 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4769 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4770 break;
4771 default:
4772 SLJIT_ASSERT_STOP();
4773 break;
4774 }
4775
4776 if (!minimize)
4777 {
4778 if (min == 0)
4779 {
4780 allocate_stack(common, 2);
4781 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4782 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4783 /* Temporary release of STR_PTR. */
4784 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4785 zerolength = compile_ref_checks(common, ccbegin, NULL);
4786 /* Restore if not zero length. */
4787 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4788 }
4789 else
4790 {
4791 allocate_stack(common, 1);
4792 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4793 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4794 }
4795
4796 if (min > 1 || max > 1)
4797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4798
4799 label = LABEL();
4800 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4801
4802 if (min > 1 || max > 1)
4803 {
4804 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4805 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4806 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4807 if (min > 1)
4808 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4809 if (max > 1)
4810 {
4811 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4812 allocate_stack(common, 1);
4813 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4814 JUMPTO(SLJIT_JUMP, label);
4815 JUMPHERE(jump);
4816 }
4817 }
4818
4819 if (max == 0)
4820 {
4821 /* Includes min > 1 case as well. */
4822 allocate_stack(common, 1);
4823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4824 JUMPTO(SLJIT_JUMP, label);
4825 }
4826
4827 JUMPHERE(zerolength);
4828 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4829
4830 decrease_call_count(common);
4831 return cc;
4832 }
4833
4834 allocate_stack(common, 2);
4835 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4836 if (type != OP_CRMINSTAR)
4837 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4838
4839 if (min == 0)
4840 {
4841 zerolength = compile_ref_checks(common, ccbegin, NULL);
4842 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4843 jump = JUMP(SLJIT_JUMP);
4844 }
4845 else
4846 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4847
4848 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4849 if (max > 0)
4850 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4851
4852 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4853 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4854
4855 if (min > 1)
4856 {
4857 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4858 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4859 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4860 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->trypath);
4861 }
4862 else if (max > 0)
4863 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4864
4865 if (jump != NULL)
4866 JUMPHERE(jump);
4867 JUMPHERE(zerolength);
4868
4869 decrease_call_count(common);
4870 return cc;
4871 }
4872
4873 static SLJIT_INLINE pcre_uchar *compile_recurse_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4874 {
4875 DEFINE_COMPILER;
4876 backtrack_common *backtrack;
4877 recurse_entry *entry = common->entries;
4878 recurse_entry *prev = NULL;
4879 int start = GET(cc, 1);
4880
4881 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4882 while (entry != NULL)
4883 {
4884 if (entry->start == start)
4885 break;
4886 prev = entry;
4887 entry = entry->next;
4888 }
4889
4890 if (entry == NULL)
4891 {
4892 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4893 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4894 return NULL;
4895 entry->next = NULL;
4896 entry->entry = NULL;
4897 entry->calls = NULL;
4898 entry->start = start;
4899
4900 if (prev != NULL)
4901 prev->next = entry;
4902 else
4903 common->entries = entry;
4904 }
4905
4906 if (common->has_set_som && common->mark_ptr != 0)
4907 {
4908 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4909 allocate_stack(common, 2);
4910 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4912 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4913 }
4914 else if (common->has_set_som || common->mark_ptr != 0)
4915 {
4916 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4917 allocate_stack(common, 1);
4918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4919 }
4920
4921 if (entry->entry == NULL)
4922 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4923 else
4924 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4925 /* Leave if the match is failed. */
4926 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4927 return cc + 1 + LINK_SIZE;
4928 }
4929
4930 static pcre_uchar *compile_assert_trypath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4931 {
4932 DEFINE_COMPILER;
4933 int framesize;
4934 int localptr;
4935 backtrack_common altbacktrack;
4936 pcre_uchar *ccbegin;
4937 pcre_uchar opcode;
4938 pcre_uchar bra = OP_BRA;
4939 jump_list *tmp = NULL;
4940 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
4941 jump_list **found;
4942 /* Saving previous accept variables. */
4943 struct sljit_label *save_quitlabel = common->quitlabel;
4944 struct sljit_label *save_acceptlabel = common->acceptlabel;
4945 jump_list *save_quit = common->quit;
4946 jump_list *save_accept = common->accept;
4947 struct sljit_jump *jump;
4948 struct sljit_jump *brajump = NULL;
4949
4950 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4951 {
4952 SLJIT_ASSERT(!conditional);
4953 bra = *cc;
4954 cc++;
4955 }
4956 localptr = PRIV_DATA(cc);
4957 SLJIT_ASSERT(localptr != 0);
4958 framesize = get_framesize(common, cc, FALSE);
4959 backtrack->framesize = framesize;
4960 backtrack->localptr = localptr;
4961 opcode = *cc;
4962 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4963 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4964 ccbegin = cc;
4965 cc += GET(cc, 1);
4966
4967 if (bra == OP_BRAMINZERO)
4968 {
4969 /* This is a braminzero backtrack path. */
4970 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4971 free_stack(common, 1);
4972 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4973 }
4974
4975 if (framesize < 0)
4976 {
4977 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4978 allocate_stack(common, 1);
4979 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4980 }
4981 else
4982 {
4983 allocate_stack(common, framesize + 2);
4984 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4985 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4986 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4987 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4988 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4989 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4990 }
4991
4992 memset(&altbacktrack, 0, sizeof(backtrack_common));
4993 common->quitlabel = NULL;
4994 common->quit = NULL;
4995 while (1)
4996 {
4997 common->acceptlabel = NULL;
4998 common->accept = NULL;
4999 altbacktrack.top = NULL;
5000 altbacktrack.topbacktracks = NULL;
5001
5002 if (*ccbegin == OP_ALT)
5003 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5004
5005 altbacktrack.cc = ccbegin;
5006 compile_trypath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5007 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5008 {
5009 common->quitlabel = save_quitlabel;
5010 common->acceptlabel = save_acceptlabel;
5011 common->quit = save_quit;
5012 common->accept = save_accept;
5013 return NULL;
5014 }
5015 common->acceptlabel = LABEL();
5016 if (common->accept != NULL)
5017 set_jumps(common->accept, common->acceptlabel);
5018
5019 /* Reset stack. */
5020 if (framesize < 0)
5021 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5022 else {
5023 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5024 {
5025 /* We don't need to keep the STR_PTR, only the previous localptr. */
5026 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5027 }
5028 else
5029 {
5030 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5031 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5032 }
5033 }
5034
5035 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5036 {
5037 /* We know that STR_PTR was stored on the top of the stack. */
5038 if (conditional)
5039 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5040 else if (bra == OP_BRAZERO)
5041 {
5042 if (framesize < 0)
5043 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5044 else
5045 {
5046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5047 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
5048 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5049 }
5050 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5051 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5052 }
5053 else if (framesize >= 0)
5054 {
5055 /* For OP_BRA and OP_BRAMINZERO. */
5056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5057 }
5058 }
5059 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5060
5061 compile_backtrackpath(common, altbacktrack.top);
5062 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5063 {
5064 common->quitlabel = save_quitlabel;
5065 common->acceptlabel = save_acceptlabel;
5066 common->quit = save_quit;
5067 common->accept = save_accept;
5068 return NULL;
5069 }
5070 set_jumps(altbacktrack.topbacktracks, LABEL());
5071
5072 if (*cc != OP_ALT)
5073 break;
5074
5075 ccbegin = cc;
5076 cc += GET(cc, 1);
5077 }
5078 /* None of them matched. */
5079 if (common->quit != NULL)
5080 set_jumps(common->quit, LABEL());
5081
5082 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5083 {
5084 /* Assert is failed. */
5085 if (conditional || bra == OP_BRAZERO)
5086 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5087
5088 if (framesize < 0)
5089 {
5090 /* The topmost item should be 0. */
5091 if (bra == OP_BRAZERO)
5092 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5093 else
5094 free_stack(common, 1);
5095 }
5096 else
5097 {
5098 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5099 /* The topmost item should be 0. */
5100 if (bra == OP_BRAZERO)
5101 {
5102 free_stack(common, framesize + 1);
5103 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5104 }
5105 else
5106 free_stack(common, framesize + 2);
5107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5108 }
5109 jump = JUMP(SLJIT_JUMP);
5110 if (bra != OP_BRAZERO)
5111 add_jump(compiler, target, jump);
5112
5113 /* Assert is successful. */
5114 set_jumps(tmp, LABEL());
5115 if (framesize < 0)
5116 {
5117 /* We know that STR_PTR was stored on the top of the stack. */
5118 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5119 /* Keep the STR_PTR on the top of the stack. */
5120 if (bra == OP_BRAZERO)
5121 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5122 else if (bra == OP_BRAMINZERO)
5123 {
5124 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5125 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5126 }
5127 }
5128 else
5129 {
5130 if (bra == OP_BRA)
5131 {
5132 /* We don't need to keep the STR_PTR, only the previous localptr. */
5133 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5134 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5135 }
5136 else
5137 {
5138 /* We don't need to keep the STR_PTR, only the previous localptr. */
5139 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
5140 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5141 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5142 }
5143 }
5144
5145 if (bra == OP_BRAZERO)
5146 {
5147 backtrack->trypath = LABEL();
5148 sljit_set_label(jump, backtrack->trypath);
5149 }
5150 else if (bra == OP_BRAMINZERO)
5151 {
5152 JUMPTO(SLJIT_JUMP, backtrack->trypath);
5153 JUMPHERE(brajump);
5154 if (framesize >= 0)
5155 {
5156 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5157 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5159 }
5160 set_jumps(backtrack->common.topbacktracks, LABEL());
5161 }
5162 }
5163 else
5164 {
5165 /* AssertNot is successful. */
5166 if (framesize < 0)
5167 {
5168 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5169 if (bra != OP_BRA)
5170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5171 else
5172 free_stack(common, 1);
5173 }
5174 else
5175 {
5176 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5177 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5178 /* The topmost item should be 0. */
5179 if (bra != OP_BRA)
5180 {
5181 free_stack(common, framesize + 1);
5182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5183 }
5184 else
5185 free_stack(common, framesize + 2);
5186 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5187 }
5188
5189 if (bra == OP_BRAZERO)
5190 backtrack->trypath = LABEL();
5191 else if (bra == OP_BRAMINZERO)
5192 {
5193 JUMPTO(SLJIT_JUMP, backtrack->trypath);
5194 JUMPHERE(brajump);
5195 }
5196
5197 if (bra != OP_BRA)
5198 {
5199 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5200 set_jumps(backtrack->common.topbacktracks, LABEL());
5201 backtrack->common.topbacktracks = NULL;
5202 }
5203 }
5204
5205 common->quitlabel = save_quitlabel;
5206 common->acceptlabel = save_acceptlabel;
5207 common->quit = save_quit;
5208 common->accept = save_accept;
5209 return cc + 1 + LINK_SIZE;
5210 }
5211
5212 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
5213 {
5214 int condition = FALSE;
5215 pcre_uchar *slotA = name_table;
5216 pcre_uchar *slotB;
5217 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5218 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5219 sljit_w no_capture;
5220 int i;
5221
5222 locals += refno & 0xff;
5223 refno >>= 8;
5224 no_capture = locals[1];
5225
5226 for (i = 0; i < name_count; i++)
5227 {
5228 if (GET2(slotA, 0) == refno) break;
5229 slotA += name_entry_size;
5230 }
5231
5232 if (i < name_count)
5233 {
5234 /* Found a name for the number - there can be only one; duplicate names
5235 for different numbers are allowed, but not vice versa. First scan down
5236 for duplicates. */
5237
5238 slotB = slotA;
5239 while (slotB > name_table)
5240 {
5241 slotB -= name_entry_size;
5242 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5243 {
5244 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5245 if (condition) break;
5246 }
5247 else break;
5248 }
5249
5250 /* Scan up for duplicates */
5251 if (!condition)
5252 {
5253 slotB = slotA;
5254 for (i++; i < name_count; i++)
5255 {
5256 slotB += name_entry_size;
5257 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5258 {
5259 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5260 if (condition) break;
5261 }
5262 else break;
5263 }
5264 }
5265 }
5266 return condition;
5267 }
5268
5269 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
5270 {
5271 int condition = FALSE;
5272 pcre_uchar *slotA = name_table;
5273 pcre_uchar *slotB;
5274 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5275 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5276 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5277 int i;
5278
5279 for (i = 0; i < name_count; i++)
5280 {
5281 if (GET2(slotA, 0) == recno) break;
5282 slotA += name_entry_size;
5283 }
5284
5285 if (i < name_count)
5286 {
5287 /* Found a name for the number - there can be only one; duplicate
5288 names for different numbers are allowed, but not vice versa. First
5289 scan down for duplicates. */
5290
5291 slotB = slotA;
5292 while (slotB > name_table)
5293 {
5294 slotB -= name_entry_size;
5295 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5296 {
5297 condition = GET2(slotB, 0) == group_num;
5298 if (condition) break;
5299 }
5300 else break;
5301 }
5302
5303 /* Scan up for duplicates */
5304 if (!condition)
5305 {
5306 slotB = slotA;
5307 for (i++; i < name_count; i++)
5308 {
5309 slotB += name_entry_size;
5310 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5311 {
5312 condition = GET2(slotB, 0) == group_num;
5313 if (condition) break;
5314 }
5315 else break;
5316 }
5317 }
5318 }
5319 return condition;
5320 }
5321
5322 /*
5323 Handling bracketed expressions is probably the most complex part.
5324
5325 Stack layout naming characters:
5326 S - Push the current STR_PTR
5327 0 - Push a 0 (NULL)
5328 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5329 before the next alternative. Not pushed if there are no alternatives.
5330 M - Any values pushed by the current alternative. Can be empty, or anything.
5331 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5332 L - Push the previous local (pointed by localptr) to the stack
5333 () - opional values stored on the stack
5334 ()* - optonal, can be stored multiple times
5335
5336 The following list shows the regular expression templates, their PCRE byte codes
5337 and stack layout supported by pcre-sljit.
5338
5339 (?:) OP_BRA | OP_KET A M
5340 () OP_CBRA | OP_KET C M
5341 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5342 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5343 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5344 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5345 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5346 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5347 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5348 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5349 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5350 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5351 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5352 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5353 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5354 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5355 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5356 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5357 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5358 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5359 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5360 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5361
5362
5363 Stack layout naming characters:
5364 A - Push the alternative index (starting from 0) on the stack.
5365 Not pushed if there is no alternatives.
5366 M - Any values pushed by the current alternative. Can be empty, or anything.
5367
5368 The next list shows the possible content of a bracket:
5369 (|) OP_*BRA | OP_ALT ... M A
5370 (?()|) OP_*COND | OP_ALT M A
5371 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5372 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5373 Or nothing, if trace is unnecessary
5374 */
5375
5376 static pcre_uchar *compile_bracket_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5377 {
5378 DEFINE_COMPILER;
5379 backtrack_common *backtrack;
5380 pcre_uchar opcode;
5381 int localptr = 0;
5382 int offset = 0;
5383 int stacksize;
5384 pcre_uchar *ccbegin;
5385 pcre_uchar *trypath;
5386 pcre_uchar bra = OP_BRA;
5387 pcre_uchar ket;
5388 assert_backtrack *assert;
5389 BOOL has_alternatives;
5390 struct sljit_jump *jump;
5391 struct sljit_jump *skip;
5392 struct sljit_label *rmaxlabel = NULL;
5393 struct sljit_jump *braminzerojump = NULL;
5394
5395 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5396
5397 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5398 {
5399 bra = *cc;
5400 cc++;
5401 opcode = *cc;
5402 }
5403
5404 opcode = *cc;
5405 ccbegin = cc;
5406 trypath = ccbegin + 1 + LINK_SIZE;
5407
5408 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5409 {
5410 /* Drop this bracket_backtrack. */
5411 parent->top = backtrack->prev;
5412 return bracketend(cc);
5413 }
5414
5415 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5416 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5417 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5418 cc += GET(cc, 1);
5419
5420 has_alternatives = *cc == OP_ALT;
5421 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5422 {
5423 has_alternatives = (*trypath == OP_RREF) ? FALSE : TRUE;
5424 if (*trypath == OP_NRREF)
5425 {
5426 stacksize = GET2(trypath, 1);
5427 if (common->currententry == NULL || stacksize == RREF_ANY)
5428 has_alternatives = FALSE;
5429 else if (common->currententry->start == 0)
5430 has_alternatives = stacksize != 0;
5431 else
5432 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5433 }
5434 }
5435
5436 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5437 opcode = OP_SCOND;
5438 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5439 opcode = OP_ONCE;
5440
5441 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5442 {
5443 /* Capturing brackets has a pre-allocated space. */
5444 offset = GET2(ccbegin, 1 + LINK_SIZE);
5445 localptr = OVECTOR_PRIV(offset);
5446 offset <<= 1;
5447 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
5448 trypath += IMM2_SIZE;
5449 }
5450 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5451 {
5452 /* Other brackets simply allocate the next entry. */
5453 localptr = PRIV_DATA(ccbegin);
5454 SLJIT_ASSERT(localptr != 0);
5455 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
5456 if (opcode == OP_ONCE)
5457 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5458 }
5459
5460 /* Instructions before the first alternative. */
5461 stacksize = 0;
5462 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5463 stacksize++;
5464 if (bra == OP_BRAZERO)
5465 stacksize++;
5466
5467 if (stacksize > 0)
5468 allocate_stack(common, stacksize);
5469
5470 stacksize = 0;
5471 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5472 {
5473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5474 stacksize++;
5475 }
5476
5477 if (bra == OP_BRAZERO)
5478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5479
5480 if (bra == OP_BRAMINZERO)
5481 {
5482 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5483 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5484 if (ket != OP_KETRMIN)
5485 {
5486 free_stack(common, 1);
5487 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5488 }
5489 else
5490 {
5491 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5492 {
5493 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5494 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5495 /* Nothing stored during the first run. */
5496 skip = JUMP(SLJIT_JUMP);
5497 JUMPHERE(jump);
5498 /* Checking zero-length iteration. */
5499 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5500 {
5501 /* When we come from outside, localptr contains the previous STR_PTR. */
5502 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5503 }
5504 else
5505 {
5506 /* Except when the whole stack frame must be saved. */
5507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5508 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5509 }
5510 JUMPHERE(skip);
5511 }
5512 else
5513 {
5514 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5515 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5516 JUMPHERE(jump);
5517 }
5518 }
5519 }
5520
5521 if (ket == OP_KETRMIN)
5522 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5523
5524 if (ket == OP_KETRMAX)
5525 {
5526 rmaxlabel = LABEL();
5527 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5528 BACKTRACK_AS(bracket_backtrack)->alttrypath = rmaxlabel;
5529 }
5530
5531 /* Handling capturing brackets and alternatives. */
5532 if (opcode == OP_ONCE)
5533 {
5534 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5535 {
5536 /* Neither capturing brackets nor recursions are not found in the block. */
5537 if (ket == OP_KETRMIN)
5538 {
5539 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5540 allocate_stack(common, 2);
5541 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5542 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5543 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5544 }
5545 else if (ket == OP_KETRMAX || has_alternatives)
5546 {
5547 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5548 allocate_stack(common, 1);
5549 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5550 }
5551 else
5552 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5553 }
5554 else
5555 {
5556 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5557 {
5558 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5559 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5560 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5561 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5563 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5564 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5565 }
5566 else
5567 {
5568 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5569 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5570 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5573 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5574 }
5575 }
5576 }
5577 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5578 {
5579 /* Saving the previous values. */
5580 allocate_stack(common, 3);
5581 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5582 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5584 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5585 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
5587 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5588 }
5589 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5590 {
5591 /* Saving the previous value. */
5592 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5593 allocate_stack(common, 1);
5594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
5595 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5596 }
5597 else if (has_alternatives)
5598 {
5599 /* Pushing the starting string pointer. */
5600 allocate_stack(common, 1);
5601 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5602 }
5603
5604 /* Generating code for the first alternative. */
5605 if (opcode == OP_COND || opcode == OP_SCOND)
5606 {
5607 if (*trypath == OP_CREF)
5608 {
5609 SLJIT_ASSERT(has_alternatives);
5610 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5611 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(trypath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5612 trypath += 1 + IMM2_SIZE;
5613 }
5614 else if (*trypath == OP_NCREF)
5615 {
5616 SLJIT_ASSERT(has_alternatives);
5617 stacksize = GET2(trypath, 1);
5618 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5619
5620 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5622 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5623 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5624 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5625 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5626 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5627 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5628 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5629
5630 JUMPHERE(jump);
5631 trypath += 1 + IMM2_SIZE;
5632 }
5633 else if (*trypath == OP_RREF || *trypath == OP_NRREF)
5634 {
5635 /* Never has other case. */
5636 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5637
5638 stacksize = GET2(trypath, 1);
5639 if (common->currententry == NULL)
5640 stacksize = 0;
5641 else if (stacksize == RREF_ANY)
5642 stacksize = 1;
5643 else if (common->currententry->start == 0)
5644 stacksize = stacksize == 0;
5645 else
5646 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5647
5648 if (*trypath == OP_RREF || stacksize || common->currententry == NULL)
5649 {
5650 SLJIT_ASSERT(!has_alternatives);
5651 if (stacksize != 0)
5652 trypath += 1 + IMM2_SIZE;
5653 else
5654 {
5655 if (*cc == OP_ALT)
5656 {
5657 trypath = cc + 1 + LINK_SIZE;
5658 cc += GET(cc, 1);
5659 }
5660 else
5661 trypath = cc;
5662 }
5663 }
5664 else
5665 {
5666 SLJIT_ASSERT(has_alternatives);
5667
5668 stacksize = GET2(trypath, 1);
5669 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5670 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5672 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5673 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5674 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5675 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5676 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5677 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5678 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5679 trypath += 1 + IMM2_SIZE;
5680 }
5681 }
5682 else
5683 {
5684 SLJIT_ASSERT(has_alternatives && *trypath >= OP_ASSERT && *trypath <= OP_ASSERTBACK_NOT);
5685 /* Similar code as PUSH_BACKTRACK macro. */
5686 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5687 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5688 return NULL;
5689 memset(assert, 0, sizeof(assert_backtrack));
5690 assert->common.cc = trypath;
5691 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5692 trypath = compile_assert_trypath(common, trypath, assert, TRUE);
5693 }
5694 }
5695
5696 compile_trypath(common, trypath, cc, backtrack);
5697 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5698 return NULL;
5699
5700 if (opcode == OP_ONCE)
5701 {
5702 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5703 {
5704 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5705 /* TMP2 which is set here used by OP_KETRMAX below. */
5706 if (ket == OP_KETRMAX)
5707 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5708 else if (ket == OP_KETRMIN)
5709 {
5710 /* Move the STR_PTR to the localptr. */
5711 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5712 }
5713 }
5714 else
5715 {
5716 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5717 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5718 if (ket == OP_KETRMAX)
5719 {
5720 /* TMP2 which is set here used by OP_KETRMAX below. */
5721 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5722 }
5723 }
5724 }
5725
5726 stacksize = 0;
5727 if (ket != OP_KET || bra != OP_BRA)
5728 stacksize++;
5729 if (has_alternatives && opcode != OP_ONCE)
5730 stacksize++;
5731
5732 if (stacksize > 0)
5733 allocate_stack(common, stacksize);
5734
5735 stacksize = 0;
5736 if (ket != OP_KET)
5737 {
5738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5739 stacksize++;
5740 }
5741 else if (bra != OP_BRA)
5742 {
5743 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5744 stacksize++;
5745 }
5746
5747 if (has_alternatives)
5748 {
5749 if (opcode != OP_ONCE)
5750 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5751 if (ket != OP_KETRMAX)
5752 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5753 }
5754
5755 /* Must be after the trypath label. */
5756 if (offset != 0)
5757 {
5758 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5760 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5761 }
5762
5763 if (ket == OP_KETRMAX)
5764 {
5765 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5766 {
5767 if (has_alternatives)
5768 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5769 /* Checking zero-length iteration. */
5770 if (opcode != OP_ONCE)
5771 {
5772 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
5773 /* Drop STR_PTR for greedy plus quantifier. */
5774 if (bra != OP_BRAZERO)
5775 free_stack(common, 1);
5776 }
5777 else
5778 /* TMP2 must contain the starting STR_PTR. */
5779 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5780 }
5781 else
5782 JUMPTO(SLJIT_JUMP, rmaxlabel);
5783 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5784 }
5785
5786 if (bra == OP_BRAZERO)
5787 BACKTRACK_AS(bracket_backtrack)->zerotrypath = LABEL();
5788
5789 if (bra == OP_BRAMINZERO)
5790 {
5791 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5792 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->trypath);
5793 if (braminzerojump != NULL)
5794 {
5795 JUMPHERE(braminzerojump);
5796 /* We need to release the end pointer to perform the
5797 backtrack for the zero-length iteration. When
5798 framesize is < 0, OP_ONCE will do the release itself. */
5799 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5800 {
5801 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5802 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5803 }
5804 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5805 free_stack(common, 1);
5806 }
5807 /* Continue to the normal backtrack. */
5808 }
5809
5810 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5811 decrease_call_count(common);
5812
5813 /* Skip the other alternatives. */
5814 while (*cc == OP_ALT)
5815 cc += GET(cc, 1);
5816 cc += 1 + LINK_SIZE;
5817 return cc;
5818 }
5819
5820 static pcre_uchar *compile_bracketpos_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5821 {
5822 DEFINE_COMPILER;
5823 backtrack_common *backtrack;
5824 pcre_uchar opcode;
5825 int localptr;
5826 int cbraprivptr = 0;
5827 int framesize;
5828 int stacksize;
5829 int offset = 0;
5830 BOOL zero = FALSE;
5831 pcre_uchar *ccbegin = NULL;
5832 int stack;
5833 struct sljit_label *loop = NULL;
5834 struct jump_list *emptymatch = NULL;
5835
5836 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5837 if (*cc == OP_BRAPOSZERO)
5838 {
5839 zero = TRUE;
5840 cc++;
5841 }
5842
5843 opcode = *cc;
5844 localptr = PRIV_DATA(cc);
5845 SLJIT_ASSERT(localptr != 0);
5846 BACKTRACK_AS(bracketpos_backtrack)->localptr = localptr;
5847 switch(opcode)
5848 {
5849 case OP_BRAPOS:
5850 case OP_SBRAPOS:
5851 ccbegin = cc + 1 + LINK_SIZE;
5852 break;
5853
5854 case OP_CBRAPOS:
5855 case OP_SCBRAPOS:
5856 offset = GET2(cc, 1 + LINK_SIZE);
5857 cbraprivptr = OVECTOR_PRIV(offset);
5858 offset <<= 1;
5859 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5860 break;
5861
5862 default:
5863 SLJIT_ASSERT_STOP();
5864 break;
5865 }
5866
5867 framesize = get_framesize(common, cc, FALSE);
5868 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5869 if (framesize < 0)
5870 {
5871 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5872 if (!zero)
5873 stacksize++;
5874 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5875 allocate_stack(common, stacksize);
5876 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5877
5878 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5879 {
5880 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5881 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5882 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5883 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5884 }
5885 else
5886 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5887
5888 if (!zero)
5889 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5890 }
5891 else
5892 {
5893 stacksize = framesize + 1;
5894 if (!zero)
5895 stacksize++;
5896 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5897 stacksize++;
5898 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5899 allocate_stack(common, stacksize);
5900
5901 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5902 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5903 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5904 stack = 0;
5905 if (!zero)
5906 {
5907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5908 stack++;
5909 }
5910 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5911 {
5912 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5913 stack++;
5914 }
5915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5916 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5917 }
5918
5919 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5921
5922 loop = LABEL();
5923 while (*cc != OP_KETRPOS)
5924 {
5925 backtrack->top = NULL;
5926 backtrack->topbacktracks = NULL;
5927 cc += GET(cc, 1);
5928
5929 compile_trypath(common, ccbegin, cc, backtrack);
5930 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5931 return NULL;
5932
5933 if (framesize < 0)
5934 {
5935 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5936
5937 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5938 {
5939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5940 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5941 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5943 }
5944 else
5945 {
5946 if (opcode == OP_SBRAPOS)
5947 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5948 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5949 }
5950
5951 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5952 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5953
5954 if (!zero)
5955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5956 }
5957 else
5958 {
5959 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5960 {
5961 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5962 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5963 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5964 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5965 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5966 }
5967 else
5968 {
5969 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5970 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5971 if (opcode == OP_SBRAPOS)
5972 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5973 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5974 }
5975
5976 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5977 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5978
5979 if (!zero)
5980 {
5981 if (framesize < 0)
5982 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5983 else
5984 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5985 }
5986 }
5987 JUMPTO(SLJIT_JUMP, loop);
5988 flush_stubs(common);
5989
5990 compile_backtrackpath(common, backtrack->top);
5991 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5992 return NULL;
5993 set_jumps(backtrack->topbacktracks, LABEL());
5994
5995 if (framesize < 0)
5996 {
5997 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5998 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5999 else
6000 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6001 }
6002 else
6003 {
6004 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6005 {
6006 /* Last alternative. */
6007 if (*cc == OP_KETRPOS)
6008 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6009 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6010 }
6011 else
6012 {
6013 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6014 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6015 }
6016 }
6017
6018 if (*cc == OP_KETRPOS)
6019 break;
6020 ccbegin = cc + 1 + LINK_SIZE;
6021 }
6022
6023 backtrack->topbacktracks = NULL;
6024 if (!zero)
6025 {
6026 if (framesize < 0)
6027 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6028 else /* TMP2 is set to [localptr] above. */
6029 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
6030 }
6031
6032 /* None of them matched. */
6033 set_jumps(emptymatch, LABEL());
6034 decrease_call_count(common);
6035 return cc + 1 + LINK_SIZE;
6036 }
6037
6038 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6039 {
6040 int class_len;
6041
6042 *opcode = *cc;
6043 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6044 {
6045 cc++;
6046 *type = OP_CHAR;
6047 }
6048 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6049 {
6050 cc++;
6051 *type = OP_CHARI;
6052 *opcode -= OP_STARI - OP_STAR;
6053 }
6054 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6055 {
6056 cc++;
6057 *type = OP_NOT;
6058 *opcode -= OP_NOTSTAR - OP_STAR;
6059 }
6060 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6061 {
6062 cc++;
6063 *type = OP_NOTI;
6064 *opcode -= OP_NOTSTARI - OP_STAR;
6065 }
6066 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6067 {
6068 cc++;
6069 *opcode -= OP_TYPESTAR - OP_STAR;
6070 *type = 0;
6071 }
6072 else
6073 {
6074 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6075 *type = *opcode;
6076 cc++;
6077 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6078 *opcode = cc[class_len - 1];
6079 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6080 {
6081 *opcode -= OP_CRSTAR - OP_STAR;
6082 if (end != NULL)
6083 *end = cc + class_len;
6084 }
6085 else
6086 {
6087 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6088 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6089 *arg2 = GET2(cc, class_len);
6090
6091 if (*arg2 == 0)
6092 {
6093 SLJIT_ASSERT(*arg1 != 0);
6094 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6095 }
6096 if (*arg1 == *arg2)
6097 *opcode = OP_EXACT;
6098
6099 if (end != NULL)
6100 *end = cc + class_len + 2 * IMM2_SIZE;
6101 }
6102 return cc;
6103 }
6104
6105 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6106 {
6107 *arg1 = GET2(cc, 0);
6108 cc += IMM2_SIZE;
6109 }
6110
6111 if (*type == 0)
6112 {
6113 *type = *cc;
6114 if (end != NULL)
6115 *end = next_opcode(common, cc);
6116 cc++;
6117 return cc;
6118 }
6119
6120 if (end != NULL)
6121 {
6122 *end = cc + 1;
6123 #ifdef SUPPORT_UTF
6124 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6125 #endif
6126 }
6127 return cc;
6128 }
6129
6130 static pcre_uchar *compile_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6131 {
6132 DEFINE_COMPILER;
6133 backtrack_common *backtrack;
6134 pcre_uchar opcode;
6135 pcre_uchar type;
6136 int arg1 = -1, arg2 = -1;
6137 pcre_uchar* end;
6138 jump_list *nomatch = NULL;
6139 struct sljit_jump *jump = NULL;
6140 struct sljit_label *label;
6141 int localptr = PRIV_DATA(cc);
6142 int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6143 int offset0 = (localptr == 0) ? STACK(0) : localptr;
6144 int offset1 = (localptr == 0) ? STACK(1) : localptr + (int)sizeof(sljit_w);
6145 int tmp_base, tmp_offset;
6146
6147 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6148
6149 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6150
6151 switch (type)
6152 {
6153 case OP_NOT_DIGIT:
6154 case OP_DIGIT:
6155 case OP_NOT_WHITESPACE:
6156 case OP_WHITESPACE:
6157 case OP_NOT_WORDCHAR:
6158 case OP_WORDCHAR:
6159 case OP_ANY:
6160 case OP_ALLANY:
6161 case OP_ANYBYTE:
6162 case OP_ANYNL:
6163 case OP_NOT_HSPACE:
6164 case OP_HSPACE:
6165 case OP_NOT_VSPACE:
6166 case OP_VSPACE:
6167 case OP_CHAR:
6168 case OP_CHARI:
6169 case OP_NOT:
6170 case OP_NOTI:
6171 case OP_CLASS:
6172 case OP_NCLASS:
6173 tmp_base = TMP3;
6174 tmp_offset = 0;
6175 break;
6176
6177 default:
6178 SLJIT_ASSERT_STOP();
6179 /* Fall through. */
6180
6181 case OP_EXTUNI:
6182 case OP_XCLASS:
6183 case OP_NOTPROP:
6184 case OP_PROP:
6185 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6186 tmp_offset = POSSESSIVE0;
6187 break;
6188 }
6189
6190 switch(opcode)
6191 {
6192 case OP_STAR:
6193 case OP_PLUS:
6194 case OP_UPTO:
6195 case OP_CRRANGE:
6196 if (type == OP_ANYNL || type == OP_EXTUNI)
6197 {
6198 SLJIT_ASSERT(localptr == 0);
6199 if (opcode == OP_STAR || opcode == OP_UPTO)
6200 {
6201 allocate_stack(common, 2);
6202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6204 }
6205 else
6206 {
6207 allocate_stack(common, 1);
6208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6209 }
6210
6211 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6212 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6213
6214 label = LABEL();
6215 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6216 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6217 {
6218 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6219 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6220 if (opcode == OP_CRRANGE && arg2 > 0)
6221 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6222 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6223 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6224 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6225 }
6226
6227 /* We cannot use TMP3 because of this allocate_stack. */
6228 allocate_stack(common, 1);
6229 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6230 JUMPTO(SLJIT_JUMP, label);
6231 if (jump != NULL)
6232 JUMPHERE(jump);
6233 }
6234 else
6235 {
6236 if (opcode == OP_PLUS)
6237 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6238 if (localptr == 0)
6239 allocate_stack(common, 2);
6240 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6241 if (opcode <= OP_PLUS)
6242 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6243 else
6244 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6245 label = LABEL();
6246 compile_char1_trypath(common, type, cc, &nomatch);
6247 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6248 if (opcode <= OP_PLUS)
6249 JUMPTO(SLJIT_JUMP, label);
6250 else if (opcode == OP_CRRANGE && arg1 == 0)
6251 {
6252 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6253 JUMPTO(SLJIT_JUMP, label);
6254 }
6255 else
6256 {
6257 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6258 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6259 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6260 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6261 }
6262 set_jumps(nomatch, LABEL());
6263 if (opcode == OP_CRRANGE)
6264 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6265 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6266 }
6267 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6268 break;
6269
6270 case OP_MINSTAR:
6271 case OP_MINPLUS:
6272 if (opcode == OP_MINPLUS)
6273 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6274 if (localptr == 0)
6275 allocate_stack(common, 1);
6276 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6277 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6278 break;
6279
6280 case OP_MINUPTO:
6281 case OP_CRMINRANGE:
6282 if (localptr == 0)
6283 allocate_stack(common, 2);
6284 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6285 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6286 if (opcode == OP_CRMINRANGE)
6287 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6288 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6289 break;
6290
6291 case OP_QUERY:
6292 case OP_MINQUERY:
6293 if (localptr == 0)
6294 allocate_stack(common, 1);
6295 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6296 if (opcode == OP_QUERY)
6297 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6298 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6299 break;
6300
6301 case OP_EXACT:
6302 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6303 label = LABEL();
6304 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6305 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6306 JUMPTO(SLJIT_C_NOT_ZERO, label);
6307 break;
6308
6309 case OP_POSSTAR:
6310 case OP_POSPLUS:
6311 case OP_POSUPTO:
6312 if (opcode == OP_POSPLUS)
6313 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6314 if (opcode == OP_POSUPTO)
6315 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6316 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6317 label = LABEL();
6318 compile_char1_trypath(common, type, cc, &nomatch);
6319 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6320 if (opcode != OP_POSUPTO)
6321 JUMPTO(SLJIT_JUMP, label);
6322 else
6323 {
6324 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6325 JUMPTO(SLJIT_C_NOT_ZERO, label);
6326 }
6327 set_jumps(nomatch, LABEL());
6328 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6329 break;
6330
6331 case OP_POSQUERY:
6332 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6333 compile_char1_trypath(common, type, cc, &nomatch);
6334 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6335 set_jumps(nomatch, LABEL());
6336 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6337 break;
6338
6339 default:
6340 SLJIT_ASSERT_STOP();
6341 break;
6342 }
6343
6344 decrease_call_count(common);
6345 return end;
6346 }
6347
6348 static SLJIT_INLINE pcre_uchar *compile_fail_accept_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6349 {
6350 DEFINE_COMPILER;
6351 backtrack_common *backtrack;
6352
6353 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6354
6355 if (*cc == OP_FAIL)
6356 {
6357 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6358 return cc + 1;
6359 }
6360
6361 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6362 {
6363 /* No need to check notempty conditions. */
6364 if (common->acceptlabel == NULL)
6365 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6366 else
6367 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6368 return cc + 1;
6369 }
6370
6371 if (common->acceptlabel == NULL)
6372 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6373 else
6374 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6375 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6376 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6377 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6378 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6379 if (common->acceptlabel == NULL)
6380 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6381 else
6382 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6383 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6384 if (common->acceptlabel == NULL)
6385 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6386 else
6387 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6388 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6389 return cc + 1;
6390 }
6391
6392 static SLJIT_INLINE pcre_uchar *compile_close_trypath(compiler_common *common, pcre_uchar *cc)
6393 {
6394 DEFINE_COMPILER;
6395 int offset = GET2(cc, 1);
6396
6397 /* Data will be discarded anyway... */
6398 if (common->currententry != NULL)
6399 return cc + 1 + IMM2_SIZE;
6400
6401 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6402 offset <<= 1;
6403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6405 return cc + 1 + IMM2_SIZE;
6406 }
6407
6408 static void compile_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6409 {
6410 DEFINE_COMPILER;
6411 backtrack_common *backtrack;
6412
6413 while (cc < ccend)
6414 {
6415 switch(*cc)
6416 {
6417 case OP_SOD:
6418 case OP_SOM:
6419 case OP_NOT_WORD_BOUNDARY:
6420 case OP_WORD_BOUNDARY:
6421 case OP_NOT_DIGIT:
6422 case OP_DIGIT:
6423 case OP_NOT_WHITESPACE:
6424 case OP_WHITESPACE:
6425 case OP_NOT_WORDCHAR:
6426 case OP_WORDCHAR:
6427 case OP_ANY:
6428 case OP_ALLANY:
6429 case OP_ANYBYTE:
6430 case OP_NOTPROP:
6431 case OP_PROP:
6432 case OP_ANYNL:
6433 case OP_NOT_HSPACE:
6434 case OP_HSPACE:
6435 case OP_NOT_VSPACE:
6436 case OP_VSPACE:
6437 case OP_EXTUNI:
6438 case OP_EODN:
6439 case OP_EOD:
6440 case OP_CIRC:
6441 case OP_CIRCM:
6442 case OP_DOLL:
6443 case OP_DOLLM:
6444 case OP_NOT:
6445 case OP_NOTI:
6446 case OP_REVERSE:
6447 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6448 break;
6449
6450 case OP_SET_SOM:
6451 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6452 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6453 allocate_stack(common, 1);
6454 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6456 cc++;
6457 break;
6458
6459 case OP_CHAR:
6460 case OP_CHARI:
6461 if (common->mode == JIT_COMPILE)
6462 cc = compile_charn_trypath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6463 else
6464 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6465 break;
6466
6467 case OP_STAR:
6468 case OP_MINSTAR:
6469 case OP_PLUS:
6470 case OP_MINPLUS:
6471 case OP_QUERY:
6472 case OP_MINQUERY:
6473 case OP_UPTO:
6474 case OP_MINUPTO:
6475 case OP_EXACT:
6476 case OP_POSSTAR:
6477 case OP_POSPLUS:
6478 case OP_POSQUERY:
6479 case OP_POSUPTO:
6480 case OP_STARI:
6481 case OP_MINSTARI:
6482 case OP_PLUSI:
6483 case OP_MINPLUSI:
6484 case OP_QUERYI:
6485 case OP_MINQUERYI:
6486 case OP_UPTOI:
6487 case OP_MINUPTOI:
6488 case OP_EXACTI:
6489 case OP_POSSTARI:
6490 case OP_POSPLUSI:
6491 case OP_POSQUERYI:
6492 case OP_POSUPTOI:
6493 case OP_NOTSTAR:
6494 case OP_NOTMINSTAR:
6495 case OP_NOTPLUS:
6496 case OP_NOTMINPLUS:
6497 case OP_NOTQUERY:
6498 case OP_NOTMINQUERY:
6499 case OP_NOTUPTO:
6500 case OP_NOTMINUPTO:
6501 case OP_NOTEXACT:
6502 case OP_NOTPOSSTAR:
6503 case OP_NOTPOSPLUS:
6504 case OP_NOTPOSQUERY:
6505 case OP_NOTPOSUPTO:
6506 case OP_NOTSTARI:
6507 case OP_NOTMINSTARI:
6508 case OP_NOTPLUSI:
6509 case OP_NOTMINPLUSI:
6510 case OP_NOTQUERYI:
6511 case OP_NOTMINQUERYI:
6512 case OP_NOTUPTOI:
6513 case OP_NOTMINUPTOI:
6514 case OP_NOTEXACTI:
6515 case OP_NOTPOSSTARI:
6516 case OP_NOTPOSPLUSI:
6517 case OP_NOTPOSQUERYI:
6518 case OP_NOTPOSUPTOI:
6519 case OP_TYPESTAR:
6520 case OP_TYPEMINSTAR:
6521 case OP_TYPEPLUS:
6522 case OP_TYPEMINPLUS:
6523 case OP_TYPEQUERY:
6524 case OP_TYPEMINQUERY:
6525 case OP_TYPEUPTO:
6526 case OP_TYPEMINUPTO:
6527 case OP_TYPEEXACT:
6528 case OP_TYPEPOSSTAR:
6529 case OP_TYPEPOSPLUS:
6530 case OP_TYPEPOSQUERY:
6531 case OP_TYPEPOSUPTO:
6532 cc = compile_iterator_trypath(common, cc, parent);
6533 break;
6534
6535 case OP_CLASS:
6536 case OP_NCLASS:
6537 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6538 cc = compile_iterator_trypath(common, cc, parent);
6539 else
6540 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6541 break;
6542
6543 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
6544 case OP_XCLASS:
6545 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6546 cc = compile_iterator_trypath(common, cc, parent);
6547 else
6548 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6549 break;
6550 #endif
6551
6552 case OP_REF:
6553 case OP_REFI:
6554 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6555 cc = compile_ref_iterator_trypath(common, cc, parent);
6556 else
6557 cc = compile_ref_trypath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6558 break;
6559
6560 case OP_RECURSE:
6561 cc = compile_recurse_trypath(common, cc, parent);
6562 break;
6563
6564 case OP_ASSERT:
6565 case OP_ASSERT_NOT:
6566 case OP_ASSERTBACK:
6567 case OP_ASSERTBACK_NOT:
6568 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6569 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6570 break;
6571
6572 case OP_BRAMINZERO:
6573 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6574 cc = bracketend(cc + 1);
6575 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6576 {
6577 allocate_stack(common, 1);
6578 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6579 }
6580 else
6581 {
6582 allocate_stack(common, 2);
6583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6584 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6585 }
6586 BACKTRACK_AS(braminzero_backtrack)->trypath = LABEL();
6587 if (cc[1] > OP_ASSERTBACK_NOT)
6588 decrease_call_count(common);
6589 break;
6590
6591 case OP_ONCE:
6592 case OP_ONCE_NC:
6593 case OP_BRA:
6594 case OP_CBRA:
6595 case OP_COND:
6596 case OP_SBRA:
6597 case OP_SCBRA:
6598 case OP_SCOND:
6599 cc = compile_bracket_trypath(common, cc, parent);
6600 break;
6601
6602 case OP_BRAZERO:
6603 if (cc[1] > OP_ASSERTBACK_NOT)
6604 cc = compile_bracket_trypath(common, cc, parent);
6605 else
6606 {
6607 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6608 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6609 }
6610 break;
6611
6612 case OP_BRAPOS:
6613 case OP_CBRAPOS:
6614 case OP_SBRAPOS:
6615 case OP_SCBRAPOS:
6616 case OP_BRAPOSZERO:
6617 cc = compile_bracketpos_trypath(common, cc, parent);
6618 break;
6619
6620 case OP_MARK:
6621 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6622 SLJIT_ASSERT(common->mark_ptr != 0);
6623 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6624 allocate_stack(common, 1);
6625 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6626 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6627 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
6628 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
6629 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
6630 cc += 1 + 2 + cc[1];
6631 break;
6632
6633 case OP_COMMIT:
6634 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6635 cc += 1;
6636 break;
6637
6638 case OP_FAIL:
6639 case OP_ACCEPT:
6640 case OP_ASSERT_ACCEPT:
6641 cc = compile_fail_accept_trypath(common, cc, parent);
6642 break;
6643
6644 case OP_CLOSE:
6645 cc = compile_close_trypath(common, cc);
6646 break;
6647
6648 case OP_SKIPZERO:
6649 cc = bracketend(cc + 1);
6650 break;
6651
6652 default:
6653 SLJIT_ASSERT_STOP();
6654 return;
6655 }
6656 if (cc == NULL)
6657 return;
6658 }
6659 SLJIT_ASSERT(cc == ccend);
6660 }
6661
6662 #undef PUSH_BACKTRACK
6663 #undef PUSH_BACKTRACK_NOVALUE
6664 #undef BACKTRACK_AS
6665
6666 #define COMPILE_BACKTRACKPATH(current) \
6667 do \
6668 { \
6669 compile_backtrackpath(common, (current)); \
6670 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6671 return; \
6672 } \
6673 while (0)
6674
6675 #define CURRENT_AS(type) ((type *)current)
6676
6677 static void compile_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
6678 {
6679 DEFINE_COMPILER;
6680 pcre_uchar *cc = current->cc;
6681 pcre_uchar opcode;
6682 pcre_uchar type;
6683 int arg1 = -1, arg2 = -1;
6684 struct sljit_label *label = NULL;
6685 struct sljit_jump *jump = NULL;
6686 jump_list *jumplist = NULL;
6687 int localptr = PRIV_DATA(cc);
6688 int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6689 int offset0 = (localptr == 0) ? STACK(0) : localptr;
6690 int offset1 = (localptr == 0) ? STACK(1) : localptr + (int)sizeof(sljit_w);
6691
6692 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
6693
6694 switch(opcode)
6695 {
6696 case OP_STAR:
6697 case OP_PLUS:
6698 case OP_UPTO:
6699 case OP_CRRANGE:
6700 if (type == OP_ANYNL || type == OP_EXTUNI)
6701 {
6702 SLJIT_ASSERT(localptr == 0);
6703 set_jumps(current->topbacktracks, LABEL());
6704 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6705 free_stack(common, 1);
6706 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6707 }
6708 else
6709 {
6710 if (opcode == OP_UPTO)
6711 arg2 = 0;
6712 if (opcode <= OP_PLUS)
6713 {
6714 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6715 jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1);
6716 }
6717 else
6718 {
6719 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6720 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6721 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
6722 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
6723 }
6724 skip_char_back(common);
6725 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6726 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6727 if (opcode == OP_CRRANGE)
6728 set_jumps(current->topbacktracks, LABEL());
6729 JUMPHERE(jump);
6730 if (localptr == 0)
6731 free_stack(common, 2);
6732 if (opcode == OP_PLUS)
6733 set_jumps(current->topbacktracks, LABEL());
6734 }
6735 break;
6736
6737 case OP_MINSTAR:
6738 case OP_MINPLUS:
6739 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6740 compile_char1_trypath(common, type, cc, &jumplist);
6741 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6742 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6743 set_jumps(jumplist, LABEL());
6744 if (localptr == 0)
6745 free_stack(common, 1);
6746 if (opcode == OP_MINPLUS)
6747 set_jumps(current->topbacktracks, LABEL());
6748 break;
6749
6750 case OP_MINUPTO:
6751 case OP_CRMINRANGE:
6752 if (opcode == OP_CRMINRANGE)
6753 {
6754 label = LABEL();
6755 set_jumps(current->topbacktracks, label);
6756 }
6757 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6758 compile_char1_trypath(common, type, cc, &jumplist);
6759
6760 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6761 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6762 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6763 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6764
6765 if (opcode == OP_CRMINRANGE)
6766 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
6767
6768 if (opcode == OP_CRMINRANGE && arg1 == 0)
6769 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6770 else
6771 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->trypath);
6772
6773 set_jumps(jumplist, LABEL());
6774 if (localptr == 0)
6775 free_stack(common, 2);
6776 break;
6777
6778 case OP_QUERY:
6779 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6780 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6781 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6782 jump = JUMP(SLJIT_JUMP);
6783 set_jumps(current->topbacktracks, LABEL());
6784 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6785 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6786 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6787 JUMPHERE(jump);
6788 if (localptr == 0)
6789 free_stack(common, 1);
6790 break;
6791
6792 case OP_MINQUERY:
6793 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6794 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6795 jump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6796 compile_char1_trypath(common, type, cc, &jumplist);
6797 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtra