/[pcre]/code/tags/pcre-8.38/pcre_jit_compile.c
ViewVC logotype

Contents of /code/tags/pcre-8.38/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 992 - (show annotations)
Sun Jul 8 17:10:07 2012 UTC (7 years, 4 months ago) by zherczeg
Original Path: code/trunk/pcre_jit_compile.c
File MIME type: text/plain
File size: 249427 byte(s)
Error occurred while calculating annotation data.
One more range optimization in the JIT compiler for case insensitive, English letter ranges
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory on the stack. Fast, but limited size. */
69 #define LOCAL_SPACE_SIZE 32768
70
71 #define STACK_GROWTH_RATE 8192
72
73 /* Enable to check that the allocation could destroy temporaries. */
74 #if defined SLJIT_DEBUG && SLJIT_DEBUG
75 #define DESTROY_REGISTERS 1
76 #endif
77
78 /*
79 Short summary about the backtracking mechanism empolyed by the jit code generator:
80
81 The code generator follows the recursive nature of the PERL compatible regular
82 expressions. The basic blocks of regular expressions are condition checkers
83 whose execute different commands depending on the result of the condition check.
84 The relationship between the operators can be horizontal (concatenation) and
85 vertical (sub-expression) (See struct backtrack_common for more details).
86
87 'ab' - 'a' and 'b' regexps are concatenated
88 'a+' - 'a' is the sub-expression of the '+' operator
89
90 The condition checkers are boolean (true/false) checkers. Machine code is generated
91 for the checker itself and for the actions depending on the result of the checker.
92 The 'true' case is called as the try path (expected path), and the other is called as
93 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
94 branches on the try path.
95
96 Greedy star operator (*) :
97 Try path: match happens.
98 Backtrack path: match failed.
99 Non-greedy star operator (*?) :
100 Try path: no need to perform a match.
101 Backtrack path: match is required.
102
103 The following example shows how the code generated for a capturing bracket
104 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
105 we have the following regular expression:
106
107 A(B|C)D
108
109 The generated code will be the following:
110
111 A try path
112 '(' try path (pushing arguments to the stack)
113 B try path
114 ')' try path (pushing arguments to the stack)
115 D try path
116 return with successful match
117
118 D backtrack path
119 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
120 B backtrack path
121 C expected path
122 jump to D try path
123 C backtrack path
124 A backtrack path
125
126 Notice, that the order of backtrack code paths are the opposite of the fast
127 code paths. In this way the topmost value on the stack is always belong
128 to the current backtrack code path. The backtrack path must check
129 whether there is a next alternative. If so, it needs to jump back to
130 the try path eventually. Otherwise it needs to clear out its own stack
131 frame and continue the execution on the backtrack code paths.
132 */
133
134 /*
135 Saved stack frames:
136
137 Atomic blocks and asserts require reloading the values of local variables
138 when the backtrack mechanism performed. Because of OP_RECURSE, the locals
139 are not necessarly known in compile time, thus we need a dynamic restore
140 mechanism.
141
142 The stack frames are stored in a chain list, and have the following format:
143 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
144
145 Thus we can restore the locals to a particular point in the stack.
146 */
147
148 typedef struct jit_arguments {
149 /* Pointers first. */
150 struct sljit_stack *stack;
151 const pcre_uchar *str;
152 const pcre_uchar *begin;
153 const pcre_uchar *end;
154 int *offsets;
155 pcre_uchar *uchar_ptr;
156 pcre_uchar *mark_ptr;
157 /* Everything else after. */
158 int offsetcount;
159 int calllimit;
160 pcre_uint8 notbol;
161 pcre_uint8 noteol;
162 pcre_uint8 notempty;
163 pcre_uint8 notempty_atstart;
164 } jit_arguments;
165
166 typedef struct executable_functions {
167 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
168 PUBL(jit_callback) callback;
169 void *userdata;
170 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
171 } executable_functions;
172
173 typedef struct jump_list {
174 struct sljit_jump *jump;
175 struct jump_list *next;
176 } jump_list;
177
178 enum stub_types { stack_alloc };
179
180 typedef struct stub_list {
181 enum stub_types type;
182 int data;
183 struct sljit_jump *start;
184 struct sljit_label *quit;
185 struct stub_list *next;
186 } stub_list;
187
188 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
189
190 /* The following structure is the key data type for the recursive
191 code generator. It is allocated by compile_trypath, and contains
192 the aguments for compile_backtrackpath. Must be the first member
193 of its descendants. */
194 typedef struct backtrack_common {
195 /* Concatenation stack. */
196 struct backtrack_common *prev;
197 jump_list *nextbacktracks;
198 /* Internal stack (for component operators). */
199 struct backtrack_common *top;
200 jump_list *topbacktracks;
201 /* Opcode pointer. */
202 pcre_uchar *cc;
203 } backtrack_common;
204
205 typedef struct assert_backtrack {
206 backtrack_common common;
207 jump_list *condfailed;
208 /* Less than 0 (-1) if a frame is not needed. */
209 int framesize;
210 /* Points to our private memory word on the stack. */
211 int localptr;
212 /* For iterators. */
213 struct sljit_label *trypath;
214 } assert_backtrack;
215
216 typedef struct bracket_backtrack {
217 backtrack_common common;
218 /* Where to coninue if an alternative is successfully matched. */
219 struct sljit_label *alttrypath;
220 /* For rmin and rmax iterators. */
221 struct sljit_label *recursivetrypath;
222 /* For greedy ? operator. */
223 struct sljit_label *zerotrypath;
224 /* Contains the branches of a failed condition. */
225 union {
226 /* Both for OP_COND, OP_SCOND. */
227 jump_list *condfailed;
228 assert_backtrack *assert;
229 /* For OP_ONCE. -1 if not needed. */
230 int framesize;
231 } u;
232 /* Points to our private memory word on the stack. */
233 int localptr;
234 } bracket_backtrack;
235
236 typedef struct bracketpos_backtrack {
237 backtrack_common common;
238 /* Points to our private memory word on the stack. */
239 int localptr;
240 /* Reverting stack is needed. */
241 int framesize;
242 /* Allocated stack size. */
243 int stacksize;
244 } bracketpos_backtrack;
245
246 typedef struct braminzero_backtrack {
247 backtrack_common common;
248 struct sljit_label *trypath;
249 } braminzero_backtrack;
250
251 typedef struct iterator_backtrack {
252 backtrack_common common;
253 /* Next iteration. */
254 struct sljit_label *trypath;
255 } iterator_backtrack;
256
257 typedef struct recurse_entry {
258 struct recurse_entry *next;
259 /* Contains the function entry. */
260 struct sljit_label *entry;
261 /* Collects the calls until the function is not created. */
262 jump_list *calls;
263 /* Points to the starting opcode. */
264 int start;
265 } recurse_entry;
266
267 typedef struct recurse_backtrack {
268 backtrack_common common;
269 } recurse_backtrack;
270
271 #define MAX_RANGE_SIZE 6
272
273 typedef struct compiler_common {
274 struct sljit_compiler *compiler;
275 pcre_uchar *start;
276
277 /* Opcode local area direct map. */
278 int *localptrs;
279 int cbraptr;
280 /* OVector starting point. Must be divisible by 2. */
281 int ovector_start;
282 /* Last known position of the requested byte. */
283 int req_char_ptr;
284 /* Head of the last recursion. */
285 int recursive_head;
286 /* First inspected character for partial matching. */
287 int start_used_ptr;
288 /* Starting pointer for partial soft matches. */
289 int hit_start;
290 /* End pointer of the first line. */
291 int first_line_end;
292 /* Points to the marked string. */
293 int mark_ptr;
294
295 /* Flipped and lower case tables. */
296 const pcre_uint8 *fcc;
297 sljit_w lcc;
298 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
299 int mode;
300 /* Newline control. */
301 int nltype;
302 int newline;
303 int bsr_nltype;
304 /* Dollar endonly. */
305 int endonly;
306 BOOL has_set_som;
307 /* Tables. */
308 sljit_w ctypes;
309 int digits[2 + MAX_RANGE_SIZE];
310 /* Named capturing brackets. */
311 sljit_uw name_table;
312 sljit_w name_count;
313 sljit_w name_entry_size;
314
315 /* Labels and jump lists. */
316 struct sljit_label *partialmatchlabel;
317 struct sljit_label *quitlabel;
318 struct sljit_label *acceptlabel;
319 stub_list *stubs;
320 recurse_entry *entries;
321 recurse_entry *currententry;
322 jump_list *partialmatch;
323 jump_list *quit;
324 jump_list *accept;
325 jump_list *calllimit;
326 jump_list *stackalloc;
327 jump_list *revertframes;
328 jump_list *wordboundary;
329 jump_list *anynewline;
330 jump_list *hspace;
331 jump_list *vspace;
332 jump_list *casefulcmp;
333 jump_list *caselesscmp;
334 BOOL jscript_compat;
335 #ifdef SUPPORT_UTF
336 BOOL utf;
337 #ifdef SUPPORT_UCP
338 BOOL use_ucp;
339 #endif
340 jump_list *utfreadchar;
341 #ifdef COMPILE_PCRE8
342 jump_list *utfreadtype8;
343 #endif
344 #endif /* SUPPORT_UTF */
345 #ifdef SUPPORT_UCP
346 jump_list *getucd;
347 #endif
348 } compiler_common;
349
350 /* For byte_sequence_compare. */
351
352 typedef struct compare_context {
353 int length;
354 int sourcereg;
355 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
356 int ucharptr;
357 union {
358 sljit_i asint;
359 sljit_uh asushort;
360 #ifdef COMPILE_PCRE8
361 sljit_ub asbyte;
362 sljit_ub asuchars[4];
363 #else
364 #ifdef COMPILE_PCRE16
365 sljit_uh asuchars[2];
366 #endif
367 #endif
368 } c;
369 union {
370 sljit_i asint;
371 sljit_uh asushort;
372 #ifdef COMPILE_PCRE8
373 sljit_ub asbyte;
374 sljit_ub asuchars[4];
375 #else
376 #ifdef COMPILE_PCRE16
377 sljit_uh asuchars[2];
378 #endif
379 #endif
380 } oc;
381 #endif
382 } compare_context;
383
384 enum {
385 frame_end = 0,
386 frame_setstrbegin = -1,
387 frame_setmark = -2
388 };
389
390 /* Undefine sljit macros. */
391 #undef CMP
392
393 /* Used for accessing the elements of the stack. */
394 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
395
396 #define TMP1 SLJIT_TEMPORARY_REG1
397 #define TMP2 SLJIT_TEMPORARY_REG3
398 #define TMP3 SLJIT_TEMPORARY_EREG2
399 #define STR_PTR SLJIT_SAVED_REG1
400 #define STR_END SLJIT_SAVED_REG2
401 #define STACK_TOP SLJIT_TEMPORARY_REG2
402 #define STACK_LIMIT SLJIT_SAVED_REG3
403 #define ARGUMENTS SLJIT_SAVED_EREG1
404 #define CALL_COUNT SLJIT_SAVED_EREG2
405 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
406
407 /* Locals layout. */
408 /* These two locals can be used by the current opcode. */
409 #define LOCALS0 (0 * sizeof(sljit_w))
410 #define LOCALS1 (1 * sizeof(sljit_w))
411 /* Two local variables for possessive quantifiers (char1 cannot use them). */
412 #define POSSESSIVE0 (2 * sizeof(sljit_w))
413 #define POSSESSIVE1 (3 * sizeof(sljit_w))
414 /* Max limit of recursions. */
415 #define CALL_LIMIT (4 * sizeof(sljit_w))
416 /* The output vector is stored on the stack, and contains pointers
417 to characters. The vector data is divided into two groups: the first
418 group contains the start / end character pointers, and the second is
419 the start pointers when the end of the capturing group has not yet reached. */
420 #define OVECTOR_START (common->ovector_start)
421 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
422 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
423 #define PRIV_DATA(cc) (common->localptrs[(cc) - common->start])
424
425 #ifdef COMPILE_PCRE8
426 #define MOV_UCHAR SLJIT_MOV_UB
427 #define MOVU_UCHAR SLJIT_MOVU_UB
428 #else
429 #ifdef COMPILE_PCRE16
430 #define MOV_UCHAR SLJIT_MOV_UH
431 #define MOVU_UCHAR SLJIT_MOVU_UH
432 #else
433 #error Unsupported compiling mode
434 #endif
435 #endif
436
437 /* Shortcuts. */
438 #define DEFINE_COMPILER \
439 struct sljit_compiler *compiler = common->compiler
440 #define OP1(op, dst, dstw, src, srcw) \
441 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
442 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
443 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
444 #define LABEL() \
445 sljit_emit_label(compiler)
446 #define JUMP(type) \
447 sljit_emit_jump(compiler, (type))
448 #define JUMPTO(type, label) \
449 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
450 #define JUMPHERE(jump) \
451 sljit_set_label((jump), sljit_emit_label(compiler))
452 #define CMP(type, src1, src1w, src2, src2w) \
453 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
454 #define CMPTO(type, src1, src1w, src2, src2w, label) \
455 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
456 #define COND_VALUE(op, dst, dstw, type) \
457 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
458 #define GET_LOCAL_BASE(dst, dstw, offset) \
459 sljit_get_local_base(compiler, (dst), (dstw), (offset))
460
461 static pcre_uchar* bracketend(pcre_uchar* cc)
462 {
463 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
464 do cc += GET(cc, 1); while (*cc == OP_ALT);
465 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
466 cc += 1 + LINK_SIZE;
467 return cc;
468 }
469
470 /* Functions whose might need modification for all new supported opcodes:
471 next_opcode
472 get_localspace
473 set_localptrs
474 get_framesize
475 init_frame
476 get_localsize
477 copy_locals
478 compile_trypath
479 compile_backtrackpath
480 */
481
482 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
483 {
484 SLJIT_UNUSED_ARG(common);
485 switch(*cc)
486 {
487 case OP_SOD:
488 case OP_SOM:
489 case OP_SET_SOM:
490 case OP_NOT_WORD_BOUNDARY:
491 case OP_WORD_BOUNDARY:
492 case OP_NOT_DIGIT:
493 case OP_DIGIT:
494 case OP_NOT_WHITESPACE:
495 case OP_WHITESPACE:
496 case OP_NOT_WORDCHAR:
497 case OP_WORDCHAR:
498 case OP_ANY:
499 case OP_ALLANY:
500 case OP_ANYNL:
501 case OP_NOT_HSPACE:
502 case OP_HSPACE:
503 case OP_NOT_VSPACE:
504 case OP_VSPACE:
505 case OP_EXTUNI:
506 case OP_EODN:
507 case OP_EOD:
508 case OP_CIRC:
509 case OP_CIRCM:
510 case OP_DOLL:
511 case OP_DOLLM:
512 case OP_TYPESTAR:
513 case OP_TYPEMINSTAR:
514 case OP_TYPEPLUS:
515 case OP_TYPEMINPLUS:
516 case OP_TYPEQUERY:
517 case OP_TYPEMINQUERY:
518 case OP_TYPEPOSSTAR:
519 case OP_TYPEPOSPLUS:
520 case OP_TYPEPOSQUERY:
521 case OP_CRSTAR:
522 case OP_CRMINSTAR:
523 case OP_CRPLUS:
524 case OP_CRMINPLUS:
525 case OP_CRQUERY:
526 case OP_CRMINQUERY:
527 case OP_DEF:
528 case OP_BRAZERO:
529 case OP_BRAMINZERO:
530 case OP_BRAPOSZERO:
531 case OP_COMMIT:
532 case OP_FAIL:
533 case OP_ACCEPT:
534 case OP_ASSERT_ACCEPT:
535 case OP_SKIPZERO:
536 return cc + 1;
537
538 case OP_ANYBYTE:
539 #ifdef SUPPORT_UTF
540 if (common->utf) return NULL;
541 #endif
542 return cc + 1;
543
544 case OP_CHAR:
545 case OP_CHARI:
546 case OP_NOT:
547 case OP_NOTI:
548 case OP_STAR:
549 case OP_MINSTAR:
550 case OP_PLUS:
551 case OP_MINPLUS:
552 case OP_QUERY:
553 case OP_MINQUERY:
554 case OP_POSSTAR:
555 case OP_POSPLUS:
556 case OP_POSQUERY:
557 case OP_STARI:
558 case OP_MINSTARI:
559 case OP_PLUSI:
560 case OP_MINPLUSI:
561 case OP_QUERYI:
562 case OP_MINQUERYI:
563 case OP_POSSTARI:
564 case OP_POSPLUSI:
565 case OP_POSQUERYI:
566 case OP_NOTSTAR:
567 case OP_NOTMINSTAR:
568 case OP_NOTPLUS:
569 case OP_NOTMINPLUS:
570 case OP_NOTQUERY:
571 case OP_NOTMINQUERY:
572 case OP_NOTPOSSTAR:
573 case OP_NOTPOSPLUS:
574 case OP_NOTPOSQUERY:
575 case OP_NOTSTARI:
576 case OP_NOTMINSTARI:
577 case OP_NOTPLUSI:
578 case OP_NOTMINPLUSI:
579 case OP_NOTQUERYI:
580 case OP_NOTMINQUERYI:
581 case OP_NOTPOSSTARI:
582 case OP_NOTPOSPLUSI:
583 case OP_NOTPOSQUERYI:
584 cc += 2;
585 #ifdef SUPPORT_UTF
586 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
587 #endif
588 return cc;
589
590 case OP_UPTO:
591 case OP_MINUPTO:
592 case OP_EXACT:
593 case OP_POSUPTO:
594 case OP_UPTOI:
595 case OP_MINUPTOI:
596 case OP_EXACTI:
597 case OP_POSUPTOI:
598 case OP_NOTUPTO:
599 case OP_NOTMINUPTO:
600 case OP_NOTEXACT:
601 case OP_NOTPOSUPTO:
602 case OP_NOTUPTOI:
603 case OP_NOTMINUPTOI:
604 case OP_NOTEXACTI:
605 case OP_NOTPOSUPTOI:
606 cc += 2 + IMM2_SIZE;
607 #ifdef SUPPORT_UTF
608 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
609 #endif
610 return cc;
611
612 case OP_NOTPROP:
613 case OP_PROP:
614 return cc + 1 + 2;
615
616 case OP_TYPEUPTO:
617 case OP_TYPEMINUPTO:
618 case OP_TYPEEXACT:
619 case OP_TYPEPOSUPTO:
620 case OP_REF:
621 case OP_REFI:
622 case OP_CREF:
623 case OP_NCREF:
624 case OP_RREF:
625 case OP_NRREF:
626 case OP_CLOSE:
627 cc += 1 + IMM2_SIZE;
628 return cc;
629
630 case OP_CRRANGE:
631 case OP_CRMINRANGE:
632 return cc + 1 + 2 * IMM2_SIZE;
633
634 case OP_CLASS:
635 case OP_NCLASS:
636 return cc + 1 + 32 / sizeof(pcre_uchar);
637
638 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
639 case OP_XCLASS:
640 return cc + GET(cc, 1);
641 #endif
642
643 case OP_RECURSE:
644 case OP_ASSERT:
645 case OP_ASSERT_NOT:
646 case OP_ASSERTBACK:
647 case OP_ASSERTBACK_NOT:
648 case OP_REVERSE:
649 case OP_ONCE:
650 case OP_ONCE_NC:
651 case OP_BRA:
652 case OP_BRAPOS:
653 case OP_COND:
654 case OP_SBRA:
655 case OP_SBRAPOS:
656 case OP_SCOND:
657 case OP_ALT:
658 case OP_KET:
659 case OP_KETRMAX:
660 case OP_KETRMIN:
661 case OP_KETRPOS:
662 return cc + 1 + LINK_SIZE;
663
664 case OP_CBRA:
665 case OP_CBRAPOS:
666 case OP_SCBRA:
667 case OP_SCBRAPOS:
668 return cc + 1 + LINK_SIZE + IMM2_SIZE;
669
670 case OP_MARK:
671 return cc + 1 + 2 + cc[1];
672
673 default:
674 return NULL;
675 }
676 }
677
678 #define CASE_ITERATOR_LOCAL1 \
679 case OP_MINSTAR: \
680 case OP_MINPLUS: \
681 case OP_QUERY: \
682 case OP_MINQUERY: \
683 case OP_MINSTARI: \
684 case OP_MINPLUSI: \
685 case OP_QUERYI: \
686 case OP_MINQUERYI: \
687 case OP_NOTMINSTAR: \
688 case OP_NOTMINPLUS: \
689 case OP_NOTQUERY: \
690 case OP_NOTMINQUERY: \
691 case OP_NOTMINSTARI: \
692 case OP_NOTMINPLUSI: \
693 case OP_NOTQUERYI: \
694 case OP_NOTMINQUERYI:
695
696 #define CASE_ITERATOR_LOCAL2A \
697 case OP_STAR: \
698 case OP_PLUS: \
699 case OP_STARI: \
700 case OP_PLUSI: \
701 case OP_NOTSTAR: \
702 case OP_NOTPLUS: \
703 case OP_NOTSTARI: \
704 case OP_NOTPLUSI:
705
706 #define CASE_ITERATOR_LOCAL2B \
707 case OP_UPTO: \
708 case OP_MINUPTO: \
709 case OP_UPTOI: \
710 case OP_MINUPTOI: \
711 case OP_NOTUPTO: \
712 case OP_NOTMINUPTO: \
713 case OP_NOTUPTOI: \
714 case OP_NOTMINUPTOI:
715
716 #define CASE_ITERATOR_TYPE_LOCAL1 \
717 case OP_TYPEMINSTAR: \
718 case OP_TYPEMINPLUS: \
719 case OP_TYPEQUERY: \
720 case OP_TYPEMINQUERY:
721
722 #define CASE_ITERATOR_TYPE_LOCAL2A \
723 case OP_TYPESTAR: \
724 case OP_TYPEPLUS:
725
726 #define CASE_ITERATOR_TYPE_LOCAL2B \
727 case OP_TYPEUPTO: \
728 case OP_TYPEMINUPTO:
729
730 static int get_class_iterator_size(pcre_uchar *cc)
731 {
732 switch(*cc)
733 {
734 case OP_CRSTAR:
735 case OP_CRPLUS:
736 return 2;
737
738 case OP_CRMINSTAR:
739 case OP_CRMINPLUS:
740 case OP_CRQUERY:
741 case OP_CRMINQUERY:
742 return 1;
743
744 case OP_CRRANGE:
745 case OP_CRMINRANGE:
746 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
747 return 0;
748 return 2;
749
750 default:
751 return 0;
752 }
753 }
754
755 static int get_localspace(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
756 {
757 int localspace = 0;
758 pcre_uchar *alternative;
759 pcre_uchar *end = NULL;
760 int space, size, bracketlen;
761
762 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
763 while (cc < ccend)
764 {
765 space = 0;
766 size = 0;
767 bracketlen = 0;
768 switch(*cc)
769 {
770 case OP_SET_SOM:
771 common->has_set_som = TRUE;
772 cc += 1;
773 break;
774
775 case OP_ASSERT:
776 case OP_ASSERT_NOT:
777 case OP_ASSERTBACK:
778 case OP_ASSERTBACK_NOT:
779 case OP_ONCE:
780 case OP_ONCE_NC:
781 case OP_BRAPOS:
782 case OP_SBRA:
783 case OP_SBRAPOS:
784 case OP_SCOND:
785 localspace += sizeof(sljit_w);
786 bracketlen = 1 + LINK_SIZE;
787 break;
788
789 case OP_CBRAPOS:
790 case OP_SCBRAPOS:
791 localspace += sizeof(sljit_w);
792 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
793 break;
794
795 case OP_COND:
796 /* Might be a hidden SCOND. */
797 alternative = cc + GET(cc, 1);
798 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
799 localspace += sizeof(sljit_w);
800 bracketlen = 1 + LINK_SIZE;
801 break;
802
803 case OP_BRA:
804 bracketlen = 1 + LINK_SIZE;
805 break;
806
807 case OP_CBRA:
808 case OP_SCBRA:
809 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
810 break;
811
812 CASE_ITERATOR_LOCAL1
813 space = 1;
814 size = -2;
815 break;
816
817 CASE_ITERATOR_LOCAL2A
818 space = 2;
819 size = -2;
820 break;
821
822 CASE_ITERATOR_LOCAL2B
823 space = 2;
824 size = -(2 + IMM2_SIZE);
825 break;
826
827 CASE_ITERATOR_TYPE_LOCAL1
828 space = 1;
829 size = 1;
830 break;
831
832 CASE_ITERATOR_TYPE_LOCAL2A
833 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
834 space = 2;
835 size = 1;
836 break;
837
838 CASE_ITERATOR_TYPE_LOCAL2B
839 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
840 space = 2;
841 size = 1 + IMM2_SIZE;
842 break;
843
844 case OP_CLASS:
845 case OP_NCLASS:
846 size += 1 + 32 / sizeof(pcre_uchar);
847 space = get_class_iterator_size(cc + size);
848 break;
849
850 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
851 case OP_XCLASS:
852 size = GET(cc, 1);
853 space = get_class_iterator_size(cc + size);
854 break;
855 #endif
856
857 case OP_RECURSE:
858 /* Set its value only once. */
859 if (common->recursive_head == 0)
860 {
861 common->recursive_head = common->ovector_start;
862 common->ovector_start += sizeof(sljit_w);
863 }
864 cc += 1 + LINK_SIZE;
865 break;
866
867 case OP_MARK:
868 if (common->mark_ptr == 0)
869 {
870 common->mark_ptr = common->ovector_start;
871 common->ovector_start += sizeof(sljit_w);
872 }
873 cc += 1 + 2 + cc[1];
874 break;
875
876 default:
877 cc = next_opcode(common, cc);
878 if (cc == NULL)
879 return -1;
880 break;
881 }
882
883 if (space > 0 && cc >= end)
884 localspace += sizeof(sljit_w) * space;
885
886 if (size != 0)
887 {
888 if (size < 0)
889 {
890 cc += -size;
891 #ifdef SUPPORT_UTF
892 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
893 #endif
894 }
895 else
896 cc += size;
897 }
898
899 if (bracketlen > 0)
900 {
901 if (cc >= end)
902 {
903 end = bracketend(cc);
904 if (end[-1 - LINK_SIZE] == OP_KET)
905 end = NULL;
906 }
907 cc += bracketlen;
908 }
909 }
910 return localspace;
911 }
912
913 static void set_localptrs(compiler_common *common, int localptr, pcre_uchar *ccend)
914 {
915 pcre_uchar *cc = common->start;
916 pcre_uchar *alternative;
917 pcre_uchar *end = NULL;
918 int space, size, bracketlen;
919
920 while (cc < ccend)
921 {
922 space = 0;
923 size = 0;
924 bracketlen = 0;
925 switch(*cc)
926 {
927 case OP_ASSERT:
928 case OP_ASSERT_NOT:
929 case OP_ASSERTBACK:
930 case OP_ASSERTBACK_NOT:
931 case OP_ONCE:
932 case OP_ONCE_NC:
933 case OP_BRAPOS:
934 case OP_SBRA:
935 case OP_SBRAPOS:
936 case OP_SCOND:
937 common->localptrs[cc - common->start] = localptr;
938 localptr += sizeof(sljit_w);
939 bracketlen = 1 + LINK_SIZE;
940 break;
941
942 case OP_CBRAPOS:
943 case OP_SCBRAPOS:
944 common->localptrs[cc - common->start] = localptr;
945 localptr += sizeof(sljit_w);
946 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
947 break;
948
949 case OP_COND:
950 /* Might be a hidden SCOND. */
951 alternative = cc + GET(cc, 1);
952 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
953 {
954 common->localptrs[cc - common->start] = localptr;
955 localptr += sizeof(sljit_w);
956 }
957 bracketlen = 1 + LINK_SIZE;
958 break;
959
960 case OP_BRA:
961 bracketlen = 1 + LINK_SIZE;
962 break;
963
964 case OP_CBRA:
965 case OP_SCBRA:
966 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
967 break;
968
969 CASE_ITERATOR_LOCAL1
970 space = 1;
971 size = -2;
972 break;
973
974 CASE_ITERATOR_LOCAL2A
975 space = 2;
976 size = -2;
977 break;
978
979 CASE_ITERATOR_LOCAL2B
980 space = 2;
981 size = -(2 + IMM2_SIZE);
982 break;
983
984 CASE_ITERATOR_TYPE_LOCAL1
985 space = 1;
986 size = 1;
987 break;
988
989 CASE_ITERATOR_TYPE_LOCAL2A
990 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
991 space = 2;
992 size = 1;
993 break;
994
995 CASE_ITERATOR_TYPE_LOCAL2B
996 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
997 space = 2;
998 size = 1 + IMM2_SIZE;
999 break;
1000
1001 case OP_CLASS:
1002 case OP_NCLASS:
1003 size += 1 + 32 / sizeof(pcre_uchar);
1004 space = get_class_iterator_size(cc + size);
1005 break;
1006
1007 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1008 case OP_XCLASS:
1009 size = GET(cc, 1);
1010 space = get_class_iterator_size(cc + size);
1011 break;
1012 #endif
1013
1014 default:
1015 cc = next_opcode(common, cc);
1016 SLJIT_ASSERT(cc != NULL);
1017 break;
1018 }
1019
1020 if (space > 0 && cc >= end)
1021 {
1022 common->localptrs[cc - common->start] = localptr;
1023 localptr += sizeof(sljit_w) * space;
1024 }
1025
1026 if (size != 0)
1027 {
1028 if (size < 0)
1029 {
1030 cc += -size;
1031 #ifdef SUPPORT_UTF
1032 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1033 #endif
1034 }
1035 else
1036 cc += size;
1037 }
1038
1039 if (bracketlen > 0)
1040 {
1041 if (cc >= end)
1042 {
1043 end = bracketend(cc);
1044 if (end[-1 - LINK_SIZE] == OP_KET)
1045 end = NULL;
1046 }
1047 cc += bracketlen;
1048 }
1049 }
1050 }
1051
1052 /* Returns with -1 if no need for frame. */
1053 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1054 {
1055 pcre_uchar *ccend = bracketend(cc);
1056 int length = 0;
1057 BOOL possessive = FALSE;
1058 BOOL setsom_found = recursive;
1059 BOOL setmark_found = recursive;
1060
1061 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1062 {
1063 length = 3;
1064 possessive = TRUE;
1065 }
1066
1067 cc = next_opcode(common, cc);
1068 SLJIT_ASSERT(cc != NULL);
1069 while (cc < ccend)
1070 switch(*cc)
1071 {
1072 case OP_SET_SOM:
1073 SLJIT_ASSERT(common->has_set_som);
1074 if (!setsom_found)
1075 {
1076 length += 2;
1077 setsom_found = TRUE;
1078 }
1079 cc += 1;
1080 break;
1081
1082 case OP_MARK:
1083 SLJIT_ASSERT(common->mark_ptr != 0);
1084 if (!setmark_found)
1085 {
1086 length += 2;
1087 setmark_found = TRUE;
1088 }
1089 cc += 1 + 2 + cc[1];
1090 break;
1091
1092 case OP_RECURSE:
1093 if (common->has_set_som && !setsom_found)
1094 {
1095 length += 2;
1096 setsom_found = TRUE;
1097 }
1098 if (common->mark_ptr != 0 && !setmark_found)
1099 {
1100 length += 2;
1101 setmark_found = TRUE;
1102 }
1103 cc += 1 + LINK_SIZE;
1104 break;
1105
1106 case OP_CBRA:
1107 case OP_CBRAPOS:
1108 case OP_SCBRA:
1109 case OP_SCBRAPOS:
1110 length += 3;
1111 cc += 1 + LINK_SIZE + IMM2_SIZE;
1112 break;
1113
1114 default:
1115 cc = next_opcode(common, cc);
1116 SLJIT_ASSERT(cc != NULL);
1117 break;
1118 }
1119
1120 /* Possessive quantifiers can use a special case. */
1121 if (SLJIT_UNLIKELY(possessive) && length == 3)
1122 return -1;
1123
1124 if (length > 0)
1125 return length + 1;
1126 return -1;
1127 }
1128
1129 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1130 {
1131 DEFINE_COMPILER;
1132 pcre_uchar *ccend = bracketend(cc);
1133 BOOL setsom_found = recursive;
1134 BOOL setmark_found = recursive;
1135 int offset;
1136
1137 /* >= 1 + shortest item size (2) */
1138 SLJIT_UNUSED_ARG(stacktop);
1139 SLJIT_ASSERT(stackpos >= stacktop + 2);
1140
1141 stackpos = STACK(stackpos);
1142 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1143 cc = next_opcode(common, cc);
1144 SLJIT_ASSERT(cc != NULL);
1145 while (cc < ccend)
1146 switch(*cc)
1147 {
1148 case OP_SET_SOM:
1149 SLJIT_ASSERT(common->has_set_som);
1150 if (!setsom_found)
1151 {
1152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1154 stackpos += (int)sizeof(sljit_w);
1155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1156 stackpos += (int)sizeof(sljit_w);
1157 setsom_found = TRUE;
1158 }
1159 cc += 1;
1160 break;
1161
1162 case OP_MARK:
1163 SLJIT_ASSERT(common->mark_ptr != 0);
1164 if (!setmark_found)
1165 {
1166 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1167 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1168 stackpos += (int)sizeof(sljit_w);
1169 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1170 stackpos += (int)sizeof(sljit_w);
1171 setmark_found = TRUE;
1172 }
1173 cc += 1 + 2 + cc[1];
1174 break;
1175
1176 case OP_RECURSE:
1177 if (common->has_set_som && !setsom_found)
1178 {
1179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1181 stackpos += (int)sizeof(sljit_w);
1182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1183 stackpos += (int)sizeof(sljit_w);
1184 setsom_found = TRUE;
1185 }
1186 if (common->mark_ptr != 0 && !setmark_found)
1187 {
1188 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1190 stackpos += (int)sizeof(sljit_w);
1191 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1192 stackpos += (int)sizeof(sljit_w);
1193 setmark_found = TRUE;
1194 }
1195 cc += 1 + LINK_SIZE;
1196 break;
1197
1198 case OP_CBRA:
1199 case OP_CBRAPOS:
1200 case OP_SCBRA:
1201 case OP_SCBRAPOS:
1202 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1204 stackpos += (int)sizeof(sljit_w);
1205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1206 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1208 stackpos += (int)sizeof(sljit_w);
1209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1210 stackpos += (int)sizeof(sljit_w);
1211
1212 cc += 1 + LINK_SIZE + IMM2_SIZE;
1213 break;
1214
1215 default:
1216 cc = next_opcode(common, cc);
1217 SLJIT_ASSERT(cc != NULL);
1218 break;
1219 }
1220
1221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1222 SLJIT_ASSERT(stackpos == STACK(stacktop));
1223 }
1224
1225 static SLJIT_INLINE int get_localsize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1226 {
1227 int localsize = 2;
1228 int size;
1229 pcre_uchar *alternative;
1230 /* Calculate the sum of the local variables. */
1231 while (cc < ccend)
1232 {
1233 size = 0;
1234 switch(*cc)
1235 {
1236 case OP_ASSERT:
1237 case OP_ASSERT_NOT:
1238 case OP_ASSERTBACK:
1239 case OP_ASSERTBACK_NOT:
1240 case OP_ONCE:
1241 case OP_ONCE_NC:
1242 case OP_BRAPOS:
1243 case OP_SBRA:
1244 case OP_SBRAPOS:
1245 case OP_SCOND:
1246 localsize++;
1247 cc += 1 + LINK_SIZE;
1248 break;
1249
1250 case OP_CBRA:
1251 case OP_SCBRA:
1252 localsize++;
1253 cc += 1 + LINK_SIZE + IMM2_SIZE;
1254 break;
1255
1256 case OP_CBRAPOS:
1257 case OP_SCBRAPOS:
1258 localsize += 2;
1259 cc += 1 + LINK_SIZE + IMM2_SIZE;
1260 break;
1261
1262 case OP_COND:
1263 /* Might be a hidden SCOND. */
1264 alternative = cc + GET(cc, 1);
1265 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1266 localsize++;
1267 cc += 1 + LINK_SIZE;
1268 break;
1269
1270 CASE_ITERATOR_LOCAL1
1271 if (PRIV_DATA(cc))
1272 localsize++;
1273 cc += 2;
1274 #ifdef SUPPORT_UTF
1275 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1276 #endif
1277 break;
1278
1279 CASE_ITERATOR_LOCAL2A
1280 if (PRIV_DATA(cc))
1281 localsize += 2;
1282 cc += 2;
1283 #ifdef SUPPORT_UTF
1284 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1285 #endif
1286 break;
1287
1288 CASE_ITERATOR_LOCAL2B
1289 if (PRIV_DATA(cc))
1290 localsize += 2;
1291 cc += 2 + IMM2_SIZE;
1292 #ifdef SUPPORT_UTF
1293 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1294 #endif
1295 break;
1296
1297 CASE_ITERATOR_TYPE_LOCAL1
1298 if (PRIV_DATA(cc))
1299 localsize++;
1300 cc += 1;
1301 break;
1302
1303 CASE_ITERATOR_TYPE_LOCAL2A
1304 if (PRIV_DATA(cc))
1305 localsize += 2;
1306 cc += 1;
1307 break;
1308
1309 CASE_ITERATOR_TYPE_LOCAL2B
1310 if (PRIV_DATA(cc))
1311 localsize += 2;
1312 cc += 1 + IMM2_SIZE;
1313 break;
1314
1315 case OP_CLASS:
1316 case OP_NCLASS:
1317 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1318 case OP_XCLASS:
1319 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1320 #else
1321 size = 1 + 32 / (int)sizeof(pcre_uchar);
1322 #endif
1323 if (PRIV_DATA(cc))
1324 localsize += get_class_iterator_size(cc + size);
1325 cc += size;
1326 break;
1327
1328 default:
1329 cc = next_opcode(common, cc);
1330 SLJIT_ASSERT(cc != NULL);
1331 break;
1332 }
1333 }
1334 SLJIT_ASSERT(cc == ccend);
1335 return localsize;
1336 }
1337
1338 static void copy_locals(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1339 BOOL save, int stackptr, int stacktop)
1340 {
1341 DEFINE_COMPILER;
1342 int srcw[2];
1343 int count, size;
1344 BOOL tmp1next = TRUE;
1345 BOOL tmp1empty = TRUE;
1346 BOOL tmp2empty = TRUE;
1347 pcre_uchar *alternative;
1348 enum {
1349 start,
1350 loop,
1351 end
1352 } status;
1353
1354 status = save ? start : loop;
1355 stackptr = STACK(stackptr - 2);
1356 stacktop = STACK(stacktop - 1);
1357
1358 if (!save)
1359 {
1360 stackptr += sizeof(sljit_w);
1361 if (stackptr < stacktop)
1362 {
1363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1364 stackptr += sizeof(sljit_w);
1365 tmp1empty = FALSE;
1366 }
1367 if (stackptr < stacktop)
1368 {
1369 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1370 stackptr += sizeof(sljit_w);
1371 tmp2empty = FALSE;
1372 }
1373 /* The tmp1next must be TRUE in either way. */
1374 }
1375
1376 while (status != end)
1377 {
1378 count = 0;
1379 switch(status)
1380 {
1381 case start:
1382 SLJIT_ASSERT(save && common->recursive_head != 0);
1383 count = 1;
1384 srcw[0] = common->recursive_head;
1385 status = loop;
1386 break;
1387
1388 case loop:
1389 if (cc >= ccend)
1390 {
1391 status = end;
1392 break;
1393 }
1394
1395 switch(*cc)
1396 {
1397 case OP_ASSERT:
1398 case OP_ASSERT_NOT:
1399 case OP_ASSERTBACK:
1400 case OP_ASSERTBACK_NOT:
1401 case OP_ONCE:
1402 case OP_ONCE_NC:
1403 case OP_BRAPOS:
1404 case OP_SBRA:
1405 case OP_SBRAPOS:
1406 case OP_SCOND:
1407 count = 1;
1408 srcw[0] = PRIV_DATA(cc);
1409 SLJIT_ASSERT(srcw[0] != 0);
1410 cc += 1 + LINK_SIZE;
1411 break;
1412
1413 case OP_CBRA:
1414 case OP_SCBRA:
1415 count = 1;
1416 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1417 cc += 1 + LINK_SIZE + IMM2_SIZE;
1418 break;
1419
1420 case OP_CBRAPOS:
1421 case OP_SCBRAPOS:
1422 count = 2;
1423 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1424 srcw[1] = PRIV_DATA(cc);
1425 SLJIT_ASSERT(srcw[0] != 0);
1426 cc += 1 + LINK_SIZE + IMM2_SIZE;
1427 break;
1428
1429 case OP_COND:
1430 /* Might be a hidden SCOND. */
1431 alternative = cc + GET(cc, 1);
1432 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1433 {
1434 count = 1;
1435 srcw[0] = PRIV_DATA(cc);
1436 SLJIT_ASSERT(srcw[0] != 0);
1437 }
1438 cc += 1 + LINK_SIZE;
1439 break;
1440
1441 CASE_ITERATOR_LOCAL1
1442 if (PRIV_DATA(cc))
1443 {
1444 count = 1;
1445 srcw[0] = PRIV_DATA(cc);
1446 }
1447 cc += 2;
1448 #ifdef SUPPORT_UTF
1449 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1450 #endif
1451 break;
1452
1453 CASE_ITERATOR_LOCAL2A
1454 if (PRIV_DATA(cc))
1455 {
1456 count = 2;
1457 srcw[0] = PRIV_DATA(cc);
1458 srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
1459 }
1460 cc += 2;
1461 #ifdef SUPPORT_UTF
1462 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1463 #endif
1464 break;
1465
1466 CASE_ITERATOR_LOCAL2B
1467 if (PRIV_DATA(cc))
1468 {
1469 count = 2;
1470 srcw[0] = PRIV_DATA(cc);
1471 srcw[1] = PRIV_DATA(cc) + sizeof(sljit_w);
1472 }
1473 cc += 2 + IMM2_SIZE;
1474 #ifdef SUPPORT_UTF
1475 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1476 #endif
1477 break;
1478
1479 CASE_ITERATOR_TYPE_LOCAL1
1480 if (PRIV_DATA(cc))
1481 {
1482 count = 1;
1483 srcw[0] = PRIV_DATA(cc);
1484 }
1485 cc += 1;
1486 break;
1487
1488 CASE_ITERATOR_TYPE_LOCAL2A
1489 if (PRIV_DATA(cc))
1490 {
1491 count = 2;
1492 srcw[0] = PRIV_DATA(cc);
1493 srcw[1] = srcw[0] + sizeof(sljit_w);
1494 }
1495 cc += 1;
1496 break;
1497
1498 CASE_ITERATOR_TYPE_LOCAL2B
1499 if (PRIV_DATA(cc))
1500 {
1501 count = 2;
1502 srcw[0] = PRIV_DATA(cc);
1503 srcw[1] = srcw[0] + sizeof(sljit_w);
1504 }
1505 cc += 1 + IMM2_SIZE;
1506 break;
1507
1508 case OP_CLASS:
1509 case OP_NCLASS:
1510 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1511 case OP_XCLASS:
1512 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1513 #else
1514 size = 1 + 32 / (int)sizeof(pcre_uchar);
1515 #endif
1516 if (PRIV_DATA(cc))
1517 switch(get_class_iterator_size(cc + size))
1518 {
1519 case 1:
1520 count = 1;
1521 srcw[0] = PRIV_DATA(cc);
1522 break;
1523
1524 case 2:
1525 count = 2;
1526 srcw[0] = PRIV_DATA(cc);
1527 srcw[1] = srcw[0] + sizeof(sljit_w);
1528 break;
1529
1530 default:
1531 SLJIT_ASSERT_STOP();
1532 break;
1533 }
1534 cc += size;
1535 break;
1536
1537 default:
1538 cc = next_opcode(common, cc);
1539 SLJIT_ASSERT(cc != NULL);
1540 break;
1541 }
1542 break;
1543
1544 case end:
1545 SLJIT_ASSERT_STOP();
1546 break;
1547 }
1548
1549 while (count > 0)
1550 {
1551 count--;
1552 if (save)
1553 {
1554 if (tmp1next)
1555 {
1556 if (!tmp1empty)
1557 {
1558 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1559 stackptr += sizeof(sljit_w);
1560 }
1561 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1562 tmp1empty = FALSE;
1563 tmp1next = FALSE;
1564 }
1565 else
1566 {
1567 if (!tmp2empty)
1568 {
1569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1570 stackptr += sizeof(sljit_w);
1571 }
1572 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1573 tmp2empty = FALSE;
1574 tmp1next = TRUE;
1575 }
1576 }
1577 else
1578 {
1579 if (tmp1next)
1580 {
1581 SLJIT_ASSERT(!tmp1empty);
1582 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1583 tmp1empty = stackptr >= stacktop;
1584 if (!tmp1empty)
1585 {
1586 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1587 stackptr += sizeof(sljit_w);
1588 }
1589 tmp1next = FALSE;
1590 }
1591 else
1592 {
1593 SLJIT_ASSERT(!tmp2empty);
1594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1595 tmp2empty = stackptr >= stacktop;
1596 if (!tmp2empty)
1597 {
1598 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1599 stackptr += sizeof(sljit_w);
1600 }
1601 tmp1next = TRUE;
1602 }
1603 }
1604 }
1605 }
1606
1607 if (save)
1608 {
1609 if (tmp1next)
1610 {
1611 if (!tmp1empty)
1612 {
1613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1614 stackptr += sizeof(sljit_w);
1615 }
1616 if (!tmp2empty)
1617 {
1618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1619 stackptr += sizeof(sljit_w);
1620 }
1621 }
1622 else
1623 {
1624 if (!tmp2empty)
1625 {
1626 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1627 stackptr += sizeof(sljit_w);
1628 }
1629 if (!tmp1empty)
1630 {
1631 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1632 stackptr += sizeof(sljit_w);
1633 }
1634 }
1635 }
1636 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1637 }
1638
1639 #undef CASE_ITERATOR_LOCAL1
1640 #undef CASE_ITERATOR_LOCAL2A
1641 #undef CASE_ITERATOR_LOCAL2B
1642 #undef CASE_ITERATOR_TYPE_LOCAL1
1643 #undef CASE_ITERATOR_TYPE_LOCAL2A
1644 #undef CASE_ITERATOR_TYPE_LOCAL2B
1645
1646 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1647 {
1648 return (value & (value - 1)) == 0;
1649 }
1650
1651 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1652 {
1653 while (list)
1654 {
1655 /* sljit_set_label is clever enough to do nothing
1656 if either the jump or the label is NULL */
1657 sljit_set_label(list->jump, label);
1658 list = list->next;
1659 }
1660 }
1661
1662 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1663 {
1664 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1665 if (list_item)
1666 {
1667 list_item->next = *list;
1668 list_item->jump = jump;
1669 *list = list_item;
1670 }
1671 }
1672
1673 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1674 {
1675 DEFINE_COMPILER;
1676 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1677
1678 if (list_item)
1679 {
1680 list_item->type = type;
1681 list_item->data = data;
1682 list_item->start = start;
1683 list_item->quit = LABEL();
1684 list_item->next = common->stubs;
1685 common->stubs = list_item;
1686 }
1687 }
1688
1689 static void flush_stubs(compiler_common *common)
1690 {
1691 DEFINE_COMPILER;
1692 stub_list* list_item = common->stubs;
1693
1694 while (list_item)
1695 {
1696 JUMPHERE(list_item->start);
1697 switch(list_item->type)
1698 {
1699 case stack_alloc:
1700 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1701 break;
1702 }
1703 JUMPTO(SLJIT_JUMP, list_item->quit);
1704 list_item = list_item->next;
1705 }
1706 common->stubs = NULL;
1707 }
1708
1709 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1710 {
1711 DEFINE_COMPILER;
1712
1713 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1714 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1715 }
1716
1717 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1718 {
1719 /* May destroy all locals and registers except TMP2. */
1720 DEFINE_COMPILER;
1721
1722 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1723 #ifdef DESTROY_REGISTERS
1724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1725 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1726 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1728 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1729 #endif
1730 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1731 }
1732
1733 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1734 {
1735 DEFINE_COMPILER;
1736 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1737 }
1738
1739 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1740 {
1741 DEFINE_COMPILER;
1742 struct sljit_label *loop;
1743 int i;
1744 /* At this point we can freely use all temporary registers. */
1745 /* TMP1 returns with begin - 1. */
1746 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1747 if (length < 8)
1748 {
1749 for (i = 0; i < length; i++)
1750 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1751 }
1752 else
1753 {
1754 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1755 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1756 loop = LABEL();
1757 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1758 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1759 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1760 }
1761 }
1762
1763 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1764 {
1765 DEFINE_COMPILER;
1766 struct sljit_label *loop;
1767 struct sljit_jump *earlyexit;
1768
1769 /* At this point we can freely use all registers. */
1770 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1772
1773 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1774 if (common->mark_ptr != 0)
1775 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1776 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1777 if (common->mark_ptr != 0)
1778 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1779 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1780 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1781 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1782 /* Unlikely, but possible */
1783 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1784 loop = LABEL();
1785 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1786 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1787 /* Copy the integer value to the output buffer */
1788 #ifdef COMPILE_PCRE16
1789 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1790 #endif
1791 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1792 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1793 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1794 JUMPHERE(earlyexit);
1795
1796 /* Calculate the return value, which is the maximum ovector value. */
1797 if (topbracket > 1)
1798 {
1799 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1800 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1801
1802 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1803 loop = LABEL();
1804 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1805 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1806 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1807 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1808 }
1809 else
1810 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1811 }
1812
1813 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1814 {
1815 DEFINE_COMPILER;
1816
1817 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1818 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1819
1820 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1821 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1822 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1823 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1824
1825 /* Store match begin and end. */
1826 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1827 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1828 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1829 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1830 #ifdef COMPILE_PCRE16
1831 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1832 #endif
1833 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1834
1835 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1836 #ifdef COMPILE_PCRE16
1837 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1838 #endif
1839 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1840
1841 JUMPTO(SLJIT_JUMP, quit);
1842 }
1843
1844 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1845 {
1846 /* May destroy TMP1. */
1847 DEFINE_COMPILER;
1848 struct sljit_jump *jump;
1849
1850 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1851 {
1852 /* The value of -1 must be kept for start_used_ptr! */
1853 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1854 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1855 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1856 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1857 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1858 JUMPHERE(jump);
1859 }
1860 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1861 {
1862 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1863 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1864 JUMPHERE(jump);
1865 }
1866 }
1867
1868 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1869 {
1870 /* Detects if the character has an othercase. */
1871 unsigned int c;
1872
1873 #ifdef SUPPORT_UTF
1874 if (common->utf)
1875 {
1876 GETCHAR(c, cc);
1877 if (c > 127)
1878 {
1879 #ifdef SUPPORT_UCP
1880 return c != UCD_OTHERCASE(c);
1881 #else
1882 return FALSE;
1883 #endif
1884 }
1885 #ifndef COMPILE_PCRE8
1886 return common->fcc[c] != c;
1887 #endif
1888 }
1889 else
1890 #endif
1891 c = *cc;
1892 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1893 }
1894
1895 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1896 {
1897 /* Returns with the othercase. */
1898 #ifdef SUPPORT_UTF
1899 if (common->utf && c > 127)
1900 {
1901 #ifdef SUPPORT_UCP
1902 return UCD_OTHERCASE(c);
1903 #else
1904 return c;
1905 #endif
1906 }
1907 #endif
1908 return TABLE_GET(c, common->fcc, c);
1909 }
1910
1911 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1912 {
1913 /* Detects if the character and its othercase has only 1 bit difference. */
1914 unsigned int c, oc, bit;
1915 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1916 int n;
1917 #endif
1918
1919 #ifdef SUPPORT_UTF
1920 if (common->utf)
1921 {
1922 GETCHAR(c, cc);
1923 if (c <= 127)
1924 oc = common->fcc[c];
1925 else
1926 {
1927 #ifdef SUPPORT_UCP
1928 oc = UCD_OTHERCASE(c);
1929 #else
1930 oc = c;
1931 #endif
1932 }
1933 }
1934 else
1935 {
1936 c = *cc;
1937 oc = TABLE_GET(c, common->fcc, c);
1938 }
1939 #else
1940 c = *cc;
1941 oc = TABLE_GET(c, common->fcc, c);
1942 #endif
1943
1944 SLJIT_ASSERT(c != oc);
1945
1946 bit = c ^ oc;
1947 /* Optimized for English alphabet. */
1948 if (c <= 127 && bit == 0x20)
1949 return (0 << 8) | 0x20;
1950
1951 /* Since c != oc, they must have at least 1 bit difference. */
1952 if (!ispowerof2(bit))
1953 return 0;
1954
1955 #ifdef COMPILE_PCRE8
1956
1957 #ifdef SUPPORT_UTF
1958 if (common->utf && c > 127)
1959 {
1960 n = GET_EXTRALEN(*cc);
1961 while ((bit & 0x3f) == 0)
1962 {
1963 n--;
1964 bit >>= 6;
1965 }
1966 return (n << 8) | bit;
1967 }
1968 #endif /* SUPPORT_UTF */
1969 return (0 << 8) | bit;
1970
1971 #else /* COMPILE_PCRE8 */
1972
1973 #ifdef COMPILE_PCRE16
1974 #ifdef SUPPORT_UTF
1975 if (common->utf && c > 65535)
1976 {
1977 if (bit >= (1 << 10))
1978 bit >>= 10;
1979 else
1980 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1981 }
1982 #endif /* SUPPORT_UTF */
1983 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1984 #endif /* COMPILE_PCRE16 */
1985
1986 #endif /* COMPILE_PCRE8 */
1987 }
1988
1989 static void check_partial(compiler_common *common, BOOL force)
1990 {
1991 /* Checks whether a partial matching is occured. Does not modify registers. */
1992 DEFINE_COMPILER;
1993 struct sljit_jump *jump = NULL;
1994
1995 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1996
1997 if (common->mode == JIT_COMPILE)
1998 return;
1999
2000 if (!force)
2001 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2002 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2003 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2004
2005 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2006 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2007 else
2008 {
2009 if (common->partialmatchlabel != NULL)
2010 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2011 else
2012 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2013 }
2014
2015 if (jump != NULL)
2016 JUMPHERE(jump);
2017 }
2018
2019 static struct sljit_jump *check_str_end(compiler_common *common)
2020 {
2021 /* Does not affect registers. Usually used in a tight spot. */
2022 DEFINE_COMPILER;
2023 struct sljit_jump *jump;
2024 struct sljit_jump *nohit;
2025 struct sljit_jump *return_value;
2026
2027 if (common->mode == JIT_COMPILE)
2028 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2029
2030 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2031 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2032 {
2033 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2035 JUMPHERE(nohit);
2036 return_value = JUMP(SLJIT_JUMP);
2037 }
2038 else
2039 {
2040 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2041 if (common->partialmatchlabel != NULL)
2042 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2043 else
2044 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2045 }
2046 JUMPHERE(jump);
2047 return return_value;
2048 }
2049
2050 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2051 {
2052 DEFINE_COMPILER;
2053 struct sljit_jump *jump;
2054
2055 if (common->mode == JIT_COMPILE)
2056 {
2057 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2058 return;
2059 }
2060
2061 /* Partial matching mode. */
2062 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2063 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2064 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2065 {
2066 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2067 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2068 }
2069 else
2070 {
2071 if (common->partialmatchlabel != NULL)
2072 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2073 else
2074 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2075 }
2076 JUMPHERE(jump);
2077 }
2078
2079 static void read_char(compiler_common *common)
2080 {
2081 /* Reads the character into TMP1, updates STR_PTR.
2082 Does not check STR_END. TMP2 Destroyed. */
2083 DEFINE_COMPILER;
2084 #ifdef SUPPORT_UTF
2085 struct sljit_jump *jump;
2086 #endif
2087
2088 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2089 #ifdef SUPPORT_UTF
2090 if (common->utf)
2091 {
2092 #ifdef COMPILE_PCRE8
2093 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2094 #else
2095 #ifdef COMPILE_PCRE16
2096 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2097 #endif
2098 #endif /* COMPILE_PCRE8 */
2099 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2100 JUMPHERE(jump);
2101 }
2102 #endif
2103 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2104 }
2105
2106 static void peek_char(compiler_common *common)
2107 {
2108 /* Reads the character into TMP1, keeps STR_PTR.
2109 Does not check STR_END. TMP2 Destroyed. */
2110 DEFINE_COMPILER;
2111 #ifdef SUPPORT_UTF
2112 struct sljit_jump *jump;
2113 #endif
2114
2115 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2116 #ifdef SUPPORT_UTF
2117 if (common->utf)
2118 {
2119 #ifdef COMPILE_PCRE8
2120 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2121 #else
2122 #ifdef COMPILE_PCRE16
2123 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2124 #endif
2125 #endif /* COMPILE_PCRE8 */
2126 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2127 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2128 JUMPHERE(jump);
2129 }
2130 #endif
2131 }
2132
2133 static void read_char8_type(compiler_common *common)
2134 {
2135 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2136 DEFINE_COMPILER;
2137 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2138 struct sljit_jump *jump;
2139 #endif
2140
2141 #ifdef SUPPORT_UTF
2142 if (common->utf)
2143 {
2144 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2146 #ifdef COMPILE_PCRE8
2147 /* This can be an extra read in some situations, but hopefully
2148 it is needed in most cases. */
2149 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2150 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2151 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2152 JUMPHERE(jump);
2153 #else
2154 #ifdef COMPILE_PCRE16
2155 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2156 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2157 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2158 JUMPHERE(jump);
2159 /* Skip low surrogate if necessary. */
2160 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2161 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2162 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2163 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2164 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2165 #endif
2166 #endif /* COMPILE_PCRE8 */
2167 return;
2168 }
2169 #endif
2170 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2171 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2172 #ifdef COMPILE_PCRE16
2173 /* The ctypes array contains only 256 values. */
2174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2175 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2176 #endif
2177 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2178 #ifdef COMPILE_PCRE16
2179 JUMPHERE(jump);
2180 #endif
2181 }
2182
2183 static void skip_char_back(compiler_common *common)
2184 {
2185 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2186 DEFINE_COMPILER;
2187 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2188 struct sljit_label *label;
2189
2190 if (common->utf)
2191 {
2192 label = LABEL();
2193 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2194 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2195 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2196 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2197 return;
2198 }
2199 #endif
2200 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2201 if (common->utf)
2202 {
2203 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2204 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2205 /* Skip low surrogate if necessary. */
2206 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2207 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2208 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2209 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2210 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2211 return;
2212 }
2213 #endif
2214 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2215 }
2216
2217 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2218 {
2219 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2220 DEFINE_COMPILER;
2221
2222 if (nltype == NLTYPE_ANY)
2223 {
2224 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2225 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2226 }
2227 else if (nltype == NLTYPE_ANYCRLF)
2228 {
2229 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2230 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2231 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2232 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2233 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2234 }
2235 else
2236 {
2237 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2238 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2239 }
2240 }
2241
2242 #ifdef SUPPORT_UTF
2243
2244 #ifdef COMPILE_PCRE8
2245 static void do_utfreadchar(compiler_common *common)
2246 {
2247 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2248 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2249 DEFINE_COMPILER;
2250 struct sljit_jump *jump;
2251
2252 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2253 /* Searching for the first zero. */
2254 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2255 jump = JUMP(SLJIT_C_NOT_ZERO);
2256 /* Two byte sequence. */
2257 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2258 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2259 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2260 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2261 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2262 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2263 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2264 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2265 JUMPHERE(jump);
2266
2267 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2268 jump = JUMP(SLJIT_C_NOT_ZERO);
2269 /* Three byte sequence. */
2270 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2271 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2272 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2273 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2274 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2275 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2276 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2277 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2278 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2279 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2280 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2281 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2282 JUMPHERE(jump);
2283
2284 /* Four byte sequence. */
2285 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2286 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2287 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2288 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2289 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2290 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2291 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2292 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2293 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2294 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2295 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2296 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2297 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2298 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2299 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2300 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2301 }
2302
2303 static void do_utfreadtype8(compiler_common *common)
2304 {
2305 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2306 of the character (>= 0xc0). Return value in TMP1. */
2307 DEFINE_COMPILER;
2308 struct sljit_jump *jump;
2309 struct sljit_jump *compare;
2310
2311 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2312
2313 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2314 jump = JUMP(SLJIT_C_NOT_ZERO);
2315 /* Two byte sequence. */
2316 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2317 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2318 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2319 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2320 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2321 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2322 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2323 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2324 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2325
2326 JUMPHERE(compare);
2327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2328 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2329 JUMPHERE(jump);
2330
2331 /* We only have types for characters less than 256. */
2332 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2333 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2334 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2335 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2336 }
2337
2338 #else /* COMPILE_PCRE8 */
2339
2340 #ifdef COMPILE_PCRE16
2341 static void do_utfreadchar(compiler_common *common)
2342 {
2343 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2344 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2345 DEFINE_COMPILER;
2346 struct sljit_jump *jump;
2347
2348 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2349 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2350 /* Do nothing, only return. */
2351 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2352
2353 JUMPHERE(jump);
2354 /* Combine two 16 bit characters. */
2355 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2357 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2358 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2359 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2360 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2361 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2362 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2363 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2364 }
2365 #endif /* COMPILE_PCRE16 */
2366
2367 #endif /* COMPILE_PCRE8 */
2368
2369 #endif /* SUPPORT_UTF */
2370
2371 #ifdef SUPPORT_UCP
2372
2373 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2374 #define UCD_BLOCK_MASK 127
2375 #define UCD_BLOCK_SHIFT 7
2376
2377 static void do_getucd(compiler_common *common)
2378 {
2379 /* Search the UCD record for the character comes in TMP1.
2380 Returns chartype in TMP1 and UCD offset in TMP2. */
2381 DEFINE_COMPILER;
2382
2383 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2384
2385 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2386 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2387 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2388 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2389 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2390 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2391 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2392 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2394 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2395 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2396 }
2397 #endif
2398
2399 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2400 {
2401 DEFINE_COMPILER;
2402 struct sljit_label *mainloop;
2403 struct sljit_label *newlinelabel = NULL;
2404 struct sljit_jump *start;
2405 struct sljit_jump *end = NULL;
2406 struct sljit_jump *nl = NULL;
2407 #ifdef SUPPORT_UTF
2408 struct sljit_jump *singlechar;
2409 #endif
2410 jump_list *newline = NULL;
2411 BOOL newlinecheck = FALSE;
2412 BOOL readuchar = FALSE;
2413
2414 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2415 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2416 newlinecheck = TRUE;
2417
2418 if (firstline)
2419 {
2420 /* Search for the end of the first line. */
2421 SLJIT_ASSERT(common->first_line_end != 0);
2422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
2423 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_END, 0);
2424
2425 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2426 {
2427 mainloop = LABEL();
2428 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2429 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2430 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2431 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2432 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2433 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2434 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2435 }
2436 else
2437 {
2438 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2439 mainloop = LABEL();
2440 /* Continual stores does not cause data dependency. */
2441 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2442 read_char(common);
2443 check_newlinechar(common, common->nltype, &newline, TRUE);
2444 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2445 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2446 set_jumps(newline, LABEL());
2447 }
2448
2449 JUMPHERE(end);
2450 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
2451 }
2452
2453 start = JUMP(SLJIT_JUMP);
2454
2455 if (newlinecheck)
2456 {
2457 newlinelabel = LABEL();
2458 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2459 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2460 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2461 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2462 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2463 #ifdef COMPILE_PCRE16
2464 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2465 #endif
2466 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2467 nl = JUMP(SLJIT_JUMP);
2468 }
2469
2470 mainloop = LABEL();
2471
2472 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2473 #ifdef SUPPORT_UTF
2474 if (common->utf) readuchar = TRUE;
2475 #endif
2476 if (newlinecheck) readuchar = TRUE;
2477
2478 if (readuchar)
2479 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2480
2481 if (newlinecheck)
2482 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2483
2484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2485 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2486 if (common->utf)
2487 {
2488 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2489 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2490 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2491 JUMPHERE(singlechar);
2492 }
2493 #endif
2494 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2495 if (common->utf)
2496 {
2497 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2498 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2499 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2500 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2501 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2502 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2503 JUMPHERE(singlechar);
2504 }
2505 #endif
2506 JUMPHERE(start);
2507
2508 if (newlinecheck)
2509 {
2510 JUMPHERE(end);
2511 JUMPHERE(nl);
2512 }
2513
2514 return mainloop;
2515 }
2516
2517 static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
2518 {
2519 DEFINE_COMPILER;
2520 struct sljit_label *start;
2521 struct sljit_jump *quit;
2522 struct sljit_jump *found;
2523 pcre_int32 chars[4];
2524 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2525 int location = 0;
2526 pcre_int32 len, c, bit, caseless;
2527 BOOL must_end;
2528
2529 #ifdef COMPILE_PCRE8
2530 union {
2531 sljit_uh ascombined;
2532 sljit_ub asuchars[2];
2533 } pair;
2534 #else
2535 union {
2536 sljit_ui ascombined;
2537 sljit_uh asuchars[2];
2538 } pair;
2539 #endif
2540
2541 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2542 return FALSE;
2543
2544 while (TRUE)
2545 {
2546 caseless = 0;
2547 must_end = TRUE;
2548 switch(*cc)
2549 {
2550 case OP_CHAR:
2551 must_end = FALSE;
2552 cc++;
2553 break;
2554
2555 case OP_CHARI:
2556 caseless = 1;
2557 must_end = FALSE;
2558 cc++;
2559 break;
2560
2561 case OP_SOD:
2562 case OP_SOM:
2563 case OP_SET_SOM:
2564 case OP_NOT_WORD_BOUNDARY:
2565 case OP_WORD_BOUNDARY:
2566 case OP_EODN:
2567 case OP_EOD:
2568 case OP_CIRC:
2569 case OP_CIRCM:
2570 case OP_DOLL:
2571 case OP_DOLLM:
2572 /* Zero width assertions. */
2573 cc++;
2574 continue;
2575
2576 case OP_PLUS:
2577 case OP_MINPLUS:
2578 case OP_POSPLUS:
2579 cc++;
2580 break;
2581
2582 case OP_EXACT:
2583 cc += 1 + IMM2_SIZE;
2584 break;
2585
2586 case OP_PLUSI:
2587 case OP_MINPLUSI:
2588 case OP_POSPLUSI:
2589 caseless = 1;
2590 cc++;
2591 break;
2592
2593 case OP_EXACTI:
2594 caseless = 1;
2595 cc += 1 + IMM2_SIZE;
2596 break;
2597
2598 default:
2599 return FALSE;
2600 }
2601
2602 len = 1;
2603 #ifdef SUPPORT_UTF
2604 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2605 #endif
2606
2607 if (caseless && char_has_othercase(common, cc))
2608 {
2609 caseless = char_get_othercase_bit(common, cc);
2610 if (caseless == 0)
2611 return FALSE;
2612 #ifdef COMPILE_PCRE8
2613 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2614 #else
2615 if ((caseless & 0x100) != 0)
2616 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2617 else
2618 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2619 #endif
2620 }
2621 else
2622 caseless = 0;
2623
2624 while (len > 0 && location < 2 * 2)
2625 {
2626 c = *cc;
2627 bit = 0;
2628 if (len == (caseless & 0xff))
2629 {
2630 bit = caseless >> 8;
2631 c |= bit;
2632 }
2633
2634 chars[location] = c;
2635 chars[location + 1] = bit;
2636
2637 len--;
2638 location += 2;
2639 cc++;
2640 }
2641
2642 if (location == 2 * 2)
2643 break;
2644 else if (must_end)
2645 return FALSE;
2646 }
2647
2648 if (firstline)
2649 {
2650 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2651 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
2652 }
2653 else
2654 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2655
2656 start = LABEL();
2657 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2658 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2659 #ifdef COMPILE_PCRE8
2660 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2661 #else /* COMPILE_PCRE8 */
2662 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2663 #endif
2664
2665 #else /* SLJIT_UNALIGNED */
2666
2667 #if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
2668 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2669 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2670 #else /* SLJIT_BIG_ENDIAN */
2671 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2672 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2673 #endif /* SLJIT_BIG_ENDIAN */
2674
2675 #ifdef COMPILE_PCRE8
2676 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
2677 #else /* COMPILE_PCRE8 */
2678 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
2679 #endif
2680 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681
2682 #endif
2683
2684 if (chars[1] != 0 || chars[3] != 0)
2685 {
2686 pair.asuchars[0] = chars[1];
2687 pair.asuchars[1] = chars[3];
2688 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
2689 }
2690
2691 pair.asuchars[0] = chars[0];
2692 pair.asuchars[1] = chars[2];
2693 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
2694
2695 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2696 JUMPTO(SLJIT_JUMP, start);
2697 JUMPHERE(found);
2698 JUMPHERE(quit);
2699
2700 if (firstline)
2701 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2702 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2703 return TRUE;
2704 }
2705
2706 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2707 {
2708 DEFINE_COMPILER;
2709 struct sljit_label *start;
2710 struct sljit_jump *quit;
2711 struct sljit_jump *found;
2712 pcre_uchar oc, bit;
2713
2714 if (firstline)
2715 {
2716 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2717 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2718 }
2719
2720 start = LABEL();
2721 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2722 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2723
2724 oc = first_char;
2725 if (caseless)
2726 {
2727 oc = TABLE_GET(first_char, common->fcc, first_char);
2728 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2729 if (first_char > 127 && common->utf)
2730 oc = UCD_OTHERCASE(first_char);
2731 #endif
2732 }
2733 if (first_char == oc)
2734 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2735 else
2736 {
2737 bit = first_char ^ oc;
2738 if (ispowerof2(bit))
2739 {
2740 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2741 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2742 }
2743 else
2744 {
2745 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2746 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2747 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2748 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2749 found = JUMP(SLJIT_C_NOT_ZERO);
2750 }
2751 }
2752
2753 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2754 JUMPTO(SLJIT_JUMP, start);
2755 JUMPHERE(found);
2756 JUMPHERE(quit);
2757
2758 if (firstline)
2759 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2760 }
2761
2762 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2763 {
2764 DEFINE_COMPILER;
2765 struct sljit_label *loop;
2766 struct sljit_jump *lastchar;
2767 struct sljit_jump *firstchar;
2768 struct sljit_jump *quit;
2769 struct sljit_jump *foundcr = NULL;
2770 struct sljit_jump *notfoundnl;
2771 jump_list *newline = NULL;
2772
2773 if (firstline)
2774 {
2775 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2776 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2777 }
2778
2779 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2780 {
2781 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2782 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2783 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2784 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2785 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2786
2787 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2788 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2789 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2790 #ifdef COMPILE_PCRE16
2791 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2792 #endif
2793 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2794
2795 loop = LABEL();
2796 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2797 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2798 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2799 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2800 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2801 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2802
2803 JUMPHERE(quit);
2804 JUMPHERE(firstchar);
2805 JUMPHERE(lastchar);
2806
2807 if (firstline)
2808 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2809 return;
2810 }
2811
2812 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2813 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2814 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2815 skip_char_back(common);
2816
2817 loop = LABEL();
2818 read_char(common);
2819 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2820 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2821 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2822 check_newlinechar(common, common->nltype, &newline, FALSE);
2823 set_jumps(newline, loop);
2824
2825 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2826 {
2827 quit = JUMP(SLJIT_JUMP);
2828 JUMPHERE(foundcr);
2829 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2830 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2831 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2832 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2833 #ifdef COMPILE_PCRE16
2834 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2835 #endif
2836 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2837 JUMPHERE(notfoundnl);
2838 JUMPHERE(quit);
2839 }
2840 JUMPHERE(lastchar);
2841 JUMPHERE(firstchar);
2842
2843 if (firstline)
2844 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2845 }
2846
2847 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2848 {
2849 DEFINE_COMPILER;
2850 struct sljit_label *start;
2851 struct sljit_jump *quit;
2852 struct sljit_jump *found;
2853 #ifndef COMPILE_PCRE8
2854 struct sljit_jump *jump;
2855 #endif
2856
2857 if (firstline)
2858 {
2859 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, STR_END, 0);
2860 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2861 }
2862
2863 start = LABEL();
2864 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2865 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2866 #ifdef SUPPORT_UTF
2867 if (common->utf)
2868 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2869 #endif
2870 #ifndef COMPILE_PCRE8
2871 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2872 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2873 JUMPHERE(jump);
2874 #endif
2875 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2876 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2877 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2878 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2879 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2880 found = JUMP(SLJIT_C_NOT_ZERO);
2881
2882 #ifdef SUPPORT_UTF
2883 if (common->utf)
2884 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2885 #endif
2886 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2887 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2888 if (common->utf)
2889 {
2890 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2891 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2892 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2893 }
2894 #endif
2895 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2896 if (common->utf)
2897 {
2898 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2899 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2900 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2901 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2902 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2903 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2904 }
2905 #endif
2906 JUMPTO(SLJIT_JUMP, start);
2907 JUMPHERE(found);
2908 JUMPHERE(quit);
2909
2910 if (firstline)
2911 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2912 }
2913
2914 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2915 {
2916 DEFINE_COMPILER;
2917 struct sljit_label *loop;
2918 struct sljit_jump *toolong;
2919 struct sljit_jump *alreadyfound;
2920 struct sljit_jump *found;
2921 struct sljit_jump *foundoc = NULL;
2922 struct sljit_jump *notfound;
2923 pcre_uchar oc, bit;
2924
2925 SLJIT_ASSERT(common->req_char_ptr != 0);
2926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2927 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2928 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2929 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2930
2931 if (has_firstchar)
2932 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2933 else
2934 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2935
2936 loop = LABEL();
2937 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2938
2939 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2940 oc = req_char;
2941 if (caseless)
2942 {
2943 oc = TABLE_GET(req_char, common->fcc, req_char);
2944 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2945 if (req_char > 127 && common->utf)
2946 oc = UCD_OTHERCASE(req_char);
2947 #endif
2948 }
2949 if (req_char == oc)
2950 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2951 else
2952 {
2953 bit = req_char ^ oc;
2954 if (ispowerof2(bit))
2955 {
2956 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2957 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2958 }
2959 else
2960 {
2961 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2962 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2963 }
2964 }
2965 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2966 JUMPTO(SLJIT_JUMP, loop);
2967
2968 JUMPHERE(found);
2969 if (foundoc)
2970 JUMPHERE(foundoc);
2971 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2972 JUMPHERE(alreadyfound);
2973 JUMPHERE(toolong);
2974 return notfound;
2975 }
2976
2977 static void do_revertframes(compiler_common *common)
2978 {
2979 DEFINE_COMPILER;
2980 struct sljit_jump *jump;
2981 struct sljit_label *mainloop;
2982
2983 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2984 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2985 GET_LOCAL_BASE(TMP3, 0, 0);
2986
2987 /* Drop frames until we reach STACK_TOP. */
2988 mainloop = LABEL();
2989 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2990 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2991 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
2992 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
2993 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
2994 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
2995 JUMPTO(SLJIT_JUMP, mainloop);
2996
2997 JUMPHERE(jump);
2998 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2999 /* End of dropping frames. */
3000 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3001
3002 JUMPHERE(jump);
3003 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3004 /* Set string begin. */
3005 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3006 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3007 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3008 JUMPTO(SLJIT_JUMP, mainloop);
3009
3010 JUMPHERE(jump);
3011 if (common->mark_ptr != 0)
3012 {
3013 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3014 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3015 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3016 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3017 JUMPTO(SLJIT_JUMP, mainloop);
3018
3019 JUMPHERE(jump);
3020 }
3021
3022 /* Unknown command. */
3023 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3024 JUMPTO(SLJIT_JUMP, mainloop);
3025 }
3026
3027 static void check_wordboundary(compiler_common *common)
3028 {
3029 DEFINE_COMPILER;
3030 struct sljit_jump *skipread;
3031 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3032 struct sljit_jump *jump;
3033 #endif
3034
3035 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3036
3037 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3038 /* Get type of the previous char, and put it to LOCALS1. */
3039 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3042 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3043 skip_char_back(common);
3044 check_start_used_ptr(common);
3045 read_char(common);
3046
3047 /* Testing char type. */
3048 #ifdef SUPPORT_UCP
3049 if (common->use_ucp)
3050 {
3051 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3052 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3053 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3054 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3055 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3056 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3057 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3058 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3059 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3060 JUMPHERE(jump);
3061 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3062 }
3063 else
3064 #endif
3065 {
3066 #ifndef COMPILE_PCRE8
3067 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3068 #elif defined SUPPORT_UTF
3069 /* Here LOCALS1 has already been zeroed. */
3070 jump = NULL;
3071 if (common->utf)
3072 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3073 #endif /* COMPILE_PCRE8 */
3074 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3075 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3076 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3077 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3078 #ifndef COMPILE_PCRE8
3079 JUMPHERE(jump);
3080 #elif defined SUPPORT_UTF
3081 if (jump != NULL)
3082 JUMPHERE(jump);
3083 #endif /* COMPILE_PCRE8 */
3084 }
3085 JUMPHERE(skipread);
3086
3087 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3088 skipread = check_str_end(common);
3089 peek_char(common);
3090
3091 /* Testing char type. This is a code duplication. */
3092 #ifdef SUPPORT_UCP
3093 if (common->use_ucp)
3094 {
3095 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3096 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3097 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3098 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3099 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3100 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3101 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3102 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3103 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3104 JUMPHERE(jump);
3105 }
3106 else
3107 #endif
3108 {
3109 #ifndef COMPILE_PCRE8
3110 /* TMP2 may be destroyed by peek_char. */
3111 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3112 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3113 #elif defined SUPPORT_UTF
3114 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3115 jump = NULL;
3116 if (common->utf)
3117 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3118 #endif
3119 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3120 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3121 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3122 #ifndef COMPILE_PCRE8
3123 JUMPHERE(jump);
3124 #elif defined SUPPORT_UTF
3125 if (jump != NULL)
3126 JUMPHERE(jump);
3127 #endif /* COMPILE_PCRE8 */
3128 }
3129 JUMPHERE(skipread);
3130
3131 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3132 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3133 }
3134
3135 /*
3136 range format:
3137
3138 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3139 ranges[1] = first bit (0 or 1)
3140 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3141 */
3142
3143 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3144 {
3145 DEFINE_COMPILER;
3146 struct sljit_jump *jump;
3147
3148 if (ranges[0] < 0)
3149 return FALSE;
3150
3151 switch(ranges[0])
3152 {
3153 case 1:
3154 if (readch)
3155 read_char(common);
3156 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3157 return TRUE;
3158
3159 case 2:
3160 if (readch)
3161 read_char(common);
3162 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3163 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3164 return TRUE;
3165
3166 case 4:
3167 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3168 {
3169 if (readch)
3170 read_char(common);
3171 if (ranges[1] != 0)
3172 {
3173 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3174 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3175 }
3176 else
3177 {
3178 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3179 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3180 JUMPHERE(jump);
3181 }
3182 return TRUE;
3183 }
3184 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && ispowerof2(ranges[4] - ranges[2]))
3185 {
3186 if (readch)
3187 read_char(common);
3188 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3189 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3190 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3191 return TRUE;
3192 }
3193 return FALSE;
3194
3195 default:
3196 return FALSE;
3197 }
3198 }
3199
3200 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3201 {
3202 int i, bit, length;
3203 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3204
3205 bit = ctypes[0] & flag;
3206 ranges[0] = -1;
3207 ranges[1] = bit != 0 ? 1 : 0;
3208 length = 0;
3209
3210 for (i = 1; i < 256; i++)
3211 if ((ctypes[i] & flag) != bit)
3212 {
3213 if (length >= MAX_RANGE_SIZE)
3214 return;
3215 ranges[2 + length] = i;
3216 length++;
3217 bit ^= flag;
3218 }
3219
3220 if (bit != 0)
3221 {
3222 if (length >= MAX_RANGE_SIZE)
3223 return;
3224 ranges[2 + length] = 256;
3225 length++;
3226 }
3227 ranges[0] = length;
3228 }
3229
3230 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3231 {
3232 int ranges[2 + MAX_RANGE_SIZE];
3233 pcre_uint8 bit, cbit, all;
3234 int i, byte, length = 0;
3235
3236 bit = bits[0] & 0x1;
3237 ranges[1] = bit;
3238 /* Can be 0 or 255. */
3239 all = -bit;
3240
3241 for (i = 0; i < 256; )
3242 {
3243 byte = i >> 3;
3244 if ((i & 0x7) == 0 && bits[byte] == all)
3245 i += 8;
3246 else
3247 {
3248 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3249 if (cbit != bit)
3250 {
3251 if (length >= MAX_RANGE_SIZE)
3252 return FALSE;
3253 ranges[2 + length] = i;
3254 length++;
3255 bit = cbit;
3256 all = -cbit;
3257 }
3258 i++;
3259 }
3260 }
3261
3262 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3263 {
3264 if (length >= MAX_RANGE_SIZE)
3265 return FALSE;
3266 ranges[2 + length] = 256;
3267 length++;
3268 }
3269 ranges[0] = length;
3270
3271 return check_ranges(common, ranges, backtracks, FALSE);
3272 }
3273
3274 static void check_anynewline(compiler_common *common)
3275 {
3276 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3277 DEFINE_COMPILER;
3278
3279 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3280
3281 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3282 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3283 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3284 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3285 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3286 #ifdef COMPILE_PCRE8
3287 if (common->utf)
3288 {
3289 #endif
3290 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3291 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3292 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3293 #ifdef COMPILE_PCRE8
3294 }
3295 #endif
3296 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3297 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3298 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3299 }
3300
3301 static void check_hspace(compiler_common *common)
3302 {
3303 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3304 DEFINE_COMPILER;
3305
3306 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3307
3308 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3309 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3310 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3311 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3312 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3313 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3314 #ifdef COMPILE_PCRE8
3315 if (common->utf)
3316 {
3317 #endif
3318 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3319 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3320 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3321 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3322 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3323 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3324 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3325 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3326 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3327 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3328 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3329 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3330 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3331 #ifdef COMPILE_PCRE8
3332 }
3333 #endif
3334 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3335 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3336
3337 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3338 }
3339
3340 static void check_vspace(compiler_common *common)
3341 {
3342 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3343 DEFINE_COMPILER;
3344
3345 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3346
3347 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3348 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3349 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3350 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3351 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3352 #ifdef COMPILE_PCRE8
3353 if (common->utf)
3354 {
3355 #endif
3356 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3357 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3358 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3359 #ifdef COMPILE_PCRE8
3360 }
3361 #endif
3362 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3363 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3364
3365 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3366 }
3367
3368 #define CHAR1 STR_END
3369 #define CHAR2 STACK_TOP
3370
3371 static void do_casefulcmp(compiler_common *common)
3372 {
3373 DEFINE_COMPILER;
3374 struct sljit_jump *jump;
3375 struct sljit_label *label;
3376
3377 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3378 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3379 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3380 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3381 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3382 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3383
3384 label = LABEL();
3385 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3386 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3387 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3388 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3389 JUMPTO(SLJIT_C_NOT_ZERO, label);
3390
3391 JUMPHERE(jump);
3392 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3393 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3394 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3395 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3396 }
3397
3398 #define LCC_TABLE STACK_LIMIT
3399
3400 static void do_caselesscmp(compiler_common *common)
3401 {
3402 DEFINE_COMPILER;
3403 struct sljit_jump *jump;
3404 struct sljit_label *label;
3405
3406 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3407 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3408
3409 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3412 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3413 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3414 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3415
3416 label = LABEL();
3417 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3418 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3419 #ifndef COMPILE_PCRE8
3420 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3421 #endif
3422 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3423 #ifndef COMPILE_PCRE8
3424 JUMPHERE(jump);
3425 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3426 #endif
3427 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3428 #ifndef COMPILE_PCRE8
3429 JUMPHERE(jump);
3430 #endif
3431 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3432 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3433 JUMPTO(SLJIT_C_NOT_ZERO, label);
3434
3435 JUMPHERE(jump);
3436 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3437 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3438 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3439 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3440 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3441 }
3442
3443 #undef LCC_TABLE
3444 #undef CHAR1
3445 #undef CHAR2
3446
3447 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3448
3449 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3450 {
3451 /* This function would be ineffective to do in JIT level. */
3452 int c1, c2;
3453 const pcre_uchar *src2 = args->uchar_ptr;
3454 const pcre_uchar *end2 = args->end;
3455
3456 while (src1 < end1)
3457 {
3458 if (src2 >= end2)
3459 return (pcre_uchar*)1;
3460 GETCHARINC(c1, src1);
3461 GETCHARINC(c2, src2);
3462 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
3463 }
3464 return src2;
3465 }
3466
3467 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3468
3469 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3470 compare_context* context, jump_list **backtracks)
3471 {
3472 DEFINE_COMPILER;
3473 unsigned int othercasebit = 0;
3474 pcre_uchar *othercasechar = NULL;
3475 #ifdef SUPPORT_UTF
3476 int utflength;
3477 #endif
3478
3479 if (caseless && char_has_othercase(common, cc))
3480 {
3481 othercasebit = char_get_othercase_bit(common, cc);
3482 SLJIT_ASSERT(othercasebit);
3483 /* Extracting bit difference info. */
3484 #ifdef COMPILE_PCRE8
3485 othercasechar = cc + (othercasebit >> 8);
3486 othercasebit &= 0xff;
3487 #else
3488 #ifdef COMPILE_PCRE16
3489 othercasechar = cc + (othercasebit >> 9);
3490 if ((othercasebit & 0x100) != 0)
3491 othercasebit = (othercasebit & 0xff) << 8;
3492 else
3493 othercasebit &= 0xff;
3494 #endif
3495 #endif
3496 }
3497
3498 if (context->sourcereg == -1)
3499 {
3500 #ifdef COMPILE_PCRE8
3501 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3502 if (context->length >= 4)
3503 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3504 else if (context->length >= 2)
3505 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3506 else
3507 #endif
3508 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3509 #else
3510 #ifdef COMPILE_PCRE16
3511 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3512 if (context->length >= 4)
3513 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3514 else
3515 #endif
3516 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3517 #endif
3518 #endif /* COMPILE_PCRE8 */
3519 context->sourcereg = TMP2;
3520 }
3521
3522 #ifdef SUPPORT_UTF
3523 utflength = 1;
3524 if (common->utf && HAS_EXTRALEN(*cc))
3525 utflength += GET_EXTRALEN(*cc);
3526
3527 do
3528 {
3529 #endif
3530
3531 context->length -= IN_UCHARS(1);
3532 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3533
3534 /* Unaligned read is supported. */
3535 if (othercasebit != 0 && othercasechar == cc)
3536 {
3537 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3538 context->oc.asuchars[context->ucharptr] = othercasebit;
3539 }
3540 else
3541 {
3542 context->c.asuchars[context->ucharptr] = *cc;
3543 context->oc.asuchars[context->ucharptr] = 0;
3544 }
3545 context->ucharptr++;
3546
3547 #ifdef COMPILE_PCRE8
3548 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3549 #else
3550 if (context->ucharptr >= 2 || context->length == 0)
3551 #endif
3552 {
3553 if (context->length >= 4)
3554 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3555 #ifdef COMPILE_PCRE8
3556 else if (context->length >= 2)
3557 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3558 else if (context->length >= 1)
3559 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3560 #else
3561 else if (context->length >= 2)
3562 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3563 #endif
3564 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3565
3566 switch(context->ucharptr)
3567 {
3568 case 4 / sizeof(pcre_uchar):
3569 if (context->oc.asint != 0)
3570 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3571 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3572 break;
3573
3574 case 2 / sizeof(pcre_uchar):
3575 if (context->oc.asushort != 0)
3576 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3577 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3578 break;
3579
3580 #ifdef COMPILE_PCRE8
3581 case 1:
3582 if (context->oc.asbyte != 0)
3583 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3584 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3585 break;
3586 #endif
3587
3588 default:
3589 SLJIT_ASSERT_STOP();
3590 break;
3591 }
3592 context->ucharptr = 0;
3593 }
3594
3595 #else
3596
3597 /* Unaligned read is unsupported. */
3598 #ifdef COMPILE_PCRE8
3599 if (context->length > 0)
3600 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3601 #else
3602 if (context->length > 0)
3603 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3604 #endif
3605 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3606
3607 if (othercasebit != 0 && othercasechar == cc)
3608 {
3609 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3610 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3611 }
3612 else
3613 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3614
3615 #endif
3616
3617 cc++;
3618 #ifdef SUPPORT_UTF
3619 utflength--;
3620 }
3621 while (utflength > 0);
3622 #endif
3623
3624 return cc;
3625 }
3626
3627 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3628
3629 #define SET_TYPE_OFFSET(value) \
3630 if ((value) != typeoffset) \
3631 { \
3632 if ((value) > typeoffset) \
3633 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3634 else \
3635 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3636 } \
3637 typeoffset = (value);
3638
3639 #define SET_CHAR_OFFSET(value) \
3640 if ((value) != charoffset) \
3641 { \
3642 if ((value) > charoffset) \
3643 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3644 else \
3645 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3646 } \
3647 charoffset = (value);
3648
3649 static void compile_xclass_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3650 {
3651 DEFINE_COMPILER;
3652 jump_list *found = NULL;
3653 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3654 unsigned int c;
3655 int compares;
3656 struct sljit_jump *jump = NULL;
3657 pcre_uchar *ccbegin;
3658 #ifdef SUPPORT_UCP
3659 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3660 BOOL charsaved = FALSE;
3661 int typereg = TMP1, scriptreg = TMP1;
3662 unsigned int typeoffset;
3663 #endif
3664 int invertcmp, numberofcmps;
3665 unsigned int charoffset;
3666
3667 /* Although SUPPORT_UTF must be defined, we are
3668 not necessary in utf mode even in 8 bit mode. */
3669 detect_partial_match(common, backtracks);
3670 read_char(common);
3671
3672 if ((*cc++ & XCL_MAP) != 0)
3673 {
3674 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3675 #ifndef COMPILE_PCRE8
3676 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3677 #elif defined SUPPORT_UTF
3678 if (common->utf)
3679 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3680 #endif
3681
3682 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3683 {
3684 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3685 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3686 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3687 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3688 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3689 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3690 }
3691
3692 #ifndef COMPILE_PCRE8
3693 JUMPHERE(jump);
3694 #elif defined SUPPORT_UTF
3695 if (common->utf)
3696 JUMPHERE(jump);
3697 #endif
3698 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3699 #ifdef SUPPORT_UCP
3700 charsaved = TRUE;
3701 #endif
3702 cc += 32 / sizeof(pcre_uchar);
3703 }
3704
3705 /* Scanning the necessary info. */
3706 ccbegin = cc;
3707 compares = 0;
3708 while (*cc != XCL_END)
3709 {
3710 compares++;
3711 if (*cc == XCL_SINGLE)
3712 {
3713 cc += 2;
3714 #ifdef SUPPORT_UTF
3715 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3716 #endif
3717 #ifdef SUPPORT_UCP
3718 needschar = TRUE;
3719 #endif
3720 }
3721 else if (*cc == XCL_RANGE)
3722 {
3723 cc += 2;
3724 #ifdef SUPPORT_UTF
3725 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3726 #endif
3727 cc++;
3728 #ifdef SUPPORT_UTF
3729 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3730 #endif
3731 #ifdef SUPPORT_UCP
3732 needschar = TRUE;
3733 #endif
3734 }
3735 #ifdef SUPPORT_UCP
3736 else
3737 {
3738 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3739 cc++;
3740 switch(*cc)
3741 {
3742 case PT_ANY:
3743 break;
3744
3745 case PT_LAMP:
3746 case PT_GC:
3747 case PT_PC:
3748 case PT_ALNUM:
3749 needstype = TRUE;
3750 break;
3751
3752 case PT_SC:
3753 needsscript = TRUE;
3754 break;
3755
3756 case PT_SPACE:
3757 case PT_PXSPACE:
3758 case PT_WORD:
3759 needstype = TRUE;
3760 needschar = TRUE;
3761 break;
3762
3763 default:
3764 SLJIT_ASSERT_STOP();
3765 break;
3766 }
3767 cc += 2;
3768 }
3769 #endif
3770 }
3771
3772 #ifdef SUPPORT_UCP
3773 /* Simple register allocation. TMP1 is preferred if possible. */
3774 if (needstype || needsscript)
3775 {
3776 if (needschar && !charsaved)
3777 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3778 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3779 if (needschar)
3780 {
3781 if (needstype)
3782 {
3783 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3784 typereg = RETURN_ADDR;
3785 }
3786
3787 if (needsscript)
3788 scriptreg = TMP3;
3789 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3790 }
3791 else if (needstype && needsscript)
3792 scriptreg = TMP3;
3793 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3794
3795 if (needsscript)
3796 {
3797 if (scriptreg == TMP1)
3798 {
3799 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3800 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3801 }
3802 else
3803 {
3804 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3805 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3806 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3807 }
3808 }
3809 }
3810 #endif
3811
3812 /* Generating code. */
3813 cc = ccbegin;
3814 charoffset = 0;
3815 numberofcmps = 0;
3816 #ifdef SUPPORT_UCP
3817 typeoffset = 0;
3818 #endif
3819
3820 while (*cc != XCL_END)
3821 {
3822 compares--;
3823 invertcmp = (compares == 0 && list != backtracks);
3824 jump = NULL;
3825
3826 if (*cc == XCL_SINGLE)
3827 {
3828 cc ++;
3829 #ifdef SUPPORT_UTF
3830 if (common->utf)
3831 {
3832 GETCHARINC(c, cc);
3833 }
3834 else
3835 #endif
3836 c = *cc++;
3837
3838 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3839 {
3840 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3841 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3842 numberofcmps++;
3843 }
3844 else if (numberofcmps > 0)
3845 {
3846 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3847 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3848 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3849 numberofcmps = 0;
3850 }
3851 else
3852 {
3853 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3854 numberofcmps = 0;
3855 }
3856 }
3857 else if (*cc == XCL_RANGE)
3858 {
3859 cc ++;
3860 #ifdef SUPPORT_UTF
3861 if (common->utf)
3862 {
3863 GETCHARINC(c, cc);
3864 }
3865 else
3866 #endif
3867 c = *cc++;
3868 SET_CHAR_OFFSET(c);
3869 #ifdef SUPPORT_UTF
3870 if (common->utf)
3871 {
3872 GETCHARINC(c, cc);
3873 }
3874 else
3875 #endif
3876 c = *cc++;
3877 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3878 {
3879 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3880 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3881 numberofcmps++;
3882 }
3883 else if (numberofcmps > 0)
3884 {
3885 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3886 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3887 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3888 numberofcmps = 0;
3889 }
3890 else
3891 {
3892 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3893 numberofcmps = 0;
3894 }
3895 }
3896 #ifdef SUPPORT_UCP
3897 else
3898 {
3899 if (*cc == XCL_NOTPROP)
3900 invertcmp ^= 0x1;
3901 cc++;
3902 switch(*cc)
3903 {
3904 case PT_ANY:
3905 if (list != backtracks)
3906 {
3907 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3908 continue;
3909 }
3910 else if (cc[-1] == XCL_NOTPROP)
3911 continue;
3912 jump = JUMP(SLJIT_JUMP);
3913 break;
3914
3915 case PT_LAMP:
3916 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3917 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3918 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3919 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3920 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3921 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3922 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3923 break;
3924
3925 case PT_GC:
3926 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3927 SET_TYPE_OFFSET(c);
3928 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3929 break;
3930
3931 case PT_PC:
3932 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3933 break;
3934
3935 case PT_SC:
3936 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3937 break;
3938
3939 case PT_SPACE:
3940 case PT_PXSPACE:
3941 if (*cc == PT_SPACE)
3942 {
3943 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3944 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3945 }
3946 SET_CHAR_OFFSET(9);
3947 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3948 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3949 if (*cc == PT_SPACE)
3950 JUMPHERE(jump);
3951
3952 SET_TYPE_OFFSET(ucp_Zl);
3953 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3954 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3955 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3956 break;
3957
3958 case PT_WORD:
3959 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3960 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3961 /* ... fall through */
3962
3963 case PT_ALNUM:
3964 SET_TYPE_OFFSET(ucp_Ll);
3965 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3966 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3967 SET_TYPE_OFFSET(ucp_Nd);
3968 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3969 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3970 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3971 break;
3972 }
3973 cc += 2;
3974 }
3975 #endif
3976
3977 if (jump != NULL)
3978 add_jump(compiler, compares > 0 ? list : backtracks, jump);
3979 }
3980
3981 if (found != NULL)
3982 set_jumps(found, LABEL());
3983 }
3984
3985 #undef SET_TYPE_OFFSET
3986 #undef SET_CHAR_OFFSET
3987
3988 #endif
3989
3990 static pcre_uchar *compile_char1_trypath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
3991 {
3992 DEFINE_COMPILER;
3993 int length;
3994 unsigned int c, oc, bit;
3995 compare_context context;
3996 struct sljit_jump *jump[4];
3997 #ifdef SUPPORT_UTF
3998 struct sljit_label *label;
3999 #ifdef SUPPORT_UCP
4000 pcre_uchar propdata[5];
4001 #endif
4002 #endif
4003
4004 switch(type)
4005 {
4006 case OP_SOD:
4007 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4008 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4009 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4010 return cc;
4011
4012 case OP_SOM:
4013 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4014 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4015 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4016 return cc;
4017
4018 case OP_NOT_WORD_BOUNDARY:
4019 case OP_WORD_BOUNDARY:
4020 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4021 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4022 return cc;
4023
4024 case OP_NOT_DIGIT:
4025 case OP_DIGIT:
4026 /* Digits are usually 0-9, so it is worth to optimize them. */
4027 if (common->digits[0] == -2)
4028 get_ctype_ranges(common, ctype_digit, common->digits);
4029 detect_partial_match(common, backtracks);
4030 /* Flip the starting bit in the negative case. */
4031 if (type == OP_NOT_DIGIT)
4032 common->digits[1] ^= 1;
4033 if (!check_ranges(common, common->digits, backtracks, TRUE))
4034 {
4035 read_char8_type(common);
4036 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4037 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4038 }
4039 if (type == OP_NOT_DIGIT)
4040 common->digits[1] ^= 1;
4041 return cc;
4042
4043 case OP_NOT_WHITESPACE:
4044 case OP_WHITESPACE:
4045 detect_partial_match(common, backtracks);
4046 read_char8_type(common);
4047 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4048 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4049 return cc;
4050
4051 case OP_NOT_WORDCHAR:
4052 case OP_WORDCHAR:
4053 detect_partial_match(common, backtracks);
4054 read_char8_type(common);
4055 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4056 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4057 return cc;
4058
4059 case OP_ANY:
4060 detect_partial_match(common, backtracks);
4061 read_char(common);
4062 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4063 {
4064 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4065 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4066 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4067 else
4068 jump[1] = check_str_end(common);
4069
4070 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4071 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4072 if (jump[1] != NULL)
4073 JUMPHERE(jump[1]);
4074 JUMPHERE(jump[0]);
4075 }
4076 else
4077 check_newlinechar(common, common->nltype, backtracks, TRUE);
4078 return cc;
4079
4080 case OP_ALLANY:
4081 detect_partial_match(common, backtracks);
4082 #ifdef SUPPORT_UTF
4083 if (common->utf)
4084 {
4085 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4086 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4087 #ifdef COMPILE_PCRE8
4088 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4089 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4090 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4091 #else /* COMPILE_PCRE8 */
4092 #ifdef COMPILE_PCRE16
4093 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4094 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4095 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4096 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
4097 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4098 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4099 #endif /* COMPILE_PCRE16 */
4100 #endif /* COMPILE_PCRE8 */
4101 JUMPHERE(jump[0]);
4102 return cc;
4103 }
4104 #endif
4105 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4106 return cc;
4107
4108 case OP_ANYBYTE:
4109 detect_partial_match(common, backtracks);
4110 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4111 return cc;
4112
4113 #ifdef SUPPORT_UTF
4114 #ifdef SUPPORT_UCP
4115 case OP_NOTPROP:
4116 case OP_PROP:
4117 propdata[0] = 0;
4118 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4119 propdata[2] = cc[0];
4120 propdata[3] = cc[1];
4121 propdata[4] = XCL_END;
4122 compile_xclass_trypath(common, propdata, backtracks);
4123 return cc + 2;
4124 #endif
4125 #endif
4126
4127 case OP_ANYNL:
4128 detect_partial_match(common, backtracks);
4129 read_char(common);
4130 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4131 /* We don't need to handle soft partial matching case. */
4132 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4133 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4134 else
4135 jump[1] = check_str_end(common);
4136 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4137 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4138 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4139 jump[3] = JUMP(SLJIT_JUMP);
4140 JUMPHERE(jump[0]);
4141 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4142 JUMPHERE(jump[1]);
4143 JUMPHERE(jump[2]);
4144 JUMPHERE(jump[3]);
4145 return cc;
4146
4147 case OP_NOT_HSPACE:
4148 case OP_HSPACE:
4149 detect_partial_match(common, backtracks);
4150 read_char(common);
4151 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4152 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4153 return cc;
4154
4155 case OP_NOT_VSPACE:
4156 case OP_VSPACE:
4157 detect_partial_match(common, backtracks);
4158 read_char(common);
4159 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4160 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4161 return cc;
4162
4163 #ifdef SUPPORT_UCP
4164 case OP_EXTUNI:
4165 detect_partial_match(common, backtracks);
4166 read_char(common);
4167 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4168 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4169 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
4170
4171 label = LABEL();
4172 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4173 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4174 read_char(common);
4175 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4176 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4177 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
4178
4179 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4180 JUMPHERE(jump[0]);
4181 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4182 {
4183 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4184 /* Since we successfully read a char above, partial matching must occure. */
4185 check_partial(common, TRUE);
4186 JUMPHERE(jump[0]);
4187 }
4188 return cc;
4189 #endif
4190
4191 case OP_EODN:
4192 /* Requires rather complex checks. */
4193 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4194 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4195 {
4196 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4197 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4198 if (common->mode == JIT_COMPILE)
4199 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4200 else
4201 {
4202 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4203 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4204 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4205 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4206 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4207 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4208 check_partial(common, TRUE);
4209 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4210 JUMPHERE(jump[1]);
4211 }
4212 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4213 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4214 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4215 }
4216 else if (common->nltype == NLTYPE_FIXED)
4217 {
4218 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4219 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4220 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4221 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4222 }
4223 else
4224 {
4225 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4226 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4227 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4228 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4229 jump[2] = JUMP(SLJIT_C_GREATER);
4230 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4231 /* Equal. */
4232 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4233 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4234 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4235
4236 JUMPHERE(jump[1]);
4237 if (common->nltype == NLTYPE_ANYCRLF)
4238 {
4239 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4240 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4241 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4242 }
4243 else
4244 {
4245 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4246 read_char(common);
4247 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4248 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4249 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4250 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4251 }
4252 JUMPHERE(jump[2]);
4253 JUMPHERE(jump[3]);
4254 }
4255 JUMPHERE(jump[0]);
4256 check_partial(common, FALSE);
4257 return cc;
4258
4259 case OP_EOD:
4260 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4261 check_partial(common, FALSE);
4262 return cc;
4263
4264 case OP_CIRC:
4265 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4266 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4267 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4268 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4269 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4270 return cc;
4271
4272 case OP_CIRCM:
4273 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4275 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4276 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4277 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4278 jump[0] = JUMP(SLJIT_JUMP);
4279 JUMPHERE(jump[1]);
4280
4281 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4282 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4283 {
4284 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4285 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4286 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4287 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4288 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4289 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4290 }
4291 else
4292 {
4293 skip_char_back(common);
4294 read_char(common);
4295 check_newlinechar(common, common->nltype, backtracks, FALSE);
4296 }
4297 JUMPHERE(jump[0]);
4298 return cc;
4299
4300 case OP_DOLL:
4301 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4302 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4303 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4304
4305 if (!common->endonly)
4306 compile_char1_trypath(common, OP_EODN, cc, backtracks);
4307 else
4308 {
4309 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4310 check_partial(common, FALSE);
4311 }
4312 return cc;
4313
4314 case OP_DOLLM:
4315 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4316 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4317 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4318 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4319 check_partial(common, FALSE);
4320 jump[0] = JUMP(SLJIT_JUMP);
4321 JUMPHERE(jump[1]);
4322
4323 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4324 {
4325 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4326 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4327 if (common->mode == JIT_COMPILE)
4328 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4329 else
4330 {
4331 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4332 /* STR_PTR = STR_END - IN_UCHARS(1) */
4333 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4334 check_partial(common, TRUE);
4335 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4336 JUMPHERE(jump[1]);
4337 }
4338
4339 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4340 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4341 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4342 }
4343 else
4344 {
4345 peek_char(common);
4346 check_newlinechar(common, common->nltype, backtracks, FALSE);
4347 }
4348 JUMPHERE(jump[0]);
4349 return cc;
4350
4351 case OP_CHAR:
4352 case OP_CHARI:
4353 length = 1;
4354 #ifdef SUPPORT_UTF
4355 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4356 #endif
4357 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4358 {
4359 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4360 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4361
4362 context.length = IN_UCHARS(length);
4363 context.sourcereg = -1;
4364 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4365 context.ucharptr = 0;
4366 #endif
4367 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4368 }
4369 detect_partial_match(common, backtracks);
4370 read_char(common);
4371 #ifdef SUPPORT_UTF
4372 if (common->utf)
4373 {
4374 GETCHAR(c, cc);
4375 }
4376 else
4377 #endif
4378 c = *cc;
4379 if (type == OP_CHAR || !char_has_othercase(common, cc))
4380 {
4381 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4382 return cc + length;
4383 }
4384 oc = char_othercase(common, c);
4385 bit = c ^ oc;
4386 if (ispowerof2(bit))
4387 {
4388 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4389 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4390 return cc + length;
4391 }
4392 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4393 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4394 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_othercase(common, c));
4395 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4396 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4397 return cc + length;
4398
4399 case OP_NOT:
4400 case OP_NOTI:
4401 detect_partial_match(common, backtracks);
4402 length = 1;
4403 #ifdef SUPPORT_UTF
4404 if (common->utf)
4405 {
4406 #ifdef COMPILE_PCRE8
4407 c = *cc;
4408 if (c < 128)
4409 {
4410 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4411 if (type == OP_NOT || !char_has_othercase(common, cc))
4412 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4413 else
4414 {
4415 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4416 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4417 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4418 }
4419 /* Skip the variable-length character. */
4420 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4421 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4422 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4423 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4424 JUMPHERE(jump[0]);
4425 return cc + 1;
4426 }
4427 else
4428 #endif /* COMPILE_PCRE8 */
4429 {
4430 GETCHARLEN(c, cc, length);
4431 read_char(common);
4432 }
4433 }
4434 else
4435 #endif /* SUPPORT_UTF */
4436 {
4437 read_char(common);
4438 c = *cc;
4439 }
4440
4441 if (type == OP_NOT || !char_has_othercase(common, cc))
4442 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4443 else
4444 {
4445 oc = char_othercase(common, c);
4446 bit = c ^ oc;
4447 if (ispowerof2(bit))
4448 {
4449 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4450 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4451 }
4452 else
4453 {
4454 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4455 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4456 }
4457 }
4458 return cc + length;
4459
4460 case OP_CLASS:
4461 case OP_NCLASS:
4462 detect_partial_match(common, backtracks);
4463 read_char(common);
4464 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4465 return cc + 32 / sizeof(pcre_uchar);
4466
4467 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4468 jump[0] = NULL;
4469 #ifdef COMPILE_PCRE8
4470 /* This check only affects 8 bit mode. In other modes, we
4471 always need to compare the value with 255. */
4472 if (common->utf)
4473 #endif /* COMPILE_PCRE8 */
4474 {
4475 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4476 if (type == OP_CLASS)
4477 {
4478 add_jump(compiler, backtracks, jump[0]);
4479 jump[0] = NULL;
4480 }
4481 }
4482 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4483 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4484 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4485 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4486 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4487 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4488 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4489 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4490 if (jump[0] != NULL)
4491 JUMPHERE(jump[0]);
4492 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4493 return cc + 32 / sizeof(pcre_uchar);
4494
4495 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4496 case OP_XCLASS:
4497 compile_xclass_trypath(common, cc + LINK_SIZE, backtracks);
4498 return cc + GET(cc, 0) - 1;
4499 #endif
4500
4501 case OP_REVERSE:
4502 length = GET(cc, 0);
4503 if (length == 0)
4504 return cc + LINK_SIZE;
4505 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4506 #ifdef SUPPORT_UTF
4507 if (common->utf)
4508 {
4509 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4510 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4511 label = LABEL();
4512 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4513 skip_char_back(common);
4514 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4515 JUMPTO(SLJIT_C_NOT_ZERO, label);
4516 }
4517 else
4518 #endif
4519 {
4520 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4521 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4522 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4523 }
4524 check_start_used_ptr(common);
4525 return cc + LINK_SIZE;
4526 }
4527 SLJIT_ASSERT_STOP();
4528 return cc;
4529 }
4530
4531 static SLJIT_INLINE pcre_uchar *compile_charn_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4532 {
4533 /* This function consumes at least one input character. */
4534 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4535 DEFINE_COMPILER;
4536 pcre_uchar *ccbegin = cc;
4537 compare_context context;
4538 int size;
4539
4540 context.length = 0;
4541 do
4542 {
4543 if (cc >= ccend)
4544 break;
4545
4546 if (*cc == OP_CHAR)
4547 {
4548 size = 1;
4549 #ifdef SUPPORT_UTF
4550 if (common->utf && HAS_EXTRALEN(cc[1]))
4551 size += GET_EXTRALEN(cc[1]);
4552 #endif
4553 }
4554 else if (*cc == OP_CHARI)
4555 {
4556 size = 1;
4557 #ifdef SUPPORT_UTF
4558 if (common->utf)
4559 {
4560 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4561 size = 0;
4562 else if (HAS_EXTRALEN(cc[1]))
4563 size += GET_EXTRALEN(cc[1]);
4564 }
4565 else
4566 #endif
4567 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4568 size = 0;
4569 }
4570 else
4571 size = 0;
4572
4573 cc += 1 + size;
4574 context.length += IN_UCHARS(size);
4575 }
4576 while (size > 0 && context.length <= 128);
4577
4578 cc = ccbegin;
4579 if (context.length > 0)
4580 {
4581 /* We have a fixed-length byte sequence. */
4582 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4583 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4584
4585 context.sourcereg = -1;
4586 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4587 context.ucharptr = 0;
4588 #endif
4589 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4590 return cc;
4591 }
4592
4593 /* A non-fixed length character will be checked if length == 0. */
4594 return compile_char1_trypath(common, *cc, cc + 1, backtracks);
4595 }
4596
4597 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4598 {
4599 DEFINE_COMPILER;
4600 int offset = GET2(cc, 1) << 1;
4601
4602 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4603 if (!common->jscript_compat)
4604 {
4605 if (backtracks == NULL)
4606 {
4607 /* OVECTOR(1) contains the "string begin - 1" constant. */
4608 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4609 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4610 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4611 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4612 return JUMP(SLJIT_C_NOT_ZERO);
4613 }
4614 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4615 }
4616 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4617 }
4618
4619 /* Forward definitions. */
4620 static void compile_trypath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4621 static void compile_backtrackpath(compiler_common *, struct backtrack_common *);
4622
4623 #define PUSH_BACKTRACK(size, ccstart, error) \
4624 do \
4625 { \
4626 backtrack = sljit_alloc_memory(compiler, (size)); \
4627 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4628 return error; \
4629 memset(backtrack, 0, size); \
4630 backtrack->prev = parent->top; \
4631 backtrack->cc = (ccstart); \
4632 parent->top = backtrack; \
4633 } \
4634 while (0)
4635
4636 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4637 do \
4638 { \
4639 backtrack = sljit_alloc_memory(compiler, (size)); \
4640 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4641 return; \
4642 memset(backtrack, 0, size); \
4643 backtrack->prev = parent->top; \
4644 backtrack->cc = (ccstart); \
4645 parent->top = backtrack; \
4646 } \
4647 while (0)
4648
4649 #define BACKTRACK_AS(type) ((type *)backtrack)
4650
4651 static pcre_uchar *compile_ref_trypath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4652 {
4653 DEFINE_COMPILER;
4654 int offset = GET2(cc, 1) << 1;
4655 struct sljit_jump *jump = NULL;
4656 struct sljit_jump *partial;
4657 struct sljit_jump *nopartial;
4658
4659 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4660 /* OVECTOR(1) contains the "string begin - 1" constant. */
4661 if (withchecks && !common->jscript_compat)
4662 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4663
4664 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4665 if (common->utf && *cc == OP_REFI)
4666 {
4667 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4668 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4669 if (withchecks)
4670 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4671
4672 /* Needed to save important temporary registers. */
4673 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4674 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4676 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4677 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4678 if (common->mode == JIT_COMPILE)
4679 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4680 else
4681 {
4682 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4683 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4684 check_partial(common, FALSE);
4685 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4686 JUMPHERE(nopartial);
4687 }
4688 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4689 }
4690 else
4691 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4692 {
4693 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4694 if (withchecks)
4695 jump = JUMP(SLJIT_C_ZERO);
4696
4697 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4698 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4699 if (common->mode == JIT_COMPILE)
4700 add_jump(compiler, backtracks, partial);
4701
4702 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4703 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4704
4705 if (common->mode != JIT_COMPILE)
4706 {
4707 nopartial = JUMP(SLJIT_JUMP);
4708 JUMPHERE(partial);
4709 /* TMP2 -= STR_END - STR_PTR */
4710 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4711 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4712 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4713 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4714 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4715 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4716 JUMPHERE(partial);
4717 check_partial(common, FALSE);
4718 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4719 JUMPHERE(nopartial);
4720 }
4721 }
4722
4723 if (jump != NULL)
4724 {
4725 if (emptyfail)
4726 add_jump(compiler, backtracks, jump);
4727 else
4728 JUMPHERE(jump);
4729 }
4730 return cc + 1 + IMM2_SIZE;
4731 }
4732
4733 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4734 {
4735 DEFINE_COMPILER;
4736 backtrack_common *backtrack;
4737 pcre_uchar type;
4738 struct sljit_label *label;
4739 struct sljit_jump *zerolength;
4740 struct sljit_jump *jump = NULL;
4741 pcre_uchar *ccbegin = cc;
4742 int min = 0, max = 0;
4743 BOOL minimize;
4744
4745 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4746
4747 type = cc[1 + IMM2_SIZE];
4748 minimize = (type & 0x1) != 0;
4749 switch(type)
4750 {
4751 case OP_CRSTAR:
4752 case OP_CRMINSTAR:
4753 min = 0;
4754 max = 0;
4755 cc += 1 + IMM2_SIZE + 1;
4756 break;
4757 case OP_CRPLUS:
4758 case OP_CRMINPLUS:
4759 min = 1;
4760 max = 0;
4761 cc += 1 + IMM2_SIZE + 1;
4762 break;
4763 case OP_CRQUERY:
4764 case OP_CRMINQUERY:
4765 min = 0;
4766 max = 1;
4767 cc += 1 + IMM2_SIZE + 1;
4768 break;
4769 case OP_CRRANGE:
4770 case OP_CRMINRANGE:
4771 min = GET2(cc, 1 + IMM2_SIZE + 1);
4772 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4773 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4774 break;
4775 default:
4776 SLJIT_ASSERT_STOP();
4777 break;
4778 }
4779
4780 if (!minimize)
4781 {
4782 if (min == 0)
4783 {
4784 allocate_stack(common, 2);
4785 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4786 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4787 /* Temporary release of STR_PTR. */
4788 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4789 zerolength = compile_ref_checks(common, ccbegin, NULL);
4790 /* Restore if not zero length. */
4791 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4792 }
4793 else
4794 {
4795 allocate_stack(common, 1);
4796 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4797 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4798 }
4799
4800 if (min > 1 || max > 1)
4801 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4802
4803 label = LABEL();
4804 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4805
4806 if (min > 1 || max > 1)
4807 {
4808 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4809 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4810 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4811 if (min > 1)
4812 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4813 if (max > 1)
4814 {
4815 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4816 allocate_stack(common, 1);
4817 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4818 JUMPTO(SLJIT_JUMP, label);
4819 JUMPHERE(jump);
4820 }
4821 }
4822
4823 if (max == 0)
4824 {
4825 /* Includes min > 1 case as well. */
4826 allocate_stack(common, 1);
4827 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4828 JUMPTO(SLJIT_JUMP, label);
4829 }
4830
4831 JUMPHERE(zerolength);
4832 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4833
4834 decrease_call_count(common);
4835 return cc;
4836 }
4837
4838 allocate_stack(common, 2);
4839 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4840 if (type != OP_CRMINSTAR)
4841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4842
4843 if (min == 0)
4844 {
4845 zerolength = compile_ref_checks(common, ccbegin, NULL);
4846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4847 jump = JUMP(SLJIT_JUMP);
4848 }
4849 else
4850 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4851
4852 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
4853 if (max > 0)
4854 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4855
4856 compile_ref_trypath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4857 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4858
4859 if (min > 1)
4860 {
4861 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4862 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4863 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4864 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->trypath);
4865 }
4866 else if (max > 0)
4867 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4868
4869 if (jump != NULL)
4870 JUMPHERE(jump);
4871 JUMPHERE(zerolength);
4872
4873 decrease_call_count(common);
4874 return cc;
4875 }
4876
4877 static SLJIT_INLINE pcre_uchar *compile_recurse_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4878 {
4879 DEFINE_COMPILER;
4880 backtrack_common *backtrack;
4881 recurse_entry *entry = common->entries;
4882 recurse_entry *prev = NULL;
4883 int start = GET(cc, 1);
4884
4885 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4886 while (entry != NULL)
4887 {
4888 if (entry->start == start)
4889 break;
4890 prev = entry;
4891 entry = entry->next;
4892 }
4893
4894 if (entry == NULL)
4895 {
4896 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4897 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4898 return NULL;
4899 entry->next = NULL;
4900 entry->entry = NULL;
4901 entry->calls = NULL;
4902 entry->start = start;
4903
4904 if (prev != NULL)
4905 prev->next = entry;
4906 else
4907 common->entries = entry;
4908 }
4909
4910 if (common->has_set_som && common->mark_ptr != 0)
4911 {
4912 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4913 allocate_stack(common, 2);
4914 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4917 }
4918 else if (common->has_set_som || common->mark_ptr != 0)
4919 {
4920 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4921 allocate_stack(common, 1);
4922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4923 }
4924
4925 if (entry->entry == NULL)
4926 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4927 else
4928 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4929 /* Leave if the match is failed. */
4930 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4931 return cc + 1 + LINK_SIZE;
4932 }
4933
4934 static pcre_uchar *compile_assert_trypath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4935 {
4936 DEFINE_COMPILER;
4937 int framesize;
4938 int localptr;
4939 backtrack_common altbacktrack;
4940 pcre_uchar *ccbegin;
4941 pcre_uchar opcode;
4942 pcre_uchar bra = OP_BRA;
4943 jump_list *tmp = NULL;
4944 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
4945 jump_list **found;
4946 /* Saving previous accept variables. */
4947 struct sljit_label *save_quitlabel = common->quitlabel;
4948 struct sljit_label *save_acceptlabel = common->acceptlabel;
4949 jump_list *save_quit = common->quit;
4950 jump_list *save_accept = common->accept;
4951 struct sljit_jump *jump;
4952 struct sljit_jump *brajump = NULL;
4953
4954 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4955 {
4956 SLJIT_ASSERT(!conditional);
4957 bra = *cc;
4958 cc++;
4959 }
4960 localptr = PRIV_DATA(cc);
4961 SLJIT_ASSERT(localptr != 0);
4962 framesize = get_framesize(common, cc, FALSE);
4963 backtrack->framesize = framesize;
4964 backtrack->localptr = localptr;
4965 opcode = *cc;
4966 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4967 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4968 ccbegin = cc;
4969 cc += GET(cc, 1);
4970
4971 if (bra == OP_BRAMINZERO)
4972 {
4973 /* This is a braminzero backtrack path. */
4974 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4975 free_stack(common, 1);
4976 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4977 }
4978
4979 if (framesize < 0)
4980 {
4981 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
4982 allocate_stack(common, 1);
4983 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4984 }
4985 else
4986 {
4987 allocate_stack(common, framesize + 2);
4988 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
4989 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4990 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
4991 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4992 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4993 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
4994 }
4995
4996 memset(&altbacktrack, 0, sizeof(backtrack_common));
4997 common->quitlabel = NULL;
4998 common->quit = NULL;
4999 while (1)
5000 {
5001 common->acceptlabel = NULL;
5002 common->accept = NULL;
5003 altbacktrack.top = NULL;
5004 altbacktrack.topbacktracks = NULL;
5005
5006 if (*ccbegin == OP_ALT)
5007 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5008
5009 altbacktrack.cc = ccbegin;
5010 compile_trypath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5011 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5012 {
5013 common->quitlabel = save_quitlabel;
5014 common->acceptlabel = save_acceptlabel;
5015 common->quit = save_quit;
5016 common->accept = save_accept;
5017 return NULL;
5018 }
5019 common->acceptlabel = LABEL();
5020 if (common->accept != NULL)
5021 set_jumps(common->accept, common->acceptlabel);
5022
5023 /* Reset stack. */
5024 if (framesize < 0)
5025 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5026 else {
5027 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5028 {
5029 /* We don't need to keep the STR_PTR, only the previous localptr. */
5030 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5031 }
5032 else
5033 {
5034 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5035 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5036 }
5037 }
5038
5039 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5040 {
5041 /* We know that STR_PTR was stored on the top of the stack. */
5042 if (conditional)
5043 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5044 else if (bra == OP_BRAZERO)
5045 {
5046 if (framesize < 0)
5047 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5048 else
5049 {
5050 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5051 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
5052 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5053 }
5054 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5055 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5056 }
5057 else if (framesize >= 0)
5058 {
5059 /* For OP_BRA and OP_BRAMINZERO. */
5060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5061 }
5062 }
5063 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5064
5065 compile_backtrackpath(common, altbacktrack.top);
5066 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5067 {
5068 common->quitlabel = save_quitlabel;
5069 common->acceptlabel = save_acceptlabel;
5070 common->quit = save_quit;
5071 common->accept = save_accept;
5072 return NULL;
5073 }
5074 set_jumps(altbacktrack.topbacktracks, LABEL());
5075
5076 if (*cc != OP_ALT)
5077 break;
5078
5079 ccbegin = cc;
5080 cc += GET(cc, 1);
5081 }
5082 /* None of them matched. */
5083 if (common->quit != NULL)
5084 set_jumps(common->quit, LABEL());
5085
5086 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5087 {
5088 /* Assert is failed. */
5089 if (conditional || bra == OP_BRAZERO)
5090 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5091
5092 if (framesize < 0)
5093 {
5094 /* The topmost item should be 0. */
5095 if (bra == OP_BRAZERO)
5096 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5097 else
5098 free_stack(common, 1);
5099 }
5100 else
5101 {
5102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5103 /* The topmost item should be 0. */
5104 if (bra == OP_BRAZERO)
5105 {
5106 free_stack(common, framesize + 1);
5107 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5108 }
5109 else
5110 free_stack(common, framesize + 2);
5111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5112 }
5113 jump = JUMP(SLJIT_JUMP);
5114 if (bra != OP_BRAZERO)
5115 add_jump(compiler, target, jump);
5116
5117 /* Assert is successful. */
5118 set_jumps(tmp, LABEL());
5119 if (framesize < 0)
5120 {
5121 /* We know that STR_PTR was stored on the top of the stack. */
5122 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5123 /* Keep the STR_PTR on the top of the stack. */
5124 if (bra == OP_BRAZERO)
5125 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5126 else if (bra == OP_BRAMINZERO)
5127 {
5128 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5130 }
5131 }
5132 else
5133 {
5134 if (bra == OP_BRA)
5135 {
5136 /* We don't need to keep the STR_PTR, only the previous localptr. */
5137 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5138 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5139 }
5140 else
5141 {
5142 /* We don't need to keep the STR_PTR, only the previous localptr. */
5143 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
5144 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5145 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5146 }
5147 }
5148
5149 if (bra == OP_BRAZERO)
5150 {
5151 backtrack->trypath = LABEL();
5152 sljit_set_label(jump, backtrack->trypath);
5153 }
5154 else if (bra == OP_BRAMINZERO)
5155 {
5156 JUMPTO(SLJIT_JUMP, backtrack->trypath);
5157 JUMPHERE(brajump);
5158 if (framesize >= 0)
5159 {
5160 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5161 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5163 }
5164 set_jumps(backtrack->common.topbacktracks, LABEL());
5165 }
5166 }
5167 else
5168 {
5169 /* AssertNot is successful. */
5170 if (framesize < 0)
5171 {
5172 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5173 if (bra != OP_BRA)
5174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5175 else
5176 free_stack(common, 1);
5177 }
5178 else
5179 {
5180 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5181 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5182 /* The topmost item should be 0. */
5183 if (bra != OP_BRA)
5184 {
5185 free_stack(common, framesize + 1);
5186 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5187 }
5188 else
5189 free_stack(common, framesize + 2);
5190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP1, 0);
5191 }
5192
5193 if (bra == OP_BRAZERO)
5194 backtrack->trypath = LABEL();
5195 else if (bra == OP_BRAMINZERO)
5196 {
5197 JUMPTO(SLJIT_JUMP, backtrack->trypath);
5198 JUMPHERE(brajump);
5199 }
5200
5201 if (bra != OP_BRA)
5202 {
5203 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5204 set_jumps(backtrack->common.topbacktracks, LABEL());
5205 backtrack->common.topbacktracks = NULL;
5206 }
5207 }
5208
5209 common->quitlabel = save_quitlabel;
5210 common->acceptlabel = save_acceptlabel;
5211 common->quit = save_quit;
5212 common->accept = save_accept;
5213 return cc + 1 + LINK_SIZE;
5214 }
5215
5216 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
5217 {
5218 int condition = FALSE;
5219 pcre_uchar *slotA = name_table;
5220 pcre_uchar *slotB;
5221 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5222 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5223 sljit_w no_capture;
5224 int i;
5225
5226 locals += refno & 0xff;
5227 refno >>= 8;
5228 no_capture = locals[1];
5229
5230 for (i = 0; i < name_count; i++)
5231 {
5232 if (GET2(slotA, 0) == refno) break;
5233 slotA += name_entry_size;
5234 }
5235
5236 if (i < name_count)
5237 {
5238 /* Found a name for the number - there can be only one; duplicate names
5239 for different numbers are allowed, but not vice versa. First scan down
5240 for duplicates. */
5241
5242 slotB = slotA;
5243 while (slotB > name_table)
5244 {
5245 slotB -= name_entry_size;
5246 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5247 {
5248 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5249 if (condition) break;
5250 }
5251 else break;
5252 }
5253
5254 /* Scan up for duplicates */
5255 if (!condition)
5256 {
5257 slotB = slotA;
5258 for (i++; i < name_count; i++)
5259 {
5260 slotB += name_entry_size;
5261 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5262 {
5263 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5264 if (condition) break;
5265 }
5266 else break;
5267 }
5268 }
5269 }
5270 return condition;
5271 }
5272
5273 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
5274 {
5275 int condition = FALSE;
5276 pcre_uchar *slotA = name_table;
5277 pcre_uchar *slotB;
5278 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5279 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5280 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5281 int i;
5282
5283 for (i = 0; i < name_count; i++)
5284 {
5285 if (GET2(slotA, 0) == recno) break;
5286 slotA += name_entry_size;
5287 }
5288
5289 if (i < name_count)
5290 {
5291 /* Found a name for the number - there can be only one; duplicate
5292 names for different numbers are allowed, but not vice versa. First
5293 scan down for duplicates. */
5294
5295 slotB = slotA;
5296 while (slotB > name_table)
5297 {
5298 slotB -= name_entry_size;
5299 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5300 {
5301 condition = GET2(slotB, 0) == group_num;
5302 if (condition) break;
5303 }
5304 else break;
5305 }
5306
5307 /* Scan up for duplicates */
5308 if (!condition)
5309 {
5310 slotB = slotA;
5311 for (i++; i < name_count; i++)
5312 {
5313 slotB += name_entry_size;
5314 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5315 {
5316 condition = GET2(slotB, 0) == group_num;
5317 if (condition) break;
5318 }
5319 else break;
5320 }
5321 }
5322 }
5323 return condition;
5324 }
5325
5326 /*
5327 Handling bracketed expressions is probably the most complex part.
5328
5329 Stack layout naming characters:
5330 S - Push the current STR_PTR
5331 0 - Push a 0 (NULL)
5332 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5333 before the next alternative. Not pushed if there are no alternatives.
5334 M - Any values pushed by the current alternative. Can be empty, or anything.
5335 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5336 L - Push the previous local (pointed by localptr) to the stack
5337 () - opional values stored on the stack
5338 ()* - optonal, can be stored multiple times
5339
5340 The following list shows the regular expression templates, their PCRE byte codes
5341 and stack layout supported by pcre-sljit.
5342
5343 (?:) OP_BRA | OP_KET A M
5344 () OP_CBRA | OP_KET C M
5345 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5346 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5347 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5348 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5349 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5350 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5351 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5352 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5353 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5354 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5355 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5356 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5357 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5358 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5359 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5360 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5361 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5362 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5363 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5364 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5365
5366
5367 Stack layout naming characters:
5368 A - Push the alternative index (starting from 0) on the stack.
5369 Not pushed if there is no alternatives.
5370 M - Any values pushed by the current alternative. Can be empty, or anything.
5371
5372 The next list shows the possible content of a bracket:
5373 (|) OP_*BRA | OP_ALT ... M A
5374 (?()|) OP_*COND | OP_ALT M A
5375 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5376 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5377 Or nothing, if trace is unnecessary
5378 */
5379
5380 static pcre_uchar *compile_bracket_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5381 {
5382 DEFINE_COMPILER;
5383 backtrack_common *backtrack;
5384 pcre_uchar opcode;
5385 int localptr = 0;
5386 int offset = 0;
5387 int stacksize;
5388 pcre_uchar *ccbegin;
5389 pcre_uchar *trypath;
5390 pcre_uchar bra = OP_BRA;
5391 pcre_uchar ket;
5392 assert_backtrack *assert;
5393 BOOL has_alternatives;
5394 struct sljit_jump *jump;
5395 struct sljit_jump *skip;
5396 struct sljit_label *rmaxlabel = NULL;
5397 struct sljit_jump *braminzerojump = NULL;
5398
5399 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5400
5401 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5402 {
5403 bra = *cc;
5404 cc++;
5405 opcode = *cc;
5406 }
5407
5408 opcode = *cc;
5409 ccbegin = cc;
5410 trypath = ccbegin + 1 + LINK_SIZE;
5411
5412 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5413 {
5414 /* Drop this bracket_backtrack. */
5415 parent->top = backtrack->prev;
5416 return bracketend(cc);
5417 }
5418
5419 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5420 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5421 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5422 cc += GET(cc, 1);
5423
5424 has_alternatives = *cc == OP_ALT;
5425 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5426 {
5427 has_alternatives = (*trypath == OP_RREF) ? FALSE : TRUE;
5428 if (*trypath == OP_NRREF)
5429 {
5430 stacksize = GET2(trypath, 1);
5431 if (common->currententry == NULL || stacksize == RREF_ANY)
5432 has_alternatives = FALSE;
5433 else if (common->currententry->start == 0)
5434 has_alternatives = stacksize != 0;
5435 else
5436 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5437 }
5438 }
5439
5440 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5441 opcode = OP_SCOND;
5442 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5443 opcode = OP_ONCE;
5444
5445 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5446 {
5447 /* Capturing brackets has a pre-allocated space. */
5448 offset = GET2(ccbegin, 1 + LINK_SIZE);
5449 localptr = OVECTOR_PRIV(offset);
5450 offset <<= 1;
5451 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
5452 trypath += IMM2_SIZE;
5453 }
5454 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5455 {
5456 /* Other brackets simply allocate the next entry. */
5457 localptr = PRIV_DATA(ccbegin);
5458 SLJIT_ASSERT(localptr != 0);
5459 BACKTRACK_AS(bracket_backtrack)->localptr = localptr;
5460 if (opcode == OP_ONCE)
5461 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5462 }
5463
5464 /* Instructions before the first alternative. */
5465 stacksize = 0;
5466 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5467 stacksize++;
5468 if (bra == OP_BRAZERO)
5469 stacksize++;
5470
5471 if (stacksize > 0)
5472 allocate_stack(common, stacksize);
5473
5474 stacksize = 0;
5475 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5476 {
5477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5478 stacksize++;
5479 }
5480
5481 if (bra == OP_BRAZERO)
5482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5483
5484 if (bra == OP_BRAMINZERO)
5485 {
5486 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5487 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5488 if (ket != OP_KETRMIN)
5489 {
5490 free_stack(common, 1);
5491 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5492 }
5493 else
5494 {
5495 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5496 {
5497 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5498 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5499 /* Nothing stored during the first run. */
5500 skip = JUMP(SLJIT_JUMP);
5501 JUMPHERE(jump);
5502 /* Checking zero-length iteration. */
5503 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5504 {
5505 /* When we come from outside, localptr contains the previous STR_PTR. */
5506 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5507 }
5508 else
5509 {
5510 /* Except when the whole stack frame must be saved. */
5511 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5512 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5513 }
5514 JUMPHERE(skip);
5515 }
5516 else
5517 {
5518 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5519 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5520 JUMPHERE(jump);
5521 }
5522 }
5523 }
5524
5525 if (ket == OP_KETRMIN)
5526 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5527
5528 if (ket == OP_KETRMAX)
5529 {
5530 rmaxlabel = LABEL();
5531 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5532 BACKTRACK_AS(bracket_backtrack)->alttrypath = rmaxlabel;
5533 }
5534
5535 /* Handling capturing brackets and alternatives. */
5536 if (opcode == OP_ONCE)
5537 {
5538 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5539 {
5540 /* Neither capturing brackets nor recursions are not found in the block. */
5541 if (ket == OP_KETRMIN)
5542 {
5543 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5544 allocate_stack(common, 2);
5545 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5546 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5547 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5548 }
5549 else if (ket == OP_KETRMAX || has_alternatives)
5550 {
5551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5552 allocate_stack(common, 1);
5553 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5554 }
5555 else
5556 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5557 }
5558 else
5559 {
5560 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5561 {
5562 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5563 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5564 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5565 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5566 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5567 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5568 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5569 }
5570 else
5571 {
5572 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5573 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5574 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5575 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5576 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5577 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5578 }
5579 }
5580 }
5581 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5582 {
5583 /* Saving the previous values. */
5584 allocate_stack(common, 3);
5585 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5586 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5587 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5588 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5589 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5590 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
5591 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5592 }
5593 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5594 {
5595 /* Saving the previous value. */
5596 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5597 allocate_stack(common, 1);
5598 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0);
5599 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5600 }
5601 else if (has_alternatives)
5602 {
5603 /* Pushing the starting string pointer. */
5604 allocate_stack(common, 1);
5605 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5606 }
5607
5608 /* Generating code for the first alternative. */
5609 if (opcode == OP_COND || opcode == OP_SCOND)
5610 {
5611 if (*trypath == OP_CREF)
5612 {
5613 SLJIT_ASSERT(has_alternatives);
5614 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5615 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(trypath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5616 trypath += 1 + IMM2_SIZE;
5617 }
5618 else if (*trypath == OP_NCREF)
5619 {
5620 SLJIT_ASSERT(has_alternatives);
5621 stacksize = GET2(trypath, 1);
5622 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5623
5624 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5625 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5626 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5627 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5628 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5629 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5630 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5631 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5632 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5633
5634 JUMPHERE(jump);
5635 trypath += 1 + IMM2_SIZE;
5636 }
5637 else if (*trypath == OP_RREF || *trypath == OP_NRREF)
5638 {
5639 /* Never has other case. */
5640 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5641
5642 stacksize = GET2(trypath, 1);
5643 if (common->currententry == NULL)
5644 stacksize = 0;
5645 else if (stacksize == RREF_ANY)
5646 stacksize = 1;
5647 else if (common->currententry->start == 0)
5648 stacksize = stacksize == 0;
5649 else
5650 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5651
5652 if (*trypath == OP_RREF || stacksize || common->currententry == NULL)
5653 {
5654 SLJIT_ASSERT(!has_alternatives);
5655 if (stacksize != 0)
5656 trypath += 1 + IMM2_SIZE;
5657 else
5658 {
5659 if (*cc == OP_ALT)
5660 {
5661 trypath = cc + 1 + LINK_SIZE;
5662 cc += GET(cc, 1);
5663 }
5664 else
5665 trypath = cc;
5666 }
5667 }
5668 else
5669 {
5670 SLJIT_ASSERT(has_alternatives);
5671
5672 stacksize = GET2(trypath, 1);
5673 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5674 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5675 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5676 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5677 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5678 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5679 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5680 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5681 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5682 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5683 trypath += 1 + IMM2_SIZE;
5684 }
5685 }
5686 else
5687 {
5688 SLJIT_ASSERT(has_alternatives && *trypath >= OP_ASSERT && *trypath <= OP_ASSERTBACK_NOT);
5689 /* Similar code as PUSH_BACKTRACK macro. */
5690 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5691 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5692 return NULL;
5693 memset(assert, 0, sizeof(assert_backtrack));
5694 assert->common.cc = trypath;
5695 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5696 trypath = compile_assert_trypath(common, trypath, assert, TRUE);
5697 }
5698 }
5699
5700 compile_trypath(common, trypath, cc, backtrack);
5701 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5702 return NULL;
5703
5704 if (opcode == OP_ONCE)
5705 {
5706 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5707 {
5708 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5709 /* TMP2 which is set here used by OP_KETRMAX below. */
5710 if (ket == OP_KETRMAX)
5711 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5712 else if (ket == OP_KETRMIN)
5713 {
5714 /* Move the STR_PTR to the localptr. */
5715 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_MEM1(STACK_TOP), 0);
5716 }
5717 }
5718 else
5719 {
5720 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5721 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5722 if (ket == OP_KETRMAX)
5723 {
5724 /* TMP2 which is set here used by OP_KETRMAX below. */
5725 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5726 }
5727 }
5728 }
5729
5730 stacksize = 0;
5731 if (ket != OP_KET || bra != OP_BRA)
5732 stacksize++;
5733 if (has_alternatives && opcode != OP_ONCE)
5734 stacksize++;
5735
5736 if (stacksize > 0)
5737 allocate_stack(common, stacksize);
5738
5739 stacksize = 0;
5740 if (ket != OP_KET)
5741 {
5742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5743 stacksize++;
5744 }
5745 else if (bra != OP_BRA)
5746 {
5747 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5748 stacksize++;
5749 }
5750
5751 if (has_alternatives)
5752 {
5753 if (opcode != OP_ONCE)
5754 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5755 if (ket != OP_KETRMAX)
5756 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5757 }
5758
5759 /* Must be after the trypath label. */
5760 if (offset != 0)
5761 {
5762 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5763 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5764 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5765 }
5766
5767 if (ket == OP_KETRMAX)
5768 {
5769 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5770 {
5771 if (has_alternatives)
5772 BACKTRACK_AS(bracket_backtrack)->alttrypath = LABEL();
5773 /* Checking zero-length iteration. */
5774 if (opcode != OP_ONCE)
5775 {
5776 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STR_PTR, 0, rmaxlabel);
5777 /* Drop STR_PTR for greedy plus quantifier. */
5778 if (bra != OP_BRAZERO)
5779 free_stack(common, 1);
5780 }
5781 else
5782 /* TMP2 must contain the starting STR_PTR. */
5783 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5784 }
5785 else
5786 JUMPTO(SLJIT_JUMP, rmaxlabel);
5787 BACKTRACK_AS(bracket_backtrack)->recursivetrypath = LABEL();
5788 }
5789
5790 if (bra == OP_BRAZERO)
5791 BACKTRACK_AS(bracket_backtrack)->zerotrypath = LABEL();
5792
5793 if (bra == OP_BRAMINZERO)
5794 {
5795 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5796 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->trypath);
5797 if (braminzerojump != NULL)
5798 {
5799 JUMPHERE(braminzerojump);
5800 /* We need to release the end pointer to perform the
5801 backtrack for the zero-length iteration. When
5802 framesize is < 0, OP_ONCE will do the release itself. */
5803 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5804 {
5805 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5806 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5807 }
5808 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5809 free_stack(common, 1);
5810 }
5811 /* Continue to the normal backtrack. */
5812 }
5813
5814 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5815 decrease_call_count(common);
5816
5817 /* Skip the other alternatives. */
5818 while (*cc == OP_ALT)
5819 cc += GET(cc, 1);
5820 cc += 1 + LINK_SIZE;
5821 return cc;
5822 }
5823
5824 static pcre_uchar *compile_bracketpos_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5825 {
5826 DEFINE_COMPILER;
5827 backtrack_common *backtrack;
5828 pcre_uchar opcode;
5829 int localptr;
5830 int cbraprivptr = 0;
5831 int framesize;
5832 int stacksize;
5833 int offset = 0;
5834 BOOL zero = FALSE;
5835 pcre_uchar *ccbegin = NULL;
5836 int stack;
5837 struct sljit_label *loop = NULL;
5838 struct jump_list *emptymatch = NULL;
5839
5840 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5841 if (*cc == OP_BRAPOSZERO)
5842 {
5843 zero = TRUE;
5844 cc++;
5845 }
5846
5847 opcode = *cc;
5848 localptr = PRIV_DATA(cc);
5849 SLJIT_ASSERT(localptr != 0);
5850 BACKTRACK_AS(bracketpos_backtrack)->localptr = localptr;
5851 switch(opcode)
5852 {
5853 case OP_BRAPOS:
5854 case OP_SBRAPOS:
5855 ccbegin = cc + 1 + LINK_SIZE;
5856 break;
5857
5858 case OP_CBRAPOS:
5859 case OP_SCBRAPOS:
5860 offset = GET2(cc, 1 + LINK_SIZE);
5861 cbraprivptr = OVECTOR_PRIV(offset);
5862 offset <<= 1;
5863 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5864 break;
5865
5866 default:
5867 SLJIT_ASSERT_STOP();
5868 break;
5869 }
5870
5871 framesize = get_framesize(common, cc, FALSE);
5872 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5873 if (framesize < 0)
5874 {
5875 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5876 if (!zero)
5877 stacksize++;
5878 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5879 allocate_stack(common, stacksize);
5880 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, STACK_TOP, 0);
5881
5882 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5883 {
5884 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5885 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5886 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5887 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5888 }
5889 else
5890 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5891
5892 if (!zero)
5893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5894 }
5895 else
5896 {
5897 stacksize = framesize + 1;
5898 if (!zero)
5899 stacksize++;
5900 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5901 stacksize++;
5902 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5903 allocate_stack(common, stacksize);
5904
5905 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5906 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5907 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, TMP2, 0);
5908 stack = 0;
5909 if (!zero)
5910 {
5911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5912 stack++;
5913 }
5914 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5915 {
5916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5917 stack++;
5918 }
5919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5920 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5921 }
5922
5923 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5925
5926 loop = LABEL();
5927 while (*cc != OP_KETRPOS)
5928 {
5929 backtrack->top = NULL;
5930 backtrack->topbacktracks = NULL;
5931 cc += GET(cc, 1);
5932
5933 compile_trypath(common, ccbegin, cc, backtrack);
5934 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5935 return NULL;
5936
5937 if (framesize < 0)
5938 {
5939 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5940
5941 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5942 {
5943 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5944 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5945 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5946 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5947 }
5948 else
5949 {
5950 if (opcode == OP_SBRAPOS)
5951 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5952 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5953 }
5954
5955 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5956 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5957
5958 if (!zero)
5959 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5960 }
5961 else
5962 {
5963 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5964 {
5965 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5966 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5967 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5968 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5969 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5970 }
5971 else
5972 {
5973 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
5974 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5975 if (opcode == OP_SBRAPOS)
5976 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5977 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5978 }
5979
5980 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5981 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5982
5983 if (!zero)
5984 {
5985 if (framesize < 0)
5986 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5987 else
5988 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5989 }
5990 }
5991 JUMPTO(SLJIT_JUMP, loop);
5992 flush_stubs(common);
5993
5994 compile_backtrackpath(common, backtrack->top);
5995 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5996 return NULL;
5997 set_jumps(backtrack->topbacktracks, LABEL());
5998
5999 if (framesize < 0)
6000 {
6001 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6002 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6003 else
6004 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6005 }
6006 else
6007 {
6008 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6009 {
6010 /* Last alternative. */
6011 if (*cc == OP_KETRPOS)
6012 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6013 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6014 }
6015 else
6016 {
6017 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), localptr);
6018 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6019 }
6020 }
6021
6022 if (*cc == OP_KETRPOS)
6023 break;
6024 ccbegin = cc + 1 + LINK_SIZE;
6025 }
6026
6027 backtrack->topbacktracks = NULL;
6028 if (!zero)
6029 {
6030 if (framesize < 0)
6031 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6032 else /* TMP2 is set to [localptr] above. */
6033 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
6034 }
6035
6036 /* None of them matched. */
6037 set_jumps(emptymatch, LABEL());
6038 decrease_call_count(common);
6039 return cc + 1 + LINK_SIZE;
6040 }
6041
6042 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6043 {
6044 int class_len;
6045
6046 *opcode = *cc;
6047 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6048 {
6049 cc++;
6050 *type = OP_CHAR;
6051 }
6052 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6053 {
6054 cc++;
6055 *type = OP_CHARI;
6056 *opcode -= OP_STARI - OP_STAR;
6057 }
6058 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6059 {
6060 cc++;
6061 *type = OP_NOT;
6062 *opcode -= OP_NOTSTAR - OP_STAR;
6063 }
6064 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6065 {
6066 cc++;
6067 *type = OP_NOTI;
6068 *opcode -= OP_NOTSTARI - OP_STAR;
6069 }
6070 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6071 {
6072 cc++;
6073 *opcode -= OP_TYPESTAR - OP_STAR;
6074 *type = 0;
6075 }
6076 else
6077 {
6078 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6079 *type = *opcode;
6080 cc++;
6081 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6082 *opcode = cc[class_len - 1];
6083 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6084 {
6085 *opcode -= OP_CRSTAR - OP_STAR;
6086 if (end != NULL)
6087 *end = cc + class_len;
6088 }
6089 else
6090 {
6091 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6092 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6093 *arg2 = GET2(cc, class_len);
6094
6095 if (*arg2 == 0)
6096 {
6097 SLJIT_ASSERT(*arg1 != 0);
6098 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6099 }
6100 if (*arg1 == *arg2)
6101 *opcode = OP_EXACT;
6102
6103 if (end != NULL)
6104 *end = cc + class_len + 2 * IMM2_SIZE;
6105 }
6106 return cc;
6107 }
6108
6109 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6110 {
6111 *arg1 = GET2(cc, 0);
6112 cc += IMM2_SIZE;
6113 }
6114
6115 if (*type == 0)
6116 {
6117 *type = *cc;
6118 if (end != NULL)
6119 *end = next_opcode(common, cc);
6120 cc++;
6121 return cc;
6122 }
6123
6124 if (end != NULL)
6125 {
6126 *end = cc + 1;
6127 #ifdef SUPPORT_UTF
6128 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6129 #endif
6130 }
6131 return cc;
6132 }
6133
6134 static pcre_uchar *compile_iterator_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6135 {
6136 DEFINE_COMPILER;
6137 backtrack_common *backtrack;
6138 pcre_uchar opcode;
6139 pcre_uchar type;
6140 int arg1 = -1, arg2 = -1;
6141 pcre_uchar* end;
6142 jump_list *nomatch = NULL;
6143 struct sljit_jump *jump = NULL;
6144 struct sljit_label *label;
6145 int localptr = PRIV_DATA(cc);
6146 int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6147 int offset0 = (localptr == 0) ? STACK(0) : localptr;
6148 int offset1 = (localptr == 0) ? STACK(1) : localptr + (int)sizeof(sljit_w);
6149 int tmp_base, tmp_offset;
6150
6151 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6152
6153 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6154
6155 switch (type)
6156 {
6157 case OP_NOT_DIGIT:
6158 case OP_DIGIT:
6159 case OP_NOT_WHITESPACE:
6160 case OP_WHITESPACE:
6161 case OP_NOT_WORDCHAR:
6162 case OP_WORDCHAR:
6163 case OP_ANY:
6164 case OP_ALLANY:
6165 case OP_ANYBYTE:
6166 case OP_ANYNL:
6167 case OP_NOT_HSPACE:
6168 case OP_HSPACE:
6169 case OP_NOT_VSPACE:
6170 case OP_VSPACE:
6171 case OP_CHAR:
6172 case OP_CHARI:
6173 case OP_NOT:
6174 case OP_NOTI:
6175 case OP_CLASS:
6176 case OP_NCLASS:
6177 tmp_base = TMP3;
6178 tmp_offset = 0;
6179 break;
6180
6181 default:
6182 SLJIT_ASSERT_STOP();
6183 /* Fall through. */
6184
6185 case OP_EXTUNI:
6186 case OP_XCLASS:
6187 case OP_NOTPROP:
6188 case OP_PROP:
6189 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6190 tmp_offset = POSSESSIVE0;
6191 break;
6192 }
6193
6194 switch(opcode)
6195 {
6196 case OP_STAR:
6197 case OP_PLUS:
6198 case OP_UPTO:
6199 case OP_CRRANGE:
6200 if (type == OP_ANYNL || type == OP_EXTUNI)
6201 {
6202 SLJIT_ASSERT(localptr == 0);
6203 if (opcode == OP_STAR || opcode == OP_UPTO)
6204 {
6205 allocate_stack(common, 2);
6206 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6208 }
6209 else
6210 {
6211 allocate_stack(common, 1);
6212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6213 }
6214
6215 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6216 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6217
6218 label = LABEL();
6219 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6220 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6221 {
6222 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6223 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6224 if (opcode == OP_CRRANGE && arg2 > 0)
6225 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6226 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6227 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6228 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6229 }
6230
6231 /* We cannot use TMP3 because of this allocate_stack. */
6232 allocate_stack(common, 1);
6233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6234 JUMPTO(SLJIT_JUMP, label);
6235 if (jump != NULL)
6236 JUMPHERE(jump);
6237 }
6238 else
6239 {
6240 if (opcode == OP_PLUS)
6241 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6242 if (localptr == 0)
6243 allocate_stack(common, 2);
6244 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6245 if (opcode <= OP_PLUS)
6246 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6247 else
6248 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6249 label = LABEL();
6250 compile_char1_trypath(common, type, cc, &nomatch);
6251 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6252 if (opcode <= OP_PLUS)
6253 JUMPTO(SLJIT_JUMP, label);
6254 else if (opcode == OP_CRRANGE && arg1 == 0)
6255 {
6256 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6257 JUMPTO(SLJIT_JUMP, label);
6258 }
6259 else
6260 {
6261 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6262 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6263 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6264 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6265 }
6266 set_jumps(nomatch, LABEL());
6267 if (opcode == OP_CRRANGE)
6268 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6269 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6270 }
6271 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6272 break;
6273
6274 case OP_MINSTAR:
6275 case OP_MINPLUS:
6276 if (opcode == OP_MINPLUS)
6277 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6278 if (localptr == 0)
6279 allocate_stack(common, 1);
6280 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6281 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6282 break;
6283
6284 case OP_MINUPTO:
6285 case OP_CRMINRANGE:
6286 if (localptr == 0)
6287 allocate_stack(common, 2);
6288 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6289 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6290 if (opcode == OP_CRMINRANGE)
6291 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6292 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6293 break;
6294
6295 case OP_QUERY:
6296 case OP_MINQUERY:
6297 if (localptr == 0)
6298 allocate_stack(common, 1);
6299 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6300 if (opcode == OP_QUERY)
6301 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6302 BACKTRACK_AS(iterator_backtrack)->trypath = LABEL();
6303 break;
6304
6305 case OP_EXACT:
6306 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6307 label = LABEL();
6308 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6309 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6310 JUMPTO(SLJIT_C_NOT_ZERO, label);
6311 break;
6312
6313 case OP_POSSTAR:
6314 case OP_POSPLUS:
6315 case OP_POSUPTO:
6316 if (opcode == OP_POSPLUS)
6317 compile_char1_trypath(common, type, cc, &backtrack->topbacktracks);
6318 if (opcode == OP_POSUPTO)
6319 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6320 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6321 label = LABEL();
6322 compile_char1_trypath(common, type, cc, &nomatch);
6323 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6324 if (opcode != OP_POSUPTO)
6325 JUMPTO(SLJIT_JUMP, label);
6326 else
6327 {
6328 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6329 JUMPTO(SLJIT_C_NOT_ZERO, label);
6330 }
6331 set_jumps(nomatch, LABEL());
6332 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6333 break;
6334
6335 case OP_POSQUERY:
6336 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6337 compile_char1_trypath(common, type, cc, &nomatch);
6338 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6339 set_jumps(nomatch, LABEL());
6340 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6341 break;
6342
6343 default:
6344 SLJIT_ASSERT_STOP();
6345 break;
6346 }
6347
6348 decrease_call_count(common);
6349 return end;
6350 }
6351
6352 static SLJIT_INLINE pcre_uchar *compile_fail_accept_trypath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6353 {
6354 DEFINE_COMPILER;
6355 backtrack_common *backtrack;
6356
6357 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6358
6359 if (*cc == OP_FAIL)
6360 {
6361 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6362 return cc + 1;
6363 }
6364
6365 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6366 {
6367 /* No need to check notempty conditions. */
6368 if (common->acceptlabel == NULL)
6369 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6370 else
6371 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6372 return cc + 1;
6373 }
6374
6375 if (common->acceptlabel == NULL)
6376 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6377 else
6378 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6379 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6380 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6381 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6382 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6383 if (common->acceptlabel == NULL)
6384 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6385 else
6386 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6387 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6388 if (common->acceptlabel == NULL)
6389 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6390 else
6391 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6392 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6393 return cc + 1;
6394 }
6395
6396 static SLJIT_INLINE pcre_uchar *compile_close_trypath(compiler_common *common, pcre_uchar *cc)
6397 {
6398 DEFINE_COMPILER;
6399 int offset = GET2(cc, 1);
6400
6401 /* Data will be discarded anyway... */
6402 if (common->currententry != NULL)
6403 return cc + 1 + IMM2_SIZE;
6404
6405 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6406 offset <<= 1;
6407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6409 return cc + 1 + IMM2_SIZE;
6410 }
6411
6412 static void compile_trypath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6413 {
6414 DEFINE_COMPILER;
6415 backtrack_common *backtrack;
6416
6417 while (cc < ccend)
6418 {
6419 switch(*cc)
6420 {
6421 case OP_SOD:
6422 case OP_SOM:
6423 case OP_NOT_WORD_BOUNDARY:
6424 case OP_WORD_BOUNDARY:
6425 case OP_NOT_DIGIT:
6426 case OP_DIGIT:
6427 case OP_NOT_WHITESPACE:
6428 case OP_WHITESPACE:
6429 case OP_NOT_WORDCHAR:
6430 case OP_WORDCHAR:
6431 case OP_ANY:
6432 case OP_ALLANY:
6433 case OP_ANYBYTE:
6434 case OP_NOTPROP:
6435 case OP_PROP:
6436 case OP_ANYNL:
6437 case OP_NOT_HSPACE:
6438 case OP_HSPACE:
6439 case OP_NOT_VSPACE:
6440 case OP_VSPACE:
6441 case OP_EXTUNI:
6442 case OP_EODN:
6443 case OP_EOD:
6444 case OP_CIRC:
6445 case OP_CIRCM:
6446 case OP_DOLL:
6447 case OP_DOLLM:
6448 case OP_NOT:
6449 case OP_NOTI:
6450 case OP_REVERSE:
6451 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6452 break;
6453
6454 case OP_SET_SOM:
6455 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6456 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6457 allocate_stack(common, 1);
6458 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6460 cc++;
6461 break;
6462
6463 case OP_CHAR:
6464 case OP_CHARI:
6465 if (common->mode == JIT_COMPILE)
6466 cc = compile_charn_trypath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6467 else
6468 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6469 break;
6470
6471 case OP_STAR:
6472 case OP_MINSTAR:
6473 case OP_PLUS:
6474 case OP_MINPLUS:
6475 case OP_QUERY:
6476 case OP_MINQUERY:
6477 case OP_UPTO:
6478 case OP_MINUPTO:
6479 case OP_EXACT:
6480 case OP_POSSTAR:
6481 case OP_POSPLUS:
6482 case OP_POSQUERY:
6483 case OP_POSUPTO:
6484 case OP_STARI:
6485 case OP_MINSTARI:
6486 case OP_PLUSI:
6487 case OP_MINPLUSI:
6488 case OP_QUERYI:
6489 case OP_MINQUERYI:
6490 case OP_UPTOI:
6491 case OP_MINUPTOI:
6492 case OP_EXACTI:
6493 case OP_POSSTARI:
6494 case OP_POSPLUSI:
6495 case OP_POSQUERYI:
6496 case OP_POSUPTOI:
6497 case OP_NOTSTAR:
6498 case OP_NOTMINSTAR:
6499 case OP_NOTPLUS:
6500 case OP_NOTMINPLUS:
6501 case OP_NOTQUERY:
6502 case OP_NOTMINQUERY:
6503 case OP_NOTUPTO:
6504 case OP_NOTMINUPTO:
6505 case OP_NOTEXACT:
6506 case OP_NOTPOSSTAR:
6507 case OP_NOTPOSPLUS:
6508 case OP_NOTPOSQUERY:
6509 case OP_NOTPOSUPTO:
6510 case OP_NOTSTARI:
6511 case OP_NOTMINSTARI:
6512 case OP_NOTPLUSI:
6513 case OP_NOTMINPLUSI:
6514 case OP_NOTQUERYI:
6515 case OP_NOTMINQUERYI:
6516 case OP_NOTUPTOI:
6517 case OP_NOTMINUPTOI:
6518 case OP_NOTEXACTI:
6519 case OP_NOTPOSSTARI:
6520 case OP_NOTPOSPLUSI:
6521 case OP_NOTPOSQUERYI:
6522 case OP_NOTPOSUPTOI:
6523 case OP_TYPESTAR:
6524 case OP_TYPEMINSTAR:
6525 case OP_TYPEPLUS:
6526 case OP_TYPEMINPLUS:
6527 case OP_TYPEQUERY:
6528 case OP_TYPEMINQUERY:
6529 case OP_TYPEUPTO:
6530 case OP_TYPEMINUPTO:
6531 case OP_TYPEEXACT:
6532 case OP_TYPEPOSSTAR:
6533 case OP_TYPEPOSPLUS:
6534 case OP_TYPEPOSQUERY:
6535 case OP_TYPEPOSUPTO:
6536 cc = compile_iterator_trypath(common, cc, parent);
6537 break;
6538
6539 case OP_CLASS:
6540 case OP_NCLASS:
6541 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6542 cc = compile_iterator_trypath(common, cc, parent);
6543 else
6544 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6545 break;
6546
6547 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
6548 case OP_XCLASS:
6549 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6550 cc = compile_iterator_trypath(common, cc, parent);
6551 else
6552 cc = compile_char1_trypath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6553 break;
6554 #endif
6555
6556 case OP_REF:
6557 case OP_REFI:
6558 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6559 cc = compile_ref_iterator_trypath(common, cc, parent);
6560 else
6561 cc = compile_ref_trypath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6562 break;
6563
6564 case OP_RECURSE:
6565 cc = compile_recurse_trypath(common, cc, parent);
6566 break;
6567
6568 case OP_ASSERT:
6569 case OP_ASSERT_NOT:
6570 case OP_ASSERTBACK:
6571 case OP_ASSERTBACK_NOT:
6572 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6573 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6574 break;
6575
6576 case OP_BRAMINZERO:
6577 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6578 cc = bracketend(cc + 1);
6579 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6580 {
6581 allocate_stack(common, 1);
6582 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6583 }
6584 else
6585 {
6586 allocate_stack(common, 2);
6587 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6588 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6589 }
6590 BACKTRACK_AS(braminzero_backtrack)->trypath = LABEL();
6591 if (cc[1] > OP_ASSERTBACK_NOT)
6592 decrease_call_count(common);
6593 break;
6594
6595 case OP_ONCE:
6596 case OP_ONCE_NC:
6597 case OP_BRA:
6598 case OP_CBRA:
6599 case OP_COND:
6600 case OP_SBRA:
6601 case OP_SCBRA:
6602 case OP_SCOND:
6603 cc = compile_bracket_trypath(common, cc, parent);
6604 break;
6605
6606 case OP_BRAZERO:
6607 if (cc[1] > OP_ASSERTBACK_NOT)
6608 cc = compile_bracket_trypath(common, cc, parent);
6609 else
6610 {
6611 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6612 cc = compile_assert_trypath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6613 }
6614 break;
6615
6616 case OP_BRAPOS:
6617 case OP_CBRAPOS:
6618 case OP_SBRAPOS:
6619 case OP_SCBRAPOS:
6620 case OP_BRAPOSZERO:
6621 cc = compile_bracketpos_trypath(common, cc, parent);
6622 break;
6623
6624 case OP_MARK:
6625 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6626 SLJIT_ASSERT(common->mark_ptr != 0);
6627 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6628 allocate_stack(common, 1);
6629 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6630 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6631 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
6632 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
6633 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
6634 cc += 1 + 2 + cc[1];
6635 break;
6636
6637 case OP_COMMIT:
6638 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6639 cc += 1;
6640 break;
6641
6642 case OP_FAIL:
6643 case OP_ACCEPT:
6644 case OP_ASSERT_ACCEPT:
6645 cc = compile_fail_accept_trypath(common, cc, parent);
6646 break;
6647
6648 case OP_CLOSE:
6649 cc = compile_close_trypath(common, cc);
6650 break;
6651
6652 case OP_SKIPZERO:
6653 cc = bracketend(cc + 1);
6654 break;
6655
6656 default:
6657 SLJIT_ASSERT_STOP();
6658 return;
6659 }
6660 if (cc == NULL)
6661 return;
6662 }
6663 SLJIT_ASSERT(cc == ccend);
6664 }
6665
6666 #undef PUSH_BACKTRACK
6667 #undef PUSH_BACKTRACK_NOVALUE
6668 #undef BACKTRACK_AS
6669
6670 #define COMPILE_BACKTRACKPATH(current) \
6671 do \
6672 { \
6673 compile_backtrackpath(common, (current)); \
6674 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6675 return; \
6676 } \
6677 while (0)
6678
6679 #define CURRENT_AS(type) ((type *)current)
6680
6681 static void compile_iterator_backtrackpath(compiler_common *common, struct backtrack_common *current)
6682 {
6683 DEFINE_COMPILER;
6684 pcre_uchar *cc = current->cc;
6685 pcre_uchar opcode;
6686 pcre_uchar type;
6687 int arg1 = -1, arg2 = -1;
6688 struct sljit_label *label = NULL;
6689 struct sljit_jump *jump = NULL;
6690 jump_list *jumplist = NULL;
6691 int localptr = PRIV_DATA(cc);
6692 int base = (localptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6693 int offset0 = (localptr == 0) ? STACK(0) : localptr;
6694 int offset1 = (localptr == 0) ? STACK(1) : localptr + (int)sizeof(sljit_w);
6695
6696 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
6697
6698 switch(opcode)
6699 {
6700 case OP_STAR:
6701 case OP_PLUS:
6702 case OP_UPTO:
6703 case OP_CRRANGE:
6704 if (type == OP_ANYNL || type == OP_EXTUNI)
6705 {
6706 SLJIT_ASSERT(localptr == 0);
6707 set_jumps(current->topbacktracks, LABEL());
6708 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6709 free_stack(common, 1);
6710 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6711 }
6712 else
6713 {
6714 if (opcode == OP_UPTO)
6715 arg2 = 0;
6716 if (opcode <= OP_PLUS)
6717 {
6718 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6719 jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1);
6720 }
6721 else
6722 {
6723 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6724 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6725 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
6726 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
6727 }
6728 skip_char_back(common);
6729 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6730 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6731 if (opcode == OP_CRRANGE)
6732 set_jumps(current->topbacktracks, LABEL());
6733 JUMPHERE(jump);
6734 if (localptr == 0)
6735 free_stack(common, 2);
6736 if (opcode == OP_PLUS)
6737 set_jumps(current->topbacktracks, LABEL());
6738 }
6739 break;
6740
6741 case OP_MINSTAR:
6742 case OP_MINPLUS:
6743 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6744 compile_char1_trypath(common, type, cc, &jumplist);
6745 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6746 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6747 set_jumps(jumplist, LABEL());
6748 if (localptr == 0)
6749 free_stack(common, 1);
6750 if (opcode == OP_MINPLUS)
6751 set_jumps(current->topbacktracks, LABEL());
6752 break;
6753
6754 case OP_MINUPTO:
6755 case OP_CRMINRANGE:
6756 if (opcode == OP_CRMINRANGE)
6757 {
6758 label = LABEL();
6759 set_jumps(current->topbacktracks, label);
6760 }
6761 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6762 compile_char1_trypath(common, type, cc, &jumplist);
6763
6764 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6765 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6766 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6767 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6768
6769 if (opcode == OP_CRMINRANGE)
6770 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
6771
6772 if (opcode == OP_CRMINRANGE && arg1 == 0)
6773 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6774 else
6775 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->trypath);
6776
6777 set_jumps(jumplist, LABEL());
6778 if (localptr == 0)
6779 free_stack(common, 2);
6780 break;
6781
6782 case OP_QUERY:
6783 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6784 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6785 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->trypath);
6786 jump = JUMP(SLJIT_JUMP);
6787 set_jumps(current->topbacktracks, LABEL());
6788 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6789 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
6790 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->trypath);
6791 JUMPHERE(jump);
6792 if (localptr == 0)
6793