/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1176 - (show annotations)
Sat Oct 27 15:46:35 2012 UTC (7 years ago) by ph10
File MIME type: text/plain
File size: 261542 byte(s)
Error occurred while calculating annotation data.
Get rid of signed/unsigned compiler warnings (Zoltan's patch).
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory for the regex stack on the real machine stack.
69 Fast, but limited size. */
70 #define MACHINE_STACK_SIZE 32768
71
72 /* Growth rate for stack allocated by the OS. Should be the multiply
73 of page size. */
74 #define STACK_GROWTH_RATE 8192
75
76 /* Enable to check that the allocation could destroy temporaries. */
77 #if defined SLJIT_DEBUG && SLJIT_DEBUG
78 #define DESTROY_REGISTERS 1
79 #endif
80
81 /*
82 Short summary about the backtracking mechanism empolyed by the jit code generator:
83
84 The code generator follows the recursive nature of the PERL compatible regular
85 expressions. The basic blocks of regular expressions are condition checkers
86 whose execute different commands depending on the result of the condition check.
87 The relationship between the operators can be horizontal (concatenation) and
88 vertical (sub-expression) (See struct backtrack_common for more details).
89
90 'ab' - 'a' and 'b' regexps are concatenated
91 'a+' - 'a' is the sub-expression of the '+' operator
92
93 The condition checkers are boolean (true/false) checkers. Machine code is generated
94 for the checker itself and for the actions depending on the result of the checker.
95 The 'true' case is called as the matching path (expected path), and the other is called as
96 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97 branches on the matching path.
98
99 Greedy star operator (*) :
100 Matching path: match happens.
101 Backtrack path: match failed.
102 Non-greedy star operator (*?) :
103 Matching path: no need to perform a match.
104 Backtrack path: match is required.
105
106 The following example shows how the code generated for a capturing bracket
107 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
108 we have the following regular expression:
109
110 A(B|C)D
111
112 The generated code will be the following:
113
114 A matching path
115 '(' matching path (pushing arguments to the stack)
116 B matching path
117 ')' matching path (pushing arguments to the stack)
118 D matching path
119 return with successful match
120
121 D backtrack path
122 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123 B backtrack path
124 C expected path
125 jump to D matching path
126 C backtrack path
127 A backtrack path
128
129 Notice, that the order of backtrack code paths are the opposite of the fast
130 code paths. In this way the topmost value on the stack is always belong
131 to the current backtrack code path. The backtrack path must check
132 whether there is a next alternative. If so, it needs to jump back to
133 the matching path eventually. Otherwise it needs to clear out its own stack
134 frame and continue the execution on the backtrack code paths.
135 */
136
137 /*
138 Saved stack frames:
139
140 Atomic blocks and asserts require reloading the values of private data
141 when the backtrack mechanism performed. Because of OP_RECURSE, the data
142 are not necessarly known in compile time, thus we need a dynamic restore
143 mechanism.
144
145 The stack frames are stored in a chain list, and have the following format:
146 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147
148 Thus we can restore the private data to a particular point in the stack.
149 */
150
151 typedef struct jit_arguments {
152 /* Pointers first. */
153 struct sljit_stack *stack;
154 const pcre_uchar *str;
155 const pcre_uchar *begin;
156 const pcre_uchar *end;
157 int *offsets;
158 pcre_uchar *uchar_ptr;
159 pcre_uchar *mark_ptr;
160 /* Everything else after. */
161 int offsetcount;
162 int calllimit;
163 pcre_uint8 notbol;
164 pcre_uint8 noteol;
165 pcre_uint8 notempty;
166 pcre_uint8 notempty_atstart;
167 } jit_arguments;
168
169 typedef struct executable_functions {
170 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
171 PUBL(jit_callback) callback;
172 void *userdata;
173 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
174 } executable_functions;
175
176 typedef struct jump_list {
177 struct sljit_jump *jump;
178 struct jump_list *next;
179 } jump_list;
180
181 enum stub_types { stack_alloc };
182
183 typedef struct stub_list {
184 enum stub_types type;
185 int data;
186 struct sljit_jump *start;
187 struct sljit_label *quit;
188 struct stub_list *next;
189 } stub_list;
190
191 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
192
193 /* The following structure is the key data type for the recursive
194 code generator. It is allocated by compile_matchingpath, and contains
195 the aguments for compile_backtrackingpath. Must be the first member
196 of its descendants. */
197 typedef struct backtrack_common {
198 /* Concatenation stack. */
199 struct backtrack_common *prev;
200 jump_list *nextbacktracks;
201 /* Internal stack (for component operators). */
202 struct backtrack_common *top;
203 jump_list *topbacktracks;
204 /* Opcode pointer. */
205 pcre_uchar *cc;
206 } backtrack_common;
207
208 typedef struct assert_backtrack {
209 backtrack_common common;
210 jump_list *condfailed;
211 /* Less than 0 (-1) if a frame is not needed. */
212 int framesize;
213 /* Points to our private memory word on the stack. */
214 int private_data_ptr;
215 /* For iterators. */
216 struct sljit_label *matchingpath;
217 } assert_backtrack;
218
219 typedef struct bracket_backtrack {
220 backtrack_common common;
221 /* Where to coninue if an alternative is successfully matched. */
222 struct sljit_label *alternative_matchingpath;
223 /* For rmin and rmax iterators. */
224 struct sljit_label *recursive_matchingpath;
225 /* For greedy ? operator. */
226 struct sljit_label *zero_matchingpath;
227 /* Contains the branches of a failed condition. */
228 union {
229 /* Both for OP_COND, OP_SCOND. */
230 jump_list *condfailed;
231 assert_backtrack *assert;
232 /* For OP_ONCE. -1 if not needed. */
233 int framesize;
234 } u;
235 /* Points to our private memory word on the stack. */
236 int private_data_ptr;
237 } bracket_backtrack;
238
239 typedef struct bracketpos_backtrack {
240 backtrack_common common;
241 /* Points to our private memory word on the stack. */
242 int private_data_ptr;
243 /* Reverting stack is needed. */
244 int framesize;
245 /* Allocated stack size. */
246 int stacksize;
247 } bracketpos_backtrack;
248
249 typedef struct braminzero_backtrack {
250 backtrack_common common;
251 struct sljit_label *matchingpath;
252 } braminzero_backtrack;
253
254 typedef struct iterator_backtrack {
255 backtrack_common common;
256 /* Next iteration. */
257 struct sljit_label *matchingpath;
258 } iterator_backtrack;
259
260 typedef struct recurse_entry {
261 struct recurse_entry *next;
262 /* Contains the function entry. */
263 struct sljit_label *entry;
264 /* Collects the calls until the function is not created. */
265 jump_list *calls;
266 /* Points to the starting opcode. */
267 int start;
268 } recurse_entry;
269
270 typedef struct recurse_backtrack {
271 backtrack_common common;
272 } recurse_backtrack;
273
274 #define MAX_RANGE_SIZE 6
275
276 typedef struct compiler_common {
277 struct sljit_compiler *compiler;
278 pcre_uchar *start;
279
280 /* Maps private data offset to each opcode. */
281 int *private_data_ptrs;
282 /* Tells whether the capturing bracket is optimized. */
283 pcre_uint8 *optimized_cbracket;
284 /* Starting offset of private data for capturing brackets. */
285 int cbraptr;
286 /* OVector starting point. Must be divisible by 2. */
287 int ovector_start;
288 /* Last known position of the requested byte. */
289 int req_char_ptr;
290 /* Head of the last recursion. */
291 int recursive_head;
292 /* First inspected character for partial matching. */
293 int start_used_ptr;
294 /* Starting pointer for partial soft matches. */
295 int hit_start;
296 /* End pointer of the first line. */
297 int first_line_end;
298 /* Points to the marked string. */
299 int mark_ptr;
300
301 /* Flipped and lower case tables. */
302 const pcre_uint8 *fcc;
303 sljit_w lcc;
304 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
305 int mode;
306 /* Newline control. */
307 int nltype;
308 int newline;
309 int bsr_nltype;
310 /* Dollar endonly. */
311 int endonly;
312 BOOL has_set_som;
313 /* Tables. */
314 sljit_w ctypes;
315 int digits[2 + MAX_RANGE_SIZE];
316 /* Named capturing brackets. */
317 sljit_uw name_table;
318 sljit_w name_count;
319 sljit_w name_entry_size;
320
321 /* Labels and jump lists. */
322 struct sljit_label *partialmatchlabel;
323 struct sljit_label *quitlabel;
324 struct sljit_label *acceptlabel;
325 stub_list *stubs;
326 recurse_entry *entries;
327 recurse_entry *currententry;
328 jump_list *partialmatch;
329 jump_list *quit;
330 jump_list *accept;
331 jump_list *calllimit;
332 jump_list *stackalloc;
333 jump_list *revertframes;
334 jump_list *wordboundary;
335 jump_list *anynewline;
336 jump_list *hspace;
337 jump_list *vspace;
338 jump_list *casefulcmp;
339 jump_list *caselesscmp;
340 BOOL jscript_compat;
341 #ifdef SUPPORT_UTF
342 BOOL utf;
343 #ifdef SUPPORT_UCP
344 BOOL use_ucp;
345 #endif
346 #ifndef COMPILE_PCRE32
347 jump_list *utfreadchar;
348 #endif
349 #ifdef COMPILE_PCRE8
350 jump_list *utfreadtype8;
351 #endif
352 #endif /* SUPPORT_UTF */
353 #ifdef SUPPORT_UCP
354 jump_list *getucd;
355 #endif
356 } compiler_common;
357
358 /* For byte_sequence_compare. */
359
360 typedef struct compare_context {
361 int length;
362 int sourcereg;
363 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
364 int ucharptr;
365 union {
366 sljit_i asint;
367 sljit_uh asushort;
368 #if defined COMPILE_PCRE8
369 sljit_ub asbyte;
370 sljit_ub asuchars[4];
371 #elif defined COMPILE_PCRE16
372 sljit_uh asuchars[2];
373 #elif defined COMPILE_PCRE32
374 sljit_ui asuchars[1];
375 #endif
376 } c;
377 union {
378 sljit_i asint;
379 sljit_uh asushort;
380 #if defined COMPILE_PCRE8
381 sljit_ub asbyte;
382 sljit_ub asuchars[4];
383 #elif defined COMPILE_PCRE16
384 sljit_uh asuchars[2];
385 #elif defined COMPILE_PCRE32
386 sljit_ui asuchars[1];
387 #endif
388 } oc;
389 #endif
390 } compare_context;
391
392 enum {
393 frame_end = 0,
394 frame_setstrbegin = -1,
395 frame_setmark = -2
396 };
397
398 /* Undefine sljit macros. */
399 #undef CMP
400
401 /* Used for accessing the elements of the stack. */
402 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
403
404 #define TMP1 SLJIT_TEMPORARY_REG1
405 #define TMP2 SLJIT_TEMPORARY_REG3
406 #define TMP3 SLJIT_TEMPORARY_EREG2
407 #define STR_PTR SLJIT_SAVED_REG1
408 #define STR_END SLJIT_SAVED_REG2
409 #define STACK_TOP SLJIT_TEMPORARY_REG2
410 #define STACK_LIMIT SLJIT_SAVED_REG3
411 #define ARGUMENTS SLJIT_SAVED_EREG1
412 #define CALL_COUNT SLJIT_SAVED_EREG2
413 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
414
415 /* Local space layout. */
416 /* These two locals can be used by the current opcode. */
417 #define LOCALS0 (0 * sizeof(sljit_w))
418 #define LOCALS1 (1 * sizeof(sljit_w))
419 /* Two local variables for possessive quantifiers (char1 cannot use them). */
420 #define POSSESSIVE0 (2 * sizeof(sljit_w))
421 #define POSSESSIVE1 (3 * sizeof(sljit_w))
422 /* Max limit of recursions. */
423 #define CALL_LIMIT (4 * sizeof(sljit_w))
424 /* The output vector is stored on the stack, and contains pointers
425 to characters. The vector data is divided into two groups: the first
426 group contains the start / end character pointers, and the second is
427 the start pointers when the end of the capturing group has not yet reached. */
428 #define OVECTOR_START (common->ovector_start)
429 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
430 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
431 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
432
433 #if defined COMPILE_PCRE8
434 #define MOV_UCHAR SLJIT_MOV_UB
435 #define MOVU_UCHAR SLJIT_MOVU_UB
436 #elif defined COMPILE_PCRE16
437 #define MOV_UCHAR SLJIT_MOV_UH
438 #define MOVU_UCHAR SLJIT_MOVU_UH
439 #elif defined COMPILE_PCRE32
440 #define MOV_UCHAR SLJIT_MOV_UI
441 #define MOVU_UCHAR SLJIT_MOVU_UI
442 #else
443 #error Unsupported compiling mode
444 #endif
445
446 /* Shortcuts. */
447 #define DEFINE_COMPILER \
448 struct sljit_compiler *compiler = common->compiler
449 #define OP1(op, dst, dstw, src, srcw) \
450 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
451 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
452 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
453 #define LABEL() \
454 sljit_emit_label(compiler)
455 #define JUMP(type) \
456 sljit_emit_jump(compiler, (type))
457 #define JUMPTO(type, label) \
458 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
459 #define JUMPHERE(jump) \
460 sljit_set_label((jump), sljit_emit_label(compiler))
461 #define CMP(type, src1, src1w, src2, src2w) \
462 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
463 #define CMPTO(type, src1, src1w, src2, src2w, label) \
464 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
465 #define COND_VALUE(op, dst, dstw, type) \
466 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
467 #define GET_LOCAL_BASE(dst, dstw, offset) \
468 sljit_get_local_base(compiler, (dst), (dstw), (offset))
469
470 static pcre_uchar* bracketend(pcre_uchar* cc)
471 {
472 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
473 do cc += GET(cc, 1); while (*cc == OP_ALT);
474 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
475 cc += 1 + LINK_SIZE;
476 return cc;
477 }
478
479 /* Functions whose might need modification for all new supported opcodes:
480 next_opcode
481 get_private_data_length
482 set_private_data_ptrs
483 get_framesize
484 init_frame
485 get_private_data_length_for_copy
486 copy_private_data
487 compile_matchingpath
488 compile_backtrackingpath
489 */
490
491 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
492 {
493 SLJIT_UNUSED_ARG(common);
494 switch(*cc)
495 {
496 case OP_SOD:
497 case OP_SOM:
498 case OP_SET_SOM:
499 case OP_NOT_WORD_BOUNDARY:
500 case OP_WORD_BOUNDARY:
501 case OP_NOT_DIGIT:
502 case OP_DIGIT:
503 case OP_NOT_WHITESPACE:
504 case OP_WHITESPACE:
505 case OP_NOT_WORDCHAR:
506 case OP_WORDCHAR:
507 case OP_ANY:
508 case OP_ALLANY:
509 case OP_ANYNL:
510 case OP_NOT_HSPACE:
511 case OP_HSPACE:
512 case OP_NOT_VSPACE:
513 case OP_VSPACE:
514 case OP_EXTUNI:
515 case OP_EODN:
516 case OP_EOD:
517 case OP_CIRC:
518 case OP_CIRCM:
519 case OP_DOLL:
520 case OP_DOLLM:
521 case OP_TYPESTAR:
522 case OP_TYPEMINSTAR:
523 case OP_TYPEPLUS:
524 case OP_TYPEMINPLUS:
525 case OP_TYPEQUERY:
526 case OP_TYPEMINQUERY:
527 case OP_TYPEPOSSTAR:
528 case OP_TYPEPOSPLUS:
529 case OP_TYPEPOSQUERY:
530 case OP_CRSTAR:
531 case OP_CRMINSTAR:
532 case OP_CRPLUS:
533 case OP_CRMINPLUS:
534 case OP_CRQUERY:
535 case OP_CRMINQUERY:
536 case OP_DEF:
537 case OP_BRAZERO:
538 case OP_BRAMINZERO:
539 case OP_BRAPOSZERO:
540 case OP_COMMIT:
541 case OP_FAIL:
542 case OP_ACCEPT:
543 case OP_ASSERT_ACCEPT:
544 case OP_SKIPZERO:
545 return cc + 1;
546
547 case OP_ANYBYTE:
548 #ifdef SUPPORT_UTF
549 if (common->utf) return NULL;
550 #endif
551 return cc + 1;
552
553 case OP_CHAR:
554 case OP_CHARI:
555 case OP_NOT:
556 case OP_NOTI:
557 case OP_STAR:
558 case OP_MINSTAR:
559 case OP_PLUS:
560 case OP_MINPLUS:
561 case OP_QUERY:
562 case OP_MINQUERY:
563 case OP_POSSTAR:
564 case OP_POSPLUS:
565 case OP_POSQUERY:
566 case OP_STARI:
567 case OP_MINSTARI:
568 case OP_PLUSI:
569 case OP_MINPLUSI:
570 case OP_QUERYI:
571 case OP_MINQUERYI:
572 case OP_POSSTARI:
573 case OP_POSPLUSI:
574 case OP_POSQUERYI:
575 case OP_NOTSTAR:
576 case OP_NOTMINSTAR:
577 case OP_NOTPLUS:
578 case OP_NOTMINPLUS:
579 case OP_NOTQUERY:
580 case OP_NOTMINQUERY:
581 case OP_NOTPOSSTAR:
582 case OP_NOTPOSPLUS:
583 case OP_NOTPOSQUERY:
584 case OP_NOTSTARI:
585 case OP_NOTMINSTARI:
586 case OP_NOTPLUSI:
587 case OP_NOTMINPLUSI:
588 case OP_NOTQUERYI:
589 case OP_NOTMINQUERYI:
590 case OP_NOTPOSSTARI:
591 case OP_NOTPOSPLUSI:
592 case OP_NOTPOSQUERYI:
593 cc += 2;
594 #ifdef SUPPORT_UTF
595 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
596 #endif
597 return cc;
598
599 case OP_UPTO:
600 case OP_MINUPTO:
601 case OP_EXACT:
602 case OP_POSUPTO:
603 case OP_UPTOI:
604 case OP_MINUPTOI:
605 case OP_EXACTI:
606 case OP_POSUPTOI:
607 case OP_NOTUPTO:
608 case OP_NOTMINUPTO:
609 case OP_NOTEXACT:
610 case OP_NOTPOSUPTO:
611 case OP_NOTUPTOI:
612 case OP_NOTMINUPTOI:
613 case OP_NOTEXACTI:
614 case OP_NOTPOSUPTOI:
615 cc += 2 + IMM2_SIZE;
616 #ifdef SUPPORT_UTF
617 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
618 #endif
619 return cc;
620
621 case OP_NOTPROP:
622 case OP_PROP:
623 return cc + 1 + 2;
624
625 case OP_TYPEUPTO:
626 case OP_TYPEMINUPTO:
627 case OP_TYPEEXACT:
628 case OP_TYPEPOSUPTO:
629 case OP_REF:
630 case OP_REFI:
631 case OP_CREF:
632 case OP_NCREF:
633 case OP_RREF:
634 case OP_NRREF:
635 case OP_CLOSE:
636 cc += 1 + IMM2_SIZE;
637 return cc;
638
639 case OP_CRRANGE:
640 case OP_CRMINRANGE:
641 return cc + 1 + 2 * IMM2_SIZE;
642
643 case OP_CLASS:
644 case OP_NCLASS:
645 return cc + 1 + 32 / sizeof(pcre_uchar);
646
647 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
648 case OP_XCLASS:
649 return cc + GET(cc, 1);
650 #endif
651
652 case OP_RECURSE:
653 case OP_ASSERT:
654 case OP_ASSERT_NOT:
655 case OP_ASSERTBACK:
656 case OP_ASSERTBACK_NOT:
657 case OP_REVERSE:
658 case OP_ONCE:
659 case OP_ONCE_NC:
660 case OP_BRA:
661 case OP_BRAPOS:
662 case OP_COND:
663 case OP_SBRA:
664 case OP_SBRAPOS:
665 case OP_SCOND:
666 case OP_ALT:
667 case OP_KET:
668 case OP_KETRMAX:
669 case OP_KETRMIN:
670 case OP_KETRPOS:
671 return cc + 1 + LINK_SIZE;
672
673 case OP_CBRA:
674 case OP_CBRAPOS:
675 case OP_SCBRA:
676 case OP_SCBRAPOS:
677 return cc + 1 + LINK_SIZE + IMM2_SIZE;
678
679 case OP_MARK:
680 return cc + 1 + 2 + cc[1];
681
682 default:
683 return NULL;
684 }
685 }
686
687 #define CASE_ITERATOR_PRIVATE_DATA_1 \
688 case OP_MINSTAR: \
689 case OP_MINPLUS: \
690 case OP_QUERY: \
691 case OP_MINQUERY: \
692 case OP_MINSTARI: \
693 case OP_MINPLUSI: \
694 case OP_QUERYI: \
695 case OP_MINQUERYI: \
696 case OP_NOTMINSTAR: \
697 case OP_NOTMINPLUS: \
698 case OP_NOTQUERY: \
699 case OP_NOTMINQUERY: \
700 case OP_NOTMINSTARI: \
701 case OP_NOTMINPLUSI: \
702 case OP_NOTQUERYI: \
703 case OP_NOTMINQUERYI:
704
705 #define CASE_ITERATOR_PRIVATE_DATA_2A \
706 case OP_STAR: \
707 case OP_PLUS: \
708 case OP_STARI: \
709 case OP_PLUSI: \
710 case OP_NOTSTAR: \
711 case OP_NOTPLUS: \
712 case OP_NOTSTARI: \
713 case OP_NOTPLUSI:
714
715 #define CASE_ITERATOR_PRIVATE_DATA_2B \
716 case OP_UPTO: \
717 case OP_MINUPTO: \
718 case OP_UPTOI: \
719 case OP_MINUPTOI: \
720 case OP_NOTUPTO: \
721 case OP_NOTMINUPTO: \
722 case OP_NOTUPTOI: \
723 case OP_NOTMINUPTOI:
724
725 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
726 case OP_TYPEMINSTAR: \
727 case OP_TYPEMINPLUS: \
728 case OP_TYPEQUERY: \
729 case OP_TYPEMINQUERY:
730
731 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
732 case OP_TYPESTAR: \
733 case OP_TYPEPLUS:
734
735 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
736 case OP_TYPEUPTO: \
737 case OP_TYPEMINUPTO:
738
739 static int get_class_iterator_size(pcre_uchar *cc)
740 {
741 switch(*cc)
742 {
743 case OP_CRSTAR:
744 case OP_CRPLUS:
745 return 2;
746
747 case OP_CRMINSTAR:
748 case OP_CRMINPLUS:
749 case OP_CRQUERY:
750 case OP_CRMINQUERY:
751 return 1;
752
753 case OP_CRRANGE:
754 case OP_CRMINRANGE:
755 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
756 return 0;
757 return 2;
758
759 default:
760 return 0;
761 }
762 }
763
764 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
765 {
766 int private_data_length = 0;
767 pcre_uchar *alternative;
768 pcre_uchar *name;
769 pcre_uchar *end = NULL;
770 int space, size, i;
771 pcre_uint32 bracketlen;
772
773 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
774 while (cc < ccend)
775 {
776 space = 0;
777 size = 0;
778 bracketlen = 0;
779 switch(*cc)
780 {
781 case OP_SET_SOM:
782 common->has_set_som = TRUE;
783 cc += 1;
784 break;
785
786 case OP_REF:
787 case OP_REFI:
788 common->optimized_cbracket[GET2(cc, 1)] = 0;
789 cc += 1 + IMM2_SIZE;
790 break;
791
792 case OP_ASSERT:
793 case OP_ASSERT_NOT:
794 case OP_ASSERTBACK:
795 case OP_ASSERTBACK_NOT:
796 case OP_ONCE:
797 case OP_ONCE_NC:
798 case OP_BRAPOS:
799 case OP_SBRA:
800 case OP_SBRAPOS:
801 private_data_length += sizeof(sljit_w);
802 bracketlen = 1 + LINK_SIZE;
803 break;
804
805 case OP_CBRAPOS:
806 case OP_SCBRAPOS:
807 private_data_length += sizeof(sljit_w);
808 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
809 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
810 break;
811
812 case OP_COND:
813 case OP_SCOND:
814 bracketlen = cc[1 + LINK_SIZE];
815 if (bracketlen == OP_CREF)
816 {
817 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
818 common->optimized_cbracket[bracketlen] = 0;
819 }
820 else if (bracketlen == OP_NCREF)
821 {
822 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
823 name = (pcre_uchar *)common->name_table;
824 alternative = name;
825 for (i = 0; i < common->name_count; i++)
826 {
827 if (GET2(name, 0) == bracketlen) break;
828 name += common->name_entry_size;
829 }
830 SLJIT_ASSERT(i != common->name_count);
831
832 for (i = 0; i < common->name_count; i++)
833 {
834 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
835 common->optimized_cbracket[GET2(alternative, 0)] = 0;
836 alternative += common->name_entry_size;
837 }
838 }
839
840 if (*cc == OP_COND)
841 {
842 /* Might be a hidden SCOND. */
843 alternative = cc + GET(cc, 1);
844 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
845 private_data_length += sizeof(sljit_w);
846 }
847 else
848 private_data_length += sizeof(sljit_w);
849 bracketlen = 1 + LINK_SIZE;
850 break;
851
852 case OP_BRA:
853 bracketlen = 1 + LINK_SIZE;
854 break;
855
856 case OP_CBRA:
857 case OP_SCBRA:
858 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
859 break;
860
861 CASE_ITERATOR_PRIVATE_DATA_1
862 space = 1;
863 size = -2;
864 break;
865
866 CASE_ITERATOR_PRIVATE_DATA_2A
867 space = 2;
868 size = -2;
869 break;
870
871 CASE_ITERATOR_PRIVATE_DATA_2B
872 space = 2;
873 size = -(2 + IMM2_SIZE);
874 break;
875
876 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
877 space = 1;
878 size = 1;
879 break;
880
881 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
882 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
883 space = 2;
884 size = 1;
885 break;
886
887 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
888 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
889 space = 2;
890 size = 1 + IMM2_SIZE;
891 break;
892
893 case OP_CLASS:
894 case OP_NCLASS:
895 size += 1 + 32 / sizeof(pcre_uchar);
896 space = get_class_iterator_size(cc + size);
897 break;
898
899 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
900 case OP_XCLASS:
901 size = GET(cc, 1);
902 space = get_class_iterator_size(cc + size);
903 break;
904 #endif
905
906 case OP_RECURSE:
907 /* Set its value only once. */
908 if (common->recursive_head == 0)
909 {
910 common->recursive_head = common->ovector_start;
911 common->ovector_start += sizeof(sljit_w);
912 }
913 cc += 1 + LINK_SIZE;
914 break;
915
916 case OP_MARK:
917 if (common->mark_ptr == 0)
918 {
919 common->mark_ptr = common->ovector_start;
920 common->ovector_start += sizeof(sljit_w);
921 }
922 cc += 1 + 2 + cc[1];
923 break;
924
925 default:
926 cc = next_opcode(common, cc);
927 if (cc == NULL)
928 return -1;
929 break;
930 }
931
932 if (space > 0 && cc >= end)
933 private_data_length += sizeof(sljit_w) * space;
934
935 if (size != 0)
936 {
937 if (size < 0)
938 {
939 cc += -size;
940 #ifdef SUPPORT_UTF
941 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
942 #endif
943 }
944 else
945 cc += size;
946 }
947
948 if (bracketlen != 0)
949 {
950 if (cc >= end)
951 {
952 end = bracketend(cc);
953 if (end[-1 - LINK_SIZE] == OP_KET)
954 end = NULL;
955 }
956 cc += bracketlen;
957 }
958 }
959 return private_data_length;
960 }
961
962 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
963 {
964 pcre_uchar *cc = common->start;
965 pcre_uchar *alternative;
966 pcre_uchar *end = NULL;
967 int space, size, bracketlen;
968
969 while (cc < ccend)
970 {
971 space = 0;
972 size = 0;
973 bracketlen = 0;
974 switch(*cc)
975 {
976 case OP_ASSERT:
977 case OP_ASSERT_NOT:
978 case OP_ASSERTBACK:
979 case OP_ASSERTBACK_NOT:
980 case OP_ONCE:
981 case OP_ONCE_NC:
982 case OP_BRAPOS:
983 case OP_SBRA:
984 case OP_SBRAPOS:
985 case OP_SCOND:
986 common->private_data_ptrs[cc - common->start] = private_data_ptr;
987 private_data_ptr += sizeof(sljit_w);
988 bracketlen = 1 + LINK_SIZE;
989 break;
990
991 case OP_CBRAPOS:
992 case OP_SCBRAPOS:
993 common->private_data_ptrs[cc - common->start] = private_data_ptr;
994 private_data_ptr += sizeof(sljit_w);
995 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
996 break;
997
998 case OP_COND:
999 /* Might be a hidden SCOND. */
1000 alternative = cc + GET(cc, 1);
1001 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1002 {
1003 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1004 private_data_ptr += sizeof(sljit_w);
1005 }
1006 bracketlen = 1 + LINK_SIZE;
1007 break;
1008
1009 case OP_BRA:
1010 bracketlen = 1 + LINK_SIZE;
1011 break;
1012
1013 case OP_CBRA:
1014 case OP_SCBRA:
1015 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1016 break;
1017
1018 CASE_ITERATOR_PRIVATE_DATA_1
1019 space = 1;
1020 size = -2;
1021 break;
1022
1023 CASE_ITERATOR_PRIVATE_DATA_2A
1024 space = 2;
1025 size = -2;
1026 break;
1027
1028 CASE_ITERATOR_PRIVATE_DATA_2B
1029 space = 2;
1030 size = -(2 + IMM2_SIZE);
1031 break;
1032
1033 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1034 space = 1;
1035 size = 1;
1036 break;
1037
1038 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1039 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1040 space = 2;
1041 size = 1;
1042 break;
1043
1044 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1045 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1046 space = 2;
1047 size = 1 + IMM2_SIZE;
1048 break;
1049
1050 case OP_CLASS:
1051 case OP_NCLASS:
1052 size += 1 + 32 / sizeof(pcre_uchar);
1053 space = get_class_iterator_size(cc + size);
1054 break;
1055
1056 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1057 case OP_XCLASS:
1058 size = GET(cc, 1);
1059 space = get_class_iterator_size(cc + size);
1060 break;
1061 #endif
1062
1063 default:
1064 cc = next_opcode(common, cc);
1065 SLJIT_ASSERT(cc != NULL);
1066 break;
1067 }
1068
1069 if (space > 0 && cc >= end)
1070 {
1071 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1072 private_data_ptr += sizeof(sljit_w) * space;
1073 }
1074
1075 if (size != 0)
1076 {
1077 if (size < 0)
1078 {
1079 cc += -size;
1080 #ifdef SUPPORT_UTF
1081 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1082 #endif
1083 }
1084 else
1085 cc += size;
1086 }
1087
1088 if (bracketlen > 0)
1089 {
1090 if (cc >= end)
1091 {
1092 end = bracketend(cc);
1093 if (end[-1 - LINK_SIZE] == OP_KET)
1094 end = NULL;
1095 }
1096 cc += bracketlen;
1097 }
1098 }
1099 }
1100
1101 /* Returns with -1 if no need for frame. */
1102 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1103 {
1104 pcre_uchar *ccend = bracketend(cc);
1105 int length = 0;
1106 BOOL possessive = FALSE;
1107 BOOL setsom_found = recursive;
1108 BOOL setmark_found = recursive;
1109
1110 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1111 {
1112 length = 3;
1113 possessive = TRUE;
1114 }
1115
1116 cc = next_opcode(common, cc);
1117 SLJIT_ASSERT(cc != NULL);
1118 while (cc < ccend)
1119 switch(*cc)
1120 {
1121 case OP_SET_SOM:
1122 SLJIT_ASSERT(common->has_set_som);
1123 if (!setsom_found)
1124 {
1125 length += 2;
1126 setsom_found = TRUE;
1127 }
1128 cc += 1;
1129 break;
1130
1131 case OP_MARK:
1132 SLJIT_ASSERT(common->mark_ptr != 0);
1133 if (!setmark_found)
1134 {
1135 length += 2;
1136 setmark_found = TRUE;
1137 }
1138 cc += 1 + 2 + cc[1];
1139 break;
1140
1141 case OP_RECURSE:
1142 if (common->has_set_som && !setsom_found)
1143 {
1144 length += 2;
1145 setsom_found = TRUE;
1146 }
1147 if (common->mark_ptr != 0 && !setmark_found)
1148 {
1149 length += 2;
1150 setmark_found = TRUE;
1151 }
1152 cc += 1 + LINK_SIZE;
1153 break;
1154
1155 case OP_CBRA:
1156 case OP_CBRAPOS:
1157 case OP_SCBRA:
1158 case OP_SCBRAPOS:
1159 length += 3;
1160 cc += 1 + LINK_SIZE + IMM2_SIZE;
1161 break;
1162
1163 default:
1164 cc = next_opcode(common, cc);
1165 SLJIT_ASSERT(cc != NULL);
1166 break;
1167 }
1168
1169 /* Possessive quantifiers can use a special case. */
1170 if (SLJIT_UNLIKELY(possessive) && length == 3)
1171 return -1;
1172
1173 if (length > 0)
1174 return length + 1;
1175 return -1;
1176 }
1177
1178 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1179 {
1180 DEFINE_COMPILER;
1181 pcre_uchar *ccend = bracketend(cc);
1182 BOOL setsom_found = recursive;
1183 BOOL setmark_found = recursive;
1184 int offset;
1185
1186 /* >= 1 + shortest item size (2) */
1187 SLJIT_UNUSED_ARG(stacktop);
1188 SLJIT_ASSERT(stackpos >= stacktop + 2);
1189
1190 stackpos = STACK(stackpos);
1191 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1192 cc = next_opcode(common, cc);
1193 SLJIT_ASSERT(cc != NULL);
1194 while (cc < ccend)
1195 switch(*cc)
1196 {
1197 case OP_SET_SOM:
1198 SLJIT_ASSERT(common->has_set_som);
1199 if (!setsom_found)
1200 {
1201 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1203 stackpos += (int)sizeof(sljit_w);
1204 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1205 stackpos += (int)sizeof(sljit_w);
1206 setsom_found = TRUE;
1207 }
1208 cc += 1;
1209 break;
1210
1211 case OP_MARK:
1212 SLJIT_ASSERT(common->mark_ptr != 0);
1213 if (!setmark_found)
1214 {
1215 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1216 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1217 stackpos += (int)sizeof(sljit_w);
1218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1219 stackpos += (int)sizeof(sljit_w);
1220 setmark_found = TRUE;
1221 }
1222 cc += 1 + 2 + cc[1];
1223 break;
1224
1225 case OP_RECURSE:
1226 if (common->has_set_som && !setsom_found)
1227 {
1228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1229 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1230 stackpos += (int)sizeof(sljit_w);
1231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1232 stackpos += (int)sizeof(sljit_w);
1233 setsom_found = TRUE;
1234 }
1235 if (common->mark_ptr != 0 && !setmark_found)
1236 {
1237 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1238 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1239 stackpos += (int)sizeof(sljit_w);
1240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1241 stackpos += (int)sizeof(sljit_w);
1242 setmark_found = TRUE;
1243 }
1244 cc += 1 + LINK_SIZE;
1245 break;
1246
1247 case OP_CBRA:
1248 case OP_CBRAPOS:
1249 case OP_SCBRA:
1250 case OP_SCBRAPOS:
1251 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1252 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1253 stackpos += (int)sizeof(sljit_w);
1254 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1255 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1257 stackpos += (int)sizeof(sljit_w);
1258 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1259 stackpos += (int)sizeof(sljit_w);
1260
1261 cc += 1 + LINK_SIZE + IMM2_SIZE;
1262 break;
1263
1264 default:
1265 cc = next_opcode(common, cc);
1266 SLJIT_ASSERT(cc != NULL);
1267 break;
1268 }
1269
1270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1271 SLJIT_ASSERT(stackpos == STACK(stacktop));
1272 }
1273
1274 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1275 {
1276 int private_data_length = 2;
1277 int size;
1278 pcre_uchar *alternative;
1279 /* Calculate the sum of the private machine words. */
1280 while (cc < ccend)
1281 {
1282 size = 0;
1283 switch(*cc)
1284 {
1285 case OP_ASSERT:
1286 case OP_ASSERT_NOT:
1287 case OP_ASSERTBACK:
1288 case OP_ASSERTBACK_NOT:
1289 case OP_ONCE:
1290 case OP_ONCE_NC:
1291 case OP_BRAPOS:
1292 case OP_SBRA:
1293 case OP_SBRAPOS:
1294 case OP_SCOND:
1295 private_data_length++;
1296 cc += 1 + LINK_SIZE;
1297 break;
1298
1299 case OP_CBRA:
1300 case OP_SCBRA:
1301 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1302 private_data_length++;
1303 cc += 1 + LINK_SIZE + IMM2_SIZE;
1304 break;
1305
1306 case OP_CBRAPOS:
1307 case OP_SCBRAPOS:
1308 private_data_length += 2;
1309 cc += 1 + LINK_SIZE + IMM2_SIZE;
1310 break;
1311
1312 case OP_COND:
1313 /* Might be a hidden SCOND. */
1314 alternative = cc + GET(cc, 1);
1315 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1316 private_data_length++;
1317 cc += 1 + LINK_SIZE;
1318 break;
1319
1320 CASE_ITERATOR_PRIVATE_DATA_1
1321 if (PRIVATE_DATA(cc))
1322 private_data_length++;
1323 cc += 2;
1324 #ifdef SUPPORT_UTF
1325 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1326 #endif
1327 break;
1328
1329 CASE_ITERATOR_PRIVATE_DATA_2A
1330 if (PRIVATE_DATA(cc))
1331 private_data_length += 2;
1332 cc += 2;
1333 #ifdef SUPPORT_UTF
1334 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1335 #endif
1336 break;
1337
1338 CASE_ITERATOR_PRIVATE_DATA_2B
1339 if (PRIVATE_DATA(cc))
1340 private_data_length += 2;
1341 cc += 2 + IMM2_SIZE;
1342 #ifdef SUPPORT_UTF
1343 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1344 #endif
1345 break;
1346
1347 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1348 if (PRIVATE_DATA(cc))
1349 private_data_length++;
1350 cc += 1;
1351 break;
1352
1353 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1354 if (PRIVATE_DATA(cc))
1355 private_data_length += 2;
1356 cc += 1;
1357 break;
1358
1359 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1360 if (PRIVATE_DATA(cc))
1361 private_data_length += 2;
1362 cc += 1 + IMM2_SIZE;
1363 break;
1364
1365 case OP_CLASS:
1366 case OP_NCLASS:
1367 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1368 case OP_XCLASS:
1369 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1370 #else
1371 size = 1 + 32 / (int)sizeof(pcre_uchar);
1372 #endif
1373 if (PRIVATE_DATA(cc))
1374 private_data_length += get_class_iterator_size(cc + size);
1375 cc += size;
1376 break;
1377
1378 default:
1379 cc = next_opcode(common, cc);
1380 SLJIT_ASSERT(cc != NULL);
1381 break;
1382 }
1383 }
1384 SLJIT_ASSERT(cc == ccend);
1385 return private_data_length;
1386 }
1387
1388 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1389 BOOL save, int stackptr, int stacktop)
1390 {
1391 DEFINE_COMPILER;
1392 int srcw[2];
1393 int count, size;
1394 BOOL tmp1next = TRUE;
1395 BOOL tmp1empty = TRUE;
1396 BOOL tmp2empty = TRUE;
1397 pcre_uchar *alternative;
1398 enum {
1399 start,
1400 loop,
1401 end
1402 } status;
1403
1404 status = save ? start : loop;
1405 stackptr = STACK(stackptr - 2);
1406 stacktop = STACK(stacktop - 1);
1407
1408 if (!save)
1409 {
1410 stackptr += sizeof(sljit_w);
1411 if (stackptr < stacktop)
1412 {
1413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1414 stackptr += sizeof(sljit_w);
1415 tmp1empty = FALSE;
1416 }
1417 if (stackptr < stacktop)
1418 {
1419 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1420 stackptr += sizeof(sljit_w);
1421 tmp2empty = FALSE;
1422 }
1423 /* The tmp1next must be TRUE in either way. */
1424 }
1425
1426 while (status != end)
1427 {
1428 count = 0;
1429 switch(status)
1430 {
1431 case start:
1432 SLJIT_ASSERT(save && common->recursive_head != 0);
1433 count = 1;
1434 srcw[0] = common->recursive_head;
1435 status = loop;
1436 break;
1437
1438 case loop:
1439 if (cc >= ccend)
1440 {
1441 status = end;
1442 break;
1443 }
1444
1445 switch(*cc)
1446 {
1447 case OP_ASSERT:
1448 case OP_ASSERT_NOT:
1449 case OP_ASSERTBACK:
1450 case OP_ASSERTBACK_NOT:
1451 case OP_ONCE:
1452 case OP_ONCE_NC:
1453 case OP_BRAPOS:
1454 case OP_SBRA:
1455 case OP_SBRAPOS:
1456 case OP_SCOND:
1457 count = 1;
1458 srcw[0] = PRIVATE_DATA(cc);
1459 SLJIT_ASSERT(srcw[0] != 0);
1460 cc += 1 + LINK_SIZE;
1461 break;
1462
1463 case OP_CBRA:
1464 case OP_SCBRA:
1465 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1466 {
1467 count = 1;
1468 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1469 }
1470 cc += 1 + LINK_SIZE + IMM2_SIZE;
1471 break;
1472
1473 case OP_CBRAPOS:
1474 case OP_SCBRAPOS:
1475 count = 2;
1476 srcw[0] = PRIVATE_DATA(cc);
1477 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1478 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1479 cc += 1 + LINK_SIZE + IMM2_SIZE;
1480 break;
1481
1482 case OP_COND:
1483 /* Might be a hidden SCOND. */
1484 alternative = cc + GET(cc, 1);
1485 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1486 {
1487 count = 1;
1488 srcw[0] = PRIVATE_DATA(cc);
1489 SLJIT_ASSERT(srcw[0] != 0);
1490 }
1491 cc += 1 + LINK_SIZE;
1492 break;
1493
1494 CASE_ITERATOR_PRIVATE_DATA_1
1495 if (PRIVATE_DATA(cc))
1496 {
1497 count = 1;
1498 srcw[0] = PRIVATE_DATA(cc);
1499 }
1500 cc += 2;
1501 #ifdef SUPPORT_UTF
1502 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1503 #endif
1504 break;
1505
1506 CASE_ITERATOR_PRIVATE_DATA_2A
1507 if (PRIVATE_DATA(cc))
1508 {
1509 count = 2;
1510 srcw[0] = PRIVATE_DATA(cc);
1511 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1512 }
1513 cc += 2;
1514 #ifdef SUPPORT_UTF
1515 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1516 #endif
1517 break;
1518
1519 CASE_ITERATOR_PRIVATE_DATA_2B
1520 if (PRIVATE_DATA(cc))
1521 {
1522 count = 2;
1523 srcw[0] = PRIVATE_DATA(cc);
1524 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1525 }
1526 cc += 2 + IMM2_SIZE;
1527 #ifdef SUPPORT_UTF
1528 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1529 #endif
1530 break;
1531
1532 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1533 if (PRIVATE_DATA(cc))
1534 {
1535 count = 1;
1536 srcw[0] = PRIVATE_DATA(cc);
1537 }
1538 cc += 1;
1539 break;
1540
1541 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1542 if (PRIVATE_DATA(cc))
1543 {
1544 count = 2;
1545 srcw[0] = PRIVATE_DATA(cc);
1546 srcw[1] = srcw[0] + sizeof(sljit_w);
1547 }
1548 cc += 1;
1549 break;
1550
1551 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1552 if (PRIVATE_DATA(cc))
1553 {
1554 count = 2;
1555 srcw[0] = PRIVATE_DATA(cc);
1556 srcw[1] = srcw[0] + sizeof(sljit_w);
1557 }
1558 cc += 1 + IMM2_SIZE;
1559 break;
1560
1561 case OP_CLASS:
1562 case OP_NCLASS:
1563 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1564 case OP_XCLASS:
1565 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1566 #else
1567 size = 1 + 32 / (int)sizeof(pcre_uchar);
1568 #endif
1569 if (PRIVATE_DATA(cc))
1570 switch(get_class_iterator_size(cc + size))
1571 {
1572 case 1:
1573 count = 1;
1574 srcw[0] = PRIVATE_DATA(cc);
1575 break;
1576
1577 case 2:
1578 count = 2;
1579 srcw[0] = PRIVATE_DATA(cc);
1580 srcw[1] = srcw[0] + sizeof(sljit_w);
1581 break;
1582
1583 default:
1584 SLJIT_ASSERT_STOP();
1585 break;
1586 }
1587 cc += size;
1588 break;
1589
1590 default:
1591 cc = next_opcode(common, cc);
1592 SLJIT_ASSERT(cc != NULL);
1593 break;
1594 }
1595 break;
1596
1597 case end:
1598 SLJIT_ASSERT_STOP();
1599 break;
1600 }
1601
1602 while (count > 0)
1603 {
1604 count--;
1605 if (save)
1606 {
1607 if (tmp1next)
1608 {
1609 if (!tmp1empty)
1610 {
1611 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1612 stackptr += sizeof(sljit_w);
1613 }
1614 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1615 tmp1empty = FALSE;
1616 tmp1next = FALSE;
1617 }
1618 else
1619 {
1620 if (!tmp2empty)
1621 {
1622 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1623 stackptr += sizeof(sljit_w);
1624 }
1625 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1626 tmp2empty = FALSE;
1627 tmp1next = TRUE;
1628 }
1629 }
1630 else
1631 {
1632 if (tmp1next)
1633 {
1634 SLJIT_ASSERT(!tmp1empty);
1635 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1636 tmp1empty = stackptr >= stacktop;
1637 if (!tmp1empty)
1638 {
1639 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1640 stackptr += sizeof(sljit_w);
1641 }
1642 tmp1next = FALSE;
1643 }
1644 else
1645 {
1646 SLJIT_ASSERT(!tmp2empty);
1647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1648 tmp2empty = stackptr >= stacktop;
1649 if (!tmp2empty)
1650 {
1651 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1652 stackptr += sizeof(sljit_w);
1653 }
1654 tmp1next = TRUE;
1655 }
1656 }
1657 }
1658 }
1659
1660 if (save)
1661 {
1662 if (tmp1next)
1663 {
1664 if (!tmp1empty)
1665 {
1666 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1667 stackptr += sizeof(sljit_w);
1668 }
1669 if (!tmp2empty)
1670 {
1671 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1672 stackptr += sizeof(sljit_w);
1673 }
1674 }
1675 else
1676 {
1677 if (!tmp2empty)
1678 {
1679 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1680 stackptr += sizeof(sljit_w);
1681 }
1682 if (!tmp1empty)
1683 {
1684 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1685 stackptr += sizeof(sljit_w);
1686 }
1687 }
1688 }
1689 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1690 }
1691
1692 #undef CASE_ITERATOR_PRIVATE_DATA_1
1693 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1694 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1695 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1696 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1697 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1698
1699 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1700 {
1701 return (value & (value - 1)) == 0;
1702 }
1703
1704 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1705 {
1706 while (list)
1707 {
1708 /* sljit_set_label is clever enough to do nothing
1709 if either the jump or the label is NULL. */
1710 sljit_set_label(list->jump, label);
1711 list = list->next;
1712 }
1713 }
1714
1715 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1716 {
1717 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1718 if (list_item)
1719 {
1720 list_item->next = *list;
1721 list_item->jump = jump;
1722 *list = list_item;
1723 }
1724 }
1725
1726 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1727 {
1728 DEFINE_COMPILER;
1729 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1730
1731 if (list_item)
1732 {
1733 list_item->type = type;
1734 list_item->data = data;
1735 list_item->start = start;
1736 list_item->quit = LABEL();
1737 list_item->next = common->stubs;
1738 common->stubs = list_item;
1739 }
1740 }
1741
1742 static void flush_stubs(compiler_common *common)
1743 {
1744 DEFINE_COMPILER;
1745 stub_list* list_item = common->stubs;
1746
1747 while (list_item)
1748 {
1749 JUMPHERE(list_item->start);
1750 switch(list_item->type)
1751 {
1752 case stack_alloc:
1753 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1754 break;
1755 }
1756 JUMPTO(SLJIT_JUMP, list_item->quit);
1757 list_item = list_item->next;
1758 }
1759 common->stubs = NULL;
1760 }
1761
1762 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1763 {
1764 DEFINE_COMPILER;
1765
1766 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1767 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1768 }
1769
1770 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1771 {
1772 /* May destroy all locals and registers except TMP2. */
1773 DEFINE_COMPILER;
1774
1775 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1776 #ifdef DESTROY_REGISTERS
1777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1778 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1779 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1780 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1782 #endif
1783 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1784 }
1785
1786 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1787 {
1788 DEFINE_COMPILER;
1789 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1790 }
1791
1792 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1793 {
1794 DEFINE_COMPILER;
1795 struct sljit_label *loop;
1796 int i;
1797 /* At this point we can freely use all temporary registers. */
1798 /* TMP1 returns with begin - 1. */
1799 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1800 if (length < 8)
1801 {
1802 for (i = 0; i < length; i++)
1803 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1804 }
1805 else
1806 {
1807 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1808 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1809 loop = LABEL();
1810 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1811 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1812 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1813 }
1814 }
1815
1816 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1817 {
1818 DEFINE_COMPILER;
1819 struct sljit_label *loop;
1820 struct sljit_jump *earlyexit;
1821
1822 /* At this point we can freely use all registers. */
1823 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1825
1826 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1827 if (common->mark_ptr != 0)
1828 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1829 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1830 if (common->mark_ptr != 0)
1831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1832 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1833 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1834 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1835 /* Unlikely, but possible */
1836 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1837 loop = LABEL();
1838 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1839 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1840 /* Copy the integer value to the output buffer */
1841 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1842 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1843 #endif
1844 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1845 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1846 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1847 JUMPHERE(earlyexit);
1848
1849 /* Calculate the return value, which is the maximum ovector value. */
1850 if (topbracket > 1)
1851 {
1852 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1853 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1854
1855 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1856 loop = LABEL();
1857 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1858 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1859 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1860 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1861 }
1862 else
1863 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1864 }
1865
1866 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1867 {
1868 DEFINE_COMPILER;
1869
1870 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1871 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1872
1873 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1874 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1875 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1876 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1877
1878 /* Store match begin and end. */
1879 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1880 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1881 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1882 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1883 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1884 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1885 #endif
1886 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1887
1888 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1889 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1890 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1891 #endif
1892 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1893
1894 JUMPTO(SLJIT_JUMP, quit);
1895 }
1896
1897 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1898 {
1899 /* May destroy TMP1. */
1900 DEFINE_COMPILER;
1901 struct sljit_jump *jump;
1902
1903 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1904 {
1905 /* The value of -1 must be kept for start_used_ptr! */
1906 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1907 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1908 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1909 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1910 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1911 JUMPHERE(jump);
1912 }
1913 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1914 {
1915 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1917 JUMPHERE(jump);
1918 }
1919 }
1920
1921 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1922 {
1923 /* Detects if the character has an othercase. */
1924 unsigned int c;
1925
1926 #ifdef SUPPORT_UTF
1927 if (common->utf)
1928 {
1929 GETCHAR(c, cc);
1930 if (c > 127)
1931 {
1932 #ifdef SUPPORT_UCP
1933 return c != UCD_OTHERCASE(c);
1934 #else
1935 return FALSE;
1936 #endif
1937 }
1938 #ifndef COMPILE_PCRE8
1939 return common->fcc[c] != c;
1940 #endif
1941 }
1942 else
1943 #endif
1944 c = *cc;
1945 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1946 }
1947
1948 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1949 {
1950 /* Returns with the othercase. */
1951 #ifdef SUPPORT_UTF
1952 if (common->utf && c > 127)
1953 {
1954 #ifdef SUPPORT_UCP
1955 return UCD_OTHERCASE(c);
1956 #else
1957 return c;
1958 #endif
1959 }
1960 #endif
1961 return TABLE_GET(c, common->fcc, c);
1962 }
1963
1964 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1965 {
1966 /* Detects if the character and its othercase has only 1 bit difference. */
1967 unsigned int c, oc, bit;
1968 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1969 int n;
1970 #endif
1971
1972 #ifdef SUPPORT_UTF
1973 if (common->utf)
1974 {
1975 GETCHAR(c, cc);
1976 if (c <= 127)
1977 oc = common->fcc[c];
1978 else
1979 {
1980 #ifdef SUPPORT_UCP
1981 oc = UCD_OTHERCASE(c);
1982 #else
1983 oc = c;
1984 #endif
1985 }
1986 }
1987 else
1988 {
1989 c = *cc;
1990 oc = TABLE_GET(c, common->fcc, c);
1991 }
1992 #else
1993 c = *cc;
1994 oc = TABLE_GET(c, common->fcc, c);
1995 #endif
1996
1997 SLJIT_ASSERT(c != oc);
1998
1999 bit = c ^ oc;
2000 /* Optimized for English alphabet. */
2001 if (c <= 127 && bit == 0x20)
2002 return (0 << 8) | 0x20;
2003
2004 /* Since c != oc, they must have at least 1 bit difference. */
2005 if (!is_powerof2(bit))
2006 return 0;
2007
2008 #if defined COMPILE_PCRE8
2009
2010 #ifdef SUPPORT_UTF
2011 if (common->utf && c > 127)
2012 {
2013 n = GET_EXTRALEN(*cc);
2014 while ((bit & 0x3f) == 0)
2015 {
2016 n--;
2017 bit >>= 6;
2018 }
2019 return (n << 8) | bit;
2020 }
2021 #endif /* SUPPORT_UTF */
2022 return (0 << 8) | bit;
2023
2024 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2025
2026 #ifdef SUPPORT_UTF
2027 if (common->utf && c > 65535)
2028 {
2029 if (bit >= (1 << 10))
2030 bit >>= 10;
2031 else
2032 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2033 }
2034 #endif /* SUPPORT_UTF */
2035 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2036
2037 #endif /* COMPILE_PCRE[8|16|32] */
2038 }
2039
2040 static void check_partial(compiler_common *common, BOOL force)
2041 {
2042 /* Checks whether a partial matching is occured. Does not modify registers. */
2043 DEFINE_COMPILER;
2044 struct sljit_jump *jump = NULL;
2045
2046 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2047
2048 if (common->mode == JIT_COMPILE)
2049 return;
2050
2051 if (!force)
2052 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2053 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2054 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2055
2056 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2057 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2058 else
2059 {
2060 if (common->partialmatchlabel != NULL)
2061 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2062 else
2063 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2064 }
2065
2066 if (jump != NULL)
2067 JUMPHERE(jump);
2068 }
2069
2070 static struct sljit_jump *check_str_end(compiler_common *common)
2071 {
2072 /* Does not affect registers. Usually used in a tight spot. */
2073 DEFINE_COMPILER;
2074 struct sljit_jump *jump;
2075 struct sljit_jump *nohit;
2076 struct sljit_jump *return_value;
2077
2078 if (common->mode == JIT_COMPILE)
2079 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2080
2081 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2082 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2083 {
2084 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2085 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2086 JUMPHERE(nohit);
2087 return_value = JUMP(SLJIT_JUMP);
2088 }
2089 else
2090 {
2091 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2092 if (common->partialmatchlabel != NULL)
2093 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2094 else
2095 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2096 }
2097 JUMPHERE(jump);
2098 return return_value;
2099 }
2100
2101 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2102 {
2103 DEFINE_COMPILER;
2104 struct sljit_jump *jump;
2105
2106 if (common->mode == JIT_COMPILE)
2107 {
2108 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2109 return;
2110 }
2111
2112 /* Partial matching mode. */
2113 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2114 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2115 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2116 {
2117 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2118 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2119 }
2120 else
2121 {
2122 if (common->partialmatchlabel != NULL)
2123 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2124 else
2125 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2126 }
2127 JUMPHERE(jump);
2128 }
2129
2130 static void read_char(compiler_common *common)
2131 {
2132 /* Reads the character into TMP1, updates STR_PTR.
2133 Does not check STR_END. TMP2 Destroyed. */
2134 DEFINE_COMPILER;
2135 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2136 struct sljit_jump *jump;
2137 #endif
2138
2139 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2140 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2141 if (common->utf)
2142 {
2143 #if defined COMPILE_PCRE8
2144 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2145 #elif defined COMPILE_PCRE16
2146 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2147 #endif /* COMPILE_PCRE[8|16] */
2148 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2149 JUMPHERE(jump);
2150 }
2151 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2152 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2153 }
2154
2155 static void peek_char(compiler_common *common)
2156 {
2157 /* Reads the character into TMP1, keeps STR_PTR.
2158 Does not check STR_END. TMP2 Destroyed. */
2159 DEFINE_COMPILER;
2160 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2161 struct sljit_jump *jump;
2162 #endif
2163
2164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2165 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2166 if (common->utf)
2167 {
2168 #if defined COMPILE_PCRE8
2169 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2170 #elif defined COMPILE_PCRE16
2171 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2172 #endif /* COMPILE_PCRE[8|16] */
2173 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2174 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2175 JUMPHERE(jump);
2176 }
2177 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2178 }
2179
2180 static void read_char8_type(compiler_common *common)
2181 {
2182 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2183 DEFINE_COMPILER;
2184 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2185 struct sljit_jump *jump;
2186 #endif
2187
2188 #ifdef SUPPORT_UTF
2189 if (common->utf)
2190 {
2191 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2192 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2193 #if defined COMPILE_PCRE8
2194 /* This can be an extra read in some situations, but hopefully
2195 it is needed in most cases. */
2196 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2197 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2198 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2199 JUMPHERE(jump);
2200 #elif defined COMPILE_PCRE16
2201 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2202 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2203 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2204 JUMPHERE(jump);
2205 /* Skip low surrogate if necessary. */
2206 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2207 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2208 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2209 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2210 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2211 #elif defined COMPILE_PCRE32
2212 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2213 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2214 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2215 JUMPHERE(jump);
2216 #endif /* COMPILE_PCRE[8|16|32] */
2217 return;
2218 }
2219 #endif /* SUPPORT_UTF */
2220 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2221 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2222 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2223 /* The ctypes array contains only 256 values. */
2224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2225 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2226 #endif
2227 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2228 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2229 JUMPHERE(jump);
2230 #endif
2231 }
2232
2233 static void skip_char_back(compiler_common *common)
2234 {
2235 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2236 DEFINE_COMPILER;
2237 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2238 #if defined COMPILE_PCRE8
2239 struct sljit_label *label;
2240
2241 if (common->utf)
2242 {
2243 label = LABEL();
2244 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2245 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2246 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2247 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2248 return;
2249 }
2250 #elif defined COMPILE_PCRE16
2251 if (common->utf)
2252 {
2253 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2254 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2255 /* Skip low surrogate if necessary. */
2256 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2257 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2258 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2259 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2260 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2261 return;
2262 }
2263 #endif /* COMPILE_PCRE[8|16] */
2264 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2265 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2266 }
2267
2268 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2269 {
2270 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2271 DEFINE_COMPILER;
2272
2273 if (nltype == NLTYPE_ANY)
2274 {
2275 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2276 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2277 }
2278 else if (nltype == NLTYPE_ANYCRLF)
2279 {
2280 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2281 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2282 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2283 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2284 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2285 }
2286 else
2287 {
2288 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2289 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2290 }
2291 }
2292
2293 #ifdef SUPPORT_UTF
2294
2295 #if defined COMPILE_PCRE8
2296 static void do_utfreadchar(compiler_common *common)
2297 {
2298 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2299 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2300 DEFINE_COMPILER;
2301 struct sljit_jump *jump;
2302
2303 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2304 /* Searching for the first zero. */
2305 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2306 jump = JUMP(SLJIT_C_NOT_ZERO);
2307 /* Two byte sequence. */
2308 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2309 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2310 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2311 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2312 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2313 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2314 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2315 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2316 JUMPHERE(jump);
2317
2318 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2319 jump = JUMP(SLJIT_C_NOT_ZERO);
2320 /* Three byte sequence. */
2321 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2322 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2323 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2324 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2325 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2326 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2327 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2328 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2329 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2330 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2331 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2332 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2333 JUMPHERE(jump);
2334
2335 /* Four byte sequence. */
2336 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2337 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2338 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2339 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2340 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2341 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2342 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2343 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2344 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2345 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2346 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2347 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2348 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2349 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2350 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2351 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2352 }
2353
2354 static void do_utfreadtype8(compiler_common *common)
2355 {
2356 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2357 of the character (>= 0xc0). Return value in TMP1. */
2358 DEFINE_COMPILER;
2359 struct sljit_jump *jump;
2360 struct sljit_jump *compare;
2361
2362 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2363
2364 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2365 jump = JUMP(SLJIT_C_NOT_ZERO);
2366 /* Two byte sequence. */
2367 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2369 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2370 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2371 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2372 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2373 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2374 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2375 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2376
2377 JUMPHERE(compare);
2378 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2379 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2380 JUMPHERE(jump);
2381
2382 /* We only have types for characters less than 256. */
2383 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2384 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2385 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2386 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2387 }
2388
2389 #elif defined COMPILE_PCRE16
2390
2391 static void do_utfreadchar(compiler_common *common)
2392 {
2393 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2394 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2395 DEFINE_COMPILER;
2396 struct sljit_jump *jump;
2397
2398 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2399 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2400 /* Do nothing, only return. */
2401 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2402
2403 JUMPHERE(jump);
2404 /* Combine two 16 bit characters. */
2405 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2406 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2407 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2408 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2409 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2410 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2411 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2412 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2413 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2414 }
2415
2416 #endif /* COMPILE_PCRE[8|16] */
2417
2418 #endif /* SUPPORT_UTF */
2419
2420 #ifdef SUPPORT_UCP
2421
2422 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2423 #define UCD_BLOCK_MASK 127
2424 #define UCD_BLOCK_SHIFT 7
2425
2426 static void do_getucd(compiler_common *common)
2427 {
2428 /* Search the UCD record for the character comes in TMP1.
2429 Returns chartype in TMP1 and UCD offset in TMP2. */
2430 DEFINE_COMPILER;
2431
2432 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2433
2434 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2435 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2436 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2437 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2438 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2439 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2440 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2441 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2442 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2443 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2444 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2445 }
2446 #endif
2447
2448 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2449 {
2450 DEFINE_COMPILER;
2451 struct sljit_label *mainloop;
2452 struct sljit_label *newlinelabel = NULL;
2453 struct sljit_jump *start;
2454 struct sljit_jump *end = NULL;
2455 struct sljit_jump *nl = NULL;
2456 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2457 struct sljit_jump *singlechar;
2458 #endif
2459 jump_list *newline = NULL;
2460 BOOL newlinecheck = FALSE;
2461 BOOL readuchar = FALSE;
2462
2463 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2464 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2465 newlinecheck = TRUE;
2466
2467 if (firstline)
2468 {
2469 /* Search for the end of the first line. */
2470 SLJIT_ASSERT(common->first_line_end != 0);
2471 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2472
2473 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2474 {
2475 mainloop = LABEL();
2476 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2477 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2478 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2479 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2480 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2481 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2482 JUMPHERE(end);
2483 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2484 }
2485 else
2486 {
2487 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2488 mainloop = LABEL();
2489 /* Continual stores does not cause data dependency. */
2490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2491 read_char(common);
2492 check_newlinechar(common, common->nltype, &newline, TRUE);
2493 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2494 JUMPHERE(end);
2495 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2496 set_jumps(newline, LABEL());
2497 }
2498
2499 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2500 }
2501
2502 start = JUMP(SLJIT_JUMP);
2503
2504 if (newlinecheck)
2505 {
2506 newlinelabel = LABEL();
2507 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2508 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2509 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2510 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2511 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2512 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2513 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2514 #endif
2515 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2516 nl = JUMP(SLJIT_JUMP);
2517 }
2518
2519 mainloop = LABEL();
2520
2521 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2522 #ifdef SUPPORT_UTF
2523 if (common->utf) readuchar = TRUE;
2524 #endif
2525 if (newlinecheck) readuchar = TRUE;
2526
2527 if (readuchar)
2528 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2529
2530 if (newlinecheck)
2531 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2532
2533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2534 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2535 #if defined COMPILE_PCRE8
2536 if (common->utf)
2537 {
2538 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2539 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2540 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2541 JUMPHERE(singlechar);
2542 }
2543 #elif defined COMPILE_PCRE16
2544 if (common->utf)
2545 {
2546 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2547 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2548 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2549 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2550 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2551 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2552 JUMPHERE(singlechar);
2553 }
2554 #endif /* COMPILE_PCRE[8|16] */
2555 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2556 JUMPHERE(start);
2557
2558 if (newlinecheck)
2559 {
2560 JUMPHERE(end);
2561 JUMPHERE(nl);
2562 }
2563
2564 return mainloop;
2565 }
2566
2567 #define MAX_N_CHARS 3
2568
2569 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2570 {
2571 DEFINE_COMPILER;
2572 struct sljit_label *start;
2573 struct sljit_jump *quit;
2574 pcre_uint32 chars[MAX_N_CHARS * 2];
2575 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2576 int location = 0;
2577 pcre_int32 len, c, bit, caseless;
2578 int must_stop;
2579
2580 /* We do not support alternatives now. */
2581 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2582 return FALSE;
2583
2584 while (TRUE)
2585 {
2586 caseless = 0;
2587 must_stop = 1;
2588 switch(*cc)
2589 {
2590 case OP_CHAR:
2591 must_stop = 0;
2592 cc++;
2593 break;
2594
2595 case OP_CHARI:
2596 caseless = 1;
2597 must_stop = 0;
2598 cc++;
2599 break;
2600
2601 case OP_SOD:
2602 case OP_SOM:
2603 case OP_SET_SOM:
2604 case OP_NOT_WORD_BOUNDARY:
2605 case OP_WORD_BOUNDARY:
2606 case OP_EODN:
2607 case OP_EOD:
2608 case OP_CIRC:
2609 case OP_CIRCM:
2610 case OP_DOLL:
2611 case OP_DOLLM:
2612 /* Zero width assertions. */
2613 cc++;
2614 continue;
2615
2616 case OP_PLUS:
2617 case OP_MINPLUS:
2618 case OP_POSPLUS:
2619 cc++;
2620 break;
2621
2622 case OP_EXACT:
2623 cc += 1 + IMM2_SIZE;
2624 break;
2625
2626 case OP_PLUSI:
2627 case OP_MINPLUSI:
2628 case OP_POSPLUSI:
2629 caseless = 1;
2630 cc++;
2631 break;
2632
2633 case OP_EXACTI:
2634 caseless = 1;
2635 cc += 1 + IMM2_SIZE;
2636 break;
2637
2638 default:
2639 must_stop = 2;
2640 break;
2641 }
2642
2643 if (must_stop == 2)
2644 break;
2645
2646 len = 1;
2647 #ifdef SUPPORT_UTF
2648 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2649 #endif
2650
2651 if (caseless && char_has_othercase(common, cc))
2652 {
2653 caseless = char_get_othercase_bit(common, cc);
2654 if (caseless == 0)
2655 return FALSE;
2656 #ifdef COMPILE_PCRE8
2657 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2658 #else
2659 if ((caseless & 0x100) != 0)
2660 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2661 else
2662 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2663 #endif
2664 }
2665 else
2666 caseless = 0;
2667
2668 while (len > 0 && location < MAX_N_CHARS * 2)
2669 {
2670 c = *cc;
2671 bit = 0;
2672 if (len == (caseless & 0xff))
2673 {
2674 bit = caseless >> 8;
2675 c |= bit;
2676 }
2677
2678 chars[location] = c;
2679 chars[location + 1] = bit;
2680
2681 len--;
2682 location += 2;
2683 cc++;
2684 }
2685
2686 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2687 break;
2688 }
2689
2690 /* At least two characters are required. */
2691 if (location < 2 * 2)
2692 return FALSE;
2693
2694 if (firstline)
2695 {
2696 SLJIT_ASSERT(common->first_line_end != 0);
2697 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2698 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, (location >> 1) - 1);
2699 }
2700 else
2701 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
2702
2703 start = LABEL();
2704 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2705
2706 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2707 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2708 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2709 if (chars[1] != 0)
2710 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2711 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2712 if (location > 2 * 2)
2713 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2714 if (chars[3] != 0)
2715 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2716 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2717 if (location > 2 * 2)
2718 {
2719 if (chars[5] != 0)
2720 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2721 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2722 }
2723 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2724
2725 JUMPHERE(quit);
2726
2727 if (firstline)
2728 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2729 else
2730 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
2731 return TRUE;
2732 }
2733
2734 #undef MAX_N_CHARS
2735
2736 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2737 {
2738 DEFINE_COMPILER;
2739 struct sljit_label *start;
2740 struct sljit_jump *quit;
2741 struct sljit_jump *found;
2742 pcre_uchar oc, bit;
2743
2744 if (firstline)
2745 {
2746 SLJIT_ASSERT(common->first_line_end != 0);
2747 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2748 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2749 }
2750
2751 start = LABEL();
2752 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2753 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2754
2755 oc = first_char;
2756 if (caseless)
2757 {
2758 oc = TABLE_GET(first_char, common->fcc, first_char);
2759 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2760 if (first_char > 127 && common->utf)
2761 oc = UCD_OTHERCASE(first_char);
2762 #endif
2763 }
2764 if (first_char == oc)
2765 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2766 else
2767 {
2768 bit = first_char ^ oc;
2769 if (is_powerof2(bit))
2770 {
2771 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2772 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2773 }
2774 else
2775 {
2776 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2777 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2778 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2779 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2780 found = JUMP(SLJIT_C_NOT_ZERO);
2781 }
2782 }
2783
2784 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2785 JUMPTO(SLJIT_JUMP, start);
2786 JUMPHERE(found);
2787 JUMPHERE(quit);
2788
2789 if (firstline)
2790 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2791 }
2792
2793 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2794 {
2795 DEFINE_COMPILER;
2796 struct sljit_label *loop;
2797 struct sljit_jump *lastchar;
2798 struct sljit_jump *firstchar;
2799 struct sljit_jump *quit;
2800 struct sljit_jump *foundcr = NULL;
2801 struct sljit_jump *notfoundnl;
2802 jump_list *newline = NULL;
2803
2804 if (firstline)
2805 {
2806 SLJIT_ASSERT(common->first_line_end != 0);
2807 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2808 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2809 }
2810
2811 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2812 {
2813 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2814 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2815 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2816 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2817 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2818
2819 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2820 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2821 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2822 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2823 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2824 #endif
2825 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2826
2827 loop = LABEL();
2828 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2829 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2830 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2831 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2832 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2833 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2834
2835 JUMPHERE(quit);
2836 JUMPHERE(firstchar);
2837 JUMPHERE(lastchar);
2838
2839 if (firstline)
2840 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2841 return;
2842 }
2843
2844 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2845 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2846 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2847 skip_char_back(common);
2848
2849 loop = LABEL();
2850 read_char(common);
2851 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2852 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2853 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2854 check_newlinechar(common, common->nltype, &newline, FALSE);
2855 set_jumps(newline, loop);
2856
2857 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2858 {
2859 quit = JUMP(SLJIT_JUMP);
2860 JUMPHERE(foundcr);
2861 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2862 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2863 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2864 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2865 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2866 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2867 #endif
2868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2869 JUMPHERE(notfoundnl);
2870 JUMPHERE(quit);
2871 }
2872 JUMPHERE(lastchar);
2873 JUMPHERE(firstchar);
2874
2875 if (firstline)
2876 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2877 }
2878
2879 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2880 {
2881 DEFINE_COMPILER;
2882 struct sljit_label *start;
2883 struct sljit_jump *quit;
2884 struct sljit_jump *found;
2885 #ifndef COMPILE_PCRE8
2886 struct sljit_jump *jump;
2887 #endif
2888
2889 if (firstline)
2890 {
2891 SLJIT_ASSERT(common->first_line_end != 0);
2892 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2893 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2894 }
2895
2896 start = LABEL();
2897 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2898 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2899 #ifdef SUPPORT_UTF
2900 if (common->utf)
2901 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2902 #endif
2903 #ifndef COMPILE_PCRE8
2904 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2905 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2906 JUMPHERE(jump);
2907 #endif
2908 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2909 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2910 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2911 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2912 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2913 found = JUMP(SLJIT_C_NOT_ZERO);
2914
2915 #ifdef SUPPORT_UTF
2916 if (common->utf)
2917 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2918 #endif
2919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2920 #ifdef SUPPORT_UTF
2921 #if defined COMPILE_PCRE8
2922 if (common->utf)
2923 {
2924 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2925 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2926 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2927 }
2928 #elif defined COMPILE_PCRE16
2929 if (common->utf)
2930 {
2931 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2932 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2933 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2934 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2935 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2936 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2937 }
2938 #endif /* COMPILE_PCRE[8|16] */
2939 #endif /* SUPPORT_UTF */
2940 JUMPTO(SLJIT_JUMP, start);
2941 JUMPHERE(found);
2942 JUMPHERE(quit);
2943
2944 if (firstline)
2945 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2946 }
2947
2948 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2949 {
2950 DEFINE_COMPILER;
2951 struct sljit_label *loop;
2952 struct sljit_jump *toolong;
2953 struct sljit_jump *alreadyfound;
2954 struct sljit_jump *found;
2955 struct sljit_jump *foundoc = NULL;
2956 struct sljit_jump *notfound;
2957 pcre_uint32 oc, bit;
2958
2959 SLJIT_ASSERT(common->req_char_ptr != 0);
2960 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2961 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2962 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2963 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2964
2965 if (has_firstchar)
2966 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2967 else
2968 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2969
2970 loop = LABEL();
2971 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2972
2973 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2974 oc = req_char;
2975 if (caseless)
2976 {
2977 oc = TABLE_GET(req_char, common->fcc, req_char);
2978 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2979 if (req_char > 127 && common->utf)
2980 oc = UCD_OTHERCASE(req_char);
2981 #endif
2982 }
2983 if (req_char == oc)
2984 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2985 else
2986 {
2987 bit = req_char ^ oc;
2988 if (is_powerof2(bit))
2989 {
2990 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2991 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2992 }
2993 else
2994 {
2995 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2996 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2997 }
2998 }
2999 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3000 JUMPTO(SLJIT_JUMP, loop);
3001
3002 JUMPHERE(found);
3003 if (foundoc)
3004 JUMPHERE(foundoc);
3005 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3006 JUMPHERE(alreadyfound);
3007 JUMPHERE(toolong);
3008 return notfound;
3009 }
3010
3011 static void do_revertframes(compiler_common *common)
3012 {
3013 DEFINE_COMPILER;
3014 struct sljit_jump *jump;
3015 struct sljit_label *mainloop;
3016
3017 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3018 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3019 GET_LOCAL_BASE(TMP3, 0, 0);
3020
3021 /* Drop frames until we reach STACK_TOP. */
3022 mainloop = LABEL();
3023 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3024 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3025 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3026 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3027 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
3028 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
3029 JUMPTO(SLJIT_JUMP, mainloop);
3030
3031 JUMPHERE(jump);
3032 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3033 /* End of dropping frames. */
3034 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3035
3036 JUMPHERE(jump);
3037 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3038 /* Set string begin. */
3039 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3040 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3042 JUMPTO(SLJIT_JUMP, mainloop);
3043
3044 JUMPHERE(jump);
3045 if (common->mark_ptr != 0)
3046 {
3047 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3048 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3049 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3051 JUMPTO(SLJIT_JUMP, mainloop);
3052
3053 JUMPHERE(jump);
3054 }
3055
3056 /* Unknown command. */
3057 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3058 JUMPTO(SLJIT_JUMP, mainloop);
3059 }
3060
3061 static void check_wordboundary(compiler_common *common)
3062 {
3063 DEFINE_COMPILER;
3064 struct sljit_jump *skipread;
3065 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3066 struct sljit_jump *jump;
3067 #endif
3068
3069 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3070
3071 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3072 /* Get type of the previous char, and put it to LOCALS1. */
3073 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3074 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3075 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3076 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3077 skip_char_back(common);
3078 check_start_used_ptr(common);
3079 read_char(common);
3080
3081 /* Testing char type. */
3082 #ifdef SUPPORT_UCP
3083 if (common->use_ucp)
3084 {
3085 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3086 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3087 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3088 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3089 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3090 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3091 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3092 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3093 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3094 JUMPHERE(jump);
3095 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3096 }
3097 else
3098 #endif
3099 {
3100 #ifndef COMPILE_PCRE8
3101 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3102 #elif defined SUPPORT_UTF
3103 /* Here LOCALS1 has already been zeroed. */
3104 jump = NULL;
3105 if (common->utf)
3106 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3107 #endif /* COMPILE_PCRE8 */
3108 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3109 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3110 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3112 #ifndef COMPILE_PCRE8
3113 JUMPHERE(jump);
3114 #elif defined SUPPORT_UTF
3115 if (jump != NULL)
3116 JUMPHERE(jump);
3117 #endif /* COMPILE_PCRE8 */
3118 }
3119 JUMPHERE(skipread);
3120
3121 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3122 skipread = check_str_end(common);
3123 peek_char(common);
3124
3125 /* Testing char type. This is a code duplication. */
3126 #ifdef SUPPORT_UCP
3127 if (common->use_ucp)
3128 {
3129 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3130 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3131 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3132 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3133 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3134 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3135 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3136 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3137 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3138 JUMPHERE(jump);
3139 }
3140 else
3141 #endif
3142 {
3143 #ifndef COMPILE_PCRE8
3144 /* TMP2 may be destroyed by peek_char. */
3145 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3146 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3147 #elif defined SUPPORT_UTF
3148 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3149 jump = NULL;
3150 if (common->utf)
3151 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3152 #endif
3153 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3154 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3155 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3156 #ifndef COMPILE_PCRE8
3157 JUMPHERE(jump);
3158 #elif defined SUPPORT_UTF
3159 if (jump != NULL)
3160 JUMPHERE(jump);
3161 #endif /* COMPILE_PCRE8 */
3162 }
3163 JUMPHERE(skipread);
3164
3165 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3166 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3167 }
3168
3169 /*
3170 range format:
3171
3172 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3173 ranges[1] = first bit (0 or 1)
3174 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3175 */
3176
3177 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3178 {
3179 DEFINE_COMPILER;
3180 struct sljit_jump *jump;
3181
3182 if (ranges[0] < 0)
3183 return FALSE;
3184
3185 switch(ranges[0])
3186 {
3187 case 1:
3188 if (readch)
3189 read_char(common);
3190 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3191 return TRUE;
3192
3193 case 2:
3194 if (readch)
3195 read_char(common);
3196 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3197 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3198 return TRUE;
3199
3200 case 4:
3201 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3202 {
3203 if (readch)
3204 read_char(common);
3205 if (ranges[1] != 0)
3206 {
3207 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3208 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3209 }
3210 else
3211 {
3212 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3213 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3214 JUMPHERE(jump);
3215 }
3216 return TRUE;
3217 }
3218 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3219 {
3220 if (readch)
3221 read_char(common);
3222 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3223 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3224 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3225 return TRUE;
3226 }
3227 return FALSE;
3228
3229 default:
3230 return FALSE;
3231 }
3232 }
3233
3234 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3235 {
3236 int i, bit, length;
3237 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3238
3239 bit = ctypes[0] & flag;
3240 ranges[0] = -1;
3241 ranges[1] = bit != 0 ? 1 : 0;
3242 length = 0;
3243
3244 for (i = 1; i < 256; i++)
3245 if ((ctypes[i] & flag) != bit)
3246 {
3247 if (length >= MAX_RANGE_SIZE)
3248 return;
3249 ranges[2 + length] = i;
3250 length++;
3251 bit ^= flag;
3252 }
3253
3254 if (bit != 0)
3255 {
3256 if (length >= MAX_RANGE_SIZE)
3257 return;
3258 ranges[2 + length] = 256;
3259 length++;
3260 }
3261 ranges[0] = length;
3262 }
3263
3264 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3265 {
3266 int ranges[2 + MAX_RANGE_SIZE];
3267 pcre_uint8 bit, cbit, all;
3268 int i, byte, length = 0;
3269
3270 bit = bits[0] & 0x1;
3271 ranges[1] = bit;
3272 /* Can be 0 or 255. */
3273 all = -bit;
3274
3275 for (i = 0; i < 256; )
3276 {
3277 byte = i >> 3;
3278 if ((i & 0x7) == 0 && bits[byte] == all)
3279 i += 8;
3280 else
3281 {
3282 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3283 if (cbit != bit)
3284 {
3285 if (length >= MAX_RANGE_SIZE)
3286 return FALSE;
3287 ranges[2 + length] = i;
3288 length++;
3289 bit = cbit;
3290 all = -cbit;
3291 }
3292 i++;
3293 }
3294 }
3295
3296 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3297 {
3298 if (length >= MAX_RANGE_SIZE)
3299 return FALSE;
3300 ranges[2 + length] = 256;
3301 length++;
3302 }
3303 ranges[0] = length;
3304
3305 return check_ranges(common, ranges, backtracks, FALSE);
3306 }
3307
3308 static void check_anynewline(compiler_common *common)
3309 {
3310 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3311 DEFINE_COMPILER;
3312
3313 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3314
3315 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3316 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3317 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3318 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3319 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3320 #ifdef COMPILE_PCRE8
3321 if (common->utf)
3322 {
3323 #endif
3324 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3325 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3326 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3327 #ifdef COMPILE_PCRE8
3328 }
3329 #endif
3330 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3331 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3332 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3333 }
3334
3335 static void check_hspace(compiler_common *common)
3336 {
3337 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3338 DEFINE_COMPILER;
3339
3340 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3341
3342 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3343 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3344 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3345 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3346 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3347 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3348 #ifdef COMPILE_PCRE8
3349 if (common->utf)
3350 {
3351 #endif
3352 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3353 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3354 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3355 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3356 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3357 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3358 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3359 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3360 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3361 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3362 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3363 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3364 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3365 #ifdef COMPILE_PCRE8
3366 }
3367 #endif
3368 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3369 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3370
3371 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3372 }
3373
3374 static void check_vspace(compiler_common *common)
3375 {
3376 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3377 DEFINE_COMPILER;
3378
3379 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3380
3381 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3382 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3383 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3384 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3385 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3386 #ifdef COMPILE_PCRE8
3387 if (common->utf)
3388 {
3389 #endif
3390 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3391 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3392 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3393 #ifdef COMPILE_PCRE8
3394 }
3395 #endif
3396 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3397 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3398
3399 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3400 }
3401
3402 #define CHAR1 STR_END
3403 #define CHAR2 STACK_TOP
3404
3405 static void do_casefulcmp(compiler_common *common)
3406 {
3407 DEFINE_COMPILER;
3408 struct sljit_jump *jump;
3409 struct sljit_label *label;
3410
3411 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3412 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3413 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3415 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3416 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3417
3418 label = LABEL();
3419 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3420 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3421 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3422 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3423 JUMPTO(SLJIT_C_NOT_ZERO, label);
3424
3425 JUMPHERE(jump);
3426 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3427 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3428 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3429 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3430 }
3431
3432 #define LCC_TABLE STACK_LIMIT
3433
3434 static void do_caselesscmp(compiler_common *common)
3435 {
3436 DEFINE_COMPILER;
3437 struct sljit_jump *jump;
3438 struct sljit_label *label;
3439
3440 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3441 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3442
3443 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3445 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3446 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3447 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3448 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3449
3450 label = LABEL();
3451 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3452 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3453 #ifndef COMPILE_PCRE8
3454 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3455 #endif
3456 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3457 #ifndef COMPILE_PCRE8
3458 JUMPHERE(jump);
3459 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3460 #endif
3461 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3462 #ifndef COMPILE_PCRE8
3463 JUMPHERE(jump);
3464 #endif
3465 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3466 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3467 JUMPTO(SLJIT_C_NOT_ZERO, label);
3468
3469 JUMPHERE(jump);
3470 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3471 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3472 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3473 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3474 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3475 }
3476
3477 #undef LCC_TABLE
3478 #undef CHAR1
3479 #undef CHAR2
3480
3481 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3482
3483 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3484 {
3485 /* This function would be ineffective to do in JIT level. */
3486 pcre_uint32 c1, c2;
3487 const pcre_uchar *src2 = args->uchar_ptr;
3488 const pcre_uchar *end2 = args->end;
3489 const ucd_record *ur;
3490 const pcre_uint32 *pp;
3491
3492 while (src1 < end1)
3493 {
3494 if (src2 >= end2)
3495 return (pcre_uchar*)1;
3496 GETCHARINC(c1, src1);
3497 GETCHARINC(c2, src2);
3498 ur = GET_UCD(c2);
3499 if (c1 != c2 && c1 != c2 + ur->other_case)
3500 {
3501 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3502 for (;;)
3503 {
3504 if (c1 < *pp) return NULL;
3505 if (c1 == *pp++) break;
3506 }
3507 }
3508 }
3509 return src2;
3510 }
3511
3512 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3513
3514 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3515 compare_context* context, jump_list **backtracks)
3516 {
3517 DEFINE_COMPILER;
3518 unsigned int othercasebit = 0;
3519 pcre_uchar *othercasechar = NULL;
3520 #ifdef SUPPORT_UTF
3521 int utflength;
3522 #endif
3523
3524 if (caseless && char_has_othercase(common, cc))
3525 {
3526 othercasebit = char_get_othercase_bit(common, cc);
3527 SLJIT_ASSERT(othercasebit);
3528 /* Extracting bit difference info. */
3529 #if defined COMPILE_PCRE8
3530 othercasechar = cc + (othercasebit >> 8);
3531 othercasebit &= 0xff;
3532 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3533 /* Note that this code only handles characters in the BMP. If there
3534 ever are characters outside the BMP whose othercase differs in only one
3535 bit from itself (there currently are none), this code will need to be
3536 revised for COMPILE_PCRE32. */
3537 othercasechar = cc + (othercasebit >> 9);
3538 if ((othercasebit & 0x100) != 0)
3539 othercasebit = (othercasebit & 0xff) << 8;
3540 else
3541 othercasebit &= 0xff;
3542 #endif /* COMPILE_PCRE[8|16|32] */
3543 }
3544
3545 if (context->sourcereg == -1)
3546 {
3547 #if defined COMPILE_PCRE8
3548 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3549 if (context->length >= 4)
3550 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3551 else if (context->length >= 2)
3552 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3553 else
3554 #endif
3555 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3556 #elif defined COMPILE_PCRE16
3557 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3558 if (context->length >= 4)
3559 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3560 else
3561 #endif
3562 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3563 #elif defined COMPILE_PCRE32
3564 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3565 #endif /* COMPILE_PCRE[8|16|32] */
3566 context->sourcereg = TMP2;
3567 }
3568
3569 #ifdef SUPPORT_UTF
3570 utflength = 1;
3571 if (common->utf && HAS_EXTRALEN(*cc))
3572 utflength += GET_EXTRALEN(*cc);
3573
3574 do
3575 {
3576 #endif
3577
3578 context->length -= IN_UCHARS(1);
3579 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3580
3581 /* Unaligned read is supported. */
3582 if (othercasebit != 0 && othercasechar == cc)
3583 {
3584 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3585 context->oc.asuchars[context->ucharptr] = othercasebit;
3586 }
3587 else
3588 {
3589 context->c.asuchars[context->ucharptr] = *cc;
3590 context->oc.asuchars[context->ucharptr] = 0;
3591 }
3592 context->ucharptr++;
3593
3594 #if defined COMPILE_PCRE8
3595 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3596 #elif defined COMPILE_PCRE16
3597 if (context->ucharptr >= 2 || context->length == 0)
3598 #elif defined COMPILE_PCRE32
3599 if (1 /* context->ucharptr >= 1 || context->length == 0 */)
3600 #endif
3601 {
3602 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3603 if (context->length >= 4)
3604 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3605 #if defined COMPILE_PCRE8
3606 else if (context->length >= 2)
3607 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3608 else if (context->length >= 1)
3609 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3610 #elif defined COMPILE_PCRE16
3611 else if (context->length >= 2)
3612 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3613 #endif /* COMPILE_PCRE[8|16] */
3614 #elif defined COMPILE_PCRE32
3615 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3616 #endif /* COMPILE_PCRE[8|16|32] */
3617 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3618
3619 switch(context->ucharptr)
3620 {
3621 case 4 / sizeof(pcre_uchar):
3622 if (context->oc.asint != 0)
3623 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3624 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3625 break;
3626
3627 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3628 case 2 / sizeof(pcre_uchar):
3629 if (context->oc.asushort != 0)
3630 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3631 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3632 break;
3633
3634 #ifdef COMPILE_PCRE8
3635 case 1:
3636 if (context->oc.asbyte != 0)
3637 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3638 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3639 break;
3640 #endif
3641
3642 #endif /* COMPILE_PCRE[8|16] */
3643
3644 default:
3645 SLJIT_ASSERT_STOP();
3646 break;
3647 }
3648 context->ucharptr = 0;
3649 }
3650
3651 #else
3652
3653 /* Unaligned read is unsupported. */
3654 if (context->length > 0)
3655 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3656
3657 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3658
3659 if (othercasebit != 0 && othercasechar == cc)
3660 {
3661 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3662 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3663 }
3664 else
3665 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3666
3667 #endif
3668
3669 cc++;
3670 #ifdef SUPPORT_UTF
3671 utflength--;
3672 }
3673 while (utflength > 0);
3674 #endif
3675
3676 return cc;
3677 }
3678
3679 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3680
3681 #define SET_TYPE_OFFSET(value) \
3682 if ((value) != typeoffset) \
3683 { \
3684 if ((value) > typeoffset) \
3685 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3686 else \
3687 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3688 } \
3689 typeoffset = (value);
3690
3691 #define SET_CHAR_OFFSET(value) \
3692 if ((value) != charoffset) \
3693 { \
3694 if ((value) > charoffset) \
3695 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3696 else \
3697 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3698 } \
3699 charoffset = (value);
3700
3701 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3702 {
3703 DEFINE_COMPILER;
3704 jump_list *found = NULL;
3705 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3706 pcre_int32 c, charoffset;
3707 const pcre_uint32 *other_cases;
3708 struct sljit_jump *jump = NULL;
3709 pcre_uchar *ccbegin;
3710 int compares, invertcmp, numberofcmps;
3711 #ifdef SUPPORT_UCP
3712 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3713 BOOL charsaved = FALSE;
3714 int typereg = TMP1, scriptreg = TMP1;
3715 pcre_int32 typeoffset;
3716 #endif
3717
3718 /* Although SUPPORT_UTF must be defined, we are
3719 not necessary in utf mode even in 8 bit mode. */
3720 detect_partial_match(common, backtracks);
3721 read_char(common);
3722
3723 if ((*cc++ & XCL_MAP) != 0)
3724 {
3725 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3726 #ifndef COMPILE_PCRE8
3727 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3728 #elif defined SUPPORT_UTF
3729 if (common->utf)
3730 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3731 #endif
3732
3733 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3734 {
3735 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3736 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3737 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3738 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3739 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3740 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3741 }
3742
3743 #ifndef COMPILE_PCRE8
3744 JUMPHERE(jump);
3745 #elif defined SUPPORT_UTF
3746 if (common->utf)
3747 JUMPHERE(jump);
3748 #endif
3749 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3750 #ifdef SUPPORT_UCP
3751 charsaved = TRUE;
3752 #endif
3753 cc += 32 / sizeof(pcre_uchar);
3754 }
3755
3756 /* Scanning the necessary info. */
3757 ccbegin = cc;
3758 compares = 0;
3759 while (*cc != XCL_END)
3760 {
3761 compares++;
3762 if (*cc == XCL_SINGLE)
3763 {
3764 cc += 2;
3765 #ifdef SUPPORT_UTF
3766 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3767 #endif
3768 #ifdef SUPPORT_UCP
3769 needschar = TRUE;
3770 #endif
3771 }
3772 else if (*cc == XCL_RANGE)
3773 {
3774 cc += 2;
3775 #ifdef SUPPORT_UTF
3776 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3777 #endif
3778 cc++;
3779 #ifdef SUPPORT_UTF
3780 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3781 #endif
3782 #ifdef SUPPORT_UCP
3783 needschar = TRUE;
3784 #endif
3785 }
3786 #ifdef SUPPORT_UCP
3787 else
3788 {
3789 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3790 cc++;
3791 switch(*cc)
3792 {
3793 case PT_ANY:
3794 break;
3795
3796 case PT_LAMP:
3797 case PT_GC:
3798 case PT_PC:
3799 case PT_ALNUM:
3800 needstype = TRUE;
3801 break;
3802
3803 case PT_SC:
3804 needsscript = TRUE;
3805 break;
3806
3807 case PT_SPACE:
3808 case PT_PXSPACE:
3809 case PT_WORD:
3810 needstype = TRUE;
3811 needschar = TRUE;
3812 break;
3813
3814 case PT_CLIST:
3815 needschar = TRUE;
3816 break;
3817
3818 default:
3819 SLJIT_ASSERT_STOP();
3820 break;
3821 }
3822 cc += 2;
3823 }
3824 #endif
3825 }
3826
3827 #ifdef SUPPORT_UCP
3828 /* Simple register allocation. TMP1 is preferred if possible. */
3829 if (needstype || needsscript)
3830 {
3831 if (needschar && !charsaved)
3832 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3833 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3834 if (needschar)
3835 {
3836 if (needstype)
3837 {
3838 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3839 typereg = RETURN_ADDR;
3840 }
3841
3842 if (needsscript)
3843 scriptreg = TMP3;
3844 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3845 }
3846 else if (needstype && needsscript)
3847 scriptreg = TMP3;
3848 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3849
3850 if (needsscript)
3851 {
3852 if (scriptreg == TMP1)
3853 {
3854 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3855 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3856 }
3857 else
3858 {
3859 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3860 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3861 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3862 }
3863 }
3864 }
3865 #endif
3866
3867 /* Generating code. */
3868 cc = ccbegin;
3869 charoffset = 0;
3870 numberofcmps = 0;
3871 #ifdef SUPPORT_UCP
3872 typeoffset = 0;
3873 #endif
3874
3875 while (*cc != XCL_END)
3876 {
3877 compares--;
3878 invertcmp = (compares == 0 && list != backtracks);
3879 jump = NULL;
3880
3881 if (*cc == XCL_SINGLE)
3882 {
3883 cc ++;
3884 #ifdef SUPPORT_UTF
3885 if (common->utf)
3886 {
3887 GETCHARINC(c, cc);
3888 }
3889 else
3890 #endif
3891 c = *cc++;
3892
3893 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3894 {
3895 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3896 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3897 numberofcmps++;
3898 }
3899 else if (numberofcmps > 0)
3900 {
3901 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3902 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3903 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3904 numberofcmps = 0;
3905 }
3906 else
3907 {
3908 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3909 numberofcmps = 0;
3910 }
3911 }
3912 else if (*cc == XCL_RANGE)
3913 {
3914 cc ++;
3915 #ifdef SUPPORT_UTF
3916 if (common->utf)
3917 {
3918 GETCHARINC(c, cc);
3919 }
3920 else
3921 #endif
3922 c = *cc++;
3923 SET_CHAR_OFFSET(c);
3924 #ifdef SUPPORT_UTF
3925 if (common->utf)
3926 {
3927 GETCHARINC(c, cc);
3928 }
3929 else
3930 #endif
3931 c = *cc++;
3932 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3933 {
3934 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3935 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3936 numberofcmps++;
3937 }
3938 else if (numberofcmps > 0)
3939 {
3940 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3941 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3942 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3943 numberofcmps = 0;
3944 }
3945 else
3946 {
3947 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3948 numberofcmps = 0;
3949 }
3950 }
3951 #ifdef SUPPORT_UCP
3952 else
3953 {
3954 if (*cc == XCL_NOTPROP)
3955 invertcmp ^= 0x1;
3956 cc++;
3957 switch(*cc)
3958 {
3959 case PT_ANY:
3960 if (list != backtracks)
3961 {
3962 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3963 continue;
3964 }
3965 else if (cc[-1] == XCL_NOTPROP)
3966 continue;
3967 jump = JUMP(SLJIT_JUMP);
3968 break;
3969
3970 case PT_LAMP:
3971 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3972 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3973 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3974 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3975 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3976 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3977 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3978 break;
3979
3980 case PT_GC:
3981 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3982 SET_TYPE_OFFSET(c);
3983 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3984 break;
3985
3986 case PT_PC:
3987 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3988 break;
3989
3990 case PT_SC:
3991 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3992 break;
3993
3994 case PT_SPACE:
3995 case PT_PXSPACE:
3996 if (*cc == PT_SPACE)
3997 {
3998 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3999 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4000 }
4001 SET_CHAR_OFFSET(9);
4002 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4003 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
4004 if (*cc == PT_SPACE)
4005 JUMPHERE(jump);
4006
4007 SET_TYPE_OFFSET(ucp_Zl);
4008 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4009 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
4010 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4011 break;
4012
4013 case PT_WORD:
4014 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4015 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4016 /* ... fall through */
4017
4018 case PT_ALNUM:
4019 SET_TYPE_OFFSET(ucp_Ll);
4020 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4021 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
4022 SET_TYPE_OFFSET(ucp_Nd);
4023 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4024 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
4025 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4026 break;
4027
4028 case PT_CLIST:
4029 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4030
4031 /* At least three characters are required.
4032 Otherwise this case would be handled by the normal code path. */
4033 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4034 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4035
4036 /* Optimizing character pairs, if their difference is power of 2. */
4037 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4038 {
4039 if (charoffset == 0)
4040 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4041 else
4042 {
4043 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_w)charoffset);
4044 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4045 }
4046 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4047 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4048 other_cases += 2;
4049 }
4050 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4051 {
4052 if (charoffset == 0)
4053 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4054 else
4055 {
4056 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_w)charoffset);
4057 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4058 }
4059 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4060 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4061
4062 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4063 COND_VALUE(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, SLJIT_C_EQUAL);
4064
4065 other_cases += 3;
4066 }
4067 else
4068 {
4069 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4070 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4071 }
4072
4073 while (*other_cases != NOTACHAR)
4074 {
4075 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4076 COND_VALUE(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, SLJIT_C_EQUAL);
4077 }
4078 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4079 break;
4080 }
4081 cc += 2;
4082 }
4083 #endif
4084
4085 if (jump != NULL)
4086 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4087 }
4088
4089 if (found != NULL)
4090 set_jumps(found, LABEL());
4091 }
4092
4093 #undef SET_TYPE_OFFSET
4094 #undef SET_CHAR_OFFSET
4095
4096 #endif
4097
4098 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4099 {
4100 DEFINE_COMPILER;
4101 int length;
4102 unsigned int c, oc, bit;
4103 compare_context context;
4104 struct sljit_jump *jump[4];
4105 #ifdef SUPPORT_UTF
4106 struct sljit_label *label;
4107 #ifdef SUPPORT_UCP
4108 pcre_uchar propdata[5];
4109 #endif
4110 #endif
4111
4112 switch(type)
4113 {
4114 case OP_SOD:
4115 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4116 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4117 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4118 return cc;
4119
4120 case OP_SOM:
4121 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4122 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4123 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4124 return cc;
4125
4126 case OP_NOT_WORD_BOUNDARY:
4127 case OP_WORD_BOUNDARY:
4128 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4129 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4130 return cc;
4131
4132 case OP_NOT_DIGIT:
4133 case OP_DIGIT:
4134 /* Digits are usually 0-9, so it is worth to optimize them. */
4135 if (common->digits[0] == -2)
4136 get_ctype_ranges(common, ctype_digit, common->digits);
4137 detect_partial_match(common, backtracks);
4138 /* Flip the starting bit in the negative case. */
4139 if (type == OP_NOT_DIGIT)
4140 common->digits[1] ^= 1;
4141 if (!check_ranges(common, common->digits, backtracks, TRUE))
4142 {
4143 read_char8_type(common);
4144 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4145 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4146 }
4147 if (type == OP_NOT_DIGIT)
4148 common->digits[1] ^= 1;
4149 return cc;
4150
4151 case OP_NOT_WHITESPACE:
4152 case OP_WHITESPACE:
4153 detect_partial_match(common, backtracks);
4154 read_char8_type(common);
4155 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4156 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4157 return cc;
4158
4159 case OP_NOT_WORDCHAR:
4160 case OP_WORDCHAR:
4161 detect_partial_match(common, backtracks);
4162 read_char8_type(common);
4163 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4164 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4165 return cc;
4166
4167 case OP_ANY:
4168 detect_partial_match(common, backtracks);
4169 read_char(common);
4170 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4171 {
4172 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4173 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4174 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4175 else
4176 jump[1] = check_str_end(common);
4177
4178 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4179 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4180 if (jump[1] != NULL)
4181 JUMPHERE(jump[1]);
4182 JUMPHERE(jump[0]);
4183 }
4184 else
4185 check_newlinechar(common, common->nltype, backtracks, TRUE);
4186 return cc;
4187
4188 case OP_ALLANY:
4189 detect_partial_match(common, backtracks);
4190 #ifdef SUPPORT_UTF
4191 if (common->utf)
4192 {
4193 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4194 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4195 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4196 #if defined COMPILE_PCRE8
4197 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4198 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4199 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4200 #elif defined COMPILE_PCRE16
4201 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4202 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4203 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4204 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
4205 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4206 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4207 #endif
4208 JUMPHERE(jump[0]);
4209 #endif /* COMPILE_PCRE[8|16] */
4210 return cc;
4211 }
4212 #endif
4213 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4214 return cc;
4215
4216 case OP_ANYBYTE:
4217 detect_partial_match(common, backtracks);
4218 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4219 return cc;
4220
4221 #ifdef SUPPORT_UTF
4222 #ifdef SUPPORT_UCP
4223 case OP_NOTPROP:
4224 case OP_PROP:
4225 propdata[0] = 0;
4226 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4227 propdata[2] = cc[0];
4228 propdata[3] = cc[1];
4229 propdata[4] = XCL_END;
4230 compile_xclass_matchingpath(common, propdata, backtracks);
4231 return cc + 2;
4232 #endif
4233 #endif
4234
4235 case OP_ANYNL:
4236 detect_partial_match(common, backtracks);
4237 read_char(common);
4238 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4239 /* We don't need to handle soft partial matching case. */
4240 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4241 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4242 else
4243 jump[1] = check_str_end(common);
4244 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4245 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4246 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4247 jump[3] = JUMP(SLJIT_JUMP);
4248 JUMPHERE(jump[0]);
4249 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4250 JUMPHERE(jump[1]);
4251 JUMPHERE(jump[2]);
4252 JUMPHERE(jump[3]);
4253 return cc;
4254
4255 case OP_NOT_HSPACE:
4256 case OP_HSPACE:
4257 detect_partial_match(common, backtracks);
4258 read_char(common);
4259 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4260 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4261 return cc;
4262
4263 case OP_NOT_VSPACE:
4264 case OP_VSPACE:
4265 detect_partial_match(common, backtracks);
4266 read_char(common);
4267 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4268 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4269 return cc;
4270
4271 #ifdef SUPPORT_UCP
4272 case OP_EXTUNI:
4273 detect_partial_match(common, backtracks);
4274 read_char(common);
4275 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4276 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4277 /* Optimize register allocation: use a real register. */
4278 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4279 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4280
4281 label = LABEL();
4282 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4283 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4284 read_char(common);
4285 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4286 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4287 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4288
4289 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4290 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_w)PRIV(ucp_gbtable));
4291 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4292 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4293 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4294 JUMPTO(SLJIT_C_NOT_ZERO, label);
4295
4296 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4297 JUMPHERE(jump[0]);
4298 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4299
4300 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4301 {
4302 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4303 /* Since we successfully read a char above, partial matching must occure. */
4304 check_partial(common, TRUE);
4305 JUMPHERE(jump[0]);
4306 }
4307 return cc;
4308 #endif
4309
4310 case OP_EODN:
4311 /* Requires rather complex checks. */
4312 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4313 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4314 {
4315 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4316 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4317 if (common->mode == JIT_COMPILE)
4318 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4319 else
4320 {
4321 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4322 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4323 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4324 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4325 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4326 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4327 check_partial(common, TRUE);
4328 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4329 JUMPHERE(jump[1]);
4330 }
4331 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4332 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4333 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4334 }
4335 else if (common->nltype == NLTYPE_FIXED)
4336 {
4337 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4338 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4339 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4340 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4341 }
4342 else
4343 {
4344 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4345 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4346 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4347 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4348 jump[2] = JUMP(SLJIT_C_GREATER);
4349 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4350 /* Equal. */
4351 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4352 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4353 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4354
4355 JUMPHERE(jump[1]);
4356 if (common->nltype == NLTYPE_ANYCRLF)
4357 {
4358 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4359 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4360 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4361 }
4362 else
4363 {
4364 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4365 read_char(common);
4366 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4367 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4368 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4369 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4370 }
4371 JUMPHERE(jump[2]);
4372 JUMPHERE(jump[3]);
4373 }
4374 JUMPHERE(jump[0]);
4375 check_partial(common, FALSE);
4376 return cc;
4377
4378 case OP_EOD:
4379 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4380 check_partial(common, FALSE);
4381 return cc;
4382
4383 case OP_CIRC:
4384 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4385 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4386 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4387 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4388 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4389 return cc;
4390
4391 case OP_CIRCM:
4392 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4394 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4395 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4396 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4397 jump[0] = JUMP(SLJIT_JUMP);
4398 JUMPHERE(jump[1]);
4399
4400 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4401 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4402 {
4403 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4404 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4405 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4406 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4407 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4408 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4409 }
4410 else
4411 {
4412 skip_char_back(common);
4413 read_char(common);
4414 check_newlinechar(common, common->nltype, backtracks, FALSE);
4415 }
4416 JUMPHERE(jump[0]);
4417 return cc;
4418
4419 case OP_DOLL:
4420 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4421 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4422 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4423
4424 if (!common->endonly)
4425 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4426 else
4427 {
4428 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4429 check_partial(common, FALSE);
4430 }
4431 return cc;
4432
4433 case OP_DOLLM:
4434 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4435 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4436 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4437 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4438 check_partial(common, FALSE);
4439 jump[0] = JUMP(SLJIT_JUMP);
4440 JUMPHERE(jump[1]);
4441
4442 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4443 {
4444 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4445 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4446 if (common->mode == JIT_COMPILE)
4447 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4448 else
4449 {
4450 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4451 /* STR_PTR = STR_END - IN_UCHARS(1) */
4452 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4453 check_partial(common, TRUE);
4454 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4455 JUMPHERE(jump[1]);
4456 }
4457
4458 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4459 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4460 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4461 }
4462 else
4463 {
4464 peek_char(common);
4465 check_newlinechar(common, common->nltype, backtracks, FALSE);
4466 }
4467 JUMPHERE(jump[0]);
4468 return cc;
4469
4470 case OP_CHAR:
4471 case OP_CHARI:
4472 length = 1;
4473 #ifdef SUPPORT_UTF
4474 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4475 #endif
4476 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4477 {
4478 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4479 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4480
4481 context.length = IN_UCHARS(length);
4482 context.sourcereg = -1;
4483 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4484 context.ucharptr = 0;
4485 #endif
4486 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4487 }
4488 detect_partial_match(common, backtracks);
4489 read_char(common);
4490 #ifdef SUPPORT_UTF
4491 if (common->utf)
4492 {
4493 GETCHAR(c, cc);
4494 }
4495 else
4496 #endif
4497 c = *cc;
4498 if (type == OP_CHAR || !char_has_othercase(common, cc))
4499 {
4500 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4501 return cc + length;
4502 }
4503 oc = char_othercase(common, c);
4504 bit = c ^ oc;
4505 if (is_powerof2(bit))
4506 {
4507 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4508 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4509 return cc + length;
4510 }
4511 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4512 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4513 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4514 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4515 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4516 return cc + length;
4517
4518 case OP_NOT:
4519 case OP_NOTI:
4520 detect_partial_match(common, backtracks);
4521 length = 1;
4522 #ifdef SUPPORT_UTF
4523 if (common->utf)
4524 {
4525 #ifdef COMPILE_PCRE8
4526 c = *cc;
4527 if (c < 128)
4528 {
4529 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4530 if (type == OP_NOT || !char_has_othercase(common, cc))
4531 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4532 else
4533 {
4534 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4535 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4536 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4537 }
4538 /* Skip the variable-length character. */
4539 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4540 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4541 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4542 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4543 JUMPHERE(jump[0]);
4544 return cc + 1;
4545 }
4546 else
4547 #endif /* COMPILE_PCRE8 */
4548 {
4549 GETCHARLEN(c, cc, length);
4550 read_char(common);
4551 }
4552 }
4553 else
4554 #endif /* SUPPORT_UTF */
4555 {
4556 read_char(common);
4557 c = *cc;
4558 }
4559
4560 if (type == OP_NOT || !char_has_othercase(common, cc))
4561 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4562 else
4563 {
4564 oc = char_othercase(common, c);
4565 bit = c ^ oc;
4566 if (is_powerof2(bit))
4567 {
4568 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4569 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4570 }
4571 else
4572 {
4573 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4574 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4575 }
4576 }
4577 return cc + length;
4578
4579 case OP_CLASS:
4580 case OP_NCLASS:
4581 detect_partial_match(common, backtracks);
4582 read_char(common);
4583 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4584 return cc + 32 / sizeof(pcre_uchar);
4585
4586 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4587 jump[0] = NULL;
4588 #ifdef COMPILE_PCRE8
4589 /* This check only affects 8 bit mode. In other modes, we
4590 always need to compare the value with 255. */
4591 if (common->utf)
4592 #endif /* COMPILE_PCRE8 */
4593 {
4594 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4595 if (type == OP_CLASS)
4596 {
4597 add_jump(compiler, backtracks, jump[0]);
4598 jump[0] = NULL;
4599 }
4600 }
4601 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4602 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4603 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4604 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4605 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4606 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4607 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4608 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4609 if (jump[0] != NULL)
4610 JUMPHERE(jump[0]);
4611 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4612 return cc + 32 / sizeof(pcre_uchar);
4613
4614 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4615 case OP_XCLASS:
4616 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4617 return cc + GET(cc, 0) - 1;
4618 #endif
4619
4620 case OP_REVERSE:
4621 length = GET(cc, 0);
4622 if (length == 0)
4623 return cc + LINK_SIZE;
4624 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4625 #ifdef SUPPORT_UTF
4626 if (common->utf)
4627 {
4628 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4629 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4630 label = LABEL();
4631 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4632 skip_char_back(common);
4633 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4634 JUMPTO(SLJIT_C_NOT_ZERO, label);
4635 }
4636 else
4637 #endif
4638 {
4639 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4640 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4641 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4642 }
4643 check_start_used_ptr(common);
4644 return cc + LINK_SIZE;
4645 }
4646 SLJIT_ASSERT_STOP();
4647 return cc;
4648 }
4649
4650 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4651 {
4652 /* This function consumes at least one input character. */
4653 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4654 DEFINE_COMPILER;
4655 pcre_uchar *ccbegin = cc;
4656 compare_context context;
4657 int size;
4658
4659 context.length = 0;
4660 do
4661 {
4662 if (cc >= ccend)
4663 break;
4664
4665 if (*cc == OP_CHAR)
4666 {
4667 size = 1;
4668 #ifdef SUPPORT_UTF
4669 if (common->utf && HAS_EXTRALEN(cc[1]))
4670 size += GET_EXTRALEN(cc[1]);
4671 #endif
4672 }
4673 else if (*cc == OP_CHARI)
4674 {
4675 size = 1;
4676 #ifdef SUPPORT_UTF
4677 if (common->utf)
4678 {
4679 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4680 size = 0;
4681 else if (HAS_EXTRALEN(cc[1]))
4682 size += GET_EXTRALEN(cc[1]);
4683 }
4684 else
4685 #endif
4686 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4687 size = 0;
4688 }
4689 else
4690 size = 0;
4691
4692 cc += 1 + size;
4693 context.length += IN_UCHARS(size);
4694 }
4695 while (size > 0 && context.length <= 128);
4696
4697 cc = ccbegin;
4698 if (context.length > 0)
4699 {
4700 /* We have a fixed-length byte sequence. */
4701 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4702 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4703
4704 context.sourcereg = -1;
4705 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4706 context.ucharptr = 0;
4707 #endif
4708 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4709 return cc;
4710 }
4711
4712 /* A non-fixed length character will be checked if length == 0. */
4713 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4714 }
4715
4716 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4717 {
4718 DEFINE_COMPILER;
4719 int offset = GET2(cc, 1) << 1;
4720
4721 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4722 if (!common->jscript_compat)
4723 {
4724 if (backtracks == NULL)
4725 {
4726 /* OVECTOR(1) contains the "string begin - 1" constant. */
4727 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4728 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4729 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4730 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4731 return JUMP(SLJIT_C_NOT_ZERO);
4732 }
4733 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4734 }
4735 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4736 }
4737
4738 /* Forward definitions. */
4739 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4740 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4741
4742 #define PUSH_BACKTRACK(size, ccstart, error) \
4743 do \
4744 { \
4745 backtrack = sljit_alloc_memory(compiler, (size)); \
4746 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4747 return error; \
4748 memset(backtrack, 0, size); \
4749 backtrack->prev = parent->top; \
4750 backtrack->cc = (ccstart); \
4751 parent->top = backtrack; \
4752 } \
4753 while (0)
4754
4755 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4756 do \
4757 { \
4758 backtrack = sljit_alloc_memory(compiler, (size)); \
4759 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4760 return; \
4761 memset(backtrack, 0, size); \
4762 backtrack->prev = parent->top; \
4763 backtrack->cc = (ccstart); \
4764 parent->top = backtrack; \
4765 } \
4766 while (0)
4767
4768 #define BACKTRACK_AS(type) ((type *)backtrack)
4769
4770 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4771 {
4772 DEFINE_COMPILER;
4773 int offset = GET2(cc, 1) << 1;
4774 struct sljit_jump *jump = NULL;
4775 struct sljit_jump *partial;
4776 struct sljit_jump *nopartial;
4777
4778 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4779 /* OVECTOR(1) contains the "string begin - 1" constant. */
4780 if (withchecks && !common->jscript_compat)
4781 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4782
4783 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4784 if (common->utf && *cc == OP_REFI)
4785 {
4786 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4787 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4788 if (withchecks)
4789 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4790
4791 /* Needed to save important temporary registers. */
4792 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4793 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4795 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4796 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4797 if (common->mode == JIT_COMPILE)
4798 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4799 else
4800 {
4801 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4802 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4803 check_partial(common, FALSE);
4804 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4805 JUMPHERE(nopartial);
4806 }
4807 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4808 }
4809 else
4810 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4811 {
4812 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4813 if (withchecks)
4814 jump = JUMP(SLJIT_C_ZERO);
4815
4816 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4817 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4818 if (common->mode == JIT_COMPILE)
4819 add_jump(compiler, backtracks, partial);
4820
4821 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4822 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4823
4824 if (common->mode != JIT_COMPILE)
4825 {
4826 nopartial = JUMP(SLJIT_JUMP);
4827 JUMPHERE(partial);
4828 /* TMP2 -= STR_END - STR_PTR */
4829 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4830 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4831 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4832 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4833 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4834 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4835 JUMPHERE(partial);
4836 check_partial(common, FALSE);
4837 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4838 JUMPHERE(nopartial);
4839 }
4840 }
4841
4842 if (jump != NULL)
4843 {
4844 if (emptyfail)
4845 add_jump(compiler, backtracks, jump);
4846 else
4847 JUMPHERE(jump);
4848 }
4849 return cc + 1 + IMM2_SIZE;
4850 }
4851
4852 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4853 {
4854 DEFINE_COMPILER;
4855 backtrack_common *backtrack;
4856 pcre_uchar type;
4857 struct sljit_label *label;
4858 struct sljit_jump *zerolength;
4859 struct sljit_jump *jump = NULL;
4860 pcre_uchar *ccbegin = cc;
4861 int min = 0, max = 0;
4862 BOOL minimize;
4863
4864 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4865
4866 type = cc[1 + IMM2_SIZE];
4867 minimize = (type & 0x1) != 0;
4868 switch(type)
4869 {
4870 case OP_CRSTAR:
4871 case OP_CRMINSTAR:
4872 min = 0;
4873 max = 0;
4874 cc += 1 + IMM2_SIZE + 1;
4875 break;
4876 case OP_CRPLUS:
4877 case OP_CRMINPLUS:
4878 min = 1;
4879 max = 0;
4880 cc += 1 + IMM2_SIZE + 1;
4881 break;
4882 case OP_CRQUERY:
4883 case OP_CRMINQUERY:
4884 min = 0;
4885 max = 1;
4886 cc += 1 + IMM2_SIZE + 1;
4887 break;
4888 case OP_CRRANGE:
4889 case OP_CRMINRANGE:
4890 min = GET2(cc, 1 + IMM2_SIZE + 1);
4891 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4892 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4893 break;
4894 default:
4895 SLJIT_ASSERT_STOP();
4896 break;
4897 }
4898
4899 if (!minimize)
4900 {
4901 if (min == 0)
4902 {
4903 allocate_stack(common, 2);
4904 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4906 /* Temporary release of STR_PTR. */
4907 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4908 zerolength = compile_ref_checks(common, ccbegin, NULL);
4909 /* Restore if not zero length. */
4910 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4911 }
4912 else
4913 {
4914 allocate_stack(common, 1);
4915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4916 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4917 }
4918
4919 if (min > 1 || max > 1)
4920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4921
4922 label = LABEL();
4923 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4924
4925 if (min > 1 || max > 1)
4926 {
4927 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4928 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4929 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4930 if (min > 1)
4931 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4932 if (max > 1)
4933 {
4934 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4935 allocate_stack(common, 1);
4936 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4937 JUMPTO(SLJIT_JUMP, label);
4938 JUMPHERE(jump);
4939 }
4940 }
4941
4942 if (max == 0)
4943 {
4944 /* Includes min > 1 case as well. */
4945 allocate_stack(common, 1);
4946 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4947 JUMPTO(SLJIT_JUMP, label);
4948 }
4949
4950 JUMPHERE(zerolength);
4951 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4952
4953 decrease_call_count(common);
4954 return cc;
4955 }
4956
4957 allocate_stack(common, 2);
4958 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4959 if (type != OP_CRMINSTAR)
4960 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4961
4962 if (min == 0)
4963 {
4964 zerolength = compile_ref_checks(common, ccbegin, NULL);
4965 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4966 jump = JUMP(SLJIT_JUMP);
4967 }
4968 else
4969 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4970
4971 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4972 if (max > 0)
4973 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4974
4975 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4976 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4977
4978 if (min > 1)
4979 {
4980 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4981 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4982 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4983 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
4984 }
4985 else if (max > 0)
4986 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4987
4988 if (jump != NULL)
4989 JUMPHERE(jump);
4990 JUMPHERE(zerolength);
4991
4992 decrease_call_count(common);
4993 return cc;
4994 }
4995
4996 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4997 {
4998 DEFINE_COMPILER;
4999 backtrack_common *backtrack;
5000 recurse_entry *entry = common->entries;
5001 recurse_entry *prev = NULL;
5002 int start = GET(cc, 1);
5003
5004 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5005 while (entry != NULL)
5006 {
5007 if (entry->start == start)
5008 break;
5009 prev = entry;
5010 entry = entry->next;
5011 }
5012
5013 if (entry == NULL)
5014 {
5015 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5016 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5017 return NULL;
5018 entry->next = NULL;
5019 entry->entry = NULL;
5020 entry->calls = NULL;
5021 entry->start = start;
5022
5023 if (prev != NULL)
5024 prev->next = entry;
5025 else
5026 common->entries = entry;
5027 }
5028
5029 if (common->has_set_som && common->mark_ptr != 0)
5030 {
5031 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5032 allocate_stack(common, 2);
5033 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5034 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5035 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5036 }
5037 else if (common->has_set_som || common->mark_ptr != 0)
5038 {
5039 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5040 allocate_stack(common, 1);
5041 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5042 }
5043
5044 if (entry->entry == NULL)
5045 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5046 else
5047 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5048 /* Leave if the match is failed. */
5049 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5050 return cc + 1 + LINK_SIZE;
5051 }
5052
5053 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5054 {
5055 DEFINE_COMPILER;
5056 int framesize;
5057 int private_data_ptr;
5058 backtrack_common altbacktrack;
5059 pcre_uchar *ccbegin;
5060 pcre_uchar opcode;
5061 pcre_uchar bra = OP_BRA;
5062 jump_list *tmp = NULL;
5063 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5064 jump_list **found;
5065 /* Saving previous accept variables. */
5066 struct sljit_label *save_quitlabel = common->quitlabel;
5067 struct sljit_label *save_acceptlabel = common->acceptlabel;
5068 jump_list *save_quit = common->quit;
5069 jump_list *save_accept = common->accept;
5070 struct sljit_jump *jump;
5071 struct sljit_jump *brajump = NULL;
5072
5073 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5074 {
5075 SLJIT_ASSERT(!conditional);
5076 bra = *cc;
5077 cc++;
5078 }
5079 private_data_ptr = PRIVATE_DATA(cc);
5080 SLJIT_ASSERT(private_data_ptr != 0);
5081 framesize = get_framesize(common, cc, FALSE);
5082 backtrack->framesize = framesize;
5083 backtrack->private_data_ptr = private_data_ptr;
5084 opcode = *cc;
5085 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5086 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5087 ccbegin = cc;
5088 cc += GET(cc, 1);
5089
5090 if (bra == OP_BRAMINZERO)
5091 {
5092 /* This is a braminzero backtrack path. */
5093 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5094 free_stack(common, 1);
5095 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5096 }
5097
5098 if (framesize < 0)
5099 {
5100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5101 allocate_stack(common, 1);
5102 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5103 }
5104 else
5105 {
5106 allocate_stack(common, framesize + 2);
5107 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5108 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5110 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5111 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5112 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5113 }
5114
5115 memset(&altbacktrack, 0, sizeof(backtrack_common));
5116 common->quitlabel = NULL;
5117 common->quit = NULL;
5118 while (1)
5119 {
5120 common->acceptlabel = NULL;
5121 common->accept = NULL;
5122 altbacktrack.top = NULL;
5123 altbacktrack.topbacktracks = NULL;
5124
5125 if (*ccbegin == OP_ALT)
5126 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5127
5128 altbacktrack.cc = ccbegin;
5129 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5130 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5131 {
5132 common->quitlabel = save_quitlabel;
5133 common->acceptlabel = save_acceptlabel;
5134 common->quit = save_quit;
5135 common->accept = save_accept;
5136 return NULL;
5137 }
5138 common->acceptlabel = LABEL();
5139 if (common->accept != NULL)
5140 set_jumps(common->accept, common->acceptlabel);
5141
5142 /* Reset stack. */
5143 if (framesize < 0)
5144 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5145 else {
5146 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5147 {
5148 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5149 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5150 }
5151 else
5152 {
5153 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5154 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5155 }
5156 }
5157
5158 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5159 {
5160 /* We know that STR_PTR was stored on the top of the stack. */
5161 if (conditional)
5162 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5163 else if (bra == OP_BRAZERO)
5164 {
5165 if (framesize < 0)
5166 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5167 else
5168 {
5169 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5170 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
5171 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5172 }
5173 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5175 }
5176 else if (framesize >= 0)
5177 {
5178 /* For OP_BRA and OP_BRAMINZERO. */
5179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5180 }
5181 }
5182 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5183
5184 compile_backtrackingpath(common, altbacktrack.top);
5185 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5186 {
5187 common->quitlabel = save_quitlabel;
5188 common->acceptlabel = save_acceptlabel;
5189 common->quit = save_quit;
5190 common->accept = save_accept;
5191 return NULL;
5192 }
5193 set_jumps(altbacktrack.topbacktracks, LABEL());
5194
5195 if (*cc != OP_ALT)
5196 break;
5197
5198 ccbegin = cc;
5199 cc += GET(cc, 1);
5200 }
5201 /* None of them matched. */
5202 if (common->quit != NULL)
5203 set_jumps(common->quit, LABEL());
5204
5205 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5206 {
5207 /* Assert is failed. */
5208 if (conditional || bra == OP_BRAZERO)
5209 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5210
5211 if (framesize < 0)
5212 {
5213 /* The topmost item should be 0. */
5214 if (bra == OP_BRAZERO)
5215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5216 else
5217 free_stack(common, 1);
5218 }
5219 else
5220 {
5221 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5222 /* The topmost item should be 0. */
5223 if (bra == OP_BRAZERO)
5224 {
5225 free_stack(common, framesize + 1);
5226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5227 }
5228 else
5229 free_stack(common, framesize + 2);
5230 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5231 }
5232 jump = JUMP(SLJIT_JUMP);
5233 if (bra != OP_BRAZERO)
5234 add_jump(compiler, target, jump);
5235
5236 /* Assert is successful. */
5237 set_jumps(tmp, LABEL());
5238 if (framesize < 0)
5239 {
5240 /* We know that STR_PTR was stored on the top of the stack. */
5241 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5242 /* Keep the STR_PTR on the top of the stack. */
5243 if (bra == OP_BRAZERO)
5244 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5245 else if (bra == OP_BRAMINZERO)
5246 {
5247 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5248 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5249 }
5250 }
5251 else
5252 {
5253 if (bra == OP_BRA)
5254 {
5255 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5256 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5257 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5258 }
5259 else
5260 {
5261 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5262 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
5263 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5264 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5265 }
5266 }
5267
5268 if (bra == OP_BRAZERO)
5269 {
5270 backtrack->matchingpath = LABEL();
5271 sljit_set_label(jump, backtrack->matchingpath);
5272 }
5273 else if (bra == OP_BRAMINZERO)
5274 {
5275 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5276 JUMPHERE(brajump);
5277 if (framesize >= 0)
5278 {
5279 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5280 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5281 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5282 }
5283 set_jumps(backtrack->common.topbacktracks, LABEL());
5284 }
5285 }
5286 else
5287 {
5288 /* AssertNot is successful. */
5289 if (framesize < 0)
5290 {
5291 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5292 if (bra != OP_BRA)
5293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5294 else
5295 free_stack(common, 1);
5296 }
5297 else
5298 {
5299 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5300 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5301 /* The topmost item should be 0. */
5302 if (bra != OP_BRA)
5303 {
5304 free_stack(common, framesize + 1);
5305 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5306 }
5307 else
5308 free_stack(common, framesize + 2);
5309 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5310 }
5311
5312 if (bra == OP_BRAZERO)
5313 backtrack->matchingpath = LABEL();
5314 else if (bra == OP_BRAMINZERO)
5315 {
5316 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5317 JUMPHERE(brajump);
5318 }
5319
5320 if (bra != OP_BRA)
5321 {
5322 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5323 set_jumps(backtrack->common.topbacktracks, LABEL());
5324 backtrack->common.topbacktracks = NULL;
5325 }
5326 }
5327
5328 common->quitlabel = save_quitlabel;
5329 common->acceptlabel = save_acceptlabel;
5330 common->quit = save_quit;
5331 common->accept = save_accept;
5332 return cc + 1 + LINK_SIZE;
5333 }
5334
5335 static sljit_w SLJIT_CALL do_searchovector(sljit_uw refno, sljit_w* locals, pcre_uchar *name_table)
5336 {
5337 int condition = FALSE;
5338 pcre_uchar *slotA = name_table;
5339 pcre_uchar *slotB;
5340 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5341 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5342 sljit_w no_capture;
5343 int i;
5344
5345 locals += refno & 0xff;
5346 refno >>= 8;
5347 no_capture = locals[1];
5348
5349 for (i = 0; i < name_count; i++)
5350 {
5351 if (GET2(slotA, 0) == refno) break;
5352 slotA += name_entry_size;
5353 }
5354
5355 if (i < name_count)
5356 {
5357 /* Found a name for the number - there can be only one; duplicate names
5358 for different numbers are allowed, but not vice versa. First scan down
5359 for duplicates. */
5360
5361 slotB = slotA;
5362 while (slotB > name_table)
5363 {
5364 slotB -= name_entry_size;
5365 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5366 {
5367 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5368 if (condition) break;
5369 }
5370 else break;
5371 }
5372
5373 /* Scan up for duplicates */
5374 if (!condition)
5375 {
5376 slotB = slotA;
5377 for (i++; i < name_count; i++)
5378 {
5379 slotB += name_entry_size;
5380 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5381 {
5382 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5383 if (condition) break;
5384 }
5385 else break;
5386 }
5387 }
5388 }
5389 return condition;
5390 }
5391
5392 static sljit_w SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5393 {
5394 int condition = FALSE;
5395 pcre_uchar *slotA = name_table;
5396 pcre_uchar *slotB;
5397 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_w)];
5398 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5399 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5400 sljit_uw i;
5401
5402 for (i = 0; i < name_count; i++)
5403 {
5404 if (GET2(slotA, 0) == recno) break;
5405 slotA += name_entry_size;
5406 }
5407
5408 if (i < name_count)
5409 {
5410 /* Found a name for the number - there can be only one; duplicate
5411 names for different numbers are allowed, but not vice versa. First
5412 scan down for duplicates. */
5413
5414 slotB = slotA;
5415 while (slotB > name_table)
5416 {
5417 slotB -= name_entry_size;
5418 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5419 {
5420 condition = GET2(slotB, 0) == group_num;
5421 if (condition) break;
5422 }
5423 else break;
5424 }
5425
5426 /* Scan up for duplicates */
5427 if (!condition)
5428 {
5429 slotB = slotA;
5430 for (i++; i < name_count; i++)
5431 {
5432 slotB += name_entry_size;
5433 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5434 {
5435 condition = GET2(slotB, 0) == group_num;
5436 if (condition) break;
5437 }
5438 else break;
5439 }
5440 }
5441 }
5442 return condition;
5443 }
5444
5445 /*
5446 Handling bracketed expressions is probably the most complex part.
5447
5448 Stack layout naming characters:
5449 S - Push the current STR_PTR
5450 0 - Push a 0 (NULL)
5451 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5452 before the next alternative. Not pushed if there are no alternatives.
5453 M - Any values pushed by the current alternative. Can be empty, or anything.
5454 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5455 L - Push the previous local (pointed by localptr) to the stack
5456 () - opional values stored on the stack
5457 ()* - optonal, can be stored multiple times
5458
5459 The following list shows the regular expression templates, their PCRE byte codes
5460 and stack layout supported by pcre-sljit.
5461
5462 (?:) OP_BRA | OP_KET A M
5463 () OP_CBRA | OP_KET C M
5464 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5465 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5466 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5467 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5468 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5469 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5470 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5471 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5472 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5473 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5474 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5475 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5476 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5477 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5478 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5479 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5480 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5481 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5482 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5483 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5484
5485
5486 Stack layout naming characters:
5487 A - Push the alternative index (starting from 0) on the stack.
5488 Not pushed if there is no alternatives.
5489 M - Any values pushed by the current alternative. Can be empty, or anything.
5490
5491 The next list shows the possible content of a bracket:
5492 (|) OP_*BRA | OP_ALT ... M A
5493 (?()|) OP_*COND | OP_ALT M A
5494 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5495 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5496 Or nothing, if trace is unnecessary
5497 */
5498
5499 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5500 {
5501 DEFINE_COMPILER;
5502 backtrack_common *backtrack;
5503 pcre_uchar opcode;
5504 int private_data_ptr = 0;
5505 int offset = 0;
5506 int stacksize;
5507 pcre_uchar *ccbegin;
5508 pcre_uchar *matchingpath;
5509 pcre_uchar bra = OP_BRA;
5510 pcre_uchar ket;
5511 assert_backtrack *assert;
5512 BOOL has_alternatives;
5513 struct sljit_jump *jump;
5514 struct sljit_jump *skip;
5515 struct sljit_label *rmaxlabel = NULL;
5516 struct sljit_jump *braminzerojump = NULL;
5517
5518 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5519
5520 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5521 {
5522 bra = *cc;
5523 cc++;
5524 opcode = *cc;
5525 }
5526
5527 opcode = *cc;
5528 ccbegin = cc;
5529 matchingpath = ccbegin + 1 + LINK_SIZE;
5530
5531 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5532 {
5533 /* Drop this bracket_backtrack. */
5534 parent->top = backtrack->prev;
5535 return bracketend(cc);
5536 }
5537
5538 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5539 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5540 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5541 cc += GET(cc, 1);
5542
5543 has_alternatives = *cc == OP_ALT;
5544 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5545 {
5546 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5547 if (*matchingpath == OP_NRREF)
5548 {
5549 stacksize = GET2(matchingpath, 1);
5550 if (common->currententry == NULL || stacksize == RREF_ANY)
5551 has_alternatives = FALSE;
5552 else if (common->currententry->start == 0)
5553 has_alternatives = stacksize != 0;
5554 else
5555 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5556 }
5557 }
5558
5559 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5560 opcode = OP_SCOND;
5561 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5562 opcode = OP_ONCE;
5563
5564 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5565 {
5566 /* Capturing brackets has a pre-allocated space. */
5567 offset = GET2(ccbegin, 1 + LINK_SIZE);
5568 if (common->optimized_cbracket[offset] == 0)
5569 {
5570 private_data_ptr = OVECTOR_PRIV(offset);
5571 offset <<= 1;
5572 }
5573 else
5574 {
5575 offset <<= 1;
5576 private_data_ptr = OVECTOR(offset);
5577 }
5578 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5579 matchingpath += IMM2_SIZE;
5580 }
5581 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5582 {
5583 /* Other brackets simply allocate the next entry. */
5584 private_data_ptr = PRIVATE_DATA(ccbegin);
5585 SLJIT_ASSERT(private_data_ptr != 0);
5586 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5587 if (opcode == OP_ONCE)
5588 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5589 }
5590
5591 /* Instructions before the first alternative. */
5592 stacksize = 0;
5593 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5594 stacksize++;
5595 if (bra == OP_BRAZERO)
5596 stacksize++;
5597
5598 if (stacksize > 0)
5599 allocate_stack(common, stacksize);
5600
5601 stacksize = 0;
5602 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5603 {
5604 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5605 stacksize++;
5606 }
5607
5608 if (bra == OP_BRAZERO)
5609 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5610
5611 if (bra == OP_BRAMINZERO)
5612 {
5613 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5614 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5615 if (ket != OP_KETRMIN)
5616 {
5617 free_stack(common, 1);
5618 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5619 }
5620 else
5621 {
5622 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5623 {
5624 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5625 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5626 /* Nothing stored during the first run. */
5627 skip = JUMP(SLJIT_JUMP);
5628 JUMPHERE(jump);
5629 /* Checking zero-length iteration. */
5630 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5631 {
5632 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5633 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5634 }
5635 else
5636 {
5637 /* Except when the whole stack frame must be saved. */
5638 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5639 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5640 }
5641 JUMPHERE(skip);
5642 }
5643 else
5644 {
5645 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5646 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5647 JUMPHERE(jump);
5648 }
5649 }
5650 }
5651
5652 if (ket == OP_KETRMIN)
5653 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5654
5655 if (ket == OP_KETRMAX)
5656 {
5657 rmaxlabel = LABEL();
5658 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5659 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5660 }
5661
5662 /* Handling capturing brackets and alternatives. */
5663 if (opcode == OP_ONCE)
5664 {
5665 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5666 {
5667 /* Neither capturing brackets nor recursions are not found in the block. */
5668 if (ket == OP_KETRMIN)
5669 {
5670 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5671 allocate_stack(common, 2);
5672 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5673 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5674 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5675 }
5676 else if (ket == OP_KETRMAX || has_alternatives)
5677 {
5678 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5679 allocate_stack(common, 1);
5680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5681 }
5682 else
5683 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5684 }
5685 else
5686 {
5687 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5688 {
5689 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5690 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5691 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5693 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5694 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5695 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5696 }
5697 else
5698 {
5699 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5700 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5701 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5702 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5703 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5704 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5705 }
5706 }
5707 }
5708 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5709 {
5710 /* Saving the previous values. */
5711 if (common->optimized_cbracket[offset >> 1] == 0)
5712 {
5713 allocate_stack(common, 3);
5714 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5715 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5716 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5719 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5720 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5721 }
5722 else
5723 {
5724 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5725 allocate_stack(common, 2);
5726 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5727 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_w));
5728 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5731 }
5732 }
5733 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5734 {
5735 /* Saving the previous value. */
5736 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5737 allocate_stack(common, 1);
5738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5739 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5740 }
5741 else if (has_alternatives)
5742 {
5743 /* Pushing the starting string pointer. */
5744 allocate_stack(common, 1);
5745 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5746 }
5747
5748 /* Generating code for the first alternative. */
5749 if (opcode == OP_COND || opcode == OP_SCOND)
5750 {
5751 if (*matchingpath == OP_CREF)
5752 {
5753 SLJIT_ASSERT(has_alternatives);
5754 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5755 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5756 matchingpath += 1 + IMM2_SIZE;
5757 }
5758 else if (*matchingpath == OP_NCREF)
5759 {
5760 SLJIT_ASSERT(has_alternatives);
5761 stacksize = GET2(matchingpath, 1);
5762 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5763
5764 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5765 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5766 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5767 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5768 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5769 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5770 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5771 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5772 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5773
5774 JUMPHERE(jump);
5775 matchingpath += 1 + IMM2_SIZE;
5776 }
5777 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5778 {
5779 /* Never has other case. */
5780 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5781
5782 stacksize = GET2(matchingpath, 1);
5783 if (common->currententry == NULL)
5784 stacksize = 0;
5785 else if (stacksize == RREF_ANY)
5786 stacksize = 1;
5787 else if (common->currententry->start == 0)
5788 stacksize = stacksize == 0;
5789 else
5790 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5791
5792 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5793 {
5794 SLJIT_ASSERT(!has_alternatives);
5795 if (stacksize != 0)
5796 matchingpath += 1 + IMM2_SIZE;
5797 else
5798 {
5799 if (*cc == OP_ALT)
5800 {
5801 matchingpath = cc + 1 + LINK_SIZE;
5802 cc += GET(cc, 1);
5803 }
5804 else
5805 matchingpath = cc;
5806 }
5807 }
5808 else
5809 {
5810 SLJIT_ASSERT(has_alternatives);
5811
5812 stacksize = GET2(matchingpath, 1);
5813 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5814 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5815 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5816 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5817 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5818 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5819 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5820 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5821 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5822 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5823 matchingpath += 1 + IMM2_SIZE;
5824 }
5825 }
5826 else
5827 {
5828 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5829 /* Similar code as PUSH_BACKTRACK macro. */
5830 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5831 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5832 return NULL;
5833 memset(assert, 0, sizeof(assert_backtrack));
5834 assert->common.cc = matchingpath;
5835 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5836 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5837 }
5838 }
5839
5840 compile_matchingpath(common, matchingpath, cc, backtrack);
5841 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5842 return NULL;
5843
5844 if (opcode == OP_ONCE)
5845 {
5846 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5847 {
5848 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5849 /* TMP2 which is set here used by OP_KETRMAX below. */
5850 if (ket == OP_KETRMAX)
5851 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5852 else if (ket == OP_KETRMIN)
5853 {
5854 /* Move the STR_PTR to the private_data_ptr. */
5855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5856 }
5857 }
5858 else
5859 {
5860 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5861 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5862 if (ket == OP_KETRMAX)
5863 {
5864 /* TMP2 which is set here used by OP_KETRMAX below. */
5865 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5866 }
5867 }
5868 }
5869
5870 stacksize = 0;
5871 if (ket != OP_KET || bra != OP_BRA)
5872 stacksize++;
5873 if (has_alternatives && opcode != OP_ONCE)
5874 stacksize++;
5875
5876 if (stacksize > 0)
5877 allocate_stack(common, stacksize);
5878
5879 stacksize = 0;
5880 if (ket != OP_KET)
5881 {
5882 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5883 stacksize++;
5884 }
5885 else if (bra != OP_BRA)
5886 {
5887 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5888 stacksize++;
5889 }
5890
5891 if (has_alternatives)
5892 {
5893 if (opcode != OP_ONCE)
5894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5895 if (ket != OP_KETRMAX)
5896 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5897 }
5898
5899 /* Must be after the matchingpath label. */
5900 if (offset != 0)
5901 {
5902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5903 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5904 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5905 }
5906
5907 if (ket == OP_KETRMAX)
5908 {
5909 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5910 {
5911 if (has_alternatives)
5912 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5913 /* Checking zero-length iteration. */
5914 if (opcode != OP_ONCE)
5915 {
5916 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
5917 /* Drop STR_PTR for greedy plus quantifier. */
5918 if (bra != OP_BRAZERO)
5919 free_stack(common, 1);
5920 }
5921 else
5922 /* TMP2 must contain the starting STR_PTR. */
5923 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5924 }
5925 else
5926 JUMPTO(SLJIT_JUMP, rmaxlabel);
5927 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5928 }
5929
5930 if (bra == OP_BRAZERO)
5931 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
5932
5933 if (bra == OP_BRAMINZERO)
5934 {
5935 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5936 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
5937 if (braminzerojump != NULL)
5938 {
5939 JUMPHERE(braminzerojump);
5940 /* We need to release the end pointer to perform the
5941 backtrack for the zero-length iteration. When
5942 framesize is < 0, OP_ONCE will do the release itself. */
5943 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5944 {
5945 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5946 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5947 }
5948 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5949 free_stack(common, 1);
5950 }
5951 /* Continue to the normal backtrack. */
5952 }
5953
5954 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5955 decrease_call_count(common);
5956
5957 /* Skip the other alternatives. */
5958 while (*cc == OP_ALT)
5959 cc += GET(cc, 1);
5960 cc += 1 + LINK_SIZE;
5961 return cc;
5962 }
5963
5964 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5965 {
5966 DEFINE_COMPILER;
5967 backtrack_common *backtrack;
5968 pcre_uchar opcode;
5969 int private_data_ptr;
5970 int cbraprivptr = 0;
5971 int framesize;
5972 int stacksize;
5973 int offset = 0;
5974 BOOL zero = FALSE;
5975 pcre_uchar *ccbegin = NULL;
5976 int stack;
5977 struct sljit_label *loop = NULL;
5978 struct jump_list *emptymatch = NULL;
5979
5980 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5981 if (*cc == OP_BRAPOSZERO)
5982 {
5983 zero = TRUE;
5984 cc++;
5985 }
5986
5987 opcode = *cc;
5988 private_data_ptr = PRIVATE_DATA(cc);
5989 SLJIT_ASSERT(private_data_ptr != 0);
5990 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
5991 switch(opcode)
5992 {
5993 case OP_BRAPOS:
5994 case OP_SBRAPOS:
5995 ccbegin = cc + 1 + LINK_SIZE;
5996 break;
5997
5998 case OP_CBRAPOS:
5999 case OP_SCBRAPOS:
6000 offset = GET2(cc, 1 + LINK_SIZE);
6001 /* This case cannot be optimized in the same was as
6002 normal capturing brackets. */
6003 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6004 cbraprivptr = OVECTOR_PRIV(offset);
6005 offset <<= 1;
6006 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6007 break;
6008
6009 default:
6010 SLJIT_ASSERT_STOP();
6011 break;
6012 }
6013
6014 framesize = get_framesize(common, cc, FALSE);
6015 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6016 if (framesize < 0)
6017 {
6018 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
6019 if (!zero)
6020 stacksize++;
6021 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6022 allocate_stack(common, stacksize);
6023 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6024
6025 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6026 {
6027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6028 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6029 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6030 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6031 }
6032 else
6033 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6034
6035 if (!zero)
6036 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6037 }
6038 else
6039 {
6040 stacksize = framesize + 1;
6041 if (!zero)
6042 stacksize++;
6043 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6044 stacksize++;
6045 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6046 allocate_stack(common, stacksize);
6047
6048 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6049 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6051 stack = 0;
6052 if (!zero)
6053 {
6054 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6055 stack++;
6056 }
6057 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6058 {
6059 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6060 stack++;
6061 }
6062 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6063 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6064 }
6065
6066 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6067 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6068
6069 loop = LABEL();
6070 while (*cc != OP_KETRPOS)
6071 {
6072 backtrack->top = NULL;
6073 backtrack->topbacktracks = NULL;
6074 cc += GET(cc, 1);
6075
6076 compile_matchingpath(common, ccbegin, cc, backtrack);
6077 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6078 return NULL;
6079
6080 if (framesize < 0)
6081 {
6082 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6083
6084 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6085 {
6086 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6087 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6089 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6090 }
6091 else
6092 {
6093 if (opcode == OP_SBRAPOS)
6094 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6095 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6096 }
6097
6098 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6099 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6100
6101 if (!zero)
6102 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6103 }
6104 else
6105 {
6106 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6107 {
6108 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
6109 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6113 }
6114 else
6115 {
6116 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6117 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
6118 if (opcode == OP_SBRAPOS)
6119 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6120 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
6121 }
6122
6123 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6124 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6125
6126 if (!zero)
6127 {
6128 if (framesize < 0)
6129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6130 else
6131 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6132 }
6133 }
6134 JUMPTO(SLJIT_JUMP, loop);
6135 flush_stubs(common);
6136
6137 compile_backtrackingpath(common, backtrack->top);
6138 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6139 return NULL;
6140 set_jumps(backtrack->topbacktracks, LABEL());
6141
6142 if (framesize < 0)
6143 {
6144 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6145 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6146 else
6147 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6148 }
6149 else
6150 {
6151 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6152 {
6153 /* Last alternative. */
6154 if (*cc == OP_KETRPOS)
6155 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6156 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6157 }
6158 else
6159 {
6160 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6161 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6162 }
6163 }
6164
6165 if (*cc == OP_KETRPOS)
6166 break;
6167 ccbegin = cc + 1 + LINK_SIZE;
6168 }
6169
6170 backtrack->topbacktracks = NULL;
6171 if (!zero)
6172 {
6173 if (framesize < 0)
6174 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6175 else /* TMP2 is set to [private_data_ptr] above. */
6176 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
6177 }
6178
6179 /* None of them matched. */
6180 set_jumps(emptymatch, LABEL());
6181 decrease_call_count(common);
6182 return cc + 1 + LINK_SIZE;
6183 }
6184
6185 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6186 {
6187 int class_len;
6188
6189 *opcode = *cc;
6190 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6191 {
6192 cc++;
6193 *type = OP_CHAR;
6194 }
6195 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6196 {
6197 cc++;
6198 *type = OP_CHARI;
6199 *opcode -= OP_STARI - OP_STAR;
6200 }
6201 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6202 {
6203 cc++;
6204 *type = OP_NOT;
6205 *opcode -= OP_NOTSTAR - OP_STAR;
6206 }
6207 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6208 {
6209 cc++;
6210 *type = OP_NOTI;
6211 *opcode -= OP_NOTSTARI - OP_STAR;
6212 }
6213 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6214 {
6215 cc++;
6216 *opcode -= OP_TYPESTAR - OP_STAR;
6217 *type = 0;
6218 }
6219 else
6220 {
6221 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6222 *type = *opcode;
6223 cc++;
6224 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6225 *opcode = cc[class_len - 1];
6226 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6227 {
6228 *opcode -= OP_CRSTAR - OP_STAR;
6229 if (end != NULL)
6230 *end = cc + class_len;
6231 }
6232 else
6233 {
6234 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6235 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6236 *arg2 = GET2(cc, class_len);
6237
6238 if (*arg2 == 0)
6239 {
6240 SLJIT_ASSERT(*arg1 != 0);
6241 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6242 }
6243 if (*arg1 == *arg2)
6244 *opcode = OP_EXACT;
6245
6246 if (end != NULL)
6247 *end = cc + class_len + 2 * IMM2_SIZE;
6248 }
6249 return cc;
6250 }
6251
6252 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6253 {
6254 *arg1 = GET2(cc, 0);
6255 cc += IMM2_SIZE;
6256 }
6257
6258 if (*type == 0)
6259 {
6260 *type = *cc;
6261 if (end != NULL)
6262 *end = next_opcode(common, cc);
6263 cc++;
6264 return cc;
6265 }
6266
6267 if (end != NULL)
6268 {
6269 *end = cc + 1;
6270 #ifdef SUPPORT_UTF
6271 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6272 #endif
6273 }
6274 return cc;
6275 }
6276
6277 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6278 {
6279 DEFINE_COMPILER;
6280 backtrack_common *backtrack;
6281 pcre_uchar opcode;
6282 pcre_uchar type;
6283 int arg1 = -1, arg2 = -1;
6284 pcre_uchar* end;
6285 jump_list *nomatch = NULL;
6286 struct sljit_jump *jump = NULL;
6287 struct sljit_label *label;
6288 int private_data_ptr = PRIVATE_DATA(cc);
6289 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6290 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6291 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_w);
6292 int tmp_base, tmp_offset;
6293
6294 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6295
6296 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6297
6298 switch (type)
6299 {
6300 case OP_NOT_DIGIT:
6301 case OP_DIGIT:
6302 case OP_NOT_WHITESPACE:
6303 case OP_WHITESPACE:
6304 case OP_NOT_WORDCHAR:
6305 case OP_WORDCHAR:
6306 case OP_ANY:
6307 case OP_ALLANY:
6308 case OP_ANYBYTE:
6309 case OP_ANYNL:
6310 case OP_NOT_HSPACE:
6311 case OP_HSPACE:
6312 case OP_NOT_VSPACE:
6313 case OP_VSPACE:
6314 case OP_CHAR:
6315 case OP_CHARI:
6316 case OP_NOT:
6317 case OP_NOTI:
6318 case OP_CLASS:
6319 case OP_NCLASS:
6320 tmp_base = TMP3;
6321 tmp_offset = 0;
6322 break;
6323
6324 default:
6325 SLJIT_ASSERT_STOP();
6326 /* Fall through. */
6327
6328 case OP_EXTUNI:
6329 case OP_XCLASS:
6330 case OP_NOTPROP:
6331 case OP_PROP:
6332 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6333 tmp_offset = POSSESSIVE0;
6334 break;
6335 }
6336
6337 switch(opcode)
6338 {
6339 case OP_STAR:
6340 case OP_PLUS:
6341 case OP_UPTO:
6342 case OP_CRRANGE:
6343 if (type == OP_ANYNL || type == OP_EXTUNI)
6344 {
6345 SLJIT_ASSERT(private_data_ptr == 0);
6346 if (opcode == OP_STAR || opcode == OP_UPTO)
6347 {
6348 allocate_stack(common, 2);
6349 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6351 }
6352 else
6353 {
6354 allocate_stack(common, 1);
6355 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6356 }
6357
6358 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6359 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6360
6361 label = LABEL();
6362 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6363 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6364 {
6365 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6366 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6367 if (opcode == OP_CRRANGE && arg2 > 0)
6368 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6369 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6370 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6371 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6372 }
6373
6374 /* We cannot use TMP3 because of this allocate_stack. */
6375 allocate_stack(common, 1);
6376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6377 JUMPTO(SLJIT_JUMP, label);
6378 if (jump != NULL)
6379 JUMPHERE(jump);
6380 }
6381 else
6382 {
6383 if (opcode == OP_PLUS)
6384 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6385 if (private_data_ptr == 0)
6386 allocate_stack(common, 2);
6387 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6388 if (opcode <= OP_PLUS)
6389 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6390 else
6391 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6392 label = LABEL();
6393 compile_char1_matchingpath(common, type, cc, &nomatch);
6394 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6395 if (opcode <= OP_PLUS)
6396 JUMPTO(SLJIT_JUMP, label);
6397 else if (opcode == OP_CRRANGE && arg1 == 0)
6398 {
6399 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6400 JUMPTO(SLJIT_JUMP, label);
6401 }
6402 else
6403 {
6404 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6405 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6406 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6407 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6408 }
6409 set_jumps(nomatch, LABEL());
6410 if (opcode == OP_CRRANGE)
6411 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6412 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6413 }
6414 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6415 break;
6416
6417 case OP_MINSTAR:
6418 case OP_MINPLUS:
6419 if (opcode == OP_MINPLUS)
6420 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6421 if (private_data_ptr == 0)
6422 allocate_stack(common, 1);
6423 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6424 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6425 break;
6426
6427 case OP_MINUPTO:
6428 case OP_CRMINRANGE:
6429 if (private_data_ptr == 0)
6430 allocate_stack(common, 2);
6431 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6432 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6433 if (opcode == OP_CRMINRANGE)
6434 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6435 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6436 break;
6437
6438 case OP_QUERY:
6439 case OP_MINQUERY:
6440 if (private_data_ptr == 0)
6441 allocate_stack(common, 1);
6442 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6443 if (opcode == OP_QUERY)
6444 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6445 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6446 break;
6447
6448 case OP_EXACT:
6449 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6450 label = LABEL();
6451 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6452 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6453 JUMPTO(SLJIT_C_NOT_ZERO, label);
6454 break;
6455
6456 case OP_POSSTAR:
6457 case OP_POSPLUS:
6458 case OP_POSUPTO:
6459 if (opcode == OP_POSPLUS)
6460 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6461 if (opcode == OP_POSUPTO)
6462 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6463 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6464 label = LABEL();
6465 compile_char1_matchingpath(common, type, cc, &nomatch);
6466 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6467 if (opcode != OP_POSUPTO)
6468 JUMPTO(SLJIT_JUMP, label);
6469 else
6470 {
6471 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6472 JUMPTO(SLJIT_C_NOT_ZERO, label);
6473 }
6474 set_jumps(nomatch, LABEL());
6475 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6476 break;
6477
6478 case OP_POSQUERY:
6479 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6480 compile_char1_matchingpath(common, type, cc, &nomatch);
6481 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6482 set_jumps(nomatch, LABEL());
6483 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6484 break;
6485
6486 default:
6487 SLJIT_ASSERT_STOP();
6488 break;
6489 }
6490
6491 decrease_call_count(common);
6492 return end;
6493 }
6494
6495 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6496 {
6497 DEFINE_COMPILER;
6498 backtrack_common *backtrack;
6499
6500 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6501
6502 if (*cc == OP_FAIL)
6503 {
6504 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6505 return cc + 1;
6506 }
6507
6508 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6509 {
6510 /* No need to check notempty conditions. */
6511 if (common->acceptlabel == NULL)
6512 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6513 else
6514 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6515 return cc + 1;
6516 }
6517
6518 if (common->acceptlabel == NULL)
6519 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6520 else
6521 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6522 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6523 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6524 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6525 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6526 if (common->acceptlabel == NULL)
6527 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6528 else
6529 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6530 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6531 if (common->acceptlabel == NULL)
6532 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6533 else
6534 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6535 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6536 return cc + 1;
6537 }
6538
6539 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6540 {
6541 DEFINE_COMPILER;
6542 int offset = GET2(cc, 1);
6543 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
6544
6545 /* Data will be discarded anyway... */
6546 if (common->currententry != NULL)
6547 return cc + 1 + IMM2_SIZE;
6548
6549 if (!optimized_cbracket)
6550 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6551 offset <<= 1;
6552 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6553 if (!optimized_cbracket)
6554 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6555 return cc + 1 + IMM2_SIZE;
6556 }
6557
6558 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6559 {
6560 DEFINE_COMPILER;
6561 backtrack_common *backtrack;
6562
6563 while (cc < ccend)
6564 {
6565 switch(*cc)
6566 {
6567 case OP_SOD:
6568 case OP_SOM:
6569 case OP_NOT_WORD_BOUNDARY:
6570 case OP_WORD_BOUNDARY:
6571 case OP_NOT_DIGIT:
6572 case OP_DIGIT:
6573 case OP_NOT_WHITESPACE:
6574 case OP_WHITESPACE:
6575 case OP_NOT_WORDCHAR:
6576 case OP_WORDCHAR:
6577 case OP_ANY:
6578 case OP_ALLANY:
6579 case OP_ANYBYTE:
6580 case OP_NOTPROP:
6581 case OP_PROP:
6582 case OP_ANYNL:
6583 case OP_NOT_HSPACE:
6584 case OP_HSPACE:
6585 case OP_NOT_VSPACE:
6586 case OP_VSPACE:
6587 case OP_EXTUNI:
6588 case OP_EODN:
6589 case OP_EOD:
6590 case OP_CIRC:
6591 case OP_CIRCM:
6592 case OP_DOLL:
6593 case OP_DOLLM:
6594 case OP_NOT:
6595 case OP_NOTI:
6596 case OP_REVERSE:
6597 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6598 break;
6599
6600 case OP_SET_SOM:
6601 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6602 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6603 allocate_stack(common, 1);
6604 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6605 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6606 cc++;
6607 break;
6608
6609 case OP_CHAR:
6610 case OP_CHARI:
6611 if (common->mode == JIT_COMPILE)
6612 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6613 else
6614 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6615 break;
6616
6617 case OP_STAR:
6618 case OP_MINSTAR:
6619 case OP_PLUS:
6620 case OP_MINPLUS:
6621 case OP_QUERY:
6622 case OP_MINQUERY:
6623 case OP_UPTO:
6624 case OP_MINUPTO:
6625 case OP_EXACT:
6626 case OP_POSSTAR:
6627 case OP_POSPLUS:
6628 case OP_POSQUERY:
6629 case OP_POSUPTO:
6630 case OP_STARI:
6631 case OP_MINSTARI:
6632 case OP_PLUSI:
6633 case OP_MINPLUSI:
6634 case OP_QUERYI:
6635 case OP_MINQUERYI:
6636 case OP_UPTOI:
6637 case OP_MINUPTOI:
6638 case OP_EXACTI:
6639 case OP_POSSTARI:
6640 case OP_POSPLUSI:
6641 case OP_POSQUERYI:
6642 case OP_POSUPTOI:
6643 case OP_NOTSTAR:
6644 case OP_NOTMINSTAR:
6645 case OP_NOTPLUS:
6646 case OP_NOTMINPLUS:
6647 case OP_NOTQUERY:
6648 case OP_NOTMINQUERY:
6649 case OP_NOTUPTO:
6650 case OP_NOTMINUPTO:
6651 case OP_NOTEXACT:
6652 case OP_NOTPOSSTAR:
6653 case OP_NOTPOSPLUS:
6654 case OP_NOTPOSQUERY:
6655 case OP_NOTPOSUPTO:
6656 case OP_NOTSTARI:
6657 case OP_NOTMINSTARI:
6658 case OP_NOTPLUSI:
6659 case OP_NOTMINPLUSI:
6660 case OP_NOTQUERYI:
6661 case OP_NOTMINQUERYI:
6662 case OP_NOTUPTOI:
6663 case OP_NOTMINUPTOI:
6664 case OP_NOTEXACTI:
6665 case OP_NOTPOSSTARI:
6666 case OP_NOTPOSPLUSI:
6667 case OP_NOTPOSQUERYI:
6668 case OP_NOTPOSUPTOI:
6669 case OP_TYPESTAR:
6670 case OP_TYPEMINSTAR:
6671 case OP_TYPEPLUS:
6672 case OP_TYPEMINPLUS:
6673 case OP_TYPEQUERY:
6674 case OP_TYPEMINQUERY:
6675 case OP_TYPEUPTO:
6676 case OP_TYPEMINUPTO:
6677 case OP_TYPEEXACT:
6678 case OP_TYPEPOSSTAR:
6679 case OP_TYPEPOSPLUS:
6680 case OP_TYPEPOSQUERY:
6681 case OP_TYPEPOSUPTO:
6682 cc = compile_iterator_matchingpath(common, cc, parent);
6683 break;
6684
6685 case OP_CLASS:
6686 case OP_NCLASS:
6687 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6688 cc = compile_iterator_matchingpath(common, cc, parent);
6689 else
6690 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6691 break;
6692
6693 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6694 case OP_XCLASS:
6695 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6696 cc = compile_iterator_matchingpath(common, cc, parent);
6697 else
6698 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6699 break;
6700 #endif
6701
6702 case OP_REF:
6703 case OP_REFI:
6704 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6705 cc = compile_ref_iterator_matchingpath(common, cc, parent);
6706 else
6707 cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6708 break;
6709
6710 case OP_RECURSE:
6711 cc = compile_recurse_matchingpath(common, cc, parent);
6712 break;
6713
6714 case OP_ASSERT:
6715 case OP_ASSERT_NOT:
6716 case OP_ASSERTBACK:
6717 case OP_ASSERTBACK_NOT:
6718 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6719 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6720 break;
6721
6722 case OP_BRAMINZERO:
6723 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6724 cc = bracketend(cc + 1);
6725 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6726 {
6727 allocate_stack(common, 1);
6728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6729 }
6730 else
6731 {
6732 allocate_stack(common, 2);
6733 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6735 }
6736 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
6737 if (cc[1] > OP_ASSERTBACK_NOT)
6738 decrease_call_count(common);
6739 break;
6740
6741 case OP_ONCE:
6742 case OP_ONCE_NC:
6743 case OP_BRA:
6744 case OP_CBRA:
6745 case OP_COND:
6746 case OP_SBRA:
6747 case OP_SCBRA:
6748 case OP_SCOND:
6749 cc = compile_bracket_matchingpath(common, cc, parent);
6750 break;
6751
6752 case OP_BRAZERO:
6753 if (cc[1] > OP_ASSERTBACK_NOT)
6754 cc = compile_bracket_matchingpath(common, cc, parent);
6755 else
6756 {
6757 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6758 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6759 }
6760 break;
6761