/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1120 - (show annotations)
Tue Oct 16 15:57:38 2012 UTC (7 years ago) by chpe
File MIME type: text/plain
File size: 261258 byte(s)
pcre32: Fix unused variable warnings

Add ifdefs since these variables are unused on pcre32.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory for the regex stack on the real machine stack.
69 Fast, but limited size. */
70 #define MACHINE_STACK_SIZE 32768
71
72 /* Growth rate for stack allocated by the OS. Should be the multiply
73 of page size. */
74 #define STACK_GROWTH_RATE 8192
75
76 /* Enable to check that the allocation could destroy temporaries. */
77 #if defined SLJIT_DEBUG && SLJIT_DEBUG
78 #define DESTROY_REGISTERS 1
79 #endif
80
81 /*
82 Short summary about the backtracking mechanism empolyed by the jit code generator:
83
84 The code generator follows the recursive nature of the PERL compatible regular
85 expressions. The basic blocks of regular expressions are condition checkers
86 whose execute different commands depending on the result of the condition check.
87 The relationship between the operators can be horizontal (concatenation) and
88 vertical (sub-expression) (See struct backtrack_common for more details).
89
90 'ab' - 'a' and 'b' regexps are concatenated
91 'a+' - 'a' is the sub-expression of the '+' operator
92
93 The condition checkers are boolean (true/false) checkers. Machine code is generated
94 for the checker itself and for the actions depending on the result of the checker.
95 The 'true' case is called as the matching path (expected path), and the other is called as
96 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97 branches on the matching path.
98
99 Greedy star operator (*) :
100 Matching path: match happens.
101 Backtrack path: match failed.
102 Non-greedy star operator (*?) :
103 Matching path: no need to perform a match.
104 Backtrack path: match is required.
105
106 The following example shows how the code generated for a capturing bracket
107 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
108 we have the following regular expression:
109
110 A(B|C)D
111
112 The generated code will be the following:
113
114 A matching path
115 '(' matching path (pushing arguments to the stack)
116 B matching path
117 ')' matching path (pushing arguments to the stack)
118 D matching path
119 return with successful match
120
121 D backtrack path
122 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123 B backtrack path
124 C expected path
125 jump to D matching path
126 C backtrack path
127 A backtrack path
128
129 Notice, that the order of backtrack code paths are the opposite of the fast
130 code paths. In this way the topmost value on the stack is always belong
131 to the current backtrack code path. The backtrack path must check
132 whether there is a next alternative. If so, it needs to jump back to
133 the matching path eventually. Otherwise it needs to clear out its own stack
134 frame and continue the execution on the backtrack code paths.
135 */
136
137 /*
138 Saved stack frames:
139
140 Atomic blocks and asserts require reloading the values of private data
141 when the backtrack mechanism performed. Because of OP_RECURSE, the data
142 are not necessarly known in compile time, thus we need a dynamic restore
143 mechanism.
144
145 The stack frames are stored in a chain list, and have the following format:
146 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147
148 Thus we can restore the private data to a particular point in the stack.
149 */
150
151 typedef struct jit_arguments {
152 /* Pointers first. */
153 struct sljit_stack *stack;
154 const pcre_uchar *str;
155 const pcre_uchar *begin;
156 const pcre_uchar *end;
157 int *offsets;
158 pcre_uchar *uchar_ptr;
159 pcre_uchar *mark_ptr;
160 /* Everything else after. */
161 int offsetcount;
162 int calllimit;
163 pcre_uint8 notbol;
164 pcre_uint8 noteol;
165 pcre_uint8 notempty;
166 pcre_uint8 notempty_atstart;
167 } jit_arguments;
168
169 typedef struct executable_functions {
170 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
171 PUBL(jit_callback) callback;
172 void *userdata;
173 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
174 } executable_functions;
175
176 typedef struct jump_list {
177 struct sljit_jump *jump;
178 struct jump_list *next;
179 } jump_list;
180
181 enum stub_types { stack_alloc };
182
183 typedef struct stub_list {
184 enum stub_types type;
185 int data;
186 struct sljit_jump *start;
187 struct sljit_label *quit;
188 struct stub_list *next;
189 } stub_list;
190
191 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
192
193 /* The following structure is the key data type for the recursive
194 code generator. It is allocated by compile_matchingpath, and contains
195 the aguments for compile_backtrackingpath. Must be the first member
196 of its descendants. */
197 typedef struct backtrack_common {
198 /* Concatenation stack. */
199 struct backtrack_common *prev;
200 jump_list *nextbacktracks;
201 /* Internal stack (for component operators). */
202 struct backtrack_common *top;
203 jump_list *topbacktracks;
204 /* Opcode pointer. */
205 pcre_uchar *cc;
206 } backtrack_common;
207
208 typedef struct assert_backtrack {
209 backtrack_common common;
210 jump_list *condfailed;
211 /* Less than 0 (-1) if a frame is not needed. */
212 int framesize;
213 /* Points to our private memory word on the stack. */
214 int private_data_ptr;
215 /* For iterators. */
216 struct sljit_label *matchingpath;
217 } assert_backtrack;
218
219 typedef struct bracket_backtrack {
220 backtrack_common common;
221 /* Where to coninue if an alternative is successfully matched. */
222 struct sljit_label *alternative_matchingpath;
223 /* For rmin and rmax iterators. */
224 struct sljit_label *recursive_matchingpath;
225 /* For greedy ? operator. */
226 struct sljit_label *zero_matchingpath;
227 /* Contains the branches of a failed condition. */
228 union {
229 /* Both for OP_COND, OP_SCOND. */
230 jump_list *condfailed;
231 assert_backtrack *assert;
232 /* For OP_ONCE. -1 if not needed. */
233 int framesize;
234 } u;
235 /* Points to our private memory word on the stack. */
236 int private_data_ptr;
237 } bracket_backtrack;
238
239 typedef struct bracketpos_backtrack {
240 backtrack_common common;
241 /* Points to our private memory word on the stack. */
242 int private_data_ptr;
243 /* Reverting stack is needed. */
244 int framesize;
245 /* Allocated stack size. */
246 int stacksize;
247 } bracketpos_backtrack;
248
249 typedef struct braminzero_backtrack {
250 backtrack_common common;
251 struct sljit_label *matchingpath;
252 } braminzero_backtrack;
253
254 typedef struct iterator_backtrack {
255 backtrack_common common;
256 /* Next iteration. */
257 struct sljit_label *matchingpath;
258 } iterator_backtrack;
259
260 typedef struct recurse_entry {
261 struct recurse_entry *next;
262 /* Contains the function entry. */
263 struct sljit_label *entry;
264 /* Collects the calls until the function is not created. */
265 jump_list *calls;
266 /* Points to the starting opcode. */
267 int start;
268 } recurse_entry;
269
270 typedef struct recurse_backtrack {
271 backtrack_common common;
272 } recurse_backtrack;
273
274 #define MAX_RANGE_SIZE 6
275
276 typedef struct compiler_common {
277 struct sljit_compiler *compiler;
278 pcre_uchar *start;
279
280 /* Maps private data offset to each opcode. */
281 int *private_data_ptrs;
282 /* Tells whether the capturing bracket is optimized. */
283 pcre_uint8 *optimized_cbracket;
284 /* Starting offset of private data for capturing brackets. */
285 int cbraptr;
286 /* OVector starting point. Must be divisible by 2. */
287 int ovector_start;
288 /* Last known position of the requested byte. */
289 int req_char_ptr;
290 /* Head of the last recursion. */
291 int recursive_head;
292 /* First inspected character for partial matching. */
293 int start_used_ptr;
294 /* Starting pointer for partial soft matches. */
295 int hit_start;
296 /* End pointer of the first line. */
297 int first_line_end;
298 /* Points to the marked string. */
299 int mark_ptr;
300
301 /* Flipped and lower case tables. */
302 const pcre_uint8 *fcc;
303 sljit_w lcc;
304 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
305 int mode;
306 /* Newline control. */
307 int nltype;
308 int newline;
309 int bsr_nltype;
310 /* Dollar endonly. */
311 int endonly;
312 BOOL has_set_som;
313 /* Tables. */
314 sljit_w ctypes;
315 int digits[2 + MAX_RANGE_SIZE];
316 /* Named capturing brackets. */
317 sljit_uw name_table;
318 sljit_w name_count;
319 sljit_w name_entry_size;
320
321 /* Labels and jump lists. */
322 struct sljit_label *partialmatchlabel;
323 struct sljit_label *quitlabel;
324 struct sljit_label *acceptlabel;
325 stub_list *stubs;
326 recurse_entry *entries;
327 recurse_entry *currententry;
328 jump_list *partialmatch;
329 jump_list *quit;
330 jump_list *accept;
331 jump_list *calllimit;
332 jump_list *stackalloc;
333 jump_list *revertframes;
334 jump_list *wordboundary;
335 jump_list *anynewline;
336 jump_list *hspace;
337 jump_list *vspace;
338 jump_list *casefulcmp;
339 jump_list *caselesscmp;
340 BOOL jscript_compat;
341 #ifdef SUPPORT_UTF
342 BOOL utf;
343 #ifdef SUPPORT_UCP
344 BOOL use_ucp;
345 #endif
346 #ifndef COMPILE_PCRE32
347 jump_list *utfreadchar;
348 #endif
349 #ifdef COMPILE_PCRE8
350 jump_list *utfreadtype8;
351 #endif
352 #endif /* SUPPORT_UTF */
353 #ifdef SUPPORT_UCP
354 jump_list *getucd;
355 #endif
356 } compiler_common;
357
358 /* For byte_sequence_compare. */
359
360 typedef struct compare_context {
361 int length;
362 int sourcereg;
363 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
364 int ucharptr;
365 union {
366 sljit_i asint;
367 sljit_uh asushort;
368 #if defined COMPILE_PCRE8
369 sljit_ub asbyte;
370 sljit_ub asuchars[4];
371 #elif defined COMPILE_PCRE16
372 sljit_uh asuchars[2];
373 #elif defined COMPILE_PCRE32
374 sljit_ui asuchars[1];
375 #endif
376 } c;
377 union {
378 sljit_i asint;
379 sljit_uh asushort;
380 #if defined COMPILE_PCRE8
381 sljit_ub asbyte;
382 sljit_ub asuchars[4];
383 #elif defined COMPILE_PCRE16
384 sljit_uh asuchars[2];
385 #elif defined COMPILE_PCRE32
386 sljit_ui asuchars[1];
387 #endif
388 } oc;
389 #endif
390 } compare_context;
391
392 enum {
393 frame_end = 0,
394 frame_setstrbegin = -1,
395 frame_setmark = -2
396 };
397
398 /* Undefine sljit macros. */
399 #undef CMP
400
401 /* Used for accessing the elements of the stack. */
402 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
403
404 #define TMP1 SLJIT_TEMPORARY_REG1
405 #define TMP2 SLJIT_TEMPORARY_REG3
406 #define TMP3 SLJIT_TEMPORARY_EREG2
407 #define STR_PTR SLJIT_SAVED_REG1
408 #define STR_END SLJIT_SAVED_REG2
409 #define STACK_TOP SLJIT_TEMPORARY_REG2
410 #define STACK_LIMIT SLJIT_SAVED_REG3
411 #define ARGUMENTS SLJIT_SAVED_EREG1
412 #define CALL_COUNT SLJIT_SAVED_EREG2
413 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
414
415 /* Local space layout. */
416 /* These two locals can be used by the current opcode. */
417 #define LOCALS0 (0 * sizeof(sljit_w))
418 #define LOCALS1 (1 * sizeof(sljit_w))
419 /* Two local variables for possessive quantifiers (char1 cannot use them). */
420 #define POSSESSIVE0 (2 * sizeof(sljit_w))
421 #define POSSESSIVE1 (3 * sizeof(sljit_w))
422 /* Max limit of recursions. */
423 #define CALL_LIMIT (4 * sizeof(sljit_w))
424 /* The output vector is stored on the stack, and contains pointers
425 to characters. The vector data is divided into two groups: the first
426 group contains the start / end character pointers, and the second is
427 the start pointers when the end of the capturing group has not yet reached. */
428 #define OVECTOR_START (common->ovector_start)
429 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
430 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
431 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
432
433 #if defined COMPILE_PCRE8
434 #define MOV_UCHAR SLJIT_MOV_UB
435 #define MOVU_UCHAR SLJIT_MOVU_UB
436 #elif defined COMPILE_PCRE16
437 #define MOV_UCHAR SLJIT_MOV_UH
438 #define MOVU_UCHAR SLJIT_MOVU_UH
439 #elif defined COMPILE_PCRE32
440 #define MOV_UCHAR SLJIT_MOV_UI
441 #define MOVU_UCHAR SLJIT_MOVU_UI
442 #else
443 #error Unsupported compiling mode
444 #endif
445
446 /* Shortcuts. */
447 #define DEFINE_COMPILER \
448 struct sljit_compiler *compiler = common->compiler
449 #define OP1(op, dst, dstw, src, srcw) \
450 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
451 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
452 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
453 #define LABEL() \
454 sljit_emit_label(compiler)
455 #define JUMP(type) \
456 sljit_emit_jump(compiler, (type))
457 #define JUMPTO(type, label) \
458 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
459 #define JUMPHERE(jump) \
460 sljit_set_label((jump), sljit_emit_label(compiler))
461 #define CMP(type, src1, src1w, src2, src2w) \
462 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
463 #define CMPTO(type, src1, src1w, src2, src2w, label) \
464 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
465 #define COND_VALUE(op, dst, dstw, type) \
466 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
467 #define GET_LOCAL_BASE(dst, dstw, offset) \
468 sljit_get_local_base(compiler, (dst), (dstw), (offset))
469
470 static pcre_uchar* bracketend(pcre_uchar* cc)
471 {
472 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
473 do cc += GET(cc, 1); while (*cc == OP_ALT);
474 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
475 cc += 1 + LINK_SIZE;
476 return cc;
477 }
478
479 /* Functions whose might need modification for all new supported opcodes:
480 next_opcode
481 get_private_data_length
482 set_private_data_ptrs
483 get_framesize
484 init_frame
485 get_private_data_length_for_copy
486 copy_private_data
487 compile_matchingpath
488 compile_backtrackingpath
489 */
490
491 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
492 {
493 SLJIT_UNUSED_ARG(common);
494 switch(*cc)
495 {
496 case OP_SOD:
497 case OP_SOM:
498 case OP_SET_SOM:
499 case OP_NOT_WORD_BOUNDARY:
500 case OP_WORD_BOUNDARY:
501 case OP_NOT_DIGIT:
502 case OP_DIGIT:
503 case OP_NOT_WHITESPACE:
504 case OP_WHITESPACE:
505 case OP_NOT_WORDCHAR:
506 case OP_WORDCHAR:
507 case OP_ANY:
508 case OP_ALLANY:
509 case OP_ANYNL:
510 case OP_NOT_HSPACE:
511 case OP_HSPACE:
512 case OP_NOT_VSPACE:
513 case OP_VSPACE:
514 case OP_EXTUNI:
515 case OP_EODN:
516 case OP_EOD:
517 case OP_CIRC:
518 case OP_CIRCM:
519 case OP_DOLL:
520 case OP_DOLLM:
521 case OP_TYPESTAR:
522 case OP_TYPEMINSTAR:
523 case OP_TYPEPLUS:
524 case OP_TYPEMINPLUS:
525 case OP_TYPEQUERY:
526 case OP_TYPEMINQUERY:
527 case OP_TYPEPOSSTAR:
528 case OP_TYPEPOSPLUS:
529 case OP_TYPEPOSQUERY:
530 case OP_CRSTAR:
531 case OP_CRMINSTAR:
532 case OP_CRPLUS:
533 case OP_CRMINPLUS:
534 case OP_CRQUERY:
535 case OP_CRMINQUERY:
536 case OP_DEF:
537 case OP_BRAZERO:
538 case OP_BRAMINZERO:
539 case OP_BRAPOSZERO:
540 case OP_COMMIT:
541 case OP_FAIL:
542 case OP_ACCEPT:
543 case OP_ASSERT_ACCEPT:
544 case OP_SKIPZERO:
545 return cc + 1;
546
547 case OP_ANYBYTE:
548 #ifdef SUPPORT_UTF
549 if (common->utf) return NULL;
550 #endif
551 return cc + 1;
552
553 case OP_CHAR:
554 case OP_CHARI:
555 case OP_NOT:
556 case OP_NOTI:
557 case OP_STAR:
558 case OP_MINSTAR:
559 case OP_PLUS:
560 case OP_MINPLUS:
561 case OP_QUERY:
562 case OP_MINQUERY:
563 case OP_POSSTAR:
564 case OP_POSPLUS:
565 case OP_POSQUERY:
566 case OP_STARI:
567 case OP_MINSTARI:
568 case OP_PLUSI:
569 case OP_MINPLUSI:
570 case OP_QUERYI:
571 case OP_MINQUERYI:
572 case OP_POSSTARI:
573 case OP_POSPLUSI:
574 case OP_POSQUERYI:
575 case OP_NOTSTAR:
576 case OP_NOTMINSTAR:
577 case OP_NOTPLUS:
578 case OP_NOTMINPLUS:
579 case OP_NOTQUERY:
580 case OP_NOTMINQUERY:
581 case OP_NOTPOSSTAR:
582 case OP_NOTPOSPLUS:
583 case OP_NOTPOSQUERY:
584 case OP_NOTSTARI:
585 case OP_NOTMINSTARI:
586 case OP_NOTPLUSI:
587 case OP_NOTMINPLUSI:
588 case OP_NOTQUERYI:
589 case OP_NOTMINQUERYI:
590 case OP_NOTPOSSTARI:
591 case OP_NOTPOSPLUSI:
592 case OP_NOTPOSQUERYI:
593 cc += 2;
594 #ifdef SUPPORT_UTF
595 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
596 #endif
597 return cc;
598
599 case OP_UPTO:
600 case OP_MINUPTO:
601 case OP_EXACT:
602 case OP_POSUPTO:
603 case OP_UPTOI:
604 case OP_MINUPTOI:
605 case OP_EXACTI:
606 case OP_POSUPTOI:
607 case OP_NOTUPTO:
608 case OP_NOTMINUPTO:
609 case OP_NOTEXACT:
610 case OP_NOTPOSUPTO:
611 case OP_NOTUPTOI:
612 case OP_NOTMINUPTOI:
613 case OP_NOTEXACTI:
614 case OP_NOTPOSUPTOI:
615 cc += 2 + IMM2_SIZE;
616 #ifdef SUPPORT_UTF
617 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
618 #endif
619 return cc;
620
621 case OP_NOTPROP:
622 case OP_PROP:
623 return cc + 1 + 2;
624
625 case OP_TYPEUPTO:
626 case OP_TYPEMINUPTO:
627 case OP_TYPEEXACT:
628 case OP_TYPEPOSUPTO:
629 case OP_REF:
630 case OP_REFI:
631 case OP_CREF:
632 case OP_NCREF:
633 case OP_RREF:
634 case OP_NRREF:
635 case OP_CLOSE:
636 cc += 1 + IMM2_SIZE;
637 return cc;
638
639 case OP_CRRANGE:
640 case OP_CRMINRANGE:
641 return cc + 1 + 2 * IMM2_SIZE;
642
643 case OP_CLASS:
644 case OP_NCLASS:
645 return cc + 1 + 32 / sizeof(pcre_uchar);
646
647 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
648 case OP_XCLASS:
649 return cc + GET(cc, 1);
650 #endif
651
652 case OP_RECURSE:
653 case OP_ASSERT:
654 case OP_ASSERT_NOT:
655 case OP_ASSERTBACK:
656 case OP_ASSERTBACK_NOT:
657 case OP_REVERSE:
658 case OP_ONCE:
659 case OP_ONCE_NC:
660 case OP_BRA:
661 case OP_BRAPOS:
662 case OP_COND:
663 case OP_SBRA:
664 case OP_SBRAPOS:
665 case OP_SCOND:
666 case OP_ALT:
667 case OP_KET:
668 case OP_KETRMAX:
669 case OP_KETRMIN:
670 case OP_KETRPOS:
671 return cc + 1 + LINK_SIZE;
672
673 case OP_CBRA:
674 case OP_CBRAPOS:
675 case OP_SCBRA:
676 case OP_SCBRAPOS:
677 return cc + 1 + LINK_SIZE + IMM2_SIZE;
678
679 case OP_MARK:
680 return cc + 1 + 2 + cc[1];
681
682 default:
683 return NULL;
684 }
685 }
686
687 #define CASE_ITERATOR_PRIVATE_DATA_1 \
688 case OP_MINSTAR: \
689 case OP_MINPLUS: \
690 case OP_QUERY: \
691 case OP_MINQUERY: \
692 case OP_MINSTARI: \
693 case OP_MINPLUSI: \
694 case OP_QUERYI: \
695 case OP_MINQUERYI: \
696 case OP_NOTMINSTAR: \
697 case OP_NOTMINPLUS: \
698 case OP_NOTQUERY: \
699 case OP_NOTMINQUERY: \
700 case OP_NOTMINSTARI: \
701 case OP_NOTMINPLUSI: \
702 case OP_NOTQUERYI: \
703 case OP_NOTMINQUERYI:
704
705 #define CASE_ITERATOR_PRIVATE_DATA_2A \
706 case OP_STAR: \
707 case OP_PLUS: \
708 case OP_STARI: \
709 case OP_PLUSI: \
710 case OP_NOTSTAR: \
711 case OP_NOTPLUS: \
712 case OP_NOTSTARI: \
713 case OP_NOTPLUSI:
714
715 #define CASE_ITERATOR_PRIVATE_DATA_2B \
716 case OP_UPTO: \
717 case OP_MINUPTO: \
718 case OP_UPTOI: \
719 case OP_MINUPTOI: \
720 case OP_NOTUPTO: \
721 case OP_NOTMINUPTO: \
722 case OP_NOTUPTOI: \
723 case OP_NOTMINUPTOI:
724
725 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
726 case OP_TYPEMINSTAR: \
727 case OP_TYPEMINPLUS: \
728 case OP_TYPEQUERY: \
729 case OP_TYPEMINQUERY:
730
731 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
732 case OP_TYPESTAR: \
733 case OP_TYPEPLUS:
734
735 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
736 case OP_TYPEUPTO: \
737 case OP_TYPEMINUPTO:
738
739 static int get_class_iterator_size(pcre_uchar *cc)
740 {
741 switch(*cc)
742 {
743 case OP_CRSTAR:
744 case OP_CRPLUS:
745 return 2;
746
747 case OP_CRMINSTAR:
748 case OP_CRMINPLUS:
749 case OP_CRQUERY:
750 case OP_CRMINQUERY:
751 return 1;
752
753 case OP_CRRANGE:
754 case OP_CRMINRANGE:
755 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
756 return 0;
757 return 2;
758
759 default:
760 return 0;
761 }
762 }
763
764 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
765 {
766 int private_data_length = 0;
767 pcre_uchar *alternative;
768 pcre_uchar *name;
769 pcre_uchar *end = NULL;
770 int space, size, bracketlen, i;
771
772 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
773 while (cc < ccend)
774 {
775 space = 0;
776 size = 0;
777 bracketlen = 0;
778 switch(*cc)
779 {
780 case OP_SET_SOM:
781 common->has_set_som = TRUE;
782 cc += 1;
783 break;
784
785 case OP_REF:
786 case OP_REFI:
787 common->optimized_cbracket[GET2(cc, 1)] = 0;
788 cc += 1 + IMM2_SIZE;
789 break;
790
791 case OP_ASSERT:
792 case OP_ASSERT_NOT:
793 case OP_ASSERTBACK:
794 case OP_ASSERTBACK_NOT:
795 case OP_ONCE:
796 case OP_ONCE_NC:
797 case OP_BRAPOS:
798 case OP_SBRA:
799 case OP_SBRAPOS:
800 private_data_length += sizeof(sljit_w);
801 bracketlen = 1 + LINK_SIZE;
802 break;
803
804 case OP_CBRAPOS:
805 case OP_SCBRAPOS:
806 private_data_length += sizeof(sljit_w);
807 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
808 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
809 break;
810
811 case OP_COND:
812 case OP_SCOND:
813 bracketlen = cc[1 + LINK_SIZE];
814 if (bracketlen == OP_CREF)
815 {
816 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
817 common->optimized_cbracket[bracketlen] = 0;
818 }
819 else if (bracketlen == OP_NCREF)
820 {
821 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
822 name = (pcre_uchar *)common->name_table;
823 alternative = name;
824 for (i = 0; i < common->name_count; i++)
825 {
826 if (GET2(name, 0) == bracketlen) break;
827 name += common->name_entry_size;
828 }
829 SLJIT_ASSERT(i != common->name_count);
830
831 for (i = 0; i < common->name_count; i++)
832 {
833 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
834 common->optimized_cbracket[GET2(alternative, 0)] = 0;
835 alternative += common->name_entry_size;
836 }
837 }
838
839 if (*cc == OP_COND)
840 {
841 /* Might be a hidden SCOND. */
842 alternative = cc + GET(cc, 1);
843 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
844 private_data_length += sizeof(sljit_w);
845 }
846 else
847 private_data_length += sizeof(sljit_w);
848 bracketlen = 1 + LINK_SIZE;
849 break;
850
851 case OP_BRA:
852 bracketlen = 1 + LINK_SIZE;
853 break;
854
855 case OP_CBRA:
856 case OP_SCBRA:
857 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
858 break;
859
860 CASE_ITERATOR_PRIVATE_DATA_1
861 space = 1;
862 size = -2;
863 break;
864
865 CASE_ITERATOR_PRIVATE_DATA_2A
866 space = 2;
867 size = -2;
868 break;
869
870 CASE_ITERATOR_PRIVATE_DATA_2B
871 space = 2;
872 size = -(2 + IMM2_SIZE);
873 break;
874
875 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
876 space = 1;
877 size = 1;
878 break;
879
880 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
881 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
882 space = 2;
883 size = 1;
884 break;
885
886 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
887 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
888 space = 2;
889 size = 1 + IMM2_SIZE;
890 break;
891
892 case OP_CLASS:
893 case OP_NCLASS:
894 size += 1 + 32 / sizeof(pcre_uchar);
895 space = get_class_iterator_size(cc + size);
896 break;
897
898 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
899 case OP_XCLASS:
900 size = GET(cc, 1);
901 space = get_class_iterator_size(cc + size);
902 break;
903 #endif
904
905 case OP_RECURSE:
906 /* Set its value only once. */
907 if (common->recursive_head == 0)
908 {
909 common->recursive_head = common->ovector_start;
910 common->ovector_start += sizeof(sljit_w);
911 }
912 cc += 1 + LINK_SIZE;
913 break;
914
915 case OP_MARK:
916 if (common->mark_ptr == 0)
917 {
918 common->mark_ptr = common->ovector_start;
919 common->ovector_start += sizeof(sljit_w);
920 }
921 cc += 1 + 2 + cc[1];
922 break;
923
924 default:
925 cc = next_opcode(common, cc);
926 if (cc == NULL)
927 return -1;
928 break;
929 }
930
931 if (space > 0 && cc >= end)
932 private_data_length += sizeof(sljit_w) * space;
933
934 if (size != 0)
935 {
936 if (size < 0)
937 {
938 cc += -size;
939 #ifdef SUPPORT_UTF
940 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
941 #endif
942 }
943 else
944 cc += size;
945 }
946
947 if (bracketlen > 0)
948 {
949 if (cc >= end)
950 {
951 end = bracketend(cc);
952 if (end[-1 - LINK_SIZE] == OP_KET)
953 end = NULL;
954 }
955 cc += bracketlen;
956 }
957 }
958 return private_data_length;
959 }
960
961 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
962 {
963 pcre_uchar *cc = common->start;
964 pcre_uchar *alternative;
965 pcre_uchar *end = NULL;
966 int space, size, bracketlen;
967
968 while (cc < ccend)
969 {
970 space = 0;
971 size = 0;
972 bracketlen = 0;
973 switch(*cc)
974 {
975 case OP_ASSERT:
976 case OP_ASSERT_NOT:
977 case OP_ASSERTBACK:
978 case OP_ASSERTBACK_NOT:
979 case OP_ONCE:
980 case OP_ONCE_NC:
981 case OP_BRAPOS:
982 case OP_SBRA:
983 case OP_SBRAPOS:
984 case OP_SCOND:
985 common->private_data_ptrs[cc - common->start] = private_data_ptr;
986 private_data_ptr += sizeof(sljit_w);
987 bracketlen = 1 + LINK_SIZE;
988 break;
989
990 case OP_CBRAPOS:
991 case OP_SCBRAPOS:
992 common->private_data_ptrs[cc - common->start] = private_data_ptr;
993 private_data_ptr += sizeof(sljit_w);
994 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
995 break;
996
997 case OP_COND:
998 /* Might be a hidden SCOND. */
999 alternative = cc + GET(cc, 1);
1000 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1001 {
1002 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1003 private_data_ptr += sizeof(sljit_w);
1004 }
1005 bracketlen = 1 + LINK_SIZE;
1006 break;
1007
1008 case OP_BRA:
1009 bracketlen = 1 + LINK_SIZE;
1010 break;
1011
1012 case OP_CBRA:
1013 case OP_SCBRA:
1014 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1015 break;
1016
1017 CASE_ITERATOR_PRIVATE_DATA_1
1018 space = 1;
1019 size = -2;
1020 break;
1021
1022 CASE_ITERATOR_PRIVATE_DATA_2A
1023 space = 2;
1024 size = -2;
1025 break;
1026
1027 CASE_ITERATOR_PRIVATE_DATA_2B
1028 space = 2;
1029 size = -(2 + IMM2_SIZE);
1030 break;
1031
1032 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1033 space = 1;
1034 size = 1;
1035 break;
1036
1037 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1038 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1039 space = 2;
1040 size = 1;
1041 break;
1042
1043 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1044 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1045 space = 2;
1046 size = 1 + IMM2_SIZE;
1047 break;
1048
1049 case OP_CLASS:
1050 case OP_NCLASS:
1051 size += 1 + 32 / sizeof(pcre_uchar);
1052 space = get_class_iterator_size(cc + size);
1053 break;
1054
1055 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1056 case OP_XCLASS:
1057 size = GET(cc, 1);
1058 space = get_class_iterator_size(cc + size);
1059 break;
1060 #endif
1061
1062 default:
1063 cc = next_opcode(common, cc);
1064 SLJIT_ASSERT(cc != NULL);
1065 break;
1066 }
1067
1068 if (space > 0 && cc >= end)
1069 {
1070 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1071 private_data_ptr += sizeof(sljit_w) * space;
1072 }
1073
1074 if (size != 0)
1075 {
1076 if (size < 0)
1077 {
1078 cc += -size;
1079 #ifdef SUPPORT_UTF
1080 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1081 #endif
1082 }
1083 else
1084 cc += size;
1085 }
1086
1087 if (bracketlen > 0)
1088 {
1089 if (cc >= end)
1090 {
1091 end = bracketend(cc);
1092 if (end[-1 - LINK_SIZE] == OP_KET)
1093 end = NULL;
1094 }
1095 cc += bracketlen;
1096 }
1097 }
1098 }
1099
1100 /* Returns with -1 if no need for frame. */
1101 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1102 {
1103 pcre_uchar *ccend = bracketend(cc);
1104 int length = 0;
1105 BOOL possessive = FALSE;
1106 BOOL setsom_found = recursive;
1107 BOOL setmark_found = recursive;
1108
1109 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1110 {
1111 length = 3;
1112 possessive = TRUE;
1113 }
1114
1115 cc = next_opcode(common, cc);
1116 SLJIT_ASSERT(cc != NULL);
1117 while (cc < ccend)
1118 switch(*cc)
1119 {
1120 case OP_SET_SOM:
1121 SLJIT_ASSERT(common->has_set_som);
1122 if (!setsom_found)
1123 {
1124 length += 2;
1125 setsom_found = TRUE;
1126 }
1127 cc += 1;
1128 break;
1129
1130 case OP_MARK:
1131 SLJIT_ASSERT(common->mark_ptr != 0);
1132 if (!setmark_found)
1133 {
1134 length += 2;
1135 setmark_found = TRUE;
1136 }
1137 cc += 1 + 2 + cc[1];
1138 break;
1139
1140 case OP_RECURSE:
1141 if (common->has_set_som && !setsom_found)
1142 {
1143 length += 2;
1144 setsom_found = TRUE;
1145 }
1146 if (common->mark_ptr != 0 && !setmark_found)
1147 {
1148 length += 2;
1149 setmark_found = TRUE;
1150 }
1151 cc += 1 + LINK_SIZE;
1152 break;
1153
1154 case OP_CBRA:
1155 case OP_CBRAPOS:
1156 case OP_SCBRA:
1157 case OP_SCBRAPOS:
1158 length += 3;
1159 cc += 1 + LINK_SIZE + IMM2_SIZE;
1160 break;
1161
1162 default:
1163 cc = next_opcode(common, cc);
1164 SLJIT_ASSERT(cc != NULL);
1165 break;
1166 }
1167
1168 /* Possessive quantifiers can use a special case. */
1169 if (SLJIT_UNLIKELY(possessive) && length == 3)
1170 return -1;
1171
1172 if (length > 0)
1173 return length + 1;
1174 return -1;
1175 }
1176
1177 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1178 {
1179 DEFINE_COMPILER;
1180 pcre_uchar *ccend = bracketend(cc);
1181 BOOL setsom_found = recursive;
1182 BOOL setmark_found = recursive;
1183 int offset;
1184
1185 /* >= 1 + shortest item size (2) */
1186 SLJIT_UNUSED_ARG(stacktop);
1187 SLJIT_ASSERT(stackpos >= stacktop + 2);
1188
1189 stackpos = STACK(stackpos);
1190 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1191 cc = next_opcode(common, cc);
1192 SLJIT_ASSERT(cc != NULL);
1193 while (cc < ccend)
1194 switch(*cc)
1195 {
1196 case OP_SET_SOM:
1197 SLJIT_ASSERT(common->has_set_som);
1198 if (!setsom_found)
1199 {
1200 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1202 stackpos += (int)sizeof(sljit_w);
1203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1204 stackpos += (int)sizeof(sljit_w);
1205 setsom_found = TRUE;
1206 }
1207 cc += 1;
1208 break;
1209
1210 case OP_MARK:
1211 SLJIT_ASSERT(common->mark_ptr != 0);
1212 if (!setmark_found)
1213 {
1214 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1216 stackpos += (int)sizeof(sljit_w);
1217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1218 stackpos += (int)sizeof(sljit_w);
1219 setmark_found = TRUE;
1220 }
1221 cc += 1 + 2 + cc[1];
1222 break;
1223
1224 case OP_RECURSE:
1225 if (common->has_set_som && !setsom_found)
1226 {
1227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1229 stackpos += (int)sizeof(sljit_w);
1230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1231 stackpos += (int)sizeof(sljit_w);
1232 setsom_found = TRUE;
1233 }
1234 if (common->mark_ptr != 0 && !setmark_found)
1235 {
1236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1238 stackpos += (int)sizeof(sljit_w);
1239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1240 stackpos += (int)sizeof(sljit_w);
1241 setmark_found = TRUE;
1242 }
1243 cc += 1 + LINK_SIZE;
1244 break;
1245
1246 case OP_CBRA:
1247 case OP_CBRAPOS:
1248 case OP_SCBRA:
1249 case OP_SCBRAPOS:
1250 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1251 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1252 stackpos += (int)sizeof(sljit_w);
1253 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1254 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1255 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1256 stackpos += (int)sizeof(sljit_w);
1257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1258 stackpos += (int)sizeof(sljit_w);
1259
1260 cc += 1 + LINK_SIZE + IMM2_SIZE;
1261 break;
1262
1263 default:
1264 cc = next_opcode(common, cc);
1265 SLJIT_ASSERT(cc != NULL);
1266 break;
1267 }
1268
1269 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1270 SLJIT_ASSERT(stackpos == STACK(stacktop));
1271 }
1272
1273 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1274 {
1275 int private_data_length = 2;
1276 int size;
1277 pcre_uchar *alternative;
1278 /* Calculate the sum of the private machine words. */
1279 while (cc < ccend)
1280 {
1281 size = 0;
1282 switch(*cc)
1283 {
1284 case OP_ASSERT:
1285 case OP_ASSERT_NOT:
1286 case OP_ASSERTBACK:
1287 case OP_ASSERTBACK_NOT:
1288 case OP_ONCE:
1289 case OP_ONCE_NC:
1290 case OP_BRAPOS:
1291 case OP_SBRA:
1292 case OP_SBRAPOS:
1293 case OP_SCOND:
1294 private_data_length++;
1295 cc += 1 + LINK_SIZE;
1296 break;
1297
1298 case OP_CBRA:
1299 case OP_SCBRA:
1300 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1301 private_data_length++;
1302 cc += 1 + LINK_SIZE + IMM2_SIZE;
1303 break;
1304
1305 case OP_CBRAPOS:
1306 case OP_SCBRAPOS:
1307 private_data_length += 2;
1308 cc += 1 + LINK_SIZE + IMM2_SIZE;
1309 break;
1310
1311 case OP_COND:
1312 /* Might be a hidden SCOND. */
1313 alternative = cc + GET(cc, 1);
1314 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1315 private_data_length++;
1316 cc += 1 + LINK_SIZE;
1317 break;
1318
1319 CASE_ITERATOR_PRIVATE_DATA_1
1320 if (PRIVATE_DATA(cc))
1321 private_data_length++;
1322 cc += 2;
1323 #ifdef SUPPORT_UTF
1324 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1325 #endif
1326 break;
1327
1328 CASE_ITERATOR_PRIVATE_DATA_2A
1329 if (PRIVATE_DATA(cc))
1330 private_data_length += 2;
1331 cc += 2;
1332 #ifdef SUPPORT_UTF
1333 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1334 #endif
1335 break;
1336
1337 CASE_ITERATOR_PRIVATE_DATA_2B
1338 if (PRIVATE_DATA(cc))
1339 private_data_length += 2;
1340 cc += 2 + IMM2_SIZE;
1341 #ifdef SUPPORT_UTF
1342 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1343 #endif
1344 break;
1345
1346 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1347 if (PRIVATE_DATA(cc))
1348 private_data_length++;
1349 cc += 1;
1350 break;
1351
1352 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1353 if (PRIVATE_DATA(cc))
1354 private_data_length += 2;
1355 cc += 1;
1356 break;
1357
1358 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1359 if (PRIVATE_DATA(cc))
1360 private_data_length += 2;
1361 cc += 1 + IMM2_SIZE;
1362 break;
1363
1364 case OP_CLASS:
1365 case OP_NCLASS:
1366 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1367 case OP_XCLASS:
1368 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1369 #else
1370 size = 1 + 32 / (int)sizeof(pcre_uchar);
1371 #endif
1372 if (PRIVATE_DATA(cc))
1373 private_data_length += get_class_iterator_size(cc + size);
1374 cc += size;
1375 break;
1376
1377 default:
1378 cc = next_opcode(common, cc);
1379 SLJIT_ASSERT(cc != NULL);
1380 break;
1381 }
1382 }
1383 SLJIT_ASSERT(cc == ccend);
1384 return private_data_length;
1385 }
1386
1387 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1388 BOOL save, int stackptr, int stacktop)
1389 {
1390 DEFINE_COMPILER;
1391 int srcw[2];
1392 int count, size;
1393 BOOL tmp1next = TRUE;
1394 BOOL tmp1empty = TRUE;
1395 BOOL tmp2empty = TRUE;
1396 pcre_uchar *alternative;
1397 enum {
1398 start,
1399 loop,
1400 end
1401 } status;
1402
1403 status = save ? start : loop;
1404 stackptr = STACK(stackptr - 2);
1405 stacktop = STACK(stacktop - 1);
1406
1407 if (!save)
1408 {
1409 stackptr += sizeof(sljit_w);
1410 if (stackptr < stacktop)
1411 {
1412 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1413 stackptr += sizeof(sljit_w);
1414 tmp1empty = FALSE;
1415 }
1416 if (stackptr < stacktop)
1417 {
1418 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1419 stackptr += sizeof(sljit_w);
1420 tmp2empty = FALSE;
1421 }
1422 /* The tmp1next must be TRUE in either way. */
1423 }
1424
1425 while (status != end)
1426 {
1427 count = 0;
1428 switch(status)
1429 {
1430 case start:
1431 SLJIT_ASSERT(save && common->recursive_head != 0);
1432 count = 1;
1433 srcw[0] = common->recursive_head;
1434 status = loop;
1435 break;
1436
1437 case loop:
1438 if (cc >= ccend)
1439 {
1440 status = end;
1441 break;
1442 }
1443
1444 switch(*cc)
1445 {
1446 case OP_ASSERT:
1447 case OP_ASSERT_NOT:
1448 case OP_ASSERTBACK:
1449 case OP_ASSERTBACK_NOT:
1450 case OP_ONCE:
1451 case OP_ONCE_NC:
1452 case OP_BRAPOS:
1453 case OP_SBRA:
1454 case OP_SBRAPOS:
1455 case OP_SCOND:
1456 count = 1;
1457 srcw[0] = PRIVATE_DATA(cc);
1458 SLJIT_ASSERT(srcw[0] != 0);
1459 cc += 1 + LINK_SIZE;
1460 break;
1461
1462 case OP_CBRA:
1463 case OP_SCBRA:
1464 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1465 {
1466 count = 1;
1467 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1468 }
1469 cc += 1 + LINK_SIZE + IMM2_SIZE;
1470 break;
1471
1472 case OP_CBRAPOS:
1473 case OP_SCBRAPOS:
1474 count = 2;
1475 srcw[0] = PRIVATE_DATA(cc);
1476 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1477 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1478 cc += 1 + LINK_SIZE + IMM2_SIZE;
1479 break;
1480
1481 case OP_COND:
1482 /* Might be a hidden SCOND. */
1483 alternative = cc + GET(cc, 1);
1484 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1485 {
1486 count = 1;
1487 srcw[0] = PRIVATE_DATA(cc);
1488 SLJIT_ASSERT(srcw[0] != 0);
1489 }
1490 cc += 1 + LINK_SIZE;
1491 break;
1492
1493 CASE_ITERATOR_PRIVATE_DATA_1
1494 if (PRIVATE_DATA(cc))
1495 {
1496 count = 1;
1497 srcw[0] = PRIVATE_DATA(cc);
1498 }
1499 cc += 2;
1500 #ifdef SUPPORT_UTF
1501 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1502 #endif
1503 break;
1504
1505 CASE_ITERATOR_PRIVATE_DATA_2A
1506 if (PRIVATE_DATA(cc))
1507 {
1508 count = 2;
1509 srcw[0] = PRIVATE_DATA(cc);
1510 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1511 }
1512 cc += 2;
1513 #ifdef SUPPORT_UTF
1514 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1515 #endif
1516 break;
1517
1518 CASE_ITERATOR_PRIVATE_DATA_2B
1519 if (PRIVATE_DATA(cc))
1520 {
1521 count = 2;
1522 srcw[0] = PRIVATE_DATA(cc);
1523 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1524 }
1525 cc += 2 + IMM2_SIZE;
1526 #ifdef SUPPORT_UTF
1527 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1528 #endif
1529 break;
1530
1531 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1532 if (PRIVATE_DATA(cc))
1533 {
1534 count = 1;
1535 srcw[0] = PRIVATE_DATA(cc);
1536 }
1537 cc += 1;
1538 break;
1539
1540 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1541 if (PRIVATE_DATA(cc))
1542 {
1543 count = 2;
1544 srcw[0] = PRIVATE_DATA(cc);
1545 srcw[1] = srcw[0] + sizeof(sljit_w);
1546 }
1547 cc += 1;
1548 break;
1549
1550 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1551 if (PRIVATE_DATA(cc))
1552 {
1553 count = 2;
1554 srcw[0] = PRIVATE_DATA(cc);
1555 srcw[1] = srcw[0] + sizeof(sljit_w);
1556 }
1557 cc += 1 + IMM2_SIZE;
1558 break;
1559
1560 case OP_CLASS:
1561 case OP_NCLASS:
1562 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1563 case OP_XCLASS:
1564 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1565 #else
1566 size = 1 + 32 / (int)sizeof(pcre_uchar);
1567 #endif
1568 if (PRIVATE_DATA(cc))
1569 switch(get_class_iterator_size(cc + size))
1570 {
1571 case 1:
1572 count = 1;
1573 srcw[0] = PRIVATE_DATA(cc);
1574 break;
1575
1576 case 2:
1577 count = 2;
1578 srcw[0] = PRIVATE_DATA(cc);
1579 srcw[1] = srcw[0] + sizeof(sljit_w);
1580 break;
1581
1582 default:
1583 SLJIT_ASSERT_STOP();
1584 break;
1585 }
1586 cc += size;
1587 break;
1588
1589 default:
1590 cc = next_opcode(common, cc);
1591 SLJIT_ASSERT(cc != NULL);
1592 break;
1593 }
1594 break;
1595
1596 case end:
1597 SLJIT_ASSERT_STOP();
1598 break;
1599 }
1600
1601 while (count > 0)
1602 {
1603 count--;
1604 if (save)
1605 {
1606 if (tmp1next)
1607 {
1608 if (!tmp1empty)
1609 {
1610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1611 stackptr += sizeof(sljit_w);
1612 }
1613 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1614 tmp1empty = FALSE;
1615 tmp1next = FALSE;
1616 }
1617 else
1618 {
1619 if (!tmp2empty)
1620 {
1621 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1622 stackptr += sizeof(sljit_w);
1623 }
1624 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1625 tmp2empty = FALSE;
1626 tmp1next = TRUE;
1627 }
1628 }
1629 else
1630 {
1631 if (tmp1next)
1632 {
1633 SLJIT_ASSERT(!tmp1empty);
1634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1635 tmp1empty = stackptr >= stacktop;
1636 if (!tmp1empty)
1637 {
1638 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1639 stackptr += sizeof(sljit_w);
1640 }
1641 tmp1next = FALSE;
1642 }
1643 else
1644 {
1645 SLJIT_ASSERT(!tmp2empty);
1646 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1647 tmp2empty = stackptr >= stacktop;
1648 if (!tmp2empty)
1649 {
1650 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1651 stackptr += sizeof(sljit_w);
1652 }
1653 tmp1next = TRUE;
1654 }
1655 }
1656 }
1657 }
1658
1659 if (save)
1660 {
1661 if (tmp1next)
1662 {
1663 if (!tmp1empty)
1664 {
1665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1666 stackptr += sizeof(sljit_w);
1667 }
1668 if (!tmp2empty)
1669 {
1670 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1671 stackptr += sizeof(sljit_w);
1672 }
1673 }
1674 else
1675 {
1676 if (!tmp2empty)
1677 {
1678 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1679 stackptr += sizeof(sljit_w);
1680 }
1681 if (!tmp1empty)
1682 {
1683 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1684 stackptr += sizeof(sljit_w);
1685 }
1686 }
1687 }
1688 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1689 }
1690
1691 #undef CASE_ITERATOR_PRIVATE_DATA_1
1692 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1693 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1694 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1695 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1696 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1697
1698 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1699 {
1700 return (value & (value - 1)) == 0;
1701 }
1702
1703 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1704 {
1705 while (list)
1706 {
1707 /* sljit_set_label is clever enough to do nothing
1708 if either the jump or the label is NULL. */
1709 sljit_set_label(list->jump, label);
1710 list = list->next;
1711 }
1712 }
1713
1714 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1715 {
1716 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1717 if (list_item)
1718 {
1719 list_item->next = *list;
1720 list_item->jump = jump;
1721 *list = list_item;
1722 }
1723 }
1724
1725 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1726 {
1727 DEFINE_COMPILER;
1728 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1729
1730 if (list_item)
1731 {
1732 list_item->type = type;
1733 list_item->data = data;
1734 list_item->start = start;
1735 list_item->quit = LABEL();
1736 list_item->next = common->stubs;
1737 common->stubs = list_item;
1738 }
1739 }
1740
1741 static void flush_stubs(compiler_common *common)
1742 {
1743 DEFINE_COMPILER;
1744 stub_list* list_item = common->stubs;
1745
1746 while (list_item)
1747 {
1748 JUMPHERE(list_item->start);
1749 switch(list_item->type)
1750 {
1751 case stack_alloc:
1752 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1753 break;
1754 }
1755 JUMPTO(SLJIT_JUMP, list_item->quit);
1756 list_item = list_item->next;
1757 }
1758 common->stubs = NULL;
1759 }
1760
1761 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1762 {
1763 DEFINE_COMPILER;
1764
1765 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1766 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1767 }
1768
1769 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1770 {
1771 /* May destroy all locals and registers except TMP2. */
1772 DEFINE_COMPILER;
1773
1774 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1775 #ifdef DESTROY_REGISTERS
1776 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1777 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1778 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1779 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1780 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1781 #endif
1782 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1783 }
1784
1785 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1786 {
1787 DEFINE_COMPILER;
1788 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1789 }
1790
1791 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1792 {
1793 DEFINE_COMPILER;
1794 struct sljit_label *loop;
1795 int i;
1796 /* At this point we can freely use all temporary registers. */
1797 /* TMP1 returns with begin - 1. */
1798 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1799 if (length < 8)
1800 {
1801 for (i = 0; i < length; i++)
1802 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1803 }
1804 else
1805 {
1806 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1807 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1808 loop = LABEL();
1809 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1810 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1811 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1812 }
1813 }
1814
1815 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1816 {
1817 DEFINE_COMPILER;
1818 struct sljit_label *loop;
1819 struct sljit_jump *earlyexit;
1820
1821 /* At this point we can freely use all registers. */
1822 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1823 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1824
1825 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1826 if (common->mark_ptr != 0)
1827 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1828 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1829 if (common->mark_ptr != 0)
1830 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1831 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1832 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1833 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1834 /* Unlikely, but possible */
1835 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1836 loop = LABEL();
1837 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1838 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1839 /* Copy the integer value to the output buffer */
1840 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1841 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1842 #endif
1843 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1844 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1845 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1846 JUMPHERE(earlyexit);
1847
1848 /* Calculate the return value, which is the maximum ovector value. */
1849 if (topbracket > 1)
1850 {
1851 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1852 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1853
1854 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1855 loop = LABEL();
1856 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1857 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1858 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1859 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1860 }
1861 else
1862 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1863 }
1864
1865 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1866 {
1867 DEFINE_COMPILER;
1868
1869 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1870 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1871
1872 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1873 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1874 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1875 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1876
1877 /* Store match begin and end. */
1878 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1879 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1880 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1881 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1882 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1883 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1884 #endif
1885 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1886
1887 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1888 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1889 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1890 #endif
1891 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1892
1893 JUMPTO(SLJIT_JUMP, quit);
1894 }
1895
1896 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1897 {
1898 /* May destroy TMP1. */
1899 DEFINE_COMPILER;
1900 struct sljit_jump *jump;
1901
1902 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1903 {
1904 /* The value of -1 must be kept for start_used_ptr! */
1905 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1906 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1907 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1908 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1910 JUMPHERE(jump);
1911 }
1912 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1913 {
1914 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1915 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1916 JUMPHERE(jump);
1917 }
1918 }
1919
1920 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1921 {
1922 /* Detects if the character has an othercase. */
1923 unsigned int c;
1924
1925 #ifdef SUPPORT_UTF
1926 if (common->utf)
1927 {
1928 GETCHAR(c, cc);
1929 if (c > 127)
1930 {
1931 #ifdef SUPPORT_UCP
1932 return c != UCD_OTHERCASE(c);
1933 #else
1934 return FALSE;
1935 #endif
1936 }
1937 #ifndef COMPILE_PCRE8
1938 return common->fcc[c] != c;
1939 #endif
1940 }
1941 else
1942 #endif
1943 c = *cc;
1944 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1945 }
1946
1947 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1948 {
1949 /* Returns with the othercase. */
1950 #ifdef SUPPORT_UTF
1951 if (common->utf && c > 127)
1952 {
1953 #ifdef SUPPORT_UCP
1954 return UCD_OTHERCASE(c);
1955 #else
1956 return c;
1957 #endif
1958 }
1959 #endif
1960 return TABLE_GET(c, common->fcc, c);
1961 }
1962
1963 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1964 {
1965 /* Detects if the character and its othercase has only 1 bit difference. */
1966 unsigned int c, oc, bit;
1967 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1968 int n;
1969 #endif
1970
1971 #ifdef SUPPORT_UTF
1972 if (common->utf)
1973 {
1974 GETCHAR(c, cc);
1975 if (c <= 127)
1976 oc = common->fcc[c];
1977 else
1978 {
1979 #ifdef SUPPORT_UCP
1980 oc = UCD_OTHERCASE(c);
1981 #else
1982 oc = c;
1983 #endif
1984 }
1985 }
1986 else
1987 {
1988 c = *cc;
1989 oc = TABLE_GET(c, common->fcc, c);
1990 }
1991 #else
1992 c = *cc;
1993 oc = TABLE_GET(c, common->fcc, c);
1994 #endif
1995
1996 SLJIT_ASSERT(c != oc);
1997
1998 bit = c ^ oc;
1999 /* Optimized for English alphabet. */
2000 if (c <= 127 && bit == 0x20)
2001 return (0 << 8) | 0x20;
2002
2003 /* Since c != oc, they must have at least 1 bit difference. */
2004 if (!is_powerof2(bit))
2005 return 0;
2006
2007 #if defined COMPILE_PCRE8
2008
2009 #ifdef SUPPORT_UTF
2010 if (common->utf && c > 127)
2011 {
2012 n = GET_EXTRALEN(*cc);
2013 while ((bit & 0x3f) == 0)
2014 {
2015 n--;
2016 bit >>= 6;
2017 }
2018 return (n << 8) | bit;
2019 }
2020 #endif /* SUPPORT_UTF */
2021 return (0 << 8) | bit;
2022
2023 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2024
2025 #ifdef SUPPORT_UTF
2026 if (common->utf && c > 65535)
2027 {
2028 if (bit >= (1 << 10))
2029 bit >>= 10;
2030 else
2031 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2032 }
2033 #endif /* SUPPORT_UTF */
2034 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2035
2036 #endif /* COMPILE_PCRE[8|16|32] */
2037 }
2038
2039 static void check_partial(compiler_common *common, BOOL force)
2040 {
2041 /* Checks whether a partial matching is occured. Does not modify registers. */
2042 DEFINE_COMPILER;
2043 struct sljit_jump *jump = NULL;
2044
2045 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2046
2047 if (common->mode == JIT_COMPILE)
2048 return;
2049
2050 if (!force)
2051 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2052 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2053 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2054
2055 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2057 else
2058 {
2059 if (common->partialmatchlabel != NULL)
2060 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2061 else
2062 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2063 }
2064
2065 if (jump != NULL)
2066 JUMPHERE(jump);
2067 }
2068
2069 static struct sljit_jump *check_str_end(compiler_common *common)
2070 {
2071 /* Does not affect registers. Usually used in a tight spot. */
2072 DEFINE_COMPILER;
2073 struct sljit_jump *jump;
2074 struct sljit_jump *nohit;
2075 struct sljit_jump *return_value;
2076
2077 if (common->mode == JIT_COMPILE)
2078 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2079
2080 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2081 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2082 {
2083 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2084 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2085 JUMPHERE(nohit);
2086 return_value = JUMP(SLJIT_JUMP);
2087 }
2088 else
2089 {
2090 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2091 if (common->partialmatchlabel != NULL)
2092 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2093 else
2094 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2095 }
2096 JUMPHERE(jump);
2097 return return_value;
2098 }
2099
2100 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2101 {
2102 DEFINE_COMPILER;
2103 struct sljit_jump *jump;
2104
2105 if (common->mode == JIT_COMPILE)
2106 {
2107 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2108 return;
2109 }
2110
2111 /* Partial matching mode. */
2112 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2113 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2114 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2115 {
2116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2117 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2118 }
2119 else
2120 {
2121 if (common->partialmatchlabel != NULL)
2122 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2123 else
2124 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2125 }
2126 JUMPHERE(jump);
2127 }
2128
2129 static void read_char(compiler_common *common)
2130 {
2131 /* Reads the character into TMP1, updates STR_PTR.
2132 Does not check STR_END. TMP2 Destroyed. */
2133 DEFINE_COMPILER;
2134 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2135 struct sljit_jump *jump;
2136 #endif
2137
2138 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2139 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2140 if (common->utf)
2141 {
2142 #if defined COMPILE_PCRE8
2143 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2144 #elif defined COMPILE_PCRE16
2145 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2146 #endif /* COMPILE_PCRE[8|16] */
2147 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2148 JUMPHERE(jump);
2149 }
2150 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2151 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2152 }
2153
2154 static void peek_char(compiler_common *common)
2155 {
2156 /* Reads the character into TMP1, keeps STR_PTR.
2157 Does not check STR_END. TMP2 Destroyed. */
2158 DEFINE_COMPILER;
2159 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2160 struct sljit_jump *jump;
2161 #endif
2162
2163 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2164 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2165 if (common->utf)
2166 {
2167 #if defined COMPILE_PCRE8
2168 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2169 #elif defined COMPILE_PCRE16
2170 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2171 #endif /* COMPILE_PCRE[8|16] */
2172 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2173 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2174 JUMPHERE(jump);
2175 }
2176 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2177 }
2178
2179 static void read_char8_type(compiler_common *common)
2180 {
2181 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2182 DEFINE_COMPILER;
2183 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2184 struct sljit_jump *jump;
2185 #endif
2186
2187 #ifdef SUPPORT_UTF
2188 if (common->utf)
2189 {
2190 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2191 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2192 #if defined COMPILE_PCRE8
2193 /* This can be an extra read in some situations, but hopefully
2194 it is needed in most cases. */
2195 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2196 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2197 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2198 JUMPHERE(jump);
2199 #elif defined COMPILE_PCRE16
2200 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2201 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2202 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2203 JUMPHERE(jump);
2204 /* Skip low surrogate if necessary. */
2205 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2206 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2207 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2208 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2209 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2210 #elif defined COMPILE_PCRE32
2211 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2212 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2213 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2214 JUMPHERE(jump);
2215 #endif /* COMPILE_PCRE[8|16|32] */
2216 return;
2217 }
2218 #endif /* SUPPORT_UTF */
2219 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2220 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2221 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2222 /* The ctypes array contains only 256 values. */
2223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2224 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2225 #endif
2226 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2227 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2228 JUMPHERE(jump);
2229 #endif
2230 }
2231
2232 static void skip_char_back(compiler_common *common)
2233 {
2234 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2235 DEFINE_COMPILER;
2236 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2237 #if defined COMPILE_PCRE8
2238 struct sljit_label *label;
2239
2240 if (common->utf)
2241 {
2242 label = LABEL();
2243 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2244 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2245 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2246 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2247 return;
2248 }
2249 #elif defined COMPILE_PCRE16
2250 if (common->utf)
2251 {
2252 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2253 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2254 /* Skip low surrogate if necessary. */
2255 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2256 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2257 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2258 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2259 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2260 return;
2261 }
2262 #endif /* COMPILE_PCRE[8|16] */
2263 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2264 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2265 }
2266
2267 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2268 {
2269 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2270 DEFINE_COMPILER;
2271
2272 if (nltype == NLTYPE_ANY)
2273 {
2274 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2275 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2276 }
2277 else if (nltype == NLTYPE_ANYCRLF)
2278 {
2279 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2280 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2282 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2283 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2284 }
2285 else
2286 {
2287 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2288 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2289 }
2290 }
2291
2292 #ifdef SUPPORT_UTF
2293
2294 #if defined COMPILE_PCRE8
2295 static void do_utfreadchar(compiler_common *common)
2296 {
2297 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2298 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2299 DEFINE_COMPILER;
2300 struct sljit_jump *jump;
2301
2302 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2303 /* Searching for the first zero. */
2304 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2305 jump = JUMP(SLJIT_C_NOT_ZERO);
2306 /* Two byte sequence. */
2307 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2308 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2309 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2310 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2311 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2312 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2313 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2314 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2315 JUMPHERE(jump);
2316
2317 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2318 jump = JUMP(SLJIT_C_NOT_ZERO);
2319 /* Three byte sequence. */
2320 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2321 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2322 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2323 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2324 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2325 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2326 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2327 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2328 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2329 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2330 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2331 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2332 JUMPHERE(jump);
2333
2334 /* Four byte sequence. */
2335 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2336 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2337 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2338 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2339 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2340 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2341 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2342 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2343 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2344 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2345 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2346 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2347 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2348 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2349 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2350 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2351 }
2352
2353 static void do_utfreadtype8(compiler_common *common)
2354 {
2355 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2356 of the character (>= 0xc0). Return value in TMP1. */
2357 DEFINE_COMPILER;
2358 struct sljit_jump *jump;
2359 struct sljit_jump *compare;
2360
2361 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2362
2363 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2364 jump = JUMP(SLJIT_C_NOT_ZERO);
2365 /* Two byte sequence. */
2366 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2367 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2368 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2369 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2370 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2371 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2372 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2373 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2374 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2375
2376 JUMPHERE(compare);
2377 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2378 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2379 JUMPHERE(jump);
2380
2381 /* We only have types for characters less than 256. */
2382 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2383 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2384 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2385 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2386 }
2387
2388 #elif defined COMPILE_PCRE16
2389
2390 static void do_utfreadchar(compiler_common *common)
2391 {
2392 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2393 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2394 DEFINE_COMPILER;
2395 struct sljit_jump *jump;
2396
2397 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2398 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2399 /* Do nothing, only return. */
2400 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2401
2402 JUMPHERE(jump);
2403 /* Combine two 16 bit characters. */
2404 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2405 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2406 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2407 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2408 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2409 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2410 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2411 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2412 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2413 }
2414
2415 #endif /* COMPILE_PCRE[8|16] */
2416
2417 #endif /* SUPPORT_UTF */
2418
2419 #ifdef SUPPORT_UCP
2420
2421 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2422 #define UCD_BLOCK_MASK 127
2423 #define UCD_BLOCK_SHIFT 7
2424
2425 static void do_getucd(compiler_common *common)
2426 {
2427 /* Search the UCD record for the character comes in TMP1.
2428 Returns chartype in TMP1 and UCD offset in TMP2. */
2429 DEFINE_COMPILER;
2430
2431 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2432
2433 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2434 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2435 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2436 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2437 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2438 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2439 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2440 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2441 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2442 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2443 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2444 }
2445 #endif
2446
2447 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2448 {
2449 DEFINE_COMPILER;
2450 struct sljit_label *mainloop;
2451 struct sljit_label *newlinelabel = NULL;
2452 struct sljit_jump *start;
2453 struct sljit_jump *end = NULL;
2454 struct sljit_jump *nl = NULL;
2455 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2456 struct sljit_jump *singlechar;
2457 #endif
2458 jump_list *newline = NULL;
2459 BOOL newlinecheck = FALSE;
2460 BOOL readuchar = FALSE;
2461
2462 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2463 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2464 newlinecheck = TRUE;
2465
2466 if (firstline)
2467 {
2468 /* Search for the end of the first line. */
2469 SLJIT_ASSERT(common->first_line_end != 0);
2470 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2471
2472 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2473 {
2474 mainloop = LABEL();
2475 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2476 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2477 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2478 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2479 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2480 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2481 JUMPHERE(end);
2482 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2483 }
2484 else
2485 {
2486 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2487 mainloop = LABEL();
2488 /* Continual stores does not cause data dependency. */
2489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2490 read_char(common);
2491 check_newlinechar(common, common->nltype, &newline, TRUE);
2492 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2493 JUMPHERE(end);
2494 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2495 set_jumps(newline, LABEL());
2496 }
2497
2498 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2499 }
2500
2501 start = JUMP(SLJIT_JUMP);
2502
2503 if (newlinecheck)
2504 {
2505 newlinelabel = LABEL();
2506 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2507 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2508 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2509 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2510 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2511 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2512 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2513 #endif
2514 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2515 nl = JUMP(SLJIT_JUMP);
2516 }
2517
2518 mainloop = LABEL();
2519
2520 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2521 #ifdef SUPPORT_UTF
2522 if (common->utf) readuchar = TRUE;
2523 #endif
2524 if (newlinecheck) readuchar = TRUE;
2525
2526 if (readuchar)
2527 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2528
2529 if (newlinecheck)
2530 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2531
2532 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2533 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2534 #if defined COMPILE_PCRE8
2535 if (common->utf)
2536 {
2537 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2538 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2539 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2540 JUMPHERE(singlechar);
2541 }
2542 #elif defined COMPILE_PCRE16
2543 if (common->utf)
2544 {
2545 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2546 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2547 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2548 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2549 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2550 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2551 JUMPHERE(singlechar);
2552 }
2553 #endif /* COMPILE_PCRE[8|16] */
2554 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2555 JUMPHERE(start);
2556
2557 if (newlinecheck)
2558 {
2559 JUMPHERE(end);
2560 JUMPHERE(nl);
2561 }
2562
2563 return mainloop;
2564 }
2565
2566 #define MAX_N_CHARS 3
2567
2568 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2569 {
2570 DEFINE_COMPILER;
2571 struct sljit_label *start;
2572 struct sljit_jump *quit;
2573 pcre_uint32 chars[MAX_N_CHARS * 2];
2574 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2575 int location = 0;
2576 pcre_int32 len, c, bit, caseless;
2577 int must_stop;
2578
2579 /* We do not support alternatives now. */
2580 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2581 return FALSE;
2582
2583 while (TRUE)
2584 {
2585 caseless = 0;
2586 must_stop = 1;
2587 switch(*cc)
2588 {
2589 case OP_CHAR:
2590 must_stop = 0;
2591 cc++;
2592 break;
2593
2594 case OP_CHARI:
2595 caseless = 1;
2596 must_stop = 0;
2597 cc++;
2598 break;
2599
2600 case OP_SOD:
2601 case OP_SOM:
2602 case OP_SET_SOM:
2603 case OP_NOT_WORD_BOUNDARY:
2604 case OP_WORD_BOUNDARY:
2605 case OP_EODN:
2606 case OP_EOD:
2607 case OP_CIRC:
2608 case OP_CIRCM:
2609 case OP_DOLL:
2610 case OP_DOLLM:
2611 /* Zero width assertions. */
2612 cc++;
2613 continue;
2614
2615 case OP_PLUS:
2616 case OP_MINPLUS:
2617 case OP_POSPLUS:
2618 cc++;
2619 break;
2620
2621 case OP_EXACT:
2622 cc += 1 + IMM2_SIZE;
2623 break;
2624
2625 case OP_PLUSI:
2626 case OP_MINPLUSI:
2627 case OP_POSPLUSI:
2628 caseless = 1;
2629 cc++;
2630 break;
2631
2632 case OP_EXACTI:
2633 caseless = 1;
2634 cc += 1 + IMM2_SIZE;
2635 break;
2636
2637 default:
2638 must_stop = 2;
2639 break;
2640 }
2641
2642 if (must_stop == 2)
2643 break;
2644
2645 len = 1;
2646 #ifdef SUPPORT_UTF
2647 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2648 #endif
2649
2650 if (caseless && char_has_othercase(common, cc))
2651 {
2652 caseless = char_get_othercase_bit(common, cc);
2653 if (caseless == 0)
2654 return FALSE;
2655 #ifdef COMPILE_PCRE8
2656 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2657 #else
2658 if ((caseless & 0x100) != 0)
2659 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2660 else
2661 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2662 #endif
2663 }
2664 else
2665 caseless = 0;
2666
2667 while (len > 0 && location < MAX_N_CHARS * 2)
2668 {
2669 c = *cc;
2670 bit = 0;
2671 if (len == (caseless & 0xff))
2672 {
2673 bit = caseless >> 8;
2674 c |= bit;
2675 }
2676
2677 chars[location] = c;
2678 chars[location + 1] = bit;
2679
2680 len--;
2681 location += 2;
2682 cc++;
2683 }
2684
2685 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2686 break;
2687 }
2688
2689 /* At least two characters are required. */
2690 if (location < 2 * 2)
2691 return FALSE;
2692
2693 if (firstline)
2694 {
2695 SLJIT_ASSERT(common->first_line_end != 0);
2696 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2697 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, (location >> 1) - 1);
2698 }
2699 else
2700 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
2701
2702 start = LABEL();
2703 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2704
2705 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2706 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2708 if (chars[1] != 0)
2709 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2710 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2711 if (location > 2 * 2)
2712 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2713 if (chars[3] != 0)
2714 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2715 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2716 if (location > 2 * 2)
2717 {
2718 if (chars[5] != 0)
2719 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2720 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2721 }
2722 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2723
2724 JUMPHERE(quit);
2725
2726 if (firstline)
2727 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2728 else
2729 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, (location >> 1) - 1);
2730 return TRUE;
2731 }
2732
2733 #undef MAX_N_CHARS
2734
2735 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2736 {
2737 DEFINE_COMPILER;
2738 struct sljit_label *start;
2739 struct sljit_jump *quit;
2740 struct sljit_jump *found;
2741 pcre_uchar oc, bit;
2742
2743 if (firstline)
2744 {
2745 SLJIT_ASSERT(common->first_line_end != 0);
2746 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2747 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2748 }
2749
2750 start = LABEL();
2751 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2752 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2753
2754 oc = first_char;
2755 if (caseless)
2756 {
2757 oc = TABLE_GET(first_char, common->fcc, first_char);
2758 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2759 if (first_char > 127 && common->utf)
2760 oc = UCD_OTHERCASE(first_char);
2761 #endif
2762 }
2763 if (first_char == oc)
2764 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2765 else
2766 {
2767 bit = first_char ^ oc;
2768 if (is_powerof2(bit))
2769 {
2770 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2771 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2772 }
2773 else
2774 {
2775 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2776 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2777 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2778 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2779 found = JUMP(SLJIT_C_NOT_ZERO);
2780 }
2781 }
2782
2783 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2784 JUMPTO(SLJIT_JUMP, start);
2785 JUMPHERE(found);
2786 JUMPHERE(quit);
2787
2788 if (firstline)
2789 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2790 }
2791
2792 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2793 {
2794 DEFINE_COMPILER;
2795 struct sljit_label *loop;
2796 struct sljit_jump *lastchar;
2797 struct sljit_jump *firstchar;
2798 struct sljit_jump *quit;
2799 struct sljit_jump *foundcr = NULL;
2800 struct sljit_jump *notfoundnl;
2801 jump_list *newline = NULL;
2802
2803 if (firstline)
2804 {
2805 SLJIT_ASSERT(common->first_line_end != 0);
2806 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2807 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2808 }
2809
2810 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2811 {
2812 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2813 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2814 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2815 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2816 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2817
2818 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2819 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2820 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2821 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2822 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2823 #endif
2824 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2825
2826 loop = LABEL();
2827 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2828 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2829 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2830 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2831 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2832 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2833
2834 JUMPHERE(quit);
2835 JUMPHERE(firstchar);
2836 JUMPHERE(lastchar);
2837
2838 if (firstline)
2839 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2840 return;
2841 }
2842
2843 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2844 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2845 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2846 skip_char_back(common);
2847
2848 loop = LABEL();
2849 read_char(common);
2850 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2851 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2852 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2853 check_newlinechar(common, common->nltype, &newline, FALSE);
2854 set_jumps(newline, loop);
2855
2856 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2857 {
2858 quit = JUMP(SLJIT_JUMP);
2859 JUMPHERE(foundcr);
2860 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2861 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2862 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2863 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2864 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2865 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2866 #endif
2867 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2868 JUMPHERE(notfoundnl);
2869 JUMPHERE(quit);
2870 }
2871 JUMPHERE(lastchar);
2872 JUMPHERE(firstchar);
2873
2874 if (firstline)
2875 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2876 }
2877
2878 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2879 {
2880 DEFINE_COMPILER;
2881 struct sljit_label *start;
2882 struct sljit_jump *quit;
2883 struct sljit_jump *found;
2884 #ifndef COMPILE_PCRE8
2885 struct sljit_jump *jump;
2886 #endif
2887
2888 if (firstline)
2889 {
2890 SLJIT_ASSERT(common->first_line_end != 0);
2891 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2892 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2893 }
2894
2895 start = LABEL();
2896 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2897 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2898 #ifdef SUPPORT_UTF
2899 if (common->utf)
2900 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2901 #endif
2902 #ifndef COMPILE_PCRE8
2903 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2904 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2905 JUMPHERE(jump);
2906 #endif
2907 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2908 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2909 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2910 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2911 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2912 found = JUMP(SLJIT_C_NOT_ZERO);
2913
2914 #ifdef SUPPORT_UTF
2915 if (common->utf)
2916 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2917 #endif
2918 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2919 #ifdef SUPPORT_UTF
2920 #if defined COMPILE_PCRE8
2921 if (common->utf)
2922 {
2923 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2924 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2925 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2926 }
2927 #elif defined COMPILE_PCRE16
2928 if (common->utf)
2929 {
2930 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2931 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2932 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2933 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2934 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2935 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2936 }
2937 #endif /* COMPILE_PCRE[8|16] */
2938 #endif /* SUPPORT_UTF */
2939 JUMPTO(SLJIT_JUMP, start);
2940 JUMPHERE(found);
2941 JUMPHERE(quit);
2942
2943 if (firstline)
2944 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2945 }
2946
2947 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2948 {
2949 DEFINE_COMPILER;
2950 struct sljit_label *loop;
2951 struct sljit_jump *toolong;
2952 struct sljit_jump *alreadyfound;
2953 struct sljit_jump *found;
2954 struct sljit_jump *foundoc = NULL;
2955 struct sljit_jump *notfound;
2956 pcre_uint32 oc, bit;
2957
2958 SLJIT_ASSERT(common->req_char_ptr != 0);
2959 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2960 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2961 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2962 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2963
2964 if (has_firstchar)
2965 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2966 else
2967 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2968
2969 loop = LABEL();
2970 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2971
2972 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2973 oc = req_char;
2974 if (caseless)
2975 {
2976 oc = TABLE_GET(req_char, common->fcc, req_char);
2977 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2978 if (req_char > 127 && common->utf)
2979 oc = UCD_OTHERCASE(req_char);
2980 #endif
2981 }
2982 if (req_char == oc)
2983 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2984 else
2985 {
2986 bit = req_char ^ oc;
2987 if (is_powerof2(bit))
2988 {
2989 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2990 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2991 }
2992 else
2993 {
2994 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2995 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2996 }
2997 }
2998 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2999 JUMPTO(SLJIT_JUMP, loop);
3000
3001 JUMPHERE(found);
3002 if (foundoc)
3003 JUMPHERE(foundoc);
3004 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3005 JUMPHERE(alreadyfound);
3006 JUMPHERE(toolong);
3007 return notfound;
3008 }
3009
3010 static void do_revertframes(compiler_common *common)
3011 {
3012 DEFINE_COMPILER;
3013 struct sljit_jump *jump;
3014 struct sljit_label *mainloop;
3015
3016 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3017 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3018 GET_LOCAL_BASE(TMP3, 0, 0);
3019
3020 /* Drop frames until we reach STACK_TOP. */
3021 mainloop = LABEL();
3022 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3023 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3024 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3025 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3026 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
3027 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
3028 JUMPTO(SLJIT_JUMP, mainloop);
3029
3030 JUMPHERE(jump);
3031 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3032 /* End of dropping frames. */
3033 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3034
3035 JUMPHERE(jump);
3036 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3037 /* Set string begin. */
3038 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3039 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3040 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3041 JUMPTO(SLJIT_JUMP, mainloop);
3042
3043 JUMPHERE(jump);
3044 if (common->mark_ptr != 0)
3045 {
3046 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3047 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3048 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3050 JUMPTO(SLJIT_JUMP, mainloop);
3051
3052 JUMPHERE(jump);
3053 }
3054
3055 /* Unknown command. */
3056 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3057 JUMPTO(SLJIT_JUMP, mainloop);
3058 }
3059
3060 static void check_wordboundary(compiler_common *common)
3061 {
3062 DEFINE_COMPILER;
3063 struct sljit_jump *skipread;
3064 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3065 struct sljit_jump *jump;
3066 #endif
3067
3068 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3069
3070 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3071 /* Get type of the previous char, and put it to LOCALS1. */
3072 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3073 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3075 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3076 skip_char_back(common);
3077 check_start_used_ptr(common);
3078 read_char(common);
3079
3080 /* Testing char type. */
3081 #ifdef SUPPORT_UCP
3082 if (common->use_ucp)
3083 {
3084 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3085 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3086 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3087 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3088 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3089 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3090 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3091 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3092 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3093 JUMPHERE(jump);
3094 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3095 }
3096 else
3097 #endif
3098 {
3099 #ifndef COMPILE_PCRE8
3100 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3101 #elif defined SUPPORT_UTF
3102 /* Here LOCALS1 has already been zeroed. */
3103 jump = NULL;
3104 if (common->utf)
3105 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3106 #endif /* COMPILE_PCRE8 */
3107 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3108 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3109 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3111 #ifndef COMPILE_PCRE8
3112 JUMPHERE(jump);
3113 #elif defined SUPPORT_UTF
3114 if (jump != NULL)
3115 JUMPHERE(jump);
3116 #endif /* COMPILE_PCRE8 */
3117 }
3118 JUMPHERE(skipread);
3119
3120 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3121 skipread = check_str_end(common);
3122 peek_char(common);
3123
3124 /* Testing char type. This is a code duplication. */
3125 #ifdef SUPPORT_UCP
3126 if (common->use_ucp)
3127 {
3128 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3129 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3130 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3131 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3132 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3133 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3134 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3135 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3136 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3137 JUMPHERE(jump);
3138 }
3139 else
3140 #endif
3141 {
3142 #ifndef COMPILE_PCRE8
3143 /* TMP2 may be destroyed by peek_char. */
3144 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3145 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3146 #elif defined SUPPORT_UTF
3147 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3148 jump = NULL;
3149 if (common->utf)
3150 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3151 #endif
3152 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3153 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3154 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3155 #ifndef COMPILE_PCRE8
3156 JUMPHERE(jump);
3157 #elif defined SUPPORT_UTF
3158 if (jump != NULL)
3159 JUMPHERE(jump);
3160 #endif /* COMPILE_PCRE8 */
3161 }
3162 JUMPHERE(skipread);
3163
3164 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3165 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3166 }
3167
3168 /*
3169 range format:
3170
3171 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3172 ranges[1] = first bit (0 or 1)
3173 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3174 */
3175
3176 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3177 {
3178 DEFINE_COMPILER;
3179 struct sljit_jump *jump;
3180
3181 if (ranges[0] < 0)
3182 return FALSE;
3183
3184 switch(ranges[0])
3185 {
3186 case 1:
3187 if (readch)
3188 read_char(common);
3189 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3190 return TRUE;
3191
3192 case 2:
3193 if (readch)
3194 read_char(common);
3195 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3196 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3197 return TRUE;
3198
3199 case 4:
3200 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3201 {
3202 if (readch)
3203 read_char(common);
3204 if (ranges[1] != 0)
3205 {
3206 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3207 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3208 }
3209 else
3210 {
3211 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3212 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3213 JUMPHERE(jump);
3214 }
3215 return TRUE;
3216 }
3217 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3218 {
3219 if (readch)
3220 read_char(common);
3221 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3222 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3223 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3224 return TRUE;
3225 }
3226 return FALSE;
3227
3228 default:
3229 return FALSE;
3230 }
3231 }
3232
3233 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3234 {
3235 int i, bit, length;
3236 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3237
3238 bit = ctypes[0] & flag;
3239 ranges[0] = -1;
3240 ranges[1] = bit != 0 ? 1 : 0;
3241 length = 0;
3242
3243 for (i = 1; i < 256; i++)
3244 if ((ctypes[i] & flag) != bit)
3245 {
3246 if (length >= MAX_RANGE_SIZE)
3247 return;
3248 ranges[2 + length] = i;
3249 length++;
3250 bit ^= flag;
3251 }
3252
3253 if (bit != 0)
3254 {
3255 if (length >= MAX_RANGE_SIZE)
3256 return;
3257 ranges[2 + length] = 256;
3258 length++;
3259 }
3260 ranges[0] = length;
3261 }
3262
3263 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3264 {
3265 int ranges[2 + MAX_RANGE_SIZE];
3266 pcre_uint8 bit, cbit, all;
3267 int i, byte, length = 0;
3268
3269 bit = bits[0] & 0x1;
3270 ranges[1] = bit;
3271 /* Can be 0 or 255. */
3272 all = -bit;
3273
3274 for (i = 0; i < 256; )
3275 {
3276 byte = i >> 3;
3277 if ((i & 0x7) == 0 && bits[byte] == all)
3278 i += 8;
3279 else
3280 {
3281 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3282 if (cbit != bit)
3283 {
3284 if (length >= MAX_RANGE_SIZE)
3285 return FALSE;
3286 ranges[2 + length] = i;
3287 length++;
3288 bit = cbit;
3289 all = -cbit;
3290 }
3291 i++;
3292 }
3293 }
3294
3295 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3296 {
3297 if (length >= MAX_RANGE_SIZE)
3298 return FALSE;
3299 ranges[2 + length] = 256;
3300 length++;
3301 }
3302 ranges[0] = length;
3303
3304 return check_ranges(common, ranges, backtracks, FALSE);
3305 }
3306
3307 static void check_anynewline(compiler_common *common)
3308 {
3309 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3310 DEFINE_COMPILER;
3311
3312 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3313
3314 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3315 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3316 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3317 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3318 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3319 #ifdef COMPILE_PCRE8
3320 if (common->utf)
3321 {
3322 #endif
3323 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3324 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3325 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3326 #ifdef COMPILE_PCRE8
3327 }
3328 #endif
3329 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3330 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3331 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3332 }
3333
3334 static void check_hspace(compiler_common *common)
3335 {
3336 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3337 DEFINE_COMPILER;
3338
3339 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3340
3341 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3342 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3343 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3344 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3345 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3346 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3347 #ifdef COMPILE_PCRE8
3348 if (common->utf)
3349 {
3350 #endif
3351 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3352 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3353 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3354 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3355 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3356 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3357 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3358 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3359 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3360 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3361 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3362 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3363 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3364 #ifdef COMPILE_PCRE8
3365 }
3366 #endif
3367 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3368 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3369
3370 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3371 }
3372
3373 static void check_vspace(compiler_common *common)
3374 {
3375 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3376 DEFINE_COMPILER;
3377
3378 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3379
3380 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3381 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3382 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3383 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3384 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3385 #ifdef COMPILE_PCRE8
3386 if (common->utf)
3387 {
3388 #endif
3389 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3390 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3391 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3392 #ifdef COMPILE_PCRE8
3393 }
3394 #endif
3395 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3396 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3397
3398 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3399 }
3400
3401 #define CHAR1 STR_END
3402 #define CHAR2 STACK_TOP
3403
3404 static void do_casefulcmp(compiler_common *common)
3405 {
3406 DEFINE_COMPILER;
3407 struct sljit_jump *jump;
3408 struct sljit_label *label;
3409
3410 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3411 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3412 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3414 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3415 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3416
3417 label = LABEL();
3418 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3419 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3420 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3421 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3422 JUMPTO(SLJIT_C_NOT_ZERO, label);
3423
3424 JUMPHERE(jump);
3425 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3426 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3427 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3428 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3429 }
3430
3431 #define LCC_TABLE STACK_LIMIT
3432
3433 static void do_caselesscmp(compiler_common *common)
3434 {
3435 DEFINE_COMPILER;
3436 struct sljit_jump *jump;
3437 struct sljit_label *label;
3438
3439 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3440 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3441
3442 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3445 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3446 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3447 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3448
3449 label = LABEL();
3450 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3451 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3452 #ifndef COMPILE_PCRE8
3453 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3454 #endif
3455 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3456 #ifndef COMPILE_PCRE8
3457 JUMPHERE(jump);
3458 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3459 #endif
3460 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3461 #ifndef COMPILE_PCRE8
3462 JUMPHERE(jump);
3463 #endif
3464 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3465 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3466 JUMPTO(SLJIT_C_NOT_ZERO, label);
3467
3468 JUMPHERE(jump);
3469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3470 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3471 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3472 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3473 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3474 }
3475
3476 #undef LCC_TABLE
3477 #undef CHAR1
3478 #undef CHAR2
3479
3480 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3481
3482 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3483 {
3484 /* This function would be ineffective to do in JIT level. */
3485 pcre_uint32 c1, c2;
3486 const pcre_uchar *src2 = args->uchar_ptr;
3487 const pcre_uchar *end2 = args->end;
3488 const ucd_record *ur;
3489 const pcre_uint32 *pp;
3490
3491 while (src1 < end1)
3492 {
3493 if (src2 >= end2)
3494 return (pcre_uchar*)1;
3495 GETCHARINC(c1, src1);
3496 GETCHARINC(c2, src2);
3497 ur = GET_UCD(c2);
3498 if (c1 != c2 && c1 != c2 + ur->other_case)
3499 {
3500 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3501 for (;;)
3502 {
3503 if (c1 < *pp) return NULL;
3504 if (c1 == *pp++) break;
3505 }
3506 }
3507 }
3508 return src2;
3509 }
3510
3511 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3512
3513 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3514 compare_context* context, jump_list **backtracks)
3515 {
3516 DEFINE_COMPILER;
3517 unsigned int othercasebit = 0;
3518 pcre_uchar *othercasechar = NULL;
3519 #ifdef SUPPORT_UTF
3520 int utflength;
3521 #endif
3522
3523 if (caseless && char_has_othercase(common, cc))
3524 {
3525 othercasebit = char_get_othercase_bit(common, cc);
3526 SLJIT_ASSERT(othercasebit);
3527 /* Extracting bit difference info. */
3528 #if defined COMPILE_PCRE8
3529 othercasechar = cc + (othercasebit >> 8);
3530 othercasebit &= 0xff;
3531 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3532 othercasechar = cc + (othercasebit >> 9);
3533 if ((othercasebit & 0x100) != 0)
3534 othercasebit = (othercasebit & 0xff) << 8;
3535 else
3536 othercasebit &= 0xff;
3537 #endif /* COMPILE_PCRE[8|16|32] */
3538 }
3539
3540 if (context->sourcereg == -1)
3541 {
3542 #if defined COMPILE_PCRE8
3543 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3544 if (context->length >= 4)
3545 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3546 else if (context->length >= 2)
3547 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3548 else
3549 #endif
3550 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3551 #elif defined COMPILE_PCRE16
3552 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3553 if (context->length >= 4)
3554 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3555 else
3556 #endif
3557 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3558 #elif defined COMPILE_PCRE32
3559 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3560 #endif /* COMPILE_PCRE[8|16|32] */
3561 context->sourcereg = TMP2;
3562 }
3563
3564 #ifdef SUPPORT_UTF
3565 utflength = 1;
3566 if (common->utf && HAS_EXTRALEN(*cc))
3567 utflength += GET_EXTRALEN(*cc);
3568
3569 do
3570 {
3571 #endif
3572
3573 context->length -= IN_UCHARS(1);
3574 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3575
3576 /* Unaligned read is supported. */
3577 if (othercasebit != 0 && othercasechar == cc)
3578 {
3579 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3580 context->oc.asuchars[context->ucharptr] = othercasebit;
3581 }
3582 else
3583 {
3584 context->c.asuchars[context->ucharptr] = *cc;
3585 context->oc.asuchars[context->ucharptr] = 0;
3586 }
3587 context->ucharptr++;
3588
3589 #if defined COMPILE_PCRE8
3590 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3591 #elif defined COMPILE_PCRE16
3592 if (context->ucharptr >= 2 || context->length == 0)
3593 #elif defined COMPILE_PCRE32
3594 if (1 /* context->ucharptr >= 1 || context->length == 0 */)
3595 #endif
3596 {
3597 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3598 if (context->length >= 4)
3599 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3600 #if defined COMPILE_PCRE8
3601 else if (context->length >= 2)
3602 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3603 else if (context->length >= 1)
3604 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3605 #elif defined COMPILE_PCRE16
3606 else if (context->length >= 2)
3607 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3608 #endif /* COMPILE_PCRE[8|16] */
3609 #elif defined COMPILE_PCRE32
3610 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3611 #endif /* COMPILE_PCRE[8|16|32] */
3612 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3613
3614 switch(context->ucharptr)
3615 {
3616 case 4 / sizeof(pcre_uchar):
3617 if (context->oc.asint != 0)
3618 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3619 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3620 break;
3621
3622 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3623 case 2 / sizeof(pcre_uchar):
3624 if (context->oc.asushort != 0)
3625 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3626 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3627 break;
3628
3629 #ifdef COMPILE_PCRE8
3630 case 1:
3631 if (context->oc.asbyte != 0)
3632 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3633 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3634 break;
3635 #endif
3636
3637 #endif /* COMPILE_PCRE[8|16] */
3638
3639 default:
3640 SLJIT_ASSERT_STOP();
3641 break;
3642 }
3643 context->ucharptr = 0;
3644 }
3645
3646 #else
3647
3648 /* Unaligned read is unsupported. */
3649 if (context->length > 0)
3650 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3651
3652 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3653
3654 if (othercasebit != 0 && othercasechar == cc)
3655 {
3656 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3657 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3658 }
3659 else
3660 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3661
3662 #endif
3663
3664 cc++;
3665 #ifdef SUPPORT_UTF
3666 utflength--;
3667 }
3668 while (utflength > 0);
3669 #endif
3670
3671 return cc;
3672 }
3673
3674 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3675
3676 #define SET_TYPE_OFFSET(value) \
3677 if ((value) != typeoffset) \
3678 { \
3679 if ((value) > typeoffset) \
3680 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3681 else \
3682 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3683 } \
3684 typeoffset = (value);
3685
3686 #define SET_CHAR_OFFSET(value) \
3687 if ((value) != charoffset) \
3688 { \
3689 if ((value) > charoffset) \
3690 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3691 else \
3692 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3693 } \
3694 charoffset = (value);
3695
3696 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3697 {
3698 DEFINE_COMPILER;
3699 jump_list *found = NULL;
3700 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3701 pcre_int32 c, charoffset;
3702 const pcre_uint32 *other_cases;
3703 struct sljit_jump *jump = NULL;
3704 pcre_uchar *ccbegin;
3705 int compares, invertcmp, numberofcmps;
3706 #ifdef SUPPORT_UCP
3707 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3708 BOOL charsaved = FALSE;
3709 int typereg = TMP1, scriptreg = TMP1;
3710 pcre_int32 typeoffset;
3711 #endif
3712
3713 /* Although SUPPORT_UTF must be defined, we are
3714 not necessary in utf mode even in 8 bit mode. */
3715 detect_partial_match(common, backtracks);
3716 read_char(common);
3717
3718 if ((*cc++ & XCL_MAP) != 0)
3719 {
3720 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3721 #ifndef COMPILE_PCRE8
3722 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3723 #elif defined SUPPORT_UTF
3724 if (common->utf)
3725 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3726 #endif
3727
3728 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3729 {
3730 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3731 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3732 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3733 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3734 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3735 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3736 }
3737
3738 #ifndef COMPILE_PCRE8
3739 JUMPHERE(jump);
3740 #elif defined SUPPORT_UTF
3741 if (common->utf)
3742 JUMPHERE(jump);
3743 #endif
3744 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3745 #ifdef SUPPORT_UCP
3746 charsaved = TRUE;
3747 #endif
3748 cc += 32 / sizeof(pcre_uchar);
3749 }
3750
3751 /* Scanning the necessary info. */
3752 ccbegin = cc;
3753 compares = 0;
3754 while (*cc != XCL_END)
3755 {
3756 compares++;
3757 if (*cc == XCL_SINGLE)
3758 {
3759 cc += 2;
3760 #ifdef SUPPORT_UTF
3761 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3762 #endif
3763 #ifdef SUPPORT_UCP
3764 needschar = TRUE;
3765 #endif
3766 }
3767 else if (*cc == XCL_RANGE)
3768 {
3769 cc += 2;
3770 #ifdef SUPPORT_UTF
3771 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3772 #endif
3773 cc++;
3774 #ifdef SUPPORT_UTF
3775 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3776 #endif
3777 #ifdef SUPPORT_UCP
3778 needschar = TRUE;
3779 #endif
3780 }
3781 #ifdef SUPPORT_UCP
3782 else
3783 {
3784 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3785 cc++;
3786 switch(*cc)
3787 {
3788 case PT_ANY:
3789 break;
3790
3791 case PT_LAMP:
3792 case PT_GC:
3793 case PT_PC:
3794 case PT_ALNUM:
3795 needstype = TRUE;
3796 break;
3797
3798 case PT_SC:
3799 needsscript = TRUE;
3800 break;
3801
3802 case PT_SPACE:
3803 case PT_PXSPACE:
3804 case PT_WORD:
3805 needstype = TRUE;
3806 needschar = TRUE;
3807 break;
3808
3809 case PT_CLIST:
3810 needschar = TRUE;
3811 break;
3812
3813 default:
3814 SLJIT_ASSERT_STOP();
3815 break;
3816 }
3817 cc += 2;
3818 }
3819 #endif
3820 }
3821
3822 #ifdef SUPPORT_UCP
3823 /* Simple register allocation. TMP1 is preferred if possible. */
3824 if (needstype || needsscript)
3825 {
3826 if (needschar && !charsaved)
3827 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3828 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3829 if (needschar)
3830 {
3831 if (needstype)
3832 {
3833 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3834 typereg = RETURN_ADDR;
3835 }
3836
3837 if (needsscript)
3838 scriptreg = TMP3;
3839 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3840 }
3841 else if (needstype && needsscript)
3842 scriptreg = TMP3;
3843 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3844
3845 if (needsscript)
3846 {
3847 if (scriptreg == TMP1)
3848 {
3849 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3850 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3851 }
3852 else
3853 {
3854 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3855 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3856 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3857 }
3858 }
3859 }
3860 #endif
3861
3862 /* Generating code. */
3863 cc = ccbegin;
3864 charoffset = 0;
3865 numberofcmps = 0;
3866 #ifdef SUPPORT_UCP
3867 typeoffset = 0;
3868 #endif
3869
3870 while (*cc != XCL_END)
3871 {
3872 compares--;
3873 invertcmp = (compares == 0 && list != backtracks);
3874 jump = NULL;
3875
3876 if (*cc == XCL_SINGLE)
3877 {
3878 cc ++;
3879 #ifdef SUPPORT_UTF
3880 if (common->utf)
3881 {
3882 GETCHARINC(c, cc);
3883 }
3884 else
3885 #endif
3886 c = *cc++;
3887
3888 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3889 {
3890 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3891 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3892 numberofcmps++;
3893 }
3894 else if (numberofcmps > 0)
3895 {
3896 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3897 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3898 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3899 numberofcmps = 0;
3900 }
3901 else
3902 {
3903 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3904 numberofcmps = 0;
3905 }
3906 }
3907 else if (*cc == XCL_RANGE)
3908 {
3909 cc ++;
3910 #ifdef SUPPORT_UTF
3911 if (common->utf)
3912 {
3913 GETCHARINC(c, cc);
3914 }
3915 else
3916 #endif
3917 c = *cc++;
3918 SET_CHAR_OFFSET(c);
3919 #ifdef SUPPORT_UTF
3920 if (common->utf)
3921 {
3922 GETCHARINC(c, cc);
3923 }
3924 else
3925 #endif
3926 c = *cc++;
3927 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3928 {
3929 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3930 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3931 numberofcmps++;
3932 }
3933 else if (numberofcmps > 0)
3934 {
3935 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3936 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3937 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3938 numberofcmps = 0;
3939 }
3940 else
3941 {
3942 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3943 numberofcmps = 0;
3944 }
3945 }
3946 #ifdef SUPPORT_UCP
3947 else
3948 {
3949 if (*cc == XCL_NOTPROP)
3950 invertcmp ^= 0x1;
3951 cc++;
3952 switch(*cc)
3953 {
3954 case PT_ANY:
3955 if (list != backtracks)
3956 {
3957 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3958 continue;
3959 }
3960 else if (cc[-1] == XCL_NOTPROP)
3961 continue;
3962 jump = JUMP(SLJIT_JUMP);
3963 break;
3964
3965 case PT_LAMP:
3966 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3967 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3968 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3969 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3970 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3971 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3972 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3973 break;
3974
3975 case PT_GC:
3976 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3977 SET_TYPE_OFFSET(c);
3978 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3979 break;
3980
3981 case PT_PC:
3982 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3983 break;
3984
3985 case PT_SC:
3986 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3987 break;
3988
3989 case PT_SPACE:
3990 case PT_PXSPACE:
3991 if (*cc == PT_SPACE)
3992 {
3993 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3994 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3995 }
3996 SET_CHAR_OFFSET(9);
3997 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3998 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3999 if (*cc == PT_SPACE)
4000 JUMPHERE(jump);
4001
4002 SET_TYPE_OFFSET(ucp_Zl);
4003 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4004 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
4005 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4006 break;
4007
4008 case PT_WORD:
4009 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4010 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4011 /* ... fall through */
4012
4013 case PT_ALNUM:
4014 SET_TYPE_OFFSET(ucp_Ll);
4015 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4016 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
4017 SET_TYPE_OFFSET(ucp_Nd);
4018 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4019 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
4020 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4021 break;
4022
4023 case PT_CLIST:
4024 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4025
4026 /* At least three characters are required.
4027 Otherwise this case would be handled by the normal code path. */
4028 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4029 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4030
4031 /* Optimizing character pairs, if their difference is power of 2. */
4032 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4033 {
4034 if (charoffset == 0)
4035 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4036 else
4037 {
4038 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_w)charoffset);
4039 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4040 }
4041 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4042 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4043 other_cases += 2;
4044 }
4045 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4046 {
4047 if (charoffset == 0)
4048 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4049 else
4050 {
4051 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_w)charoffset);
4052 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4053 }
4054 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4055 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4056
4057 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4058 COND_VALUE(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, SLJIT_C_EQUAL);
4059
4060 other_cases += 3;
4061 }
4062 else
4063 {
4064 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4065 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4066 }
4067
4068 while (*other_cases != NOTACHAR)
4069 {
4070 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4071 COND_VALUE(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, SLJIT_C_EQUAL);
4072 }
4073 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4074 break;
4075 }
4076 cc += 2;
4077 }
4078 #endif
4079
4080 if (jump != NULL)
4081 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4082 }
4083
4084 if (found != NULL)
4085 set_jumps(found, LABEL());
4086 }
4087
4088 #undef SET_TYPE_OFFSET
4089 #undef SET_CHAR_OFFSET
4090
4091 #endif
4092
4093 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4094 {
4095 DEFINE_COMPILER;
4096 int length;
4097 unsigned int c, oc, bit;
4098 compare_context context;
4099 struct sljit_jump *jump[4];
4100 #ifdef SUPPORT_UTF
4101 struct sljit_label *label;
4102 #ifdef SUPPORT_UCP
4103 pcre_uchar propdata[5];
4104 #endif
4105 #endif
4106
4107 switch(type)
4108 {
4109 case OP_SOD:
4110 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4111 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4112 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4113 return cc;
4114
4115 case OP_SOM:
4116 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4117 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4118 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4119 return cc;
4120
4121 case OP_NOT_WORD_BOUNDARY:
4122 case OP_WORD_BOUNDARY:
4123 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4124 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4125 return cc;
4126
4127 case OP_NOT_DIGIT:
4128 case OP_DIGIT:
4129 /* Digits are usually 0-9, so it is worth to optimize them. */
4130 if (common->digits[0] == -2)
4131 get_ctype_ranges(common, ctype_digit, common->digits);
4132 detect_partial_match(common, backtracks);
4133 /* Flip the starting bit in the negative case. */
4134 if (type == OP_NOT_DIGIT)
4135 common->digits[1] ^= 1;
4136 if (!check_ranges(common, common->digits, backtracks, TRUE))
4137 {
4138 read_char8_type(common);
4139 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4140 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4141 }
4142 if (type == OP_NOT_DIGIT)
4143 common->digits[1] ^= 1;
4144 return cc;
4145
4146 case OP_NOT_WHITESPACE:
4147 case OP_WHITESPACE:
4148 detect_partial_match(common, backtracks);
4149 read_char8_type(common);
4150 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4151 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4152 return cc;
4153
4154 case OP_NOT_WORDCHAR:
4155 case OP_WORDCHAR:
4156 detect_partial_match(common, backtracks);
4157 read_char8_type(common);
4158 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4159 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4160 return cc;
4161
4162 case OP_ANY:
4163 detect_partial_match(common, backtracks);
4164 read_char(common);
4165 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4166 {
4167 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4168 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4169 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4170 else
4171 jump[1] = check_str_end(common);
4172
4173 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4174 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4175 if (jump[1] != NULL)
4176 JUMPHERE(jump[1]);
4177 JUMPHERE(jump[0]);
4178 }
4179 else
4180 check_newlinechar(common, common->nltype, backtracks, TRUE);
4181 return cc;
4182
4183 case OP_ALLANY:
4184 detect_partial_match(common, backtracks);
4185 #ifdef SUPPORT_UTF
4186 if (common->utf)
4187 {
4188 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4189 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4190 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4191 #if defined COMPILE_PCRE8
4192 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4193 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4194 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4195 #elif defined COMPILE_PCRE16
4196 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4197 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4198 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4199 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
4200 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4201 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4202 #endif
4203 JUMPHERE(jump[0]);
4204 #endif /* COMPILE_PCRE[8|16] */
4205 return cc;
4206 }
4207 #endif
4208 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4209 return cc;
4210
4211 case OP_ANYBYTE:
4212 detect_partial_match(common, backtracks);
4213 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4214 return cc;
4215
4216 #ifdef SUPPORT_UTF
4217 #ifdef SUPPORT_UCP
4218 case OP_NOTPROP:
4219 case OP_PROP:
4220 propdata[0] = 0;
4221 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4222 propdata[2] = cc[0];
4223 propdata[3] = cc[1];
4224 propdata[4] = XCL_END;
4225 compile_xclass_matchingpath(common, propdata, backtracks);
4226 return cc + 2;
4227 #endif
4228 #endif
4229
4230 case OP_ANYNL:
4231 detect_partial_match(common, backtracks);
4232 read_char(common);
4233 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4234 /* We don't need to handle soft partial matching case. */
4235 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4236 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4237 else
4238 jump[1] = check_str_end(common);
4239 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4240 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4241 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4242 jump[3] = JUMP(SLJIT_JUMP);
4243 JUMPHERE(jump[0]);
4244 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4245 JUMPHERE(jump[1]);
4246 JUMPHERE(jump[2]);
4247 JUMPHERE(jump[3]);
4248 return cc;
4249
4250 case OP_NOT_HSPACE:
4251 case OP_HSPACE:
4252 detect_partial_match(common, backtracks);
4253 read_char(common);
4254 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4255 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4256 return cc;
4257
4258 case OP_NOT_VSPACE:
4259 case OP_VSPACE:
4260 detect_partial_match(common, backtracks);
4261 read_char(common);
4262 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4263 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4264 return cc;
4265
4266 #ifdef SUPPORT_UCP
4267 case OP_EXTUNI:
4268 detect_partial_match(common, backtracks);
4269 read_char(common);
4270 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4271 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4272 /* Optimize register allocation: use a real register. */
4273 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4274 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4275
4276 label = LABEL();
4277 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4278 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4279 read_char(common);
4280 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4281 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4282 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4283
4284 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4285 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_w)PRIV(ucp_gbtable));
4286 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4287 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4288 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4289 JUMPTO(SLJIT_C_NOT_ZERO, label);
4290
4291 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4292 JUMPHERE(jump[0]);
4293 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4294
4295 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4296 {
4297 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4298 /* Since we successfully read a char above, partial matching must occure. */
4299 check_partial(common, TRUE);
4300 JUMPHERE(jump[0]);
4301 }
4302 return cc;
4303 #endif
4304
4305 case OP_EODN:
4306 /* Requires rather complex checks. */
4307 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4308 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4309 {
4310 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4311 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4312 if (common->mode == JIT_COMPILE)
4313 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4314 else
4315 {
4316 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4317 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4318 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4319 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4320 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4321 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4322 check_partial(common, TRUE);
4323 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4324 JUMPHERE(jump[1]);
4325 }
4326 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4327 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4328 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4329 }
4330 else if (common->nltype == NLTYPE_FIXED)
4331 {
4332 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4333 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4334 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4335 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4336 }
4337 else
4338 {
4339 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4340 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4341 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4342 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4343 jump[2] = JUMP(SLJIT_C_GREATER);
4344 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4345 /* Equal. */
4346 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4347 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4348 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4349
4350 JUMPHERE(jump[1]);
4351 if (common->nltype == NLTYPE_ANYCRLF)
4352 {
4353 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4354 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4355 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4356 }
4357 else
4358 {
4359 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4360 read_char(common);
4361 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4362 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4363 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4364 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4365 }
4366 JUMPHERE(jump[2]);
4367 JUMPHERE(jump[3]);
4368 }
4369 JUMPHERE(jump[0]);
4370 check_partial(common, FALSE);
4371 return cc;
4372
4373 case OP_EOD:
4374 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4375 check_partial(common, FALSE);
4376 return cc;
4377
4378 case OP_CIRC:
4379 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4380 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4381 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4382 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4383 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4384 return cc;
4385
4386 case OP_CIRCM:
4387 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4388 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4389 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4390 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4391 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4392 jump[0] = JUMP(SLJIT_JUMP);
4393 JUMPHERE(jump[1]);
4394
4395 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4396 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4397 {
4398 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4399 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4400 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4401 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4402 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4403 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4404 }
4405 else
4406 {
4407 skip_char_back(common);
4408 read_char(common);
4409 check_newlinechar(common, common->nltype, backtracks, FALSE);
4410 }
4411 JUMPHERE(jump[0]);
4412 return cc;
4413
4414 case OP_DOLL:
4415 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4416 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4417 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4418
4419 if (!common->endonly)
4420 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4421 else
4422 {
4423 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4424 check_partial(common, FALSE);
4425 }
4426 return cc;
4427
4428 case OP_DOLLM:
4429 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4430 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4431 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4432 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4433 check_partial(common, FALSE);
4434 jump[0] = JUMP(SLJIT_JUMP);
4435 JUMPHERE(jump[1]);
4436
4437 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4438 {
4439 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4440 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4441 if (common->mode == JIT_COMPILE)
4442 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4443 else
4444 {
4445 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4446 /* STR_PTR = STR_END - IN_UCHARS(1) */
4447 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4448 check_partial(common, TRUE);
4449 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4450 JUMPHERE(jump[1]);
4451 }
4452
4453 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4454 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4455 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4456 }
4457 else
4458 {
4459 peek_char(common);
4460 check_newlinechar(common, common->nltype, backtracks, FALSE);
4461 }
4462 JUMPHERE(jump[0]);
4463 return cc;
4464
4465 case OP_CHAR:
4466 case OP_CHARI:
4467 length = 1;
4468 #ifdef SUPPORT_UTF
4469 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4470 #endif
4471 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4472 {
4473 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4474 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4475
4476 context.length = IN_UCHARS(length);
4477 context.sourcereg = -1;
4478 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4479 context.ucharptr = 0;
4480 #endif
4481 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4482 }
4483 detect_partial_match(common, backtracks);
4484 read_char(common);
4485 #ifdef SUPPORT_UTF
4486 if (common->utf)
4487 {
4488 GETCHAR(c, cc);
4489 }
4490 else
4491 #endif
4492 c = *cc;
4493 if (type == OP_CHAR || !char_has_othercase(common, cc))
4494 {
4495 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4496 return cc + length;
4497 }
4498 oc = char_othercase(common, c);
4499 bit = c ^ oc;
4500 if (is_powerof2(bit))
4501 {
4502 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4503 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4504 return cc + length;
4505 }
4506 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4507 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4508 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4509 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4510 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4511 return cc + length;
4512
4513 case OP_NOT:
4514 case OP_NOTI:
4515 detect_partial_match(common, backtracks);
4516 length = 1;
4517 #ifdef SUPPORT_UTF
4518 if (common->utf)
4519 {
4520 #ifdef COMPILE_PCRE8
4521 c = *cc;
4522 if (c < 128)
4523 {
4524 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4525 if (type == OP_NOT || !char_has_othercase(common, cc))
4526 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4527 else
4528 {
4529 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4530 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4531 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4532 }
4533 /* Skip the variable-length character. */
4534 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4535 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4536 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4537 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4538 JUMPHERE(jump[0]);
4539 return cc + 1;
4540 }
4541 else
4542 #endif /* COMPILE_PCRE8 */
4543 {
4544 GETCHARLEN(c, cc, length);
4545 read_char(common);
4546 }
4547 }
4548 else
4549 #endif /* SUPPORT_UTF */
4550 {
4551 read_char(common);
4552 c = *cc;
4553 }
4554
4555 if (type == OP_NOT || !char_has_othercase(common, cc))
4556 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4557 else
4558 {
4559 oc = char_othercase(common, c);
4560 bit = c ^ oc;
4561 if (is_powerof2(bit))
4562 {
4563 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4564 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4565 }
4566 else
4567 {
4568 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4569 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4570 }
4571 }
4572 return cc + length;
4573
4574 case OP_CLASS:
4575 case OP_NCLASS:
4576 detect_partial_match(common, backtracks);
4577 read_char(common);
4578 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4579 return cc + 32 / sizeof(pcre_uchar);
4580
4581 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4582 jump[0] = NULL;
4583 #ifdef COMPILE_PCRE8
4584 /* This check only affects 8 bit mode. In other modes, we
4585 always need to compare the value with 255. */
4586 if (common->utf)
4587 #endif /* COMPILE_PCRE8 */
4588 {
4589 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4590 if (type == OP_CLASS)
4591 {
4592 add_jump(compiler, backtracks, jump[0]);
4593 jump[0] = NULL;
4594 }
4595 }
4596 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4597 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4598 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4599 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4600 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4601 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4602 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4603 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4604 if (jump[0] != NULL)
4605 JUMPHERE(jump[0]);
4606 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4607 return cc + 32 / sizeof(pcre_uchar);
4608
4609 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4610 case OP_XCLASS:
4611 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4612 return cc + GET(cc, 0) - 1;
4613 #endif
4614
4615 case OP_REVERSE:
4616 length = GET(cc, 0);
4617 if (length == 0)
4618 return cc + LINK_SIZE;
4619 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4620 #ifdef SUPPORT_UTF
4621 if (common->utf)
4622 {
4623 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4624 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4625 label = LABEL();
4626 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4627 skip_char_back(common);
4628 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4629 JUMPTO(SLJIT_C_NOT_ZERO, label);
4630 }
4631 else
4632 #endif
4633 {
4634 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4635 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4636 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4637 }
4638 check_start_used_ptr(common);
4639 return cc + LINK_SIZE;
4640 }
4641 SLJIT_ASSERT_STOP();
4642 return cc;
4643 }
4644
4645 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4646 {
4647 /* This function consumes at least one input character. */
4648 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4649 DEFINE_COMPILER;
4650 pcre_uchar *ccbegin = cc;
4651 compare_context context;
4652 int size;
4653
4654 context.length = 0;
4655 do
4656 {
4657 if (cc >= ccend)
4658 break;
4659
4660 if (*cc == OP_CHAR)
4661 {
4662 size = 1;
4663 #ifdef SUPPORT_UTF
4664 if (common->utf && HAS_EXTRALEN(cc[1]))
4665 size += GET_EXTRALEN(cc[1]);
4666 #endif
4667 }
4668 else if (*cc == OP_CHARI)
4669 {
4670 size = 1;
4671 #ifdef SUPPORT_UTF
4672 if (common->utf)
4673 {
4674 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4675 size = 0;
4676 else if (HAS_EXTRALEN(cc[1]))
4677 size += GET_EXTRALEN(cc[1]);
4678 }
4679 else
4680 #endif
4681 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4682 size = 0;
4683 }
4684 else
4685 size = 0;
4686
4687 cc += 1 + size;
4688 context.length += IN_UCHARS(size);
4689 }
4690 while (size > 0 && context.length <= 128);
4691
4692 cc = ccbegin;
4693 if (context.length > 0)
4694 {
4695 /* We have a fixed-length byte sequence. */
4696 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4697 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4698
4699 context.sourcereg = -1;
4700 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4701 context.ucharptr = 0;
4702 #endif
4703 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4704 return cc;
4705 }
4706
4707 /* A non-fixed length character will be checked if length == 0. */
4708 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4709 }
4710
4711 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4712 {
4713 DEFINE_COMPILER;
4714 int offset = GET2(cc, 1) << 1;
4715
4716 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4717 if (!common->jscript_compat)
4718 {
4719 if (backtracks == NULL)
4720 {
4721 /* OVECTOR(1) contains the "string begin - 1" constant. */
4722 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4723 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4724 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4725 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4726 return JUMP(SLJIT_C_NOT_ZERO);
4727 }
4728 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4729 }
4730 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4731 }
4732
4733 /* Forward definitions. */
4734 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4735 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4736
4737 #define PUSH_BACKTRACK(size, ccstart, error) \
4738 do \
4739 { \
4740 backtrack = sljit_alloc_memory(compiler, (size)); \
4741 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4742 return error; \
4743 memset(backtrack, 0, size); \
4744 backtrack->prev = parent->top; \
4745 backtrack->cc = (ccstart); \
4746 parent->top = backtrack; \
4747 } \
4748 while (0)
4749
4750 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4751 do \
4752 { \
4753 backtrack = sljit_alloc_memory(compiler, (size)); \
4754 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4755 return; \
4756 memset(backtrack, 0, size); \
4757 backtrack->prev = parent->top; \
4758 backtrack->cc = (ccstart); \
4759 parent->top = backtrack; \
4760 } \
4761 while (0)
4762
4763 #define BACKTRACK_AS(type) ((type *)backtrack)
4764
4765 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4766 {
4767 DEFINE_COMPILER;
4768 int offset = GET2(cc, 1) << 1;
4769 struct sljit_jump *jump = NULL;
4770 struct sljit_jump *partial;
4771 struct sljit_jump *nopartial;
4772
4773 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4774 /* OVECTOR(1) contains the "string begin - 1" constant. */
4775 if (withchecks && !common->jscript_compat)
4776 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4777
4778 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4779 if (common->utf && *cc == OP_REFI)
4780 {
4781 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4782 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4783 if (withchecks)
4784 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4785
4786 /* Needed to save important temporary registers. */
4787 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4788 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4789 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4790 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4791 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4792 if (common->mode == JIT_COMPILE)
4793 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4794 else
4795 {
4796 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4797 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4798 check_partial(common, FALSE);
4799 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4800 JUMPHERE(nopartial);
4801 }
4802 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4803 }
4804 else
4805 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4806 {
4807 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4808 if (withchecks)
4809 jump = JUMP(SLJIT_C_ZERO);
4810
4811 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4812 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4813 if (common->mode == JIT_COMPILE)
4814 add_jump(compiler, backtracks, partial);
4815
4816 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4817 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4818
4819 if (common->mode != JIT_COMPILE)
4820 {
4821 nopartial = JUMP(SLJIT_JUMP);
4822 JUMPHERE(partial);
4823 /* TMP2 -= STR_END - STR_PTR */
4824 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4825 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4826 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4827 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4828 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4829 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4830 JUMPHERE(partial);
4831 check_partial(common, FALSE);
4832 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4833 JUMPHERE(nopartial);
4834 }
4835 }
4836
4837 if (jump != NULL)
4838 {
4839 if (emptyfail)
4840 add_jump(compiler, backtracks, jump);
4841 else
4842 JUMPHERE(jump);
4843 }
4844 return cc + 1 + IMM2_SIZE;
4845 }
4846
4847 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4848 {
4849 DEFINE_COMPILER;
4850 backtrack_common *backtrack;
4851 pcre_uchar type;
4852 struct sljit_label *label;
4853 struct sljit_jump *zerolength;
4854 struct sljit_jump *jump = NULL;
4855 pcre_uchar *ccbegin = cc;
4856 int min = 0, max = 0;
4857 BOOL minimize;
4858
4859 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4860
4861 type = cc[1 + IMM2_SIZE];
4862 minimize = (type & 0x1) != 0;
4863 switch(type)
4864 {
4865 case OP_CRSTAR:
4866 case OP_CRMINSTAR:
4867 min = 0;
4868 max = 0;
4869 cc += 1 + IMM2_SIZE + 1;
4870 break;
4871 case OP_CRPLUS:
4872 case OP_CRMINPLUS:
4873 min = 1;
4874 max = 0;
4875 cc += 1 + IMM2_SIZE + 1;
4876 break;
4877 case OP_CRQUERY:
4878 case OP_CRMINQUERY:
4879 min = 0;
4880 max = 1;
4881 cc += 1 + IMM2_SIZE + 1;
4882 break;
4883 case OP_CRRANGE:
4884 case OP_CRMINRANGE:
4885 min = GET2(cc, 1 + IMM2_SIZE + 1);
4886 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4887 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4888 break;
4889 default:
4890 SLJIT_ASSERT_STOP();
4891 break;
4892 }
4893
4894 if (!minimize)
4895 {
4896 if (min == 0)
4897 {
4898 allocate_stack(common, 2);
4899 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4900 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4901 /* Temporary release of STR_PTR. */
4902 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4903 zerolength = compile_ref_checks(common, ccbegin, NULL);
4904 /* Restore if not zero length. */
4905 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4906 }
4907 else
4908 {
4909 allocate_stack(common, 1);
4910 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4911 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4912 }
4913
4914 if (min > 1 || max > 1)
4915 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4916
4917 label = LABEL();
4918 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4919
4920 if (min > 1 || max > 1)
4921 {
4922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4923 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4925 if (min > 1)
4926 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4927 if (max > 1)
4928 {
4929 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4930 allocate_stack(common, 1);
4931 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4932 JUMPTO(SLJIT_JUMP, label);
4933 JUMPHERE(jump);
4934 }
4935 }
4936
4937 if (max == 0)
4938 {
4939 /* Includes min > 1 case as well. */
4940 allocate_stack(common, 1);
4941 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4942 JUMPTO(SLJIT_JUMP, label);
4943 }
4944
4945 JUMPHERE(zerolength);
4946 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4947
4948 decrease_call_count(common);
4949 return cc;
4950 }
4951
4952 allocate_stack(common, 2);
4953 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4954 if (type != OP_CRMINSTAR)
4955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4956
4957 if (min == 0)
4958 {
4959 zerolength = compile_ref_checks(common, ccbegin, NULL);
4960 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4961 jump = JUMP(SLJIT_JUMP);
4962 }
4963 else
4964 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4965
4966 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4967 if (max > 0)
4968 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4969
4970 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4971 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4972
4973 if (min > 1)
4974 {
4975 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4976 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4977 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4978 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
4979 }
4980 else if (max > 0)
4981 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4982
4983 if (jump != NULL)
4984 JUMPHERE(jump);
4985 JUMPHERE(zerolength);
4986
4987 decrease_call_count(common);
4988 return cc;
4989 }
4990
4991 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4992 {
4993 DEFINE_COMPILER;
4994 backtrack_common *backtrack;
4995 recurse_entry *entry = common->entries;
4996 recurse_entry *prev = NULL;
4997 int start = GET(cc, 1);
4998
4999 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5000 while (entry != NULL)
5001 {
5002 if (entry->start == start)
5003 break;
5004 prev = entry;
5005 entry = entry->next;
5006 }
5007
5008 if (entry == NULL)
5009 {
5010 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5011 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5012 return NULL;
5013 entry->next = NULL;
5014 entry->entry = NULL;
5015 entry->calls = NULL;
5016 entry->start = start;
5017
5018 if (prev != NULL)
5019 prev->next = entry;
5020 else
5021 common->entries = entry;
5022 }
5023
5024 if (common->has_set_som && common->mark_ptr != 0)
5025 {
5026 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5027 allocate_stack(common, 2);
5028 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5029 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5030 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5031 }
5032 else if (common->has_set_som || common->mark_ptr != 0)
5033 {
5034 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5035 allocate_stack(common, 1);
5036 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5037 }
5038
5039 if (entry->entry == NULL)
5040 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5041 else
5042 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5043 /* Leave if the match is failed. */
5044 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5045 return cc + 1 + LINK_SIZE;
5046 }
5047
5048 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5049 {
5050 DEFINE_COMPILER;
5051 int framesize;
5052 int private_data_ptr;
5053 backtrack_common altbacktrack;
5054 pcre_uchar *ccbegin;
5055 pcre_uchar opcode;
5056 pcre_uchar bra = OP_BRA;
5057 jump_list *tmp = NULL;
5058 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5059 jump_list **found;
5060 /* Saving previous accept variables. */
5061 struct sljit_label *save_quitlabel = common->quitlabel;
5062 struct sljit_label *save_acceptlabel = common->acceptlabel;
5063 jump_list *save_quit = common->quit;
5064 jump_list *save_accept = common->accept;
5065 struct sljit_jump *jump;
5066 struct sljit_jump *brajump = NULL;
5067
5068 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5069 {
5070 SLJIT_ASSERT(!conditional);
5071 bra = *cc;
5072 cc++;
5073 }
5074 private_data_ptr = PRIVATE_DATA(cc);
5075 SLJIT_ASSERT(private_data_ptr != 0);
5076 framesize = get_framesize(common, cc, FALSE);
5077 backtrack->framesize = framesize;
5078 backtrack->private_data_ptr = private_data_ptr;
5079 opcode = *cc;
5080 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5081 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5082 ccbegin = cc;
5083 cc += GET(cc, 1);
5084
5085 if (bra == OP_BRAMINZERO)
5086 {
5087 /* This is a braminzero backtrack path. */
5088 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5089 free_stack(common, 1);
5090 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5091 }
5092
5093 if (framesize < 0)
5094 {
5095 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5096 allocate_stack(common, 1);
5097 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5098 }
5099 else
5100 {
5101 allocate_stack(common, framesize + 2);
5102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5103 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5104 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5105 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5106 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5107 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5108 }
5109
5110 memset(&altbacktrack, 0, sizeof(backtrack_common));
5111 common->quitlabel = NULL;
5112 common->quit = NULL;
5113 while (1)
5114 {
5115 common->acceptlabel = NULL;
5116 common->accept = NULL;
5117 altbacktrack.top = NULL;
5118 altbacktrack.topbacktracks = NULL;
5119
5120 if (*ccbegin == OP_ALT)
5121 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5122
5123 altbacktrack.cc = ccbegin;
5124 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5125 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5126 {
5127 common->quitlabel = save_quitlabel;
5128 common->acceptlabel = save_acceptlabel;
5129 common->quit = save_quit;
5130 common->accept = save_accept;
5131 return NULL;
5132 }
5133 common->acceptlabel = LABEL();
5134 if (common->accept != NULL)
5135 set_jumps(common->accept, common->acceptlabel);
5136
5137 /* Reset stack. */
5138 if (framesize < 0)
5139 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5140 else {
5141 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5142 {
5143 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5144 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5145 }
5146 else
5147 {
5148 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5149 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5150 }
5151 }
5152
5153 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5154 {
5155 /* We know that STR_PTR was stored on the top of the stack. */
5156 if (conditional)
5157 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5158 else if (bra == OP_BRAZERO)
5159 {
5160 if (framesize < 0)
5161 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5162 else
5163 {
5164 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5165 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
5166 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5167 }
5168 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5169 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5170 }
5171 else if (framesize >= 0)
5172 {
5173 /* For OP_BRA and OP_BRAMINZERO. */
5174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5175 }
5176 }
5177 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5178
5179 compile_backtrackingpath(common, altbacktrack.top);
5180 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5181 {
5182 common->quitlabel = save_quitlabel;
5183 common->acceptlabel = save_acceptlabel;
5184 common->quit = save_quit;
5185 common->accept = save_accept;
5186 return NULL;
5187 }
5188 set_jumps(altbacktrack.topbacktracks, LABEL());
5189
5190 if (*cc != OP_ALT)
5191 break;
5192
5193 ccbegin = cc;
5194 cc += GET(cc, 1);
5195 }
5196 /* None of them matched. */
5197 if (common->quit != NULL)
5198 set_jumps(common->quit, LABEL());
5199
5200 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5201 {
5202 /* Assert is failed. */
5203 if (conditional || bra == OP_BRAZERO)
5204 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5205
5206 if (framesize < 0)
5207 {
5208 /* The topmost item should be 0. */
5209 if (bra == OP_BRAZERO)
5210 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5211 else
5212 free_stack(common, 1);
5213 }
5214 else
5215 {
5216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5217 /* The topmost item should be 0. */
5218 if (bra == OP_BRAZERO)
5219 {
5220 free_stack(common, framesize + 1);
5221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5222 }
5223 else
5224 free_stack(common, framesize + 2);
5225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5226 }
5227 jump = JUMP(SLJIT_JUMP);
5228 if (bra != OP_BRAZERO)
5229 add_jump(compiler, target, jump);
5230
5231 /* Assert is successful. */
5232 set_jumps(tmp, LABEL());
5233 if (framesize < 0)
5234 {
5235 /* We know that STR_PTR was stored on the top of the stack. */
5236 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5237 /* Keep the STR_PTR on the top of the stack. */
5238 if (bra == OP_BRAZERO)
5239 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5240 else if (bra == OP_BRAMINZERO)
5241 {
5242 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5243 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5244 }
5245 }
5246 else
5247 {
5248 if (bra == OP_BRA)
5249 {
5250 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5251 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5252 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5253 }
5254 else
5255 {
5256 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5257 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
5258 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5259 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5260 }
5261 }
5262
5263 if (bra == OP_BRAZERO)
5264 {
5265 backtrack->matchingpath = LABEL();
5266 sljit_set_label(jump, backtrack->matchingpath);
5267 }
5268 else if (bra == OP_BRAMINZERO)
5269 {
5270 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5271 JUMPHERE(brajump);
5272 if (framesize >= 0)
5273 {
5274 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5275 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5276 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5277 }
5278 set_jumps(backtrack->common.topbacktracks, LABEL());
5279 }
5280 }
5281 else
5282 {
5283 /* AssertNot is successful. */
5284 if (framesize < 0)
5285 {
5286 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5287 if (bra != OP_BRA)
5288 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5289 else
5290 free_stack(common, 1);
5291 }
5292 else
5293 {
5294 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5296 /* The topmost item should be 0. */
5297 if (bra != OP_BRA)
5298 {
5299 free_stack(common, framesize + 1);
5300 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5301 }
5302 else
5303 free_stack(common, framesize + 2);
5304 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5305 }
5306
5307 if (bra == OP_BRAZERO)
5308 backtrack->matchingpath = LABEL();
5309 else if (bra == OP_BRAMINZERO)
5310 {
5311 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5312 JUMPHERE(brajump);
5313 }
5314
5315 if (bra != OP_BRA)
5316 {
5317 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5318 set_jumps(backtrack->common.topbacktracks, LABEL());
5319 backtrack->common.topbacktracks = NULL;
5320 }
5321 }
5322
5323 common->quitlabel = save_quitlabel;
5324 common->acceptlabel = save_acceptlabel;
5325 common->quit = save_quit;
5326 common->accept = save_accept;
5327 return cc + 1 + LINK_SIZE;
5328 }
5329
5330 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
5331 {
5332 int condition = FALSE;
5333 pcre_uchar *slotA = name_table;
5334 pcre_uchar *slotB;
5335 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5336 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5337 sljit_w no_capture;
5338 int i;
5339
5340 locals += refno & 0xff;
5341 refno >>= 8;
5342 no_capture = locals[1];
5343
5344 for (i = 0; i < name_count; i++)
5345 {
5346 if (GET2(slotA, 0) == refno) break;
5347 slotA += name_entry_size;
5348 }
5349
5350 if (i < name_count)
5351 {
5352 /* Found a name for the number - there can be only one; duplicate names
5353 for different numbers are allowed, but not vice versa. First scan down
5354 for duplicates. */
5355
5356 slotB = slotA;
5357 while (slotB > name_table)
5358 {
5359 slotB -= name_entry_size;
5360 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5361 {
5362 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5363 if (condition) break;
5364 }
5365 else break;
5366 }
5367
5368 /* Scan up for duplicates */
5369 if (!condition)
5370 {
5371 slotB = slotA;
5372 for (i++; i < name_count; i++)
5373 {
5374 slotB += name_entry_size;
5375 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5376 {
5377 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5378 if (condition) break;
5379 }
5380 else break;
5381 }
5382 }
5383 }
5384 return condition;
5385 }
5386
5387 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
5388 {
5389 int condition = FALSE;
5390 pcre_uchar *slotA = name_table;
5391 pcre_uchar *slotB;
5392 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5393 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5394 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5395 int i;
5396
5397 for (i = 0; i < name_count; i++)
5398 {
5399 if (GET2(slotA, 0) == recno) break;
5400 slotA += name_entry_size;
5401 }
5402
5403 if (i < name_count)
5404 {
5405 /* Found a name for the number - there can be only one; duplicate
5406 names for different numbers are allowed, but not vice versa. First
5407 scan down for duplicates. */
5408
5409 slotB = slotA;
5410 while (slotB > name_table)
5411 {
5412 slotB -= name_entry_size;
5413 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5414 {
5415 condition = GET2(slotB, 0) == group_num;
5416 if (condition) break;
5417 }
5418 else break;
5419 }
5420
5421 /* Scan up for duplicates */
5422 if (!condition)
5423 {
5424 slotB = slotA;
5425 for (i++; i < name_count; i++)
5426 {
5427 slotB += name_entry_size;
5428 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5429 {
5430 condition = GET2(slotB, 0) == group_num;
5431 if (condition) break;
5432 }
5433 else break;
5434 }
5435 }
5436 }
5437 return condition;
5438 }
5439
5440 /*
5441 Handling bracketed expressions is probably the most complex part.
5442
5443 Stack layout naming characters:
5444 S - Push the current STR_PTR
5445 0 - Push a 0 (NULL)
5446 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5447 before the next alternative. Not pushed if there are no alternatives.
5448 M - Any values pushed by the current alternative. Can be empty, or anything.
5449 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5450 L - Push the previous local (pointed by localptr) to the stack
5451 () - opional values stored on the stack
5452 ()* - optonal, can be stored multiple times
5453
5454 The following list shows the regular expression templates, their PCRE byte codes
5455 and stack layout supported by pcre-sljit.
5456
5457 (?:) OP_BRA | OP_KET A M
5458 () OP_CBRA | OP_KET C M
5459 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5460 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5461 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5462 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5463 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5464 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5465 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5466 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5467 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5468 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5469 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5470 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5471 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5472 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5473 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5474 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5475 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5476 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5477 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5478 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5479
5480
5481 Stack layout naming characters:
5482 A - Push the alternative index (starting from 0) on the stack.
5483 Not pushed if there is no alternatives.
5484 M - Any values pushed by the current alternative. Can be empty, or anything.
5485
5486 The next list shows the possible content of a bracket:
5487 (|) OP_*BRA | OP_ALT ... M A
5488 (?()|) OP_*COND | OP_ALT M A
5489 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5490 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5491 Or nothing, if trace is unnecessary
5492 */
5493
5494 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5495 {
5496 DEFINE_COMPILER;
5497 backtrack_common *backtrack;
5498 pcre_uchar opcode;
5499 int private_data_ptr = 0;
5500 int offset = 0;
5501 int stacksize;
5502 pcre_uchar *ccbegin;
5503 pcre_uchar *matchingpath;
5504 pcre_uchar bra = OP_BRA;
5505 pcre_uchar ket;
5506 assert_backtrack *assert;
5507 BOOL has_alternatives;
5508 struct sljit_jump *jump;
5509 struct sljit_jump *skip;
5510 struct sljit_label *rmaxlabel = NULL;
5511 struct sljit_jump *braminzerojump = NULL;
5512
5513 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5514
5515 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5516 {
5517 bra = *cc;
5518 cc++;
5519 opcode = *cc;
5520 }
5521
5522 opcode = *cc;
5523 ccbegin = cc;
5524 matchingpath = ccbegin + 1 + LINK_SIZE;
5525
5526 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5527 {
5528 /* Drop this bracket_backtrack. */
5529 parent->top = backtrack->prev;
5530 return bracketend(cc);
5531 }
5532
5533 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5534 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5535 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5536 cc += GET(cc, 1);
5537
5538 has_alternatives = *cc == OP_ALT;
5539 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5540 {
5541 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5542 if (*matchingpath == OP_NRREF)
5543 {
5544 stacksize = GET2(matchingpath, 1);
5545 if (common->currententry == NULL || stacksize == RREF_ANY)
5546 has_alternatives = FALSE;
5547 else if (common->currententry->start == 0)
5548 has_alternatives = stacksize != 0;
5549 else
5550 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5551 }
5552 }
5553
5554 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5555 opcode = OP_SCOND;
5556 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5557 opcode = OP_ONCE;
5558
5559 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5560 {
5561 /* Capturing brackets has a pre-allocated space. */
5562 offset = GET2(ccbegin, 1 + LINK_SIZE);
5563 if (common->optimized_cbracket[offset] == 0)
5564 {
5565 private_data_ptr = OVECTOR_PRIV(offset);
5566 offset <<= 1;
5567 }
5568 else
5569 {
5570 offset <<= 1;
5571 private_data_ptr = OVECTOR(offset);
5572 }
5573 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5574 matchingpath += IMM2_SIZE;
5575 }
5576 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5577 {
5578 /* Other brackets simply allocate the next entry. */
5579 private_data_ptr = PRIVATE_DATA(ccbegin);
5580 SLJIT_ASSERT(private_data_ptr != 0);
5581 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5582 if (opcode == OP_ONCE)
5583 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5584 }
5585
5586 /* Instructions before the first alternative. */
5587 stacksize = 0;
5588 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5589 stacksize++;
5590 if (bra == OP_BRAZERO)
5591 stacksize++;
5592
5593 if (stacksize > 0)
5594 allocate_stack(common, stacksize);
5595
5596 stacksize = 0;
5597 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5598 {
5599 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5600 stacksize++;
5601 }
5602
5603 if (bra == OP_BRAZERO)
5604 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5605
5606 if (bra == OP_BRAMINZERO)
5607 {
5608 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5609 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5610 if (ket != OP_KETRMIN)
5611 {
5612 free_stack(common, 1);
5613 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5614 }
5615 else
5616 {
5617 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5618 {
5619 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5620 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5621 /* Nothing stored during the first run. */
5622 skip = JUMP(SLJIT_JUMP);
5623 JUMPHERE(jump);
5624 /* Checking zero-length iteration. */
5625 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5626 {
5627 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5628 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5629 }
5630 else
5631 {
5632 /* Except when the whole stack frame must be saved. */
5633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5634 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5635 }
5636 JUMPHERE(skip);
5637 }
5638 else
5639 {
5640 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5641 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5642 JUMPHERE(jump);
5643 }
5644 }
5645 }
5646
5647 if (ket == OP_KETRMIN)
5648 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5649
5650 if (ket == OP_KETRMAX)
5651 {
5652 rmaxlabel = LABEL();
5653 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5654 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5655 }
5656
5657 /* Handling capturing brackets and alternatives. */
5658 if (opcode == OP_ONCE)
5659 {
5660 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5661 {
5662 /* Neither capturing brackets nor recursions are not found in the block. */
5663 if (ket == OP_KETRMIN)
5664 {
5665 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5666 allocate_stack(common, 2);
5667 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5668 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5669 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5670 }
5671 else if (ket == OP_KETRMAX || has_alternatives)
5672 {
5673 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5674 allocate_stack(common, 1);
5675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5676 }
5677 else
5678 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5679 }
5680 else
5681 {
5682 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5683 {
5684 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5685 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5686 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5687 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5688 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5689 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5690 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5691 }
5692 else
5693 {
5694 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5695 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5696 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5697 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5698 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5699 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5700 }
5701 }
5702 }
5703 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5704 {
5705 /* Saving the previous values. */
5706 if (common->optimized_cbracket[offset >> 1] == 0)
5707 {
5708 allocate_stack(common, 3);
5709 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5710 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5712 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5714 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5715 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5716 }
5717 else
5718 {
5719 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5720 allocate_stack(common, 2);
5721 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5722 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_w));
5723 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5726 }
5727 }
5728 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5729 {
5730 /* Saving the previous value. */
5731 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5732 allocate_stack(common, 1);
5733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5735 }
5736 else if (has_alternatives)
5737 {
5738 /* Pushing the starting string pointer. */
5739 allocate_stack(common, 1);
5740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5741 }
5742
5743 /* Generating code for the first alternative. */
5744 if (opcode == OP_COND || opcode == OP_SCOND)
5745 {
5746 if (*matchingpath == OP_CREF)
5747 {
5748 SLJIT_ASSERT(has_alternatives);
5749 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5750 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5751 matchingpath += 1 + IMM2_SIZE;
5752 }
5753 else if (*matchingpath == OP_NCREF)
5754 {
5755 SLJIT_ASSERT(has_alternatives);
5756 stacksize = GET2(matchingpath, 1);
5757 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5758
5759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5760 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5761 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5762 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5763 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5764 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5765 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5766 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5767 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5768
5769 JUMPHERE(jump);
5770 matchingpath += 1 + IMM2_SIZE;
5771 }
5772 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5773 {
5774 /* Never has other case. */
5775 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5776
5777 stacksize = GET2(matchingpath, 1);
5778 if (common->currententry == NULL)
5779 stacksize = 0;
5780 else if (stacksize == RREF_ANY)
5781 stacksize = 1;
5782 else if (common->currententry->start == 0)
5783 stacksize = stacksize == 0;
5784 else
5785 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5786
5787 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5788 {
5789 SLJIT_ASSERT(!has_alternatives);
5790 if (stacksize != 0)
5791 matchingpath += 1 + IMM2_SIZE;
5792 else
5793 {
5794 if (*cc == OP_ALT)
5795 {
5796 matchingpath = cc + 1 + LINK_SIZE;
5797 cc += GET(cc, 1);
5798 }
5799 else
5800 matchingpath = cc;
5801 }
5802 }
5803 else
5804 {
5805 SLJIT_ASSERT(has_alternatives);
5806
5807 stacksize = GET2(matchingpath, 1);
5808 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5809 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5810 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5811 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5812 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5813 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5814 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5815 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5816 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5817 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5818 matchingpath += 1 + IMM2_SIZE;
5819 }
5820 }
5821 else
5822 {
5823 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5824 /* Similar code as PUSH_BACKTRACK macro. */
5825 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5826 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5827 return NULL;
5828 memset(assert, 0, sizeof(assert_backtrack));
5829 assert->common.cc = matchingpath;
5830 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5831 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5832 }
5833 }
5834
5835 compile_matchingpath(common, matchingpath, cc, backtrack);
5836 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5837 return NULL;
5838
5839 if (opcode == OP_ONCE)
5840 {
5841 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5842 {
5843 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5844 /* TMP2 which is set here used by OP_KETRMAX below. */
5845 if (ket == OP_KETRMAX)
5846 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5847 else if (ket == OP_KETRMIN)
5848 {
5849 /* Move the STR_PTR to the private_data_ptr. */
5850 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5851 }
5852 }
5853 else
5854 {
5855 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5856 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5857 if (ket == OP_KETRMAX)
5858 {
5859 /* TMP2 which is set here used by OP_KETRMAX below. */
5860 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5861 }
5862 }
5863 }
5864
5865 stacksize = 0;
5866 if (ket != OP_KET || bra != OP_BRA)
5867 stacksize++;
5868 if (has_alternatives && opcode != OP_ONCE)
5869 stacksize++;
5870
5871 if (stacksize > 0)
5872 allocate_stack(common, stacksize);
5873
5874 stacksize = 0;
5875 if (ket != OP_KET)
5876 {
5877 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5878 stacksize++;
5879 }
5880 else if (bra != OP_BRA)
5881 {
5882 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5883 stacksize++;
5884 }
5885
5886 if (has_alternatives)
5887 {
5888 if (opcode != OP_ONCE)
5889 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5890 if (ket != OP_KETRMAX)
5891 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5892 }
5893
5894 /* Must be after the matchingpath label. */
5895 if (offset != 0)
5896 {
5897 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5898 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5899 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5900 }
5901
5902 if (ket == OP_KETRMAX)
5903 {
5904 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5905 {
5906 if (has_alternatives)
5907 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5908 /* Checking zero-length iteration. */
5909 if (opcode != OP_ONCE)
5910 {
5911 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
5912 /* Drop STR_PTR for greedy plus quantifier. */
5913 if (bra != OP_BRAZERO)
5914 free_stack(common, 1);
5915 }
5916 else
5917 /* TMP2 must contain the starting STR_PTR. */
5918 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5919 }
5920 else
5921 JUMPTO(SLJIT_JUMP, rmaxlabel);
5922 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5923 }
5924
5925 if (bra == OP_BRAZERO)
5926 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
5927
5928 if (bra == OP_BRAMINZERO)
5929 {
5930 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5931 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
5932 if (braminzerojump != NULL)
5933 {
5934 JUMPHERE(braminzerojump);
5935 /* We need to release the end pointer to perform the
5936 backtrack for the zero-length iteration. When
5937 framesize is < 0, OP_ONCE will do the release itself. */
5938 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5939 {
5940 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5941 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5942 }
5943 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5944 free_stack(common, 1);
5945 }
5946 /* Continue to the normal backtrack. */
5947 }
5948
5949 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5950 decrease_call_count(common);
5951
5952 /* Skip the other alternatives. */
5953 while (*cc == OP_ALT)
5954 cc += GET(cc, 1);
5955 cc += 1 + LINK_SIZE;
5956 return cc;
5957 }
5958
5959 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5960 {
5961 DEFINE_COMPILER;
5962 backtrack_common *backtrack;
5963 pcre_uchar opcode;
5964 int private_data_ptr;
5965 int cbraprivptr = 0;
5966 int framesize;
5967 int stacksize;
5968 int offset = 0;
5969 BOOL zero = FALSE;
5970 pcre_uchar *ccbegin = NULL;
5971 int stack;
5972 struct sljit_label *loop = NULL;
5973 struct jump_list *emptymatch = NULL;
5974
5975 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5976 if (*cc == OP_BRAPOSZERO)
5977 {
5978 zero = TRUE;
5979 cc++;
5980 }
5981
5982 opcode = *cc;
5983 private_data_ptr = PRIVATE_DATA(cc);
5984 SLJIT_ASSERT(private_data_ptr != 0);
5985 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
5986 switch(opcode)
5987 {
5988 case OP_BRAPOS:
5989 case OP_SBRAPOS:
5990 ccbegin = cc + 1 + LINK_SIZE;
5991 break;
5992
5993 case OP_CBRAPOS:
5994 case OP_SCBRAPOS:
5995 offset = GET2(cc, 1 + LINK_SIZE);
5996 /* This case cannot be optimized in the same was as
5997 normal capturing brackets. */
5998 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
5999 cbraprivptr = OVECTOR_PRIV(offset);
6000 offset <<= 1;
6001 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6002 break;
6003
6004 default:
6005 SLJIT_ASSERT_STOP();
6006 break;
6007 }
6008
6009 framesize = get_framesize(common, cc, FALSE);
6010 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6011 if (framesize < 0)
6012 {
6013 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
6014 if (!zero)
6015 stacksize++;
6016 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6017 allocate_stack(common, stacksize);
6018 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6019
6020 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6021 {
6022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6023 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6025 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6026 }
6027 else
6028 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6029
6030 if (!zero)
6031 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6032 }
6033 else
6034 {
6035 stacksize = framesize + 1;
6036 if (!zero)
6037 stacksize++;
6038 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6039 stacksize++;
6040 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6041 allocate_stack(common, stacksize);
6042
6043 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6044 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6046 stack = 0;
6047 if (!zero)
6048 {
6049 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6050 stack++;
6051 }
6052 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6053 {
6054 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6055 stack++;
6056 }
6057 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6058 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6059 }
6060
6061 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6062 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6063
6064 loop = LABEL();
6065 while (*cc != OP_KETRPOS)
6066 {
6067 backtrack->top = NULL;
6068 backtrack->topbacktracks = NULL;
6069 cc += GET(cc, 1);
6070
6071 compile_matchingpath(common, ccbegin, cc, backtrack);
6072 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6073 return NULL;
6074
6075 if (framesize < 0)
6076 {
6077 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6078
6079 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6080 {
6081 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6082 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6084 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6085 }
6086 else
6087 {
6088 if (opcode == OP_SBRAPOS)
6089 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6090 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6091 }
6092
6093 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6094 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6095
6096 if (!zero)
6097 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6098 }
6099 else
6100 {
6101 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6102 {
6103 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
6104 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6106 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6108 }
6109 else
6110 {
6111 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6112 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
6113 if (opcode == OP_SBRAPOS)
6114 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6115 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
6116 }
6117
6118 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6119 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6120
6121 if (!zero)
6122 {
6123 if (framesize < 0)
6124 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6125 else
6126 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6127 }
6128 }
6129 JUMPTO(SLJIT_JUMP, loop);
6130 flush_stubs(common);
6131
6132 compile_backtrackingpath(common, backtrack->top);
6133 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6134 return NULL;
6135 set_jumps(backtrack->topbacktracks, LABEL());
6136
6137 if (framesize < 0)
6138 {
6139 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6140 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6141 else
6142 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6143 }
6144 else
6145 {
6146 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6147 {
6148 /* Last alternative. */
6149 if (*cc == OP_KETRPOS)
6150 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6151 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6152 }
6153 else
6154 {
6155 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6156 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6157 }
6158 }
6159
6160 if (*cc == OP_KETRPOS)
6161 break;
6162 ccbegin = cc + 1 + LINK_SIZE;
6163 }
6164
6165 backtrack->topbacktracks = NULL;
6166 if (!zero)
6167 {
6168 if (framesize < 0)
6169 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6170 else /* TMP2 is set to [private_data_ptr] above. */
6171 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
6172 }
6173
6174 /* None of them matched. */
6175 set_jumps(emptymatch, LABEL());
6176 decrease_call_count(common);
6177 return cc + 1 + LINK_SIZE;
6178 }
6179
6180 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6181 {
6182 int class_len;
6183
6184 *opcode = *cc;
6185 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6186 {
6187 cc++;
6188 *type = OP_CHAR;
6189 }
6190 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6191 {
6192 cc++;
6193 *type = OP_CHARI;
6194 *opcode -= OP_STARI - OP_STAR;
6195 }
6196 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6197 {
6198 cc++;
6199 *type = OP_NOT;
6200 *opcode -= OP_NOTSTAR - OP_STAR;
6201 }
6202 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6203 {
6204 cc++;
6205 *type = OP_NOTI;
6206 *opcode -= OP_NOTSTARI - OP_STAR;
6207 }
6208 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6209 {
6210 cc++;
6211 *opcode -= OP_TYPESTAR - OP_STAR;
6212 *type = 0;
6213 }
6214 else
6215 {
6216 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6217 *type = *opcode;
6218 cc++;
6219 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6220 *opcode = cc[class_len - 1];
6221 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6222 {
6223 *opcode -= OP_CRSTAR - OP_STAR;
6224 if (end != NULL)
6225 *end = cc + class_len;
6226 }
6227 else
6228 {
6229 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6230 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6231 *arg2 = GET2(cc, class_len);
6232
6233 if (*arg2 == 0)
6234 {
6235 SLJIT_ASSERT(*arg1 != 0);
6236 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6237 }
6238 if (*arg1 == *arg2)
6239 *opcode = OP_EXACT;
6240
6241 if (end != NULL)
6242 *end = cc + class_len + 2 * IMM2_SIZE;
6243 }
6244 return cc;
6245 }
6246
6247 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6248 {
6249 *arg1 = GET2(cc, 0);
6250 cc += IMM2_SIZE;
6251 }
6252
6253 if (*type == 0)
6254 {
6255 *type = *cc;
6256 if (end != NULL)
6257 *end = next_opcode(common, cc);
6258 cc++;
6259 return cc;
6260 }
6261
6262 if (end != NULL)
6263 {
6264 *end = cc + 1;
6265 #ifdef SUPPORT_UTF
6266 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6267 #endif
6268 }
6269 return cc;
6270 }
6271
6272 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6273 {
6274 DEFINE_COMPILER;
6275 backtrack_common *backtrack;
6276 pcre_uchar opcode;
6277 pcre_uchar type;
6278 int arg1 = -1, arg2 = -1;
6279 pcre_uchar* end;
6280 jump_list *nomatch = NULL;
6281 struct sljit_jump *jump = NULL;
6282 struct sljit_label *label;
6283 int private_data_ptr = PRIVATE_DATA(cc);
6284 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6285 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6286 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_w);
6287 int tmp_base, tmp_offset;
6288
6289 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6290
6291 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6292
6293 switch (type)
6294 {
6295 case OP_NOT_DIGIT:
6296 case OP_DIGIT:
6297 case OP_NOT_WHITESPACE:
6298 case OP_WHITESPACE:
6299 case OP_NOT_WORDCHAR:
6300 case OP_WORDCHAR:
6301 case OP_ANY:
6302 case OP_ALLANY:
6303 case OP_ANYBYTE:
6304 case OP_ANYNL:
6305 case OP_NOT_HSPACE:
6306 case OP_HSPACE:
6307 case OP_NOT_VSPACE:
6308 case OP_VSPACE:
6309 case OP_CHAR:
6310 case OP_CHARI:
6311 case OP_NOT:
6312 case OP_NOTI:
6313 case OP_CLASS:
6314 case OP_NCLASS:
6315 tmp_base = TMP3;
6316 tmp_offset = 0;
6317 break;
6318
6319 default:
6320 SLJIT_ASSERT_STOP();
6321 /* Fall through. */
6322
6323 case OP_EXTUNI:
6324 case OP_XCLASS:
6325 case OP_NOTPROP:
6326 case OP_PROP:
6327 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6328 tmp_offset = POSSESSIVE0;
6329 break;
6330 }
6331
6332 switch(opcode)
6333 {
6334 case OP_STAR:
6335 case OP_PLUS:
6336 case OP_UPTO:
6337 case OP_CRRANGE:
6338 if (type == OP_ANYNL || type == OP_EXTUNI)
6339 {
6340 SLJIT_ASSERT(private_data_ptr == 0);
6341 if (opcode == OP_STAR || opcode == OP_UPTO)
6342 {
6343 allocate_stack(common, 2);
6344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6345 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6346 }
6347 else
6348 {
6349 allocate_stack(common, 1);
6350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6351 }
6352
6353 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6354 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6355
6356 label = LABEL();
6357 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6358 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6359 {
6360 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6361 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6362 if (opcode == OP_CRRANGE && arg2 > 0)
6363 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6364 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6365 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6366 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6367 }
6368
6369 /* We cannot use TMP3 because of this allocate_stack. */
6370 allocate_stack(common, 1);
6371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6372 JUMPTO(SLJIT_JUMP, label);
6373 if (jump != NULL)
6374 JUMPHERE(jump);
6375 }
6376 else
6377 {
6378 if (opcode == OP_PLUS)
6379 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6380 if (private_data_ptr == 0)
6381 allocate_stack(common, 2);
6382 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6383 if (opcode <= OP_PLUS)
6384 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6385 else
6386 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6387 label = LABEL();
6388 compile_char1_matchingpath(common, type, cc, &nomatch);
6389 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6390 if (opcode <= OP_PLUS)
6391 JUMPTO(SLJIT_JUMP, label);
6392 else if (opcode == OP_CRRANGE && arg1 == 0)
6393 {
6394 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6395 JUMPTO(SLJIT_JUMP, label);
6396 }
6397 else
6398 {
6399 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6400 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6401 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6402 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6403 }
6404 set_jumps(nomatch, LABEL());
6405 if (opcode == OP_CRRANGE)
6406 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6407 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6408 }
6409 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6410 break;
6411
6412 case OP_MINSTAR:
6413 case OP_MINPLUS:
6414 if (opcode == OP_MINPLUS)
6415 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6416 if (private_data_ptr == 0)
6417 allocate_stack(common, 1);
6418 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6419 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6420 break;
6421
6422 case OP_MINUPTO:
6423 case OP_CRMINRANGE:
6424 if (private_data_ptr == 0)
6425 allocate_stack(common, 2);
6426 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6427 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6428 if (opcode == OP_CRMINRANGE)
6429 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6430 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6431 break;
6432
6433 case OP_QUERY:
6434 case OP_MINQUERY:
6435 if (private_data_ptr == 0)
6436 allocate_stack(common, 1);
6437 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6438 if (opcode == OP_QUERY)
6439 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6440 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6441 break;
6442
6443 case OP_EXACT:
6444 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6445 label = LABEL();
6446 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6447 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6448 JUMPTO(SLJIT_C_NOT_ZERO, label);
6449 break;
6450
6451 case OP_POSSTAR:
6452 case OP_POSPLUS:
6453 case OP_POSUPTO:
6454 if (opcode == OP_POSPLUS)
6455 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6456 if (opcode == OP_POSUPTO)
6457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6458 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6459 label = LABEL();
6460 compile_char1_matchingpath(common, type, cc, &nomatch);
6461 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6462 if (opcode != OP_POSUPTO)
6463 JUMPTO(SLJIT_JUMP, label);
6464 else
6465 {
6466 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6467 JUMPTO(SLJIT_C_NOT_ZERO, label);
6468 }
6469 set_jumps(nomatch, LABEL());
6470 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6471 break;
6472
6473 case OP_POSQUERY:
6474 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6475 compile_char1_matchingpath(common, type, cc, &nomatch);
6476 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6477 set_jumps(nomatch, LABEL());
6478 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6479 break;
6480
6481 default:
6482 SLJIT_ASSERT_STOP();
6483 break;
6484 }
6485
6486 decrease_call_count(common);
6487 return end;
6488 }
6489
6490 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6491 {
6492 DEFINE_COMPILER;
6493 backtrack_common *backtrack;
6494
6495 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6496
6497 if (*cc == OP_FAIL)
6498 {
6499 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6500 return cc + 1;
6501 }
6502
6503 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6504 {
6505 /* No need to check notempty conditions. */
6506 if (common->acceptlabel == NULL)
6507 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6508 else
6509 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6510 return cc + 1;
6511 }
6512
6513 if (common->acceptlabel == NULL)
6514 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6515 else
6516 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6517 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6518 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6519 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6520 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6521 if (common->acceptlabel == NULL)
6522 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6523 else
6524 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6525 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6526 if (common->acceptlabel == NULL)
6527 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6528 else
6529 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6530 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6531 return cc + 1;
6532 }
6533
6534 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6535 {
6536 DEFINE_COMPILER;
6537 int offset = GET2(cc, 1);
6538 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
6539
6540 /* Data will be discarded anyway... */
6541 if (common->currententry != NULL)
6542 return cc + 1 + IMM2_SIZE;
6543
6544 if (!optimized_cbracket)
6545 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6546 offset <<= 1;
6547 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6548 if (!optimized_cbracket)
6549 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6550 return cc + 1 + IMM2_SIZE;
6551 }
6552
6553 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6554 {
6555 DEFINE_COMPILER;
6556 backtrack_common *backtrack;
6557
6558 while (cc < ccend)
6559 {
6560 switch(*cc)
6561 {
6562 case OP_SOD:
6563 case OP_SOM:
6564 case OP_NOT_WORD_BOUNDARY:
6565 case OP_WORD_BOUNDARY:
6566 case OP_NOT_DIGIT:
6567 case OP_DIGIT:
6568 case OP_NOT_WHITESPACE:
6569 case OP_WHITESPACE:
6570 case OP_NOT_WORDCHAR:
6571 case OP_WORDCHAR:
6572 case OP_ANY:
6573 case OP_ALLANY:
6574 case OP_ANYBYTE:
6575 case OP_NOTPROP:
6576 case OP_PROP:
6577 case OP_ANYNL:
6578 case OP_NOT_HSPACE:
6579 case OP_HSPACE:
6580 case OP_NOT_VSPACE:
6581 case OP_VSPACE:
6582 case OP_EXTUNI:
6583 case OP_EODN:
6584 case OP_EOD:
6585 case OP_CIRC:
6586 case OP_CIRCM:
6587 case OP_DOLL:
6588 case OP_DOLLM:
6589 case OP_NOT:
6590 case OP_NOTI:
6591 case OP_REVERSE:
6592 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6593 break;
6594
6595 case OP_SET_SOM:
6596 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6597 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6598 allocate_stack(common, 1);
6599 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6600 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6601 cc++;
6602 break;
6603
6604 case OP_CHAR:
6605 case OP_CHARI:
6606 if (common->mode == JIT_COMPILE)
6607 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6608 else
6609 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6610 break;
6611
6612 case OP_STAR:
6613 case OP_MINSTAR:
6614 case OP_PLUS:
6615 case OP_MINPLUS:
6616 case OP_QUERY:
6617 case OP_MINQUERY:
6618 case OP_UPTO:
6619 case OP_MINUPTO:
6620 case OP_EXACT:
6621 case OP_POSSTAR:
6622 case OP_POSPLUS:
6623 case OP_POSQUERY:
6624 case OP_POSUPTO:
6625 case OP_STARI:
6626 case OP_MINSTARI:
6627 case OP_PLUSI:
6628 case OP_MINPLUSI:
6629 case OP_QUERYI:
6630 case OP_MINQUERYI:
6631 case OP_UPTOI:
6632 case OP_MINUPTOI:
6633 case OP_EXACTI:
6634 case OP_POSSTARI:
6635 case OP_POSPLUSI:
6636 case OP_POSQUERYI:
6637 case OP_POSUPTOI:
6638 case OP_NOTSTAR:
6639 case OP_NOTMINSTAR:
6640 case OP_NOTPLUS:
6641 case OP_NOTMINPLUS:
6642 case OP_NOTQUERY:
6643 case OP_NOTMINQUERY:
6644 case OP_NOTUPTO:
6645 case OP_NOTMINUPTO:
6646 case OP_NOTEXACT:
6647 case OP_NOTPOSSTAR:
6648 case OP_NOTPOSPLUS:
6649 case OP_NOTPOSQUERY:
6650 case OP_NOTPOSUPTO:
6651 case OP_NOTSTARI:
6652 case OP_NOTMINSTARI:
6653 case OP_NOTPLUSI:
6654 case OP_NOTMINPLUSI:
6655 case OP_NOTQUERYI:
6656 case OP_NOTMINQUERYI:
6657 case OP_NOTUPTOI:
6658 case OP_NOTMINUPTOI:
6659 case OP_NOTEXACTI:
6660 case OP_NOTPOSSTARI:
6661 case OP_NOTPOSPLUSI:
6662 case OP_NOTPOSQUERYI:
6663 case OP_NOTPOSUPTOI:
6664 case OP_TYPESTAR:
6665 case OP_TYPEMINSTAR:
6666 case OP_TYPEPLUS:
6667 case OP_TYPEMINPLUS:
6668 case OP_TYPEQUERY:
6669 case OP_TYPEMINQUERY:
6670 case OP_TYPEUPTO:
6671 case OP_TYPEMINUPTO:
6672 case OP_TYPEEXACT:
6673 case OP_TYPEPOSSTAR:
6674 case OP_TYPEPOSPLUS:
6675 case OP_TYPEPOSQUERY:
6676 case OP_TYPEPOSUPTO:
6677 cc = compile_iterator_matchingpath(common, cc, parent);
6678 break;
6679
6680 case OP_CLASS:
6681 case OP_NCLASS:
6682 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6683 cc = compile_iterator_matchingpath(common, cc, parent);
6684 else
6685 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6686 break;
6687
6688 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6689 case OP_XCLASS:
6690 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6691 cc = compile_iterator_matchingpath(common, cc, parent);
6692 else
6693 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6694 break;
6695 #endif
6696
6697 case OP_REF:
6698 case OP_REFI:
6699 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6700 cc = compile_ref_iterator_matchingpath(common, cc, parent);
6701 else
6702 cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6703 break;
6704
6705 case OP_RECURSE:
6706 cc = compile_recurse_matchingpath(common, cc, parent);
6707 break;
6708
6709 case OP_ASSERT:
6710 case OP_ASSERT_NOT:
6711 case OP_ASSERTBACK:
6712 case OP_ASSERTBACK_NOT:
6713 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6714 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6715 break;
6716
6717 case OP_BRAMINZERO:
6718 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6719 cc = bracketend(cc + 1);
6720 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6721 {
6722 allocate_stack(common, 1);
6723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6724 }
6725 else
6726 {
6727 allocate_stack(common, 2);
6728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6730 }
6731 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
6732 if (cc[1] > OP_ASSERTBACK_NOT)
6733 decrease_call_count(common);
6734 break;
6735
6736 case OP_ONCE:
6737 case OP_ONCE_NC:
6738 case OP_BRA:
6739 case OP_CBRA:
6740 case OP_COND:
6741 case OP_SBRA:
6742 case OP_SCBRA:
6743 case OP_SCOND:
6744 cc = compile_bracket_matchingpath(common, cc, parent);
6745 break;
6746
6747 case OP_BRAZERO:
6748 if (cc[1] > OP_ASSERTBACK_NOT)
6749 cc = compile_bracket_matchingpath(common, cc, parent);
6750 else
6751 {
6752 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6753 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6754 }
6755 break;
6756
6757 case OP_BRAPOS:
6758 case OP_CBRAPOS:
6759 case OP_SBRAPOS:
6760 case OP_SCBRAPOS:
6761 case OP_BRAPOSZERO:
6762 cc = compile_bracketpos_matchingpath(common, cc, parent);