/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1249 - (show annotations)
Mon Feb 18 09:55:43 2013 UTC (6 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 276951 byte(s)
Error occurred while calculating annotation data.
Inlining subpatterns in recursions.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* Allocate memory for the regex stack on the real machine stack.
75 Fast, but limited size. */
76 #define MACHINE_STACK_SIZE 32768
77
78 /* Growth rate for stack allocated by the OS. Should be the multiply
79 of page size. */
80 #define STACK_GROWTH_RATE 8192
81
82 /* Enable to check that the allocation could destroy temporaries. */
83 #if defined SLJIT_DEBUG && SLJIT_DEBUG
84 #define DESTROY_REGISTERS 1
85 #endif
86
87 /*
88 Short summary about the backtracking mechanism empolyed by the jit code generator:
89
90 The code generator follows the recursive nature of the PERL compatible regular
91 expressions. The basic blocks of regular expressions are condition checkers
92 whose execute different commands depending on the result of the condition check.
93 The relationship between the operators can be horizontal (concatenation) and
94 vertical (sub-expression) (See struct backtrack_common for more details).
95
96 'ab' - 'a' and 'b' regexps are concatenated
97 'a+' - 'a' is the sub-expression of the '+' operator
98
99 The condition checkers are boolean (true/false) checkers. Machine code is generated
100 for the checker itself and for the actions depending on the result of the checker.
101 The 'true' case is called as the matching path (expected path), and the other is called as
102 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
103 branches on the matching path.
104
105 Greedy star operator (*) :
106 Matching path: match happens.
107 Backtrack path: match failed.
108 Non-greedy star operator (*?) :
109 Matching path: no need to perform a match.
110 Backtrack path: match is required.
111
112 The following example shows how the code generated for a capturing bracket
113 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
114 we have the following regular expression:
115
116 A(B|C)D
117
118 The generated code will be the following:
119
120 A matching path
121 '(' matching path (pushing arguments to the stack)
122 B matching path
123 ')' matching path (pushing arguments to the stack)
124 D matching path
125 return with successful match
126
127 D backtrack path
128 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
129 B backtrack path
130 C expected path
131 jump to D matching path
132 C backtrack path
133 A backtrack path
134
135 Notice, that the order of backtrack code paths are the opposite of the fast
136 code paths. In this way the topmost value on the stack is always belong
137 to the current backtrack code path. The backtrack path must check
138 whether there is a next alternative. If so, it needs to jump back to
139 the matching path eventually. Otherwise it needs to clear out its own stack
140 frame and continue the execution on the backtrack code paths.
141 */
142
143 /*
144 Saved stack frames:
145
146 Atomic blocks and asserts require reloading the values of private data
147 when the backtrack mechanism performed. Because of OP_RECURSE, the data
148 are not necessarly known in compile time, thus we need a dynamic restore
149 mechanism.
150
151 The stack frames are stored in a chain list, and have the following format:
152 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
153
154 Thus we can restore the private data to a particular point in the stack.
155 */
156
157 typedef struct jit_arguments {
158 /* Pointers first. */
159 struct sljit_stack *stack;
160 const pcre_uchar *str;
161 const pcre_uchar *begin;
162 const pcre_uchar *end;
163 int *offsets;
164 pcre_uchar *uchar_ptr;
165 pcre_uchar *mark_ptr;
166 void *callout_data;
167 /* Everything else after. */
168 int offset_count;
169 int call_limit;
170 pcre_uint8 notbol;
171 pcre_uint8 noteol;
172 pcre_uint8 notempty;
173 pcre_uint8 notempty_atstart;
174 } jit_arguments;
175
176 typedef struct executable_functions {
177 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
178 PUBL(jit_callback) callback;
179 void *userdata;
180 pcre_uint32 top_bracket;
181 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
182 } executable_functions;
183
184 typedef struct jump_list {
185 struct sljit_jump *jump;
186 struct jump_list *next;
187 } jump_list;
188
189 typedef struct stub_list {
190 struct sljit_jump *start;
191 struct sljit_label *quit;
192 struct stub_list *next;
193 } stub_list;
194
195 enum frame_types { no_frame = -1, no_stack = -2 };
196
197 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
198
199 /* The following structure is the key data type for the recursive
200 code generator. It is allocated by compile_matchingpath, and contains
201 the aguments for compile_backtrackingpath. Must be the first member
202 of its descendants. */
203 typedef struct backtrack_common {
204 /* Concatenation stack. */
205 struct backtrack_common *prev;
206 jump_list *nextbacktracks;
207 /* Internal stack (for component operators). */
208 struct backtrack_common *top;
209 jump_list *topbacktracks;
210 /* Opcode pointer. */
211 pcre_uchar *cc;
212 } backtrack_common;
213
214 typedef struct assert_backtrack {
215 backtrack_common common;
216 jump_list *condfailed;
217 /* Less than 0 (-1) if a frame is not needed. */
218 int framesize;
219 /* Points to our private memory word on the stack. */
220 int private_data_ptr;
221 /* For iterators. */
222 struct sljit_label *matchingpath;
223 } assert_backtrack;
224
225 typedef struct bracket_backtrack {
226 backtrack_common common;
227 /* Where to coninue if an alternative is successfully matched. */
228 struct sljit_label *alternative_matchingpath;
229 /* For rmin and rmax iterators. */
230 struct sljit_label *recursive_matchingpath;
231 /* For greedy ? operator. */
232 struct sljit_label *zero_matchingpath;
233 /* Contains the branches of a failed condition. */
234 union {
235 /* Both for OP_COND, OP_SCOND. */
236 jump_list *condfailed;
237 assert_backtrack *assert;
238 /* For OP_ONCE. -1 if not needed. */
239 int framesize;
240 } u;
241 /* Points to our private memory word on the stack. */
242 int private_data_ptr;
243 } bracket_backtrack;
244
245 typedef struct bracketpos_backtrack {
246 backtrack_common common;
247 /* Points to our private memory word on the stack. */
248 int private_data_ptr;
249 /* Reverting stack is needed. */
250 int framesize;
251 /* Allocated stack size. */
252 int stacksize;
253 } bracketpos_backtrack;
254
255 typedef struct braminzero_backtrack {
256 backtrack_common common;
257 struct sljit_label *matchingpath;
258 } braminzero_backtrack;
259
260 typedef struct iterator_backtrack {
261 backtrack_common common;
262 /* Next iteration. */
263 struct sljit_label *matchingpath;
264 } iterator_backtrack;
265
266 typedef struct recurse_entry {
267 struct recurse_entry *next;
268 /* Contains the function entry. */
269 struct sljit_label *entry;
270 /* Collects the calls until the function is not created. */
271 jump_list *calls;
272 /* Points to the starting opcode. */
273 int start;
274 } recurse_entry;
275
276 typedef struct recurse_backtrack {
277 backtrack_common common;
278 BOOL inlined_pattern;
279 } recurse_backtrack;
280
281 #define MAX_RANGE_SIZE 6
282
283 typedef struct compiler_common {
284 struct sljit_compiler *compiler;
285 pcre_uchar *start;
286
287 /* Maps private data offset to each opcode. */
288 int *private_data_ptrs;
289 /* Tells whether the capturing bracket is optimized. */
290 pcre_uint8 *optimized_cbracket;
291 /* Starting offset of private data for capturing brackets. */
292 int cbraptr;
293 /* OVector starting point. Must be divisible by 2. */
294 int ovector_start;
295 /* Last known position of the requested byte. */
296 int req_char_ptr;
297 /* Head of the last recursion. */
298 int recursive_head_ptr;
299 /* First inspected character for partial matching. */
300 int start_used_ptr;
301 /* Starting pointer for partial soft matches. */
302 int hit_start;
303 /* End pointer of the first line. */
304 int first_line_end;
305 /* Points to the marked string. */
306 int mark_ptr;
307 /* Points to the last matched capture block index. */
308 int capture_last_ptr;
309
310 /* Flipped and lower case tables. */
311 const pcre_uint8 *fcc;
312 sljit_sw lcc;
313 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
314 int mode;
315 /* Newline control. */
316 int nltype;
317 int newline;
318 int bsr_nltype;
319 /* Dollar endonly. */
320 int endonly;
321 BOOL has_set_som;
322 /* Tables. */
323 sljit_sw ctypes;
324 int digits[2 + MAX_RANGE_SIZE];
325 /* Named capturing brackets. */
326 sljit_uw name_table;
327 sljit_sw name_count;
328 sljit_sw name_entry_size;
329
330 /* Labels and jump lists. */
331 struct sljit_label *partialmatchlabel;
332 struct sljit_label *quit_label;
333 struct sljit_label *forced_quit_label;
334 struct sljit_label *accept_label;
335 stub_list *stubs;
336 recurse_entry *entries;
337 recurse_entry *currententry;
338 jump_list *partialmatch;
339 jump_list *quit;
340 jump_list *forced_quit;
341 jump_list *accept;
342 jump_list *calllimit;
343 jump_list *stackalloc;
344 jump_list *revertframes;
345 jump_list *wordboundary;
346 jump_list *anynewline;
347 jump_list *hspace;
348 jump_list *vspace;
349 jump_list *casefulcmp;
350 jump_list *caselesscmp;
351 BOOL jscript_compat;
352 #ifdef SUPPORT_UTF
353 BOOL utf;
354 #ifdef SUPPORT_UCP
355 BOOL use_ucp;
356 #endif
357 #ifndef COMPILE_PCRE32
358 jump_list *utfreadchar;
359 #endif
360 #ifdef COMPILE_PCRE8
361 jump_list *utfreadtype8;
362 #endif
363 #endif /* SUPPORT_UTF */
364 #ifdef SUPPORT_UCP
365 jump_list *getucd;
366 #endif
367 } compiler_common;
368
369 /* For byte_sequence_compare. */
370
371 typedef struct compare_context {
372 int length;
373 int sourcereg;
374 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
375 int ucharptr;
376 union {
377 sljit_si asint;
378 sljit_uh asushort;
379 #if defined COMPILE_PCRE8
380 sljit_ub asbyte;
381 sljit_ub asuchars[4];
382 #elif defined COMPILE_PCRE16
383 sljit_uh asuchars[2];
384 #elif defined COMPILE_PCRE32
385 sljit_ui asuchars[1];
386 #endif
387 } c;
388 union {
389 sljit_si asint;
390 sljit_uh asushort;
391 #if defined COMPILE_PCRE8
392 sljit_ub asbyte;
393 sljit_ub asuchars[4];
394 #elif defined COMPILE_PCRE16
395 sljit_uh asuchars[2];
396 #elif defined COMPILE_PCRE32
397 sljit_ui asuchars[1];
398 #endif
399 } oc;
400 #endif
401 } compare_context;
402
403 /* Undefine sljit macros. */
404 #undef CMP
405
406 /* Used for accessing the elements of the stack. */
407 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
408
409 #define TMP1 SLJIT_SCRATCH_REG1
410 #define TMP2 SLJIT_SCRATCH_REG3
411 #define TMP3 SLJIT_TEMPORARY_EREG2
412 #define STR_PTR SLJIT_SAVED_REG1
413 #define STR_END SLJIT_SAVED_REG2
414 #define STACK_TOP SLJIT_SCRATCH_REG2
415 #define STACK_LIMIT SLJIT_SAVED_REG3
416 #define ARGUMENTS SLJIT_SAVED_EREG1
417 #define CALL_COUNT SLJIT_SAVED_EREG2
418 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
419
420 /* Local space layout. */
421 /* These two locals can be used by the current opcode. */
422 #define LOCALS0 (0 * sizeof(sljit_sw))
423 #define LOCALS1 (1 * sizeof(sljit_sw))
424 /* Two local variables for possessive quantifiers (char1 cannot use them). */
425 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
426 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
427 /* Max limit of recursions. */
428 #define CALL_LIMIT (4 * sizeof(sljit_sw))
429 /* The output vector is stored on the stack, and contains pointers
430 to characters. The vector data is divided into two groups: the first
431 group contains the start / end character pointers, and the second is
432 the start pointers when the end of the capturing group has not yet reached. */
433 #define OVECTOR_START (common->ovector_start)
434 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
435 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw))
436 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
437
438 #if defined COMPILE_PCRE8
439 #define MOV_UCHAR SLJIT_MOV_UB
440 #define MOVU_UCHAR SLJIT_MOVU_UB
441 #elif defined COMPILE_PCRE16
442 #define MOV_UCHAR SLJIT_MOV_UH
443 #define MOVU_UCHAR SLJIT_MOVU_UH
444 #elif defined COMPILE_PCRE32
445 #define MOV_UCHAR SLJIT_MOV_UI
446 #define MOVU_UCHAR SLJIT_MOVU_UI
447 #else
448 #error Unsupported compiling mode
449 #endif
450
451 /* Shortcuts. */
452 #define DEFINE_COMPILER \
453 struct sljit_compiler *compiler = common->compiler
454 #define OP1(op, dst, dstw, src, srcw) \
455 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
456 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
457 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
458 #define LABEL() \
459 sljit_emit_label(compiler)
460 #define JUMP(type) \
461 sljit_emit_jump(compiler, (type))
462 #define JUMPTO(type, label) \
463 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
464 #define JUMPHERE(jump) \
465 sljit_set_label((jump), sljit_emit_label(compiler))
466 #define SET_LABEL(jump, label) \
467 sljit_set_label((jump), (label))
468 #define CMP(type, src1, src1w, src2, src2w) \
469 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
470 #define CMPTO(type, src1, src1w, src2, src2w, label) \
471 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
472 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
473 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
474 #define GET_LOCAL_BASE(dst, dstw, offset) \
475 sljit_get_local_base(compiler, (dst), (dstw), (offset))
476
477 static pcre_uchar* bracketend(pcre_uchar* cc)
478 {
479 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
480 do cc += GET(cc, 1); while (*cc == OP_ALT);
481 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
482 cc += 1 + LINK_SIZE;
483 return cc;
484 }
485
486 /* Functions whose might need modification for all new supported opcodes:
487 next_opcode
488 get_private_data_length
489 set_private_data_ptrs
490 get_framesize
491 init_frame
492 get_private_data_length_for_copy
493 copy_private_data
494 compile_matchingpath
495 compile_backtrackingpath
496 */
497
498 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
499 {
500 SLJIT_UNUSED_ARG(common);
501 switch(*cc)
502 {
503 case OP_SOD:
504 case OP_SOM:
505 case OP_SET_SOM:
506 case OP_NOT_WORD_BOUNDARY:
507 case OP_WORD_BOUNDARY:
508 case OP_NOT_DIGIT:
509 case OP_DIGIT:
510 case OP_NOT_WHITESPACE:
511 case OP_WHITESPACE:
512 case OP_NOT_WORDCHAR:
513 case OP_WORDCHAR:
514 case OP_ANY:
515 case OP_ALLANY:
516 case OP_NOTPROP:
517 case OP_PROP:
518 case OP_ANYNL:
519 case OP_NOT_HSPACE:
520 case OP_HSPACE:
521 case OP_NOT_VSPACE:
522 case OP_VSPACE:
523 case OP_EXTUNI:
524 case OP_EODN:
525 case OP_EOD:
526 case OP_CIRC:
527 case OP_CIRCM:
528 case OP_DOLL:
529 case OP_DOLLM:
530 case OP_CRSTAR:
531 case OP_CRMINSTAR:
532 case OP_CRPLUS:
533 case OP_CRMINPLUS:
534 case OP_CRQUERY:
535 case OP_CRMINQUERY:
536 case OP_CRRANGE:
537 case OP_CRMINRANGE:
538 case OP_CLASS:
539 case OP_NCLASS:
540 case OP_REF:
541 case OP_REFI:
542 case OP_RECURSE:
543 case OP_CALLOUT:
544 case OP_ALT:
545 case OP_KET:
546 case OP_KETRMAX:
547 case OP_KETRMIN:
548 case OP_KETRPOS:
549 case OP_REVERSE:
550 case OP_ASSERT:
551 case OP_ASSERT_NOT:
552 case OP_ASSERTBACK:
553 case OP_ASSERTBACK_NOT:
554 case OP_ONCE:
555 case OP_ONCE_NC:
556 case OP_BRA:
557 case OP_BRAPOS:
558 case OP_CBRA:
559 case OP_CBRAPOS:
560 case OP_COND:
561 case OP_SBRA:
562 case OP_SBRAPOS:
563 case OP_SCBRA:
564 case OP_SCBRAPOS:
565 case OP_SCOND:
566 case OP_CREF:
567 case OP_NCREF:
568 case OP_RREF:
569 case OP_NRREF:
570 case OP_DEF:
571 case OP_BRAZERO:
572 case OP_BRAMINZERO:
573 case OP_BRAPOSZERO:
574 case OP_COMMIT:
575 case OP_FAIL:
576 case OP_ACCEPT:
577 case OP_ASSERT_ACCEPT:
578 case OP_CLOSE:
579 case OP_SKIPZERO:
580 return cc + PRIV(OP_lengths)[*cc];
581
582 case OP_CHAR:
583 case OP_CHARI:
584 case OP_NOT:
585 case OP_NOTI:
586 case OP_STAR:
587 case OP_MINSTAR:
588 case OP_PLUS:
589 case OP_MINPLUS:
590 case OP_QUERY:
591 case OP_MINQUERY:
592 case OP_UPTO:
593 case OP_MINUPTO:
594 case OP_EXACT:
595 case OP_POSSTAR:
596 case OP_POSPLUS:
597 case OP_POSQUERY:
598 case OP_POSUPTO:
599 case OP_STARI:
600 case OP_MINSTARI:
601 case OP_PLUSI:
602 case OP_MINPLUSI:
603 case OP_QUERYI:
604 case OP_MINQUERYI:
605 case OP_UPTOI:
606 case OP_MINUPTOI:
607 case OP_EXACTI:
608 case OP_POSSTARI:
609 case OP_POSPLUSI:
610 case OP_POSQUERYI:
611 case OP_POSUPTOI:
612 case OP_NOTSTAR:
613 case OP_NOTMINSTAR:
614 case OP_NOTPLUS:
615 case OP_NOTMINPLUS:
616 case OP_NOTQUERY:
617 case OP_NOTMINQUERY:
618 case OP_NOTUPTO:
619 case OP_NOTMINUPTO:
620 case OP_NOTEXACT:
621 case OP_NOTPOSSTAR:
622 case OP_NOTPOSPLUS:
623 case OP_NOTPOSQUERY:
624 case OP_NOTPOSUPTO:
625 case OP_NOTSTARI:
626 case OP_NOTMINSTARI:
627 case OP_NOTPLUSI:
628 case OP_NOTMINPLUSI:
629 case OP_NOTQUERYI:
630 case OP_NOTMINQUERYI:
631 case OP_NOTUPTOI:
632 case OP_NOTMINUPTOI:
633 case OP_NOTEXACTI:
634 case OP_NOTPOSSTARI:
635 case OP_NOTPOSPLUSI:
636 case OP_NOTPOSQUERYI:
637 case OP_NOTPOSUPTOI:
638 cc += PRIV(OP_lengths)[*cc];
639 #ifdef SUPPORT_UTF
640 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
641 #endif
642 return cc;
643
644 /* Special cases. */
645 case OP_TYPESTAR:
646 case OP_TYPEMINSTAR:
647 case OP_TYPEPLUS:
648 case OP_TYPEMINPLUS:
649 case OP_TYPEQUERY:
650 case OP_TYPEMINQUERY:
651 case OP_TYPEUPTO:
652 case OP_TYPEMINUPTO:
653 case OP_TYPEEXACT:
654 case OP_TYPEPOSSTAR:
655 case OP_TYPEPOSPLUS:
656 case OP_TYPEPOSQUERY:
657 case OP_TYPEPOSUPTO:
658 return cc + PRIV(OP_lengths)[*cc] - 1;
659
660 case OP_ANYBYTE:
661 #ifdef SUPPORT_UTF
662 if (common->utf) return NULL;
663 #endif
664 return cc + 1;
665
666 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
667 case OP_XCLASS:
668 return cc + GET(cc, 1);
669 #endif
670
671 case OP_MARK:
672 return cc + 1 + 2 + cc[1];
673
674 default:
675 return NULL;
676 }
677 }
678
679 #define CASE_ITERATOR_PRIVATE_DATA_1 \
680 case OP_MINSTAR: \
681 case OP_MINPLUS: \
682 case OP_QUERY: \
683 case OP_MINQUERY: \
684 case OP_MINSTARI: \
685 case OP_MINPLUSI: \
686 case OP_QUERYI: \
687 case OP_MINQUERYI: \
688 case OP_NOTMINSTAR: \
689 case OP_NOTMINPLUS: \
690 case OP_NOTQUERY: \
691 case OP_NOTMINQUERY: \
692 case OP_NOTMINSTARI: \
693 case OP_NOTMINPLUSI: \
694 case OP_NOTQUERYI: \
695 case OP_NOTMINQUERYI:
696
697 #define CASE_ITERATOR_PRIVATE_DATA_2A \
698 case OP_STAR: \
699 case OP_PLUS: \
700 case OP_STARI: \
701 case OP_PLUSI: \
702 case OP_NOTSTAR: \
703 case OP_NOTPLUS: \
704 case OP_NOTSTARI: \
705 case OP_NOTPLUSI:
706
707 #define CASE_ITERATOR_PRIVATE_DATA_2B \
708 case OP_UPTO: \
709 case OP_MINUPTO: \
710 case OP_UPTOI: \
711 case OP_MINUPTOI: \
712 case OP_NOTUPTO: \
713 case OP_NOTMINUPTO: \
714 case OP_NOTUPTOI: \
715 case OP_NOTMINUPTOI:
716
717 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
718 case OP_TYPEMINSTAR: \
719 case OP_TYPEMINPLUS: \
720 case OP_TYPEQUERY: \
721 case OP_TYPEMINQUERY:
722
723 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
724 case OP_TYPESTAR: \
725 case OP_TYPEPLUS:
726
727 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
728 case OP_TYPEUPTO: \
729 case OP_TYPEMINUPTO:
730
731 static int get_class_iterator_size(pcre_uchar *cc)
732 {
733 switch(*cc)
734 {
735 case OP_CRSTAR:
736 case OP_CRPLUS:
737 return 2;
738
739 case OP_CRMINSTAR:
740 case OP_CRMINPLUS:
741 case OP_CRQUERY:
742 case OP_CRMINQUERY:
743 return 1;
744
745 case OP_CRRANGE:
746 case OP_CRMINRANGE:
747 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
748 return 0;
749 return 2;
750
751 default:
752 return 0;
753 }
754 }
755
756 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
757 {
758 int private_data_length = 0;
759 pcre_uchar *alternative;
760 pcre_uchar *name;
761 pcre_uchar *end = NULL;
762 int space, size, i;
763 pcre_uint32 bracketlen;
764
765 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
766 while (cc < ccend)
767 {
768 space = 0;
769 size = 0;
770 bracketlen = 0;
771 switch(*cc)
772 {
773 case OP_SET_SOM:
774 common->has_set_som = TRUE;
775 cc += 1;
776 break;
777
778 case OP_REF:
779 case OP_REFI:
780 common->optimized_cbracket[GET2(cc, 1)] = 0;
781 cc += 1 + IMM2_SIZE;
782 break;
783
784 case OP_ASSERT:
785 case OP_ASSERT_NOT:
786 case OP_ASSERTBACK:
787 case OP_ASSERTBACK_NOT:
788 case OP_ONCE:
789 case OP_ONCE_NC:
790 case OP_BRAPOS:
791 case OP_SBRA:
792 case OP_SBRAPOS:
793 private_data_length += sizeof(sljit_sw);
794 bracketlen = 1 + LINK_SIZE;
795 break;
796
797 case OP_CBRAPOS:
798 case OP_SCBRAPOS:
799 private_data_length += sizeof(sljit_sw);
800 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
801 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
802 break;
803
804 case OP_COND:
805 case OP_SCOND:
806 /* Only AUTO_CALLOUT can insert this opcode. We do
807 not intend to support this case. */
808 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
809 return -1;
810
811 if (*cc == OP_COND)
812 {
813 /* Might be a hidden SCOND. */
814 alternative = cc + GET(cc, 1);
815 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
816 private_data_length += sizeof(sljit_sw);
817 }
818 else
819 private_data_length += sizeof(sljit_sw);
820 bracketlen = 1 + LINK_SIZE;
821 break;
822
823 case OP_CREF:
824 i = GET2(cc, 1);
825 common->optimized_cbracket[i] = 0;
826 cc += 1 + IMM2_SIZE;
827 break;
828
829 case OP_NCREF:
830 bracketlen = GET2(cc, 1);
831 name = (pcre_uchar *)common->name_table;
832 alternative = name;
833 for (i = 0; i < common->name_count; i++)
834 {
835 if (GET2(name, 0) == bracketlen) break;
836 name += common->name_entry_size;
837 }
838 SLJIT_ASSERT(i != common->name_count);
839
840 for (i = 0; i < common->name_count; i++)
841 {
842 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
843 common->optimized_cbracket[GET2(alternative, 0)] = 0;
844 alternative += common->name_entry_size;
845 }
846 bracketlen = 0;
847 cc += 1 + IMM2_SIZE;
848 break;
849
850 case OP_BRA:
851 bracketlen = 1 + LINK_SIZE;
852 break;
853
854 case OP_CBRA:
855 case OP_SCBRA:
856 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
857 break;
858
859 CASE_ITERATOR_PRIVATE_DATA_1
860 space = 1;
861 size = -2;
862 break;
863
864 CASE_ITERATOR_PRIVATE_DATA_2A
865 space = 2;
866 size = -2;
867 break;
868
869 CASE_ITERATOR_PRIVATE_DATA_2B
870 space = 2;
871 size = -(2 + IMM2_SIZE);
872 break;
873
874 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
875 space = 1;
876 size = 1;
877 break;
878
879 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
880 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
881 space = 2;
882 size = 1;
883 break;
884
885 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
886 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
887 space = 2;
888 size = 1 + IMM2_SIZE;
889 break;
890
891 case OP_CLASS:
892 case OP_NCLASS:
893 size += 1 + 32 / sizeof(pcre_uchar);
894 space = get_class_iterator_size(cc + size);
895 break;
896
897 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
898 case OP_XCLASS:
899 size = GET(cc, 1);
900 space = get_class_iterator_size(cc + size);
901 break;
902 #endif
903
904 case OP_RECURSE:
905 /* Set its value only once. */
906 if (common->recursive_head_ptr == 0)
907 {
908 common->recursive_head_ptr = common->ovector_start;
909 common->ovector_start += sizeof(sljit_sw);
910 }
911 cc += 1 + LINK_SIZE;
912 break;
913
914 case OP_CALLOUT:
915 if (common->capture_last_ptr == 0)
916 {
917 common->capture_last_ptr = common->ovector_start;
918 common->ovector_start += sizeof(sljit_sw);
919 }
920 cc += 2 + 2 * LINK_SIZE;
921 break;
922
923 case OP_MARK:
924 if (common->mark_ptr == 0)
925 {
926 common->mark_ptr = common->ovector_start;
927 common->ovector_start += sizeof(sljit_sw);
928 }
929 cc += 1 + 2 + cc[1];
930 break;
931
932 default:
933 cc = next_opcode(common, cc);
934 if (cc == NULL)
935 return -1;
936 break;
937 }
938
939 if (space > 0 && cc >= end)
940 private_data_length += sizeof(sljit_sw) * space;
941
942 if (size != 0)
943 {
944 if (size < 0)
945 {
946 cc += -size;
947 #ifdef SUPPORT_UTF
948 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
949 #endif
950 }
951 else
952 cc += size;
953 }
954
955 if (bracketlen != 0)
956 {
957 if (cc >= end)
958 {
959 end = bracketend(cc);
960 if (end[-1 - LINK_SIZE] == OP_KET)
961 end = NULL;
962 }
963 cc += bracketlen;
964 }
965 }
966 return private_data_length;
967 }
968
969 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
970 {
971 pcre_uchar *cc = common->start;
972 pcre_uchar *alternative;
973 pcre_uchar *end = NULL;
974 int space, size, bracketlen;
975
976 while (cc < ccend)
977 {
978 space = 0;
979 size = 0;
980 bracketlen = 0;
981 switch(*cc)
982 {
983 case OP_ASSERT:
984 case OP_ASSERT_NOT:
985 case OP_ASSERTBACK:
986 case OP_ASSERTBACK_NOT:
987 case OP_ONCE:
988 case OP_ONCE_NC:
989 case OP_BRAPOS:
990 case OP_SBRA:
991 case OP_SBRAPOS:
992 case OP_SCOND:
993 common->private_data_ptrs[cc - common->start] = private_data_ptr;
994 private_data_ptr += sizeof(sljit_sw);
995 bracketlen = 1 + LINK_SIZE;
996 break;
997
998 case OP_CBRAPOS:
999 case OP_SCBRAPOS:
1000 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1001 private_data_ptr += sizeof(sljit_sw);
1002 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1003 break;
1004
1005 case OP_COND:
1006 /* Might be a hidden SCOND. */
1007 alternative = cc + GET(cc, 1);
1008 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1009 {
1010 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1011 private_data_ptr += sizeof(sljit_sw);
1012 }
1013 bracketlen = 1 + LINK_SIZE;
1014 break;
1015
1016 case OP_BRA:
1017 bracketlen = 1 + LINK_SIZE;
1018 break;
1019
1020 case OP_CBRA:
1021 case OP_SCBRA:
1022 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1023 break;
1024
1025 CASE_ITERATOR_PRIVATE_DATA_1
1026 space = 1;
1027 size = -2;
1028 break;
1029
1030 CASE_ITERATOR_PRIVATE_DATA_2A
1031 space = 2;
1032 size = -2;
1033 break;
1034
1035 CASE_ITERATOR_PRIVATE_DATA_2B
1036 space = 2;
1037 size = -(2 + IMM2_SIZE);
1038 break;
1039
1040 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1041 space = 1;
1042 size = 1;
1043 break;
1044
1045 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1046 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1047 space = 2;
1048 size = 1;
1049 break;
1050
1051 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1052 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1053 space = 2;
1054 size = 1 + IMM2_SIZE;
1055 break;
1056
1057 case OP_CLASS:
1058 case OP_NCLASS:
1059 size += 1 + 32 / sizeof(pcre_uchar);
1060 space = get_class_iterator_size(cc + size);
1061 break;
1062
1063 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1064 case OP_XCLASS:
1065 size = GET(cc, 1);
1066 space = get_class_iterator_size(cc + size);
1067 break;
1068 #endif
1069
1070 default:
1071 cc = next_opcode(common, cc);
1072 SLJIT_ASSERT(cc != NULL);
1073 break;
1074 }
1075
1076 if (space > 0 && cc >= end)
1077 {
1078 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1079 private_data_ptr += sizeof(sljit_sw) * space;
1080 }
1081
1082 if (size != 0)
1083 {
1084 if (size < 0)
1085 {
1086 cc += -size;
1087 #ifdef SUPPORT_UTF
1088 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1089 #endif
1090 }
1091 else
1092 cc += size;
1093 }
1094
1095 if (bracketlen > 0)
1096 {
1097 if (cc >= end)
1098 {
1099 end = bracketend(cc);
1100 if (end[-1 - LINK_SIZE] == OP_KET)
1101 end = NULL;
1102 }
1103 cc += bracketlen;
1104 }
1105 }
1106 }
1107
1108 /* Returns with a frame_types (always < 0) if no need for frame. */
1109 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1110 {
1111 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1112 int length = 0;
1113 int possessive = 0;
1114 BOOL stack_restore = FALSE;
1115 BOOL setsom_found = recursive;
1116 BOOL setmark_found = recursive;
1117 /* The last capture is a local variable even for recursions. */
1118 BOOL capture_last_found = FALSE;
1119
1120 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1121 {
1122 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1123 /* This is correct regardless of common->capture_last_ptr. */
1124 capture_last_found = TRUE;
1125 }
1126
1127 cc = next_opcode(common, cc);
1128 SLJIT_ASSERT(cc != NULL);
1129 while (cc < ccend)
1130 switch(*cc)
1131 {
1132 case OP_SET_SOM:
1133 SLJIT_ASSERT(common->has_set_som);
1134 stack_restore = TRUE;
1135 if (!setsom_found)
1136 {
1137 length += 2;
1138 setsom_found = TRUE;
1139 }
1140 cc += 1;
1141 break;
1142
1143 case OP_MARK:
1144 SLJIT_ASSERT(common->mark_ptr != 0);
1145 stack_restore = TRUE;
1146 if (!setmark_found)
1147 {
1148 length += 2;
1149 setmark_found = TRUE;
1150 }
1151 cc += 1 + 2 + cc[1];
1152 break;
1153
1154 case OP_RECURSE:
1155 stack_restore = TRUE;
1156 if (common->has_set_som && !setsom_found)
1157 {
1158 length += 2;
1159 setsom_found = TRUE;
1160 }
1161 if (common->mark_ptr != 0 && !setmark_found)
1162 {
1163 length += 2;
1164 setmark_found = TRUE;
1165 }
1166 if (common->capture_last_ptr != 0 && !capture_last_found)
1167 {
1168 length += 2;
1169 capture_last_found = TRUE;
1170 }
1171 cc += 1 + LINK_SIZE;
1172 break;
1173
1174 case OP_CBRA:
1175 case OP_CBRAPOS:
1176 case OP_SCBRA:
1177 case OP_SCBRAPOS:
1178 stack_restore = TRUE;
1179 if (common->capture_last_ptr != 0 && !capture_last_found)
1180 {
1181 length += 2;
1182 capture_last_found = TRUE;
1183 }
1184 length += 3;
1185 cc += 1 + LINK_SIZE + IMM2_SIZE;
1186 break;
1187
1188 default:
1189 stack_restore = TRUE;
1190 /* Fall through. */
1191
1192 case OP_NOT_WORD_BOUNDARY:
1193 case OP_WORD_BOUNDARY:
1194 case OP_NOT_DIGIT:
1195 case OP_DIGIT:
1196 case OP_NOT_WHITESPACE:
1197 case OP_WHITESPACE:
1198 case OP_NOT_WORDCHAR:
1199 case OP_WORDCHAR:
1200 case OP_ANY:
1201 case OP_ALLANY:
1202 case OP_ANYBYTE:
1203 case OP_NOTPROP:
1204 case OP_PROP:
1205 case OP_ANYNL:
1206 case OP_NOT_HSPACE:
1207 case OP_HSPACE:
1208 case OP_NOT_VSPACE:
1209 case OP_VSPACE:
1210 case OP_EXTUNI:
1211 case OP_EODN:
1212 case OP_EOD:
1213 case OP_CIRC:
1214 case OP_CIRCM:
1215 case OP_DOLL:
1216 case OP_DOLLM:
1217 case OP_CHAR:
1218 case OP_CHARI:
1219 case OP_NOT:
1220 case OP_NOTI:
1221
1222 case OP_EXACT:
1223 case OP_POSSTAR:
1224 case OP_POSPLUS:
1225 case OP_POSQUERY:
1226 case OP_POSUPTO:
1227
1228 case OP_EXACTI:
1229 case OP_POSSTARI:
1230 case OP_POSPLUSI:
1231 case OP_POSQUERYI:
1232 case OP_POSUPTOI:
1233
1234 case OP_NOTEXACT:
1235 case OP_NOTPOSSTAR:
1236 case OP_NOTPOSPLUS:
1237 case OP_NOTPOSQUERY:
1238 case OP_NOTPOSUPTO:
1239
1240 case OP_NOTEXACTI:
1241 case OP_NOTPOSSTARI:
1242 case OP_NOTPOSPLUSI:
1243 case OP_NOTPOSQUERYI:
1244 case OP_NOTPOSUPTOI:
1245
1246 case OP_TYPEEXACT:
1247 case OP_TYPEPOSSTAR:
1248 case OP_TYPEPOSPLUS:
1249 case OP_TYPEPOSQUERY:
1250 case OP_TYPEPOSUPTO:
1251
1252 case OP_CLASS:
1253 case OP_NCLASS:
1254 case OP_XCLASS:
1255
1256 cc = next_opcode(common, cc);
1257 SLJIT_ASSERT(cc != NULL);
1258 break;
1259 }
1260
1261 /* Possessive quantifiers can use a special case. */
1262 if (SLJIT_UNLIKELY(possessive == length))
1263 return stack_restore ? no_frame : no_stack;
1264
1265 if (length > 0)
1266 return length + 1;
1267 return stack_restore ? no_frame : no_stack;
1268 }
1269
1270 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1271 {
1272 DEFINE_COMPILER;
1273 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1274 BOOL setsom_found = recursive;
1275 BOOL setmark_found = recursive;
1276 /* The last capture is a local variable even for recursions. */
1277 BOOL capture_last_found = FALSE;
1278 int offset;
1279
1280 /* >= 1 + shortest item size (2) */
1281 SLJIT_UNUSED_ARG(stacktop);
1282 SLJIT_ASSERT(stackpos >= stacktop + 2);
1283
1284 stackpos = STACK(stackpos);
1285 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1286 cc = next_opcode(common, cc);
1287 SLJIT_ASSERT(cc != NULL);
1288 while (cc < ccend)
1289 switch(*cc)
1290 {
1291 case OP_SET_SOM:
1292 SLJIT_ASSERT(common->has_set_som);
1293 if (!setsom_found)
1294 {
1295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1297 stackpos += (int)sizeof(sljit_sw);
1298 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1299 stackpos += (int)sizeof(sljit_sw);
1300 setsom_found = TRUE;
1301 }
1302 cc += 1;
1303 break;
1304
1305 case OP_MARK:
1306 SLJIT_ASSERT(common->mark_ptr != 0);
1307 if (!setmark_found)
1308 {
1309 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1311 stackpos += (int)sizeof(sljit_sw);
1312 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1313 stackpos += (int)sizeof(sljit_sw);
1314 setmark_found = TRUE;
1315 }
1316 cc += 1 + 2 + cc[1];
1317 break;
1318
1319 case OP_RECURSE:
1320 if (common->has_set_som && !setsom_found)
1321 {
1322 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1323 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1324 stackpos += (int)sizeof(sljit_sw);
1325 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1326 stackpos += (int)sizeof(sljit_sw);
1327 setsom_found = TRUE;
1328 }
1329 if (common->mark_ptr != 0 && !setmark_found)
1330 {
1331 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1332 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1333 stackpos += (int)sizeof(sljit_sw);
1334 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1335 stackpos += (int)sizeof(sljit_sw);
1336 setmark_found = TRUE;
1337 }
1338 if (common->capture_last_ptr != 0 && !capture_last_found)
1339 {
1340 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1342 stackpos += (int)sizeof(sljit_sw);
1343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1344 stackpos += (int)sizeof(sljit_sw);
1345 capture_last_found = TRUE;
1346 }
1347 cc += 1 + LINK_SIZE;
1348 break;
1349
1350 case OP_CBRA:
1351 case OP_CBRAPOS:
1352 case OP_SCBRA:
1353 case OP_SCBRAPOS:
1354 if (common->capture_last_ptr != 0 && !capture_last_found)
1355 {
1356 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1357 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1358 stackpos += (int)sizeof(sljit_sw);
1359 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1360 stackpos += (int)sizeof(sljit_sw);
1361 capture_last_found = TRUE;
1362 }
1363 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1364 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1365 stackpos += (int)sizeof(sljit_sw);
1366 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1367 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1368 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1369 stackpos += (int)sizeof(sljit_sw);
1370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1371 stackpos += (int)sizeof(sljit_sw);
1372
1373 cc += 1 + LINK_SIZE + IMM2_SIZE;
1374 break;
1375
1376 default:
1377 cc = next_opcode(common, cc);
1378 SLJIT_ASSERT(cc != NULL);
1379 break;
1380 }
1381
1382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1383 SLJIT_ASSERT(stackpos == STACK(stacktop));
1384 }
1385
1386 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1387 {
1388 int private_data_length = 2;
1389 int size;
1390 pcre_uchar *alternative;
1391 /* Calculate the sum of the private machine words. */
1392 while (cc < ccend)
1393 {
1394 size = 0;
1395 switch(*cc)
1396 {
1397 case OP_ASSERT:
1398 case OP_ASSERT_NOT:
1399 case OP_ASSERTBACK:
1400 case OP_ASSERTBACK_NOT:
1401 case OP_ONCE:
1402 case OP_ONCE_NC:
1403 case OP_BRAPOS:
1404 case OP_SBRA:
1405 case OP_SBRAPOS:
1406 case OP_SCOND:
1407 private_data_length++;
1408 cc += 1 + LINK_SIZE;
1409 break;
1410
1411 case OP_CBRA:
1412 case OP_SCBRA:
1413 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1414 private_data_length++;
1415 cc += 1 + LINK_SIZE + IMM2_SIZE;
1416 break;
1417
1418 case OP_CBRAPOS:
1419 case OP_SCBRAPOS:
1420 private_data_length += 2;
1421 cc += 1 + LINK_SIZE + IMM2_SIZE;
1422 break;
1423
1424 case OP_COND:
1425 /* Might be a hidden SCOND. */
1426 alternative = cc + GET(cc, 1);
1427 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1428 private_data_length++;
1429 cc += 1 + LINK_SIZE;
1430 break;
1431
1432 CASE_ITERATOR_PRIVATE_DATA_1
1433 if (PRIVATE_DATA(cc))
1434 private_data_length++;
1435 cc += 2;
1436 #ifdef SUPPORT_UTF
1437 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1438 #endif
1439 break;
1440
1441 CASE_ITERATOR_PRIVATE_DATA_2A
1442 if (PRIVATE_DATA(cc))
1443 private_data_length += 2;
1444 cc += 2;
1445 #ifdef SUPPORT_UTF
1446 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1447 #endif
1448 break;
1449
1450 CASE_ITERATOR_PRIVATE_DATA_2B
1451 if (PRIVATE_DATA(cc))
1452 private_data_length += 2;
1453 cc += 2 + IMM2_SIZE;
1454 #ifdef SUPPORT_UTF
1455 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1456 #endif
1457 break;
1458
1459 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1460 if (PRIVATE_DATA(cc))
1461 private_data_length++;
1462 cc += 1;
1463 break;
1464
1465 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1466 if (PRIVATE_DATA(cc))
1467 private_data_length += 2;
1468 cc += 1;
1469 break;
1470
1471 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1472 if (PRIVATE_DATA(cc))
1473 private_data_length += 2;
1474 cc += 1 + IMM2_SIZE;
1475 break;
1476
1477 case OP_CLASS:
1478 case OP_NCLASS:
1479 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1480 case OP_XCLASS:
1481 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1482 #else
1483 size = 1 + 32 / (int)sizeof(pcre_uchar);
1484 #endif
1485 if (PRIVATE_DATA(cc))
1486 private_data_length += get_class_iterator_size(cc + size);
1487 cc += size;
1488 break;
1489
1490 default:
1491 cc = next_opcode(common, cc);
1492 SLJIT_ASSERT(cc != NULL);
1493 break;
1494 }
1495 }
1496 SLJIT_ASSERT(cc == ccend);
1497 return private_data_length;
1498 }
1499
1500 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1501 BOOL save, int stackptr, int stacktop)
1502 {
1503 DEFINE_COMPILER;
1504 int srcw[2];
1505 int count, size;
1506 BOOL tmp1next = TRUE;
1507 BOOL tmp1empty = TRUE;
1508 BOOL tmp2empty = TRUE;
1509 pcre_uchar *alternative;
1510 enum {
1511 start,
1512 loop,
1513 end
1514 } status;
1515
1516 status = save ? start : loop;
1517 stackptr = STACK(stackptr - 2);
1518 stacktop = STACK(stacktop - 1);
1519
1520 if (!save)
1521 {
1522 stackptr += sizeof(sljit_sw);
1523 if (stackptr < stacktop)
1524 {
1525 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1526 stackptr += sizeof(sljit_sw);
1527 tmp1empty = FALSE;
1528 }
1529 if (stackptr < stacktop)
1530 {
1531 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1532 stackptr += sizeof(sljit_sw);
1533 tmp2empty = FALSE;
1534 }
1535 /* The tmp1next must be TRUE in either way. */
1536 }
1537
1538 while (status != end)
1539 {
1540 count = 0;
1541 switch(status)
1542 {
1543 case start:
1544 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1545 count = 1;
1546 srcw[0] = common->recursive_head_ptr;
1547 status = loop;
1548 break;
1549
1550 case loop:
1551 if (cc >= ccend)
1552 {
1553 status = end;
1554 break;
1555 }
1556
1557 switch(*cc)
1558 {
1559 case OP_ASSERT:
1560 case OP_ASSERT_NOT:
1561 case OP_ASSERTBACK:
1562 case OP_ASSERTBACK_NOT:
1563 case OP_ONCE:
1564 case OP_ONCE_NC:
1565 case OP_BRAPOS:
1566 case OP_SBRA:
1567 case OP_SBRAPOS:
1568 case OP_SCOND:
1569 count = 1;
1570 srcw[0] = PRIVATE_DATA(cc);
1571 SLJIT_ASSERT(srcw[0] != 0);
1572 cc += 1 + LINK_SIZE;
1573 break;
1574
1575 case OP_CBRA:
1576 case OP_SCBRA:
1577 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1578 {
1579 count = 1;
1580 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1581 }
1582 cc += 1 + LINK_SIZE + IMM2_SIZE;
1583 break;
1584
1585 case OP_CBRAPOS:
1586 case OP_SCBRAPOS:
1587 count = 2;
1588 srcw[0] = PRIVATE_DATA(cc);
1589 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1590 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1591 cc += 1 + LINK_SIZE + IMM2_SIZE;
1592 break;
1593
1594 case OP_COND:
1595 /* Might be a hidden SCOND. */
1596 alternative = cc + GET(cc, 1);
1597 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1598 {
1599 count = 1;
1600 srcw[0] = PRIVATE_DATA(cc);
1601 SLJIT_ASSERT(srcw[0] != 0);
1602 }
1603 cc += 1 + LINK_SIZE;
1604 break;
1605
1606 CASE_ITERATOR_PRIVATE_DATA_1
1607 if (PRIVATE_DATA(cc))
1608 {
1609 count = 1;
1610 srcw[0] = PRIVATE_DATA(cc);
1611 }
1612 cc += 2;
1613 #ifdef SUPPORT_UTF
1614 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1615 #endif
1616 break;
1617
1618 CASE_ITERATOR_PRIVATE_DATA_2A
1619 if (PRIVATE_DATA(cc))
1620 {
1621 count = 2;
1622 srcw[0] = PRIVATE_DATA(cc);
1623 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1624 }
1625 cc += 2;
1626 #ifdef SUPPORT_UTF
1627 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1628 #endif
1629 break;
1630
1631 CASE_ITERATOR_PRIVATE_DATA_2B
1632 if (PRIVATE_DATA(cc))
1633 {
1634 count = 2;
1635 srcw[0] = PRIVATE_DATA(cc);
1636 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1637 }
1638 cc += 2 + IMM2_SIZE;
1639 #ifdef SUPPORT_UTF
1640 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1641 #endif
1642 break;
1643
1644 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1645 if (PRIVATE_DATA(cc))
1646 {
1647 count = 1;
1648 srcw[0] = PRIVATE_DATA(cc);
1649 }
1650 cc += 1;
1651 break;
1652
1653 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1654 if (PRIVATE_DATA(cc))
1655 {
1656 count = 2;
1657 srcw[0] = PRIVATE_DATA(cc);
1658 srcw[1] = srcw[0] + sizeof(sljit_sw);
1659 }
1660 cc += 1;
1661 break;
1662
1663 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1664 if (PRIVATE_DATA(cc))
1665 {
1666 count = 2;
1667 srcw[0] = PRIVATE_DATA(cc);
1668 srcw[1] = srcw[0] + sizeof(sljit_sw);
1669 }
1670 cc += 1 + IMM2_SIZE;
1671 break;
1672
1673 case OP_CLASS:
1674 case OP_NCLASS:
1675 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1676 case OP_XCLASS:
1677 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1678 #else
1679 size = 1 + 32 / (int)sizeof(pcre_uchar);
1680 #endif
1681 if (PRIVATE_DATA(cc))
1682 switch(get_class_iterator_size(cc + size))
1683 {
1684 case 1:
1685 count = 1;
1686 srcw[0] = PRIVATE_DATA(cc);
1687 break;
1688
1689 case 2:
1690 count = 2;
1691 srcw[0] = PRIVATE_DATA(cc);
1692 srcw[1] = srcw[0] + sizeof(sljit_sw);
1693 break;
1694
1695 default:
1696 SLJIT_ASSERT_STOP();
1697 break;
1698 }
1699 cc += size;
1700 break;
1701
1702 default:
1703 cc = next_opcode(common, cc);
1704 SLJIT_ASSERT(cc != NULL);
1705 break;
1706 }
1707 break;
1708
1709 case end:
1710 SLJIT_ASSERT_STOP();
1711 break;
1712 }
1713
1714 while (count > 0)
1715 {
1716 count--;
1717 if (save)
1718 {
1719 if (tmp1next)
1720 {
1721 if (!tmp1empty)
1722 {
1723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1724 stackptr += sizeof(sljit_sw);
1725 }
1726 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1727 tmp1empty = FALSE;
1728 tmp1next = FALSE;
1729 }
1730 else
1731 {
1732 if (!tmp2empty)
1733 {
1734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1735 stackptr += sizeof(sljit_sw);
1736 }
1737 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1738 tmp2empty = FALSE;
1739 tmp1next = TRUE;
1740 }
1741 }
1742 else
1743 {
1744 if (tmp1next)
1745 {
1746 SLJIT_ASSERT(!tmp1empty);
1747 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1748 tmp1empty = stackptr >= stacktop;
1749 if (!tmp1empty)
1750 {
1751 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1752 stackptr += sizeof(sljit_sw);
1753 }
1754 tmp1next = FALSE;
1755 }
1756 else
1757 {
1758 SLJIT_ASSERT(!tmp2empty);
1759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1760 tmp2empty = stackptr >= stacktop;
1761 if (!tmp2empty)
1762 {
1763 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1764 stackptr += sizeof(sljit_sw);
1765 }
1766 tmp1next = TRUE;
1767 }
1768 }
1769 }
1770 }
1771
1772 if (save)
1773 {
1774 if (tmp1next)
1775 {
1776 if (!tmp1empty)
1777 {
1778 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1779 stackptr += sizeof(sljit_sw);
1780 }
1781 if (!tmp2empty)
1782 {
1783 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1784 stackptr += sizeof(sljit_sw);
1785 }
1786 }
1787 else
1788 {
1789 if (!tmp2empty)
1790 {
1791 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1792 stackptr += sizeof(sljit_sw);
1793 }
1794 if (!tmp1empty)
1795 {
1796 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1797 stackptr += sizeof(sljit_sw);
1798 }
1799 }
1800 }
1801 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1802 }
1803
1804 #undef CASE_ITERATOR_PRIVATE_DATA_1
1805 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1806 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1807 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1808 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1809 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1810
1811 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1812 {
1813 return (value & (value - 1)) == 0;
1814 }
1815
1816 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1817 {
1818 while (list)
1819 {
1820 /* sljit_set_label is clever enough to do nothing
1821 if either the jump or the label is NULL. */
1822 SET_LABEL(list->jump, label);
1823 list = list->next;
1824 }
1825 }
1826
1827 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1828 {
1829 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1830 if (list_item)
1831 {
1832 list_item->next = *list;
1833 list_item->jump = jump;
1834 *list = list_item;
1835 }
1836 }
1837
1838 static void add_stub(compiler_common *common, struct sljit_jump *start)
1839 {
1840 DEFINE_COMPILER;
1841 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1842
1843 if (list_item)
1844 {
1845 list_item->start = start;
1846 list_item->quit = LABEL();
1847 list_item->next = common->stubs;
1848 common->stubs = list_item;
1849 }
1850 }
1851
1852 static void flush_stubs(compiler_common *common)
1853 {
1854 DEFINE_COMPILER;
1855 stub_list* list_item = common->stubs;
1856
1857 while (list_item)
1858 {
1859 JUMPHERE(list_item->start);
1860 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1861 JUMPTO(SLJIT_JUMP, list_item->quit);
1862 list_item = list_item->next;
1863 }
1864 common->stubs = NULL;
1865 }
1866
1867 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1868 {
1869 DEFINE_COMPILER;
1870
1871 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1872 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1873 }
1874
1875 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1876 {
1877 /* May destroy all locals and registers except TMP2. */
1878 DEFINE_COMPILER;
1879
1880 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1881 #ifdef DESTROY_REGISTERS
1882 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1883 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1884 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1886 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1887 #endif
1888 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1889 }
1890
1891 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1892 {
1893 DEFINE_COMPILER;
1894 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1895 }
1896
1897 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1898 {
1899 DEFINE_COMPILER;
1900 struct sljit_label *loop;
1901 int i;
1902 /* At this point we can freely use all temporary registers. */
1903 /* TMP1 returns with begin - 1. */
1904 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1905 if (length < 8)
1906 {
1907 for (i = 0; i < length; i++)
1908 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1909 }
1910 else
1911 {
1912 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));
1913 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);
1914 loop = LABEL();
1915 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1916 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1917 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1918 }
1919 }
1920
1921 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1922 {
1923 DEFINE_COMPILER;
1924 struct sljit_label *loop;
1925 struct sljit_jump *early_quit;
1926
1927 /* At this point we can freely use all registers. */
1928 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1929 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1930
1931 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1932 if (common->mark_ptr != 0)
1933 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1934 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
1935 if (common->mark_ptr != 0)
1936 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1937 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1938 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1939 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1940 /* Unlikely, but possible */
1941 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1942 loop = LABEL();
1943 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1944 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
1945 /* Copy the integer value to the output buffer */
1946 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1947 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1948 #endif
1949 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1950 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1951 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1952 JUMPHERE(early_quit);
1953
1954 /* Calculate the return value, which is the maximum ovector value. */
1955 if (topbracket > 1)
1956 {
1957 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
1958 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
1959
1960 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1961 loop = LABEL();
1962 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
1963 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1964 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1965 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
1966 }
1967 else
1968 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1969 }
1970
1971 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1972 {
1973 DEFINE_COMPILER;
1974
1975 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1976 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1977
1978 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
1979 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1980 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offset_count));
1981 CMPTO(SLJIT_C_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
1982
1983 /* Store match begin and end. */
1984 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1985 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1986 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1987 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1988 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1989 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1990 #endif
1991 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1992
1993 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
1994 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1995 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1996 #endif
1997 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
1998
1999 JUMPTO(SLJIT_JUMP, quit);
2000 }
2001
2002 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2003 {
2004 /* May destroy TMP1. */
2005 DEFINE_COMPILER;
2006 struct sljit_jump *jump;
2007
2008 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2009 {
2010 /* The value of -1 must be kept for start_used_ptr! */
2011 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2012 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2013 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2014 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2015 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2016 JUMPHERE(jump);
2017 }
2018 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2019 {
2020 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2021 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2022 JUMPHERE(jump);
2023 }
2024 }
2025
2026 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2027 {
2028 /* Detects if the character has an othercase. */
2029 unsigned int c;
2030
2031 #ifdef SUPPORT_UTF
2032 if (common->utf)
2033 {
2034 GETCHAR(c, cc);
2035 if (c > 127)
2036 {
2037 #ifdef SUPPORT_UCP
2038 return c != UCD_OTHERCASE(c);
2039 #else
2040 return FALSE;
2041 #endif
2042 }
2043 #ifndef COMPILE_PCRE8
2044 return common->fcc[c] != c;
2045 #endif
2046 }
2047 else
2048 #endif
2049 c = *cc;
2050 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2051 }
2052
2053 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2054 {
2055 /* Returns with the othercase. */
2056 #ifdef SUPPORT_UTF
2057 if (common->utf && c > 127)
2058 {
2059 #ifdef SUPPORT_UCP
2060 return UCD_OTHERCASE(c);
2061 #else
2062 return c;
2063 #endif
2064 }
2065 #endif
2066 return TABLE_GET(c, common->fcc, c);
2067 }
2068
2069 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2070 {
2071 /* Detects if the character and its othercase has only 1 bit difference. */
2072 unsigned int c, oc, bit;
2073 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2074 int n;
2075 #endif
2076
2077 #ifdef SUPPORT_UTF
2078 if (common->utf)
2079 {
2080 GETCHAR(c, cc);
2081 if (c <= 127)
2082 oc = common->fcc[c];
2083 else
2084 {
2085 #ifdef SUPPORT_UCP
2086 oc = UCD_OTHERCASE(c);
2087 #else
2088 oc = c;
2089 #endif
2090 }
2091 }
2092 else
2093 {
2094 c = *cc;
2095 oc = TABLE_GET(c, common->fcc, c);
2096 }
2097 #else
2098 c = *cc;
2099 oc = TABLE_GET(c, common->fcc, c);
2100 #endif
2101
2102 SLJIT_ASSERT(c != oc);
2103
2104 bit = c ^ oc;
2105 /* Optimized for English alphabet. */
2106 if (c <= 127 && bit == 0x20)
2107 return (0 << 8) | 0x20;
2108
2109 /* Since c != oc, they must have at least 1 bit difference. */
2110 if (!is_powerof2(bit))
2111 return 0;
2112
2113 #if defined COMPILE_PCRE8
2114
2115 #ifdef SUPPORT_UTF
2116 if (common->utf && c > 127)
2117 {
2118 n = GET_EXTRALEN(*cc);
2119 while ((bit & 0x3f) == 0)
2120 {
2121 n--;
2122 bit >>= 6;
2123 }
2124 return (n << 8) | bit;
2125 }
2126 #endif /* SUPPORT_UTF */
2127 return (0 << 8) | bit;
2128
2129 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2130
2131 #ifdef SUPPORT_UTF
2132 if (common->utf && c > 65535)
2133 {
2134 if (bit >= (1 << 10))
2135 bit >>= 10;
2136 else
2137 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2138 }
2139 #endif /* SUPPORT_UTF */
2140 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2141
2142 #endif /* COMPILE_PCRE[8|16|32] */
2143 }
2144
2145 static void check_partial(compiler_common *common, BOOL force)
2146 {
2147 /* Checks whether a partial matching is occured. Does not modify registers. */
2148 DEFINE_COMPILER;
2149 struct sljit_jump *jump = NULL;
2150
2151 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2152
2153 if (common->mode == JIT_COMPILE)
2154 return;
2155
2156 if (!force)
2157 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2158 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2159 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2160
2161 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2163 else
2164 {
2165 if (common->partialmatchlabel != NULL)
2166 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2167 else
2168 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2169 }
2170
2171 if (jump != NULL)
2172 JUMPHERE(jump);
2173 }
2174
2175 static struct sljit_jump *check_str_end(compiler_common *common)
2176 {
2177 /* Does not affect registers. Usually used in a tight spot. */
2178 DEFINE_COMPILER;
2179 struct sljit_jump *jump;
2180 struct sljit_jump *nohit;
2181 struct sljit_jump *return_value;
2182
2183 if (common->mode == JIT_COMPILE)
2184 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2185
2186 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2187 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2188 {
2189 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2191 JUMPHERE(nohit);
2192 return_value = JUMP(SLJIT_JUMP);
2193 }
2194 else
2195 {
2196 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2197 if (common->partialmatchlabel != NULL)
2198 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2199 else
2200 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2201 }
2202 JUMPHERE(jump);
2203 return return_value;
2204 }
2205
2206 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2207 {
2208 DEFINE_COMPILER;
2209 struct sljit_jump *jump;
2210
2211 if (common->mode == JIT_COMPILE)
2212 {
2213 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2214 return;
2215 }
2216
2217 /* Partial matching mode. */
2218 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2219 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2220 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2221 {
2222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2223 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2224 }
2225 else
2226 {
2227 if (common->partialmatchlabel != NULL)
2228 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2229 else
2230 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2231 }
2232 JUMPHERE(jump);
2233 }
2234
2235 static void read_char(compiler_common *common)
2236 {
2237 /* Reads the character into TMP1, updates STR_PTR.
2238 Does not check STR_END. TMP2 Destroyed. */
2239 DEFINE_COMPILER;
2240 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2241 struct sljit_jump *jump;
2242 #endif
2243
2244 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2245 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2246 if (common->utf)
2247 {
2248 #if defined COMPILE_PCRE8
2249 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2250 #elif defined COMPILE_PCRE16
2251 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2252 #endif /* COMPILE_PCRE[8|16] */
2253 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2254 JUMPHERE(jump);
2255 }
2256 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2257 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2258 }
2259
2260 static void peek_char(compiler_common *common)
2261 {
2262 /* Reads the character into TMP1, keeps STR_PTR.
2263 Does not check STR_END. TMP2 Destroyed. */
2264 DEFINE_COMPILER;
2265 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2266 struct sljit_jump *jump;
2267 #endif
2268
2269 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2270 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2271 if (common->utf)
2272 {
2273 #if defined COMPILE_PCRE8
2274 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2275 #elif defined COMPILE_PCRE16
2276 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2277 #endif /* COMPILE_PCRE[8|16] */
2278 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2279 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2280 JUMPHERE(jump);
2281 }
2282 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2283 }
2284
2285 static void read_char8_type(compiler_common *common)
2286 {
2287 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2288 DEFINE_COMPILER;
2289 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2290 struct sljit_jump *jump;
2291 #endif
2292
2293 #ifdef SUPPORT_UTF
2294 if (common->utf)
2295 {
2296 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2297 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2298 #if defined COMPILE_PCRE8
2299 /* This can be an extra read in some situations, but hopefully
2300 it is needed in most cases. */
2301 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2302 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2303 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2304 JUMPHERE(jump);
2305 #elif defined COMPILE_PCRE16
2306 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2307 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2308 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2309 JUMPHERE(jump);
2310 /* Skip low surrogate if necessary. */
2311 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2312 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2313 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2314 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2315 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2316 #elif defined COMPILE_PCRE32
2317 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2318 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2319 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2320 JUMPHERE(jump);
2321 #endif /* COMPILE_PCRE[8|16|32] */
2322 return;
2323 }
2324 #endif /* SUPPORT_UTF */
2325 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2326 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2327 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2328 /* The ctypes array contains only 256 values. */
2329 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2330 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2331 #endif
2332 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2333 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2334 JUMPHERE(jump);
2335 #endif
2336 }
2337
2338 static void skip_char_back(compiler_common *common)
2339 {
2340 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2341 DEFINE_COMPILER;
2342 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2343 #if defined COMPILE_PCRE8
2344 struct sljit_label *label;
2345
2346 if (common->utf)
2347 {
2348 label = LABEL();
2349 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2350 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2351 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2352 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2353 return;
2354 }
2355 #elif defined COMPILE_PCRE16
2356 if (common->utf)
2357 {
2358 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2359 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2360 /* Skip low surrogate if necessary. */
2361 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2362 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2363 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2364 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2365 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2366 return;
2367 }
2368 #endif /* COMPILE_PCRE[8|16] */
2369 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2370 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2371 }
2372
2373 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2374 {
2375 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2376 DEFINE_COMPILER;
2377
2378 if (nltype == NLTYPE_ANY)
2379 {
2380 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2381 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2382 }
2383 else if (nltype == NLTYPE_ANYCRLF)
2384 {
2385 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2386 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2387 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2388 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2389 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2390 }
2391 else
2392 {
2393 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2394 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2395 }
2396 }
2397
2398 #ifdef SUPPORT_UTF
2399
2400 #if defined COMPILE_PCRE8
2401 static void do_utfreadchar(compiler_common *common)
2402 {
2403 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2404 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2405 DEFINE_COMPILER;
2406 struct sljit_jump *jump;
2407
2408 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2409 /* Searching for the first zero. */
2410 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2411 jump = JUMP(SLJIT_C_NOT_ZERO);
2412 /* Two byte sequence. */
2413 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2414 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2415 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2416 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2417 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2418 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2419 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2420 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2421 JUMPHERE(jump);
2422
2423 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2424 jump = JUMP(SLJIT_C_NOT_ZERO);
2425 /* Three byte sequence. */
2426 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2427 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2428 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2429 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2430 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2431 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2432 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2433 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2434 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2435 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2436 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2437 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2438 JUMPHERE(jump);
2439
2440 /* Four byte sequence. */
2441 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2442 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2443 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2444 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2445 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2446 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2447 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2448 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2449 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2450 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2451 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2452 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2453 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2454 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2455 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2456 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2457 }
2458
2459 static void do_utfreadtype8(compiler_common *common)
2460 {
2461 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2462 of the character (>= 0xc0). Return value in TMP1. */
2463 DEFINE_COMPILER;
2464 struct sljit_jump *jump;
2465 struct sljit_jump *compare;
2466
2467 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2468
2469 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2470 jump = JUMP(SLJIT_C_NOT_ZERO);
2471 /* Two byte sequence. */
2472 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2473 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2474 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2475 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2476 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2477 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2478 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2479 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2480 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2481
2482 JUMPHERE(compare);
2483 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2484 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2485 JUMPHERE(jump);
2486
2487 /* We only have types for characters less than 256. */
2488 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2489 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2490 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2491 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2492 }
2493
2494 #elif defined COMPILE_PCRE16
2495
2496 static void do_utfreadchar(compiler_common *common)
2497 {
2498 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2499 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2500 DEFINE_COMPILER;
2501 struct sljit_jump *jump;
2502
2503 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2504 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2505 /* Do nothing, only return. */
2506 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2507
2508 JUMPHERE(jump);
2509 /* Combine two 16 bit characters. */
2510 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2511 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2512 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2513 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2514 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2515 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2516 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2517 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2518 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2519 }
2520
2521 #endif /* COMPILE_PCRE[8|16] */
2522
2523 #endif /* SUPPORT_UTF */
2524
2525 #ifdef SUPPORT_UCP
2526
2527 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2528 #define UCD_BLOCK_MASK 127
2529 #define UCD_BLOCK_SHIFT 7
2530
2531 static void do_getucd(compiler_common *common)
2532 {
2533 /* Search the UCD record for the character comes in TMP1.
2534 Returns chartype in TMP1 and UCD offset in TMP2. */
2535 DEFINE_COMPILER;
2536
2537 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2538
2539 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2540 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2541 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2542 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2543 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2544 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2545 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2546 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2547 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2548 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2549 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2550 }
2551 #endif
2552
2553 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2554 {
2555 DEFINE_COMPILER;
2556 struct sljit_label *mainloop;
2557 struct sljit_label *newlinelabel = NULL;
2558 struct sljit_jump *start;
2559 struct sljit_jump *end = NULL;
2560 struct sljit_jump *nl = NULL;
2561 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2562 struct sljit_jump *singlechar;
2563 #endif
2564 jump_list *newline = NULL;
2565 BOOL newlinecheck = FALSE;
2566 BOOL readuchar = FALSE;
2567
2568 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2569 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2570 newlinecheck = TRUE;
2571
2572 if (firstline)
2573 {
2574 /* Search for the end of the first line. */
2575 SLJIT_ASSERT(common->first_line_end != 0);
2576 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2577
2578 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2579 {
2580 mainloop = LABEL();
2581 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2582 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2583 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2584 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2585 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2586 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2587 JUMPHERE(end);
2588 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2589 }
2590 else
2591 {
2592 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2593 mainloop = LABEL();
2594 /* Continual stores does not cause data dependency. */
2595 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2596 read_char(common);
2597 check_newlinechar(common, common->nltype, &newline, TRUE);
2598 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2599 JUMPHERE(end);
2600 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2601 set_jumps(newline, LABEL());
2602 }
2603
2604 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2605 }
2606
2607 start = JUMP(SLJIT_JUMP);
2608
2609 if (newlinecheck)
2610 {
2611 newlinelabel = LABEL();
2612 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2613 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2614 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2615 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2616 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2617 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2618 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2619 #endif
2620 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2621 nl = JUMP(SLJIT_JUMP);
2622 }
2623
2624 mainloop = LABEL();
2625
2626 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2627 #ifdef SUPPORT_UTF
2628 if (common->utf) readuchar = TRUE;
2629 #endif
2630 if (newlinecheck) readuchar = TRUE;
2631
2632 if (readuchar)
2633 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2634
2635 if (newlinecheck)
2636 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2637
2638 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2639 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2640 #if defined COMPILE_PCRE8
2641 if (common->utf)
2642 {
2643 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2644 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2645 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2646 JUMPHERE(singlechar);
2647 }
2648 #elif defined COMPILE_PCRE16
2649 if (common->utf)
2650 {
2651 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2652 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2653 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2654 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2655 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2657 JUMPHERE(singlechar);
2658 }
2659 #endif /* COMPILE_PCRE[8|16] */
2660 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2661 JUMPHERE(start);
2662
2663 if (newlinecheck)
2664 {
2665 JUMPHERE(end);
2666 JUMPHERE(nl);
2667 }
2668
2669 return mainloop;
2670 }
2671
2672 #define MAX_N_CHARS 3
2673
2674 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2675 {
2676 DEFINE_COMPILER;
2677 struct sljit_label *start;
2678 struct sljit_jump *quit;
2679 pcre_uint32 chars[MAX_N_CHARS * 2];
2680 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2681 int location = 0;
2682 pcre_int32 len, c, bit, caseless;
2683 int must_stop;
2684
2685 /* We do not support alternatives now. */
2686 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2687 return FALSE;
2688
2689 while (TRUE)
2690 {
2691 caseless = 0;
2692 must_stop = 1;
2693 switch(*cc)
2694 {
2695 case OP_CHAR:
2696 must_stop = 0;
2697 cc++;
2698 break;
2699
2700 case OP_CHARI:
2701 caseless = 1;
2702 must_stop = 0;
2703 cc++;
2704 break;
2705
2706 case OP_SOD:
2707 case OP_SOM:
2708 case OP_SET_SOM:
2709 case OP_NOT_WORD_BOUNDARY:
2710 case OP_WORD_BOUNDARY:
2711 case OP_EODN:
2712 case OP_EOD:
2713 case OP_CIRC:
2714 case OP_CIRCM:
2715 case OP_DOLL:
2716 case OP_DOLLM:
2717 /* Zero width assertions. */
2718 cc++;
2719 continue;
2720
2721 case OP_PLUS:
2722 case OP_MINPLUS:
2723 case OP_POSPLUS:
2724 cc++;
2725 break;
2726
2727 case OP_EXACT:
2728 cc += 1 + IMM2_SIZE;
2729 break;
2730
2731 case OP_PLUSI:
2732 case OP_MINPLUSI:
2733 case OP_POSPLUSI:
2734 caseless = 1;
2735 cc++;
2736 break;
2737
2738 case OP_EXACTI:
2739 caseless = 1;
2740 cc += 1 + IMM2_SIZE;
2741 break;
2742
2743 default:
2744 must_stop = 2;
2745 break;
2746 }
2747
2748 if (must_stop == 2)
2749 break;
2750
2751 len = 1;
2752 #ifdef SUPPORT_UTF
2753 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2754 #endif
2755
2756 if (caseless && char_has_othercase(common, cc))
2757 {
2758 caseless = char_get_othercase_bit(common, cc);
2759 if (caseless == 0)
2760 return FALSE;
2761 #ifdef COMPILE_PCRE8
2762 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2763 #else
2764 if ((caseless & 0x100) != 0)
2765 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2766 else
2767 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2768 #endif
2769 }
2770 else
2771 caseless = 0;
2772
2773 while (len > 0 && location < MAX_N_CHARS * 2)
2774 {
2775 c = *cc;
2776 bit = 0;
2777 if (len == (caseless & 0xff))
2778 {
2779 bit = caseless >> 8;
2780 c |= bit;
2781 }
2782
2783 chars[location] = c;
2784 chars[location + 1] = bit;
2785
2786 len--;
2787 location += 2;
2788 cc++;
2789 }
2790
2791 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2792 break;
2793 }
2794
2795 /* At least two characters are required. */
2796 if (location < 2 * 2)
2797 return FALSE;
2798
2799 if (firstline)
2800 {
2801 SLJIT_ASSERT(common->first_line_end != 0);
2802 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2803 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2804 }
2805 else
2806 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2807
2808 start = LABEL();
2809 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2810
2811 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2812 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2813 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2814 if (chars[1] != 0)
2815 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2816 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2817 if (location > 2 * 2)
2818 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2819 if (chars[3] != 0)
2820 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2821 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2822 if (location > 2 * 2)
2823 {
2824 if (chars[5] != 0)
2825 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2826 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2827 }
2828 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2829
2830 JUMPHERE(quit);
2831
2832 if (firstline)
2833 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2834 else
2835 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2836 return TRUE;
2837 }
2838
2839 #undef MAX_N_CHARS
2840
2841 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2842 {
2843 DEFINE_COMPILER;
2844 struct sljit_label *start;
2845 struct sljit_jump *quit;
2846 struct sljit_jump *found;
2847 pcre_uchar oc, bit;
2848
2849 if (firstline)
2850 {
2851 SLJIT_ASSERT(common->first_line_end != 0);
2852 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2853 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2854 }
2855
2856 start = LABEL();
2857 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2858 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2859
2860 oc = first_char;
2861 if (caseless)
2862 {
2863 oc = TABLE_GET(first_char, common->fcc, first_char);
2864 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2865 if (first_char > 127 && common->utf)
2866 oc = UCD_OTHERCASE(first_char);
2867 #endif
2868 }
2869 if (first_char == oc)
2870 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2871 else
2872 {
2873 bit = first_char ^ oc;
2874 if (is_powerof2(bit))
2875 {
2876 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2877 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2878 }
2879 else
2880 {
2881 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2882 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2883 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2884 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2885 found = JUMP(SLJIT_C_NOT_ZERO);
2886 }
2887 }
2888
2889 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2890 JUMPTO(SLJIT_JUMP, start);
2891 JUMPHERE(found);
2892 JUMPHERE(quit);
2893
2894 if (firstline)
2895 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2896 }
2897
2898 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2899 {
2900 DEFINE_COMPILER;
2901 struct sljit_label *loop;
2902 struct sljit_jump *lastchar;
2903 struct sljit_jump *firstchar;
2904 struct sljit_jump *quit;
2905 struct sljit_jump *foundcr = NULL;
2906 struct sljit_jump *notfoundnl;
2907 jump_list *newline = NULL;
2908
2909 if (firstline)
2910 {
2911 SLJIT_ASSERT(common->first_line_end != 0);
2912 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2913 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2914 }
2915
2916 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2917 {
2918 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2919 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2920 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2921 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2922 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2923
2924 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2925 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2926 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2927 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2928 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2929 #endif
2930 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2931
2932 loop = LABEL();
2933 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2934 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2935 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2936 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2937 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2938 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2939
2940 JUMPHERE(quit);
2941 JUMPHERE(firstchar);
2942 JUMPHERE(lastchar);
2943
2944 if (firstline)
2945 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2946 return;
2947 }
2948
2949 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2950 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2951 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2952 skip_char_back(common);
2953
2954 loop = LABEL();
2955 read_char(common);
2956 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2957 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2958 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2959 check_newlinechar(common, common->nltype, &newline, FALSE);
2960 set_jumps(newline, loop);
2961
2962 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2963 {
2964 quit = JUMP(SLJIT_JUMP);
2965 JUMPHERE(foundcr);
2966 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2967 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2968 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2969 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2970 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2971 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2972 #endif
2973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2974 JUMPHERE(notfoundnl);
2975 JUMPHERE(quit);
2976 }
2977 JUMPHERE(lastchar);
2978 JUMPHERE(firstchar);
2979
2980 if (firstline)
2981 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2982 }
2983
2984 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
2985
2986 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2987 {
2988 DEFINE_COMPILER;
2989 struct sljit_label *start;
2990 struct sljit_jump *quit;
2991 struct sljit_jump *found = NULL;
2992 jump_list *matches = NULL;
2993 pcre_uint8 inverted_start_bits[32];
2994 int i;
2995 #ifndef COMPILE_PCRE8
2996 struct sljit_jump *jump;
2997 #endif
2998
2999 for (i = 0; i < 32; ++i)
3000 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3001
3002 if (firstline)
3003 {
3004 SLJIT_ASSERT(common->first_line_end != 0);
3005 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3006 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3007 }
3008
3009 start = LABEL();
3010 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3011 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3012 #ifdef SUPPORT_UTF
3013 if (common->utf)
3014 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3015 #endif
3016
3017 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3018 {
3019 #ifndef COMPILE_PCRE8
3020 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3021 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3022 JUMPHERE(jump);
3023 #endif
3024 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3025 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3026 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3027 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3028 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3029 found = JUMP(SLJIT_C_NOT_ZERO);
3030 }
3031
3032 #ifdef SUPPORT_UTF
3033 if (common->utf)
3034 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3035 #endif
3036 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3037 #ifdef SUPPORT_UTF
3038 #if defined COMPILE_PCRE8
3039 if (common->utf)
3040 {
3041 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3042 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3043 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3044 }
3045 #elif defined COMPILE_PCRE16
3046 if (common->utf)
3047 {
3048 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3049 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3050 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3051 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3052 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3053 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3054 }
3055 #endif /* COMPILE_PCRE[8|16] */
3056 #endif /* SUPPORT_UTF */
3057 JUMPTO(SLJIT_JUMP, start);
3058 if (found != NULL)
3059 JUMPHERE(found);
3060 if (matches != NULL)
3061 set_jumps(matches, LABEL());
3062 JUMPHERE(quit);
3063
3064 if (firstline)
3065 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3066 }
3067
3068 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3069 {
3070 DEFINE_COMPILER;
3071 struct sljit_label *loop;
3072 struct sljit_jump *toolong;
3073 struct sljit_jump *alreadyfound;
3074 struct sljit_jump *found;
3075 struct sljit_jump *foundoc = NULL;
3076 struct sljit_jump *notfound;
3077 pcre_uint32 oc, bit;
3078
3079 SLJIT_ASSERT(common->req_char_ptr != 0);
3080 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3081 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3082 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3083 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3084
3085 if (has_firstchar)
3086 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3087 else
3088 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3089
3090 loop = LABEL();
3091 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3092
3093 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3094 oc = req_char;
3095 if (caseless)
3096 {
3097 oc = TABLE_GET(req_char, common->fcc, req_char);
3098 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3099 if (req_char > 127 && common->utf)
3100 oc = UCD_OTHERCASE(req_char);
3101 #endif
3102 }
3103 if (req_char == oc)
3104 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3105 else
3106 {
3107 bit = req_char ^ oc;
3108 if (is_powerof2(bit))
3109 {
3110 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3111 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3112 }
3113 else
3114 {
3115 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3116 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3117 }
3118 }
3119 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3120 JUMPTO(SLJIT_JUMP, loop);
3121
3122 JUMPHERE(found);
3123 if (foundoc)
3124 JUMPHERE(foundoc);
3125 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3126 JUMPHERE(alreadyfound);
3127 JUMPHERE(toolong);
3128 return notfound;
3129 }
3130
3131 static void do_revertframes(compiler_common *common)
3132 {
3133 DEFINE_COMPILER;
3134 struct sljit_jump *jump;
3135 struct sljit_label *mainloop;
3136
3137 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3138 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3139 GET_LOCAL_BASE(TMP3, 0, 0);
3140
3141 /* Drop frames until we reach STACK_TOP. */
3142 mainloop = LABEL();
3143 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3144 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3145 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3146
3147 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3148 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3149 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3150 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3151 JUMPTO(SLJIT_JUMP, mainloop);
3152
3153 JUMPHERE(jump);
3154 jump = JUMP(SLJIT_C_SIG_LESS);
3155 /* End of dropping frames. */
3156 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3157
3158 JUMPHERE(jump);
3159 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3160 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3161 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3162 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3163 JUMPTO(SLJIT_JUMP, mainloop);
3164 }
3165
3166 static void check_wordboundary(compiler_common *common)
3167 {
3168 DEFINE_COMPILER;
3169 struct sljit_jump *skipread;
3170 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3171 struct sljit_jump *jump;
3172 #endif
3173
3174 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3175
3176 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3177 /* Get type of the previous char, and put it to LOCALS1. */
3178 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3181 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3182 skip_char_back(common);
3183 check_start_used_ptr(common);
3184 read_char(common);
3185
3186 /* Testing char type. */
3187 #ifdef SUPPORT_UCP
3188 if (common->use_ucp)
3189 {
3190 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3191 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3192 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3193 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3194 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3195 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3196 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3197 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3198 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3199 JUMPHERE(jump);
3200 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3201 }
3202 else
3203 #endif
3204 {
3205 #ifndef COMPILE_PCRE8
3206 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3207 #elif defined SUPPORT_UTF
3208 /* Here LOCALS1 has already been zeroed. */
3209 jump = NULL;
3210 if (common->utf)
3211 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3212 #endif /* COMPILE_PCRE8 */
3213 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3214 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3215 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3216 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3217 #ifndef COMPILE_PCRE8
3218 JUMPHERE(jump);
3219 #elif defined SUPPORT_UTF
3220 if (jump != NULL)
3221 JUMPHERE(jump);
3222 #endif /* COMPILE_PCRE8 */
3223 }
3224 JUMPHERE(skipread);
3225
3226 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3227 skipread = check_str_end(common);
3228 peek_char(common);
3229
3230 /* Testing char type. This is a code duplication. */
3231 #ifdef SUPPORT_UCP
3232 if (common->use_ucp)
3233 {
3234 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3235 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3236 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3237 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3238 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3239 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3240 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3241 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3242 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3243 JUMPHERE(jump);
3244 }
3245 else
3246 #endif
3247 {
3248 #ifndef COMPILE_PCRE8
3249 /* TMP2 may be destroyed by peek_char. */
3250 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3251 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3252 #elif defined SUPPORT_UTF
3253 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3254 jump = NULL;
3255 if (common->utf)
3256 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3257 #endif
3258 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3259 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3260 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3261 #ifndef COMPILE_PCRE8
3262 JUMPHERE(jump);
3263 #elif defined SUPPORT_UTF
3264 if (jump != NULL)
3265 JUMPHERE(jump);
3266 #endif /* COMPILE_PCRE8 */
3267 }
3268 JUMPHERE(skipread);
3269
3270 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3271 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3272 }
3273
3274 /*
3275 range format:
3276
3277 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3278 ranges[1] = first bit (0 or 1)
3279 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3280 */
3281
3282 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3283 {
3284 DEFINE_COMPILER;
3285 struct sljit_jump *jump;
3286
3287 if (ranges[0] < 0)
3288 return FALSE;
3289
3290 switch(ranges[0])
3291 {
3292 case 1:
3293 if (readch)
3294 read_char(common);
3295 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3296 return TRUE;
3297
3298 case 2:
3299 if (readch)
3300 read_char(common);
3301 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3302 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3303 return TRUE;
3304
3305 case 4:
3306 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3307 {
3308 if (readch)
3309 read_char(common);
3310 if (ranges[1] != 0)
3311 {
3312 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3313 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3314 }
3315 else
3316 {
3317 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3318 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3319 JUMPHERE(jump);
3320 }
3321 return TRUE;
3322 }
3323 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3324 {
3325 if (readch)
3326 read_char(common);
3327 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3328 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3329 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3330 return TRUE;
3331 }
3332 return FALSE;
3333
3334 default:
3335 return FALSE;
3336 }
3337 }
3338
3339 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3340 {
3341 int i, bit, length;
3342 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3343
3344 bit = ctypes[0] & flag;
3345 ranges[0] = -1;
3346 ranges[1] = bit != 0 ? 1 : 0;
3347 length = 0;
3348
3349 for (i = 1; i < 256; i++)
3350 if ((ctypes[i] & flag) != bit)
3351 {
3352 if (length >= MAX_RANGE_SIZE)
3353 return;
3354 ranges[2 + length] = i;
3355 length++;
3356 bit ^= flag;
3357 }
3358
3359 if (bit != 0)
3360 {
3361 if (length >= MAX_RANGE_SIZE)
3362 return;
3363 ranges[2 + length] = 256;
3364 length++;
3365 }
3366 ranges[0] = length;
3367 }
3368
3369 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3370 {
3371 int ranges[2 + MAX_RANGE_SIZE];
3372 pcre_uint8 bit, cbit, all;
3373 int i, byte, length = 0;
3374
3375 bit = bits[0] & 0x1;
3376 ranges[1] = bit;
3377 /* Can be 0 or 255. */
3378 all = -bit;
3379
3380 for (i = 0; i < 256; )
3381 {
3382 byte = i >> 3;
3383 if ((i & 0x7) == 0 && bits[byte] == all)
3384 i += 8;
3385 else
3386 {
3387 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3388 if (cbit != bit)
3389 {
3390 if (length >= MAX_RANGE_SIZE)
3391 return FALSE;
3392 ranges[2 + length] = i;
3393 length++;
3394 bit = cbit;
3395 all = -cbit;
3396 }
3397 i++;
3398 }
3399 }
3400
3401 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3402 {
3403 if (length >= MAX_RANGE_SIZE)
3404 return FALSE;
3405 ranges[2 + length] = 256;
3406 length++;
3407 }
3408 ranges[0] = length;
3409
3410 return check_ranges(common, ranges, backtracks, FALSE);
3411 }
3412
3413 static void check_anynewline(compiler_common *common)
3414 {
3415 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3416 DEFINE_COMPILER;
3417
3418 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3419
3420 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3421 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3422 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3423 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3424 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3425 #ifdef COMPILE_PCRE8
3426 if (common->utf)
3427 {
3428 #endif
3429 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3430 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3431 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3432 #ifdef COMPILE_PCRE8
3433 }
3434 #endif
3435 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3436 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3437 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3438 }
3439
3440 static void check_hspace(compiler_common *common)
3441 {
3442 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3443 DEFINE_COMPILER;
3444
3445 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3446
3447 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3448 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3449 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3450 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3451 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3452 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3453 #ifdef COMPILE_PCRE8
3454 if (common->utf)
3455 {
3456 #endif
3457 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3458 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3459 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3460 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3461 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3462 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3463 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3464 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3465 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3466 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3467 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3468 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3470 #ifdef COMPILE_PCRE8
3471 }
3472 #endif
3473 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3474 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3475
3476 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3477 }
3478
3479 static void check_vspace(compiler_common *common)
3480 {
3481 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3482 DEFINE_COMPILER;
3483
3484 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3485
3486 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3487 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3488 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3489 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3490 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3491 #ifdef COMPILE_PCRE8
3492 if (common->utf)
3493 {
3494 #endif
3495 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3496 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3497 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3498 #ifdef COMPILE_PCRE8
3499 }
3500 #endif
3501 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3502 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3503
3504 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3505 }
3506
3507 #define CHAR1 STR_END
3508 #define CHAR2 STACK_TOP
3509
3510 static void do_casefulcmp(compiler_common *common)
3511 {
3512 DEFINE_COMPILER;
3513 struct sljit_jump *jump;
3514 struct sljit_label *label;
3515
3516 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3517 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3518 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3520 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3521 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3522
3523 label = LABEL();
3524 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3525 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3526 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3527 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3528 JUMPTO(SLJIT_C_NOT_ZERO, label);
3529
3530 JUMPHERE(jump);
3531 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3532 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3533 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3534 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3535 }
3536
3537 #define LCC_TABLE STACK_LIMIT
3538
3539 static void do_caselesscmp(compiler_common *common)
3540 {
3541 DEFINE_COMPILER;
3542 struct sljit_jump *jump;
3543 struct sljit_label *label;
3544
3545 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3546 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3547
3548 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3549 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3550 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3551 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3552 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3553 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3554
3555 label = LABEL();
3556 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3557 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3558 #ifndef COMPILE_PCRE8
3559 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3560 #endif
3561 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3562 #ifndef COMPILE_PCRE8
3563 JUMPHERE(jump);
3564 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3565 #endif
3566 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3567 #ifndef COMPILE_PCRE8
3568 JUMPHERE(jump);
3569 #endif
3570 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3571 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3572 JUMPTO(SLJIT_C_NOT_ZERO, label);
3573
3574 JUMPHERE(jump);
3575 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3576 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3577 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3578 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3579 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3580 }
3581
3582 #undef LCC_TABLE
3583 #undef CHAR1
3584 #undef CHAR2
3585
3586 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3587
3588 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3589 {
3590 /* This function would be ineffective to do in JIT level. */
3591 pcre_uint32 c1, c2;
3592 const pcre_uchar *src2 = args->uchar_ptr;
3593 const pcre_uchar *end2 = args->end;
3594 const ucd_record *ur;
3595 const pcre_uint32 *pp;
3596
3597 while (src1 < end1)
3598 {
3599 if (src2 >= end2)
3600 return (pcre_uchar*)1;
3601 GETCHARINC(c1, src1);
3602 GETCHARINC(c2, src2);
3603 ur = GET_UCD(c2);
3604 if (c1 != c2 && c1 != c2 + ur->other_case)
3605 {
3606 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3607 for (;;)
3608 {
3609 if (c1 < *pp) return NULL;
3610 if (c1 == *pp++) break;
3611 }
3612 }
3613 }
3614 return src2;
3615 }
3616
3617 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3618
3619 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3620 compare_context* context, jump_list **backtracks)
3621 {
3622 DEFINE_COMPILER;
3623 unsigned int othercasebit = 0;
3624 pcre_uchar *othercasechar = NULL;
3625 #ifdef SUPPORT_UTF
3626 int utflength;
3627 #endif
3628
3629 if (caseless && char_has_othercase(common, cc))
3630 {
3631 othercasebit = char_get_othercase_bit(common, cc);
3632 SLJIT_ASSERT(othercasebit);
3633 /* Extracting bit difference info. */
3634 #if defined COMPILE_PCRE8
3635 othercasechar = cc + (othercasebit >> 8);
3636 othercasebit &= 0xff;
3637 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3638 /* Note that this code only handles characters in the BMP. If there
3639 ever are characters outside the BMP whose othercase differs in only one
3640 bit from itself (there currently are none), this code will need to be
3641 revised for COMPILE_PCRE32. */
3642 othercasechar = cc + (othercasebit >> 9);
3643 if ((othercasebit & 0x100) != 0)
3644 othercasebit = (othercasebit & 0xff) << 8;
3645 else
3646 othercasebit &= 0xff;
3647 #endif /* COMPILE_PCRE[8|16|32] */
3648 }
3649
3650 if (context->sourcereg == -1)
3651 {
3652 #if defined COMPILE_PCRE8
3653 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3654 if (context->length >= 4)
3655 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3656 else if (context->length >= 2)
3657 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3658 else
3659 #endif
3660 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3661 #elif defined COMPILE_PCRE16
3662 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3663 if (context->length >= 4)
3664 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3665 else
3666 #endif
3667 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3668 #elif defined COMPILE_PCRE32
3669 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3670 #endif /* COMPILE_PCRE[8|16|32] */
3671 context->sourcereg = TMP2;
3672 }
3673
3674 #ifdef SUPPORT_UTF
3675 utflength = 1;
3676 if (common->utf && HAS_EXTRALEN(*cc))
3677 utflength += GET_EXTRALEN(*cc);
3678
3679 do
3680 {
3681 #endif
3682
3683 context->length -= IN_UCHARS(1);
3684 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3685
3686 /* Unaligned read is supported. */
3687 if (othercasebit != 0 && othercasechar == cc)
3688 {
3689 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3690 context->oc.asuchars[context->ucharptr] = othercasebit;
3691 }
3692 else
3693 {
3694 context->c.asuchars[context->ucharptr] = *cc;
3695 context->oc.asuchars[context->ucharptr] = 0;
3696 }
3697 context->ucharptr++;
3698
3699 #if defined COMPILE_PCRE8
3700 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3701 #else
3702 if (context->ucharptr >= 2 || context->length == 0)
3703 #endif
3704 {
3705 if (context->length >= 4)
3706 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3707 else if (context->length >= 2)
3708 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3709 #if defined COMPILE_PCRE8
3710 else if (context->length >= 1)
3711 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3712 #endif /* COMPILE_PCRE8 */
3713 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3714
3715 switch(context->ucharptr)
3716 {
3717 case 4 / sizeof(pcre_uchar):
3718 if (context->oc.asint != 0)
3719 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3720 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3721 break;
3722
3723 case 2 / sizeof(pcre_uchar):
3724 if (context->oc.asushort != 0)
3725 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3726 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3727 break;
3728
3729 #ifdef COMPILE_PCRE8
3730 case 1:
3731 if (context->oc.asbyte != 0)
3732 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3733 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3734 break;
3735 #endif
3736
3737 default:
3738 SLJIT_ASSERT_STOP();
3739 break;
3740 }
3741 context->ucharptr = 0;
3742 }
3743
3744 #else
3745
3746 /* Unaligned read is unsupported or in 32 bit mode. */
3747 if (context->length >= 1)
3748 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3749
3750 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3751
3752 if (othercasebit != 0 && othercasechar == cc)
3753 {
3754 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3755 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3756 }
3757 else
3758 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3759
3760 #endif
3761
3762 cc++;
3763 #ifdef SUPPORT_UTF
3764 utflength--;
3765 }
3766 while (utflength > 0);
3767 #endif
3768
3769 return cc;
3770 }
3771
3772 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3773
3774 #define SET_TYPE_OFFSET(value) \
3775 if ((value) != typeoffset) \
3776 { \
3777 if ((value) > typeoffset) \
3778 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3779 else \
3780 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3781 } \
3782 typeoffset = (value);
3783
3784 #define SET_CHAR_OFFSET(value) \
3785 if ((value) != charoffset) \
3786 { \
3787 if ((value) > charoffset) \
3788 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3789 else \
3790 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3791 } \
3792 charoffset = (value);
3793
3794 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3795 {
3796 DEFINE_COMPILER;
3797 jump_list *found = NULL;
3798 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3799 pcre_int32 c, charoffset;
3800 const pcre_uint32 *other_cases;
3801 struct sljit_jump *jump = NULL;
3802 pcre_uchar *ccbegin;
3803 int compares, invertcmp, numberofcmps;
3804 #ifdef SUPPORT_UCP
3805 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3806 BOOL charsaved = FALSE;
3807 int typereg = TMP1, scriptreg = TMP1;
3808 pcre_int32 typeoffset;
3809 #endif
3810
3811 /* Although SUPPORT_UTF must be defined, we are
3812 not necessary in utf mode even in 8 bit mode. */
3813 detect_partial_match(common, backtracks);
3814 read_char(common);
3815
3816 if ((*cc++ & XCL_MAP) != 0)
3817 {
3818 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3819 #ifndef COMPILE_PCRE8
3820 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3821 #elif defined SUPPORT_UTF
3822 if (common->utf)
3823 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3824 #endif
3825
3826 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3827 {
3828 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3829 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3830 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3831 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3832 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3833 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3834 }
3835
3836 #ifndef COMPILE_PCRE8
3837 JUMPHERE(jump);
3838 #elif defined SUPPORT_UTF
3839 if (common->utf)
3840 JUMPHERE(jump);
3841 #endif
3842 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3843 #ifdef SUPPORT_UCP
3844 charsaved = TRUE;
3845 #endif
3846 cc += 32 / sizeof(pcre_uchar);
3847 }
3848
3849 /* Scanning the necessary info. */
3850 ccbegin = cc;
3851 compares = 0;
3852 while (*cc != XCL_END)
3853 {
3854 compares++;
3855 if (*cc == XCL_SINGLE)
3856 {
3857 cc += 2;
3858 #ifdef SUPPORT_UTF
3859 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3860 #endif
3861 #ifdef SUPPORT_UCP
3862 needschar = TRUE;
3863 #endif
3864 }
3865 else if (*cc == XCL_RANGE)
3866 {
3867 cc += 2;
3868 #ifdef SUPPORT_UTF
3869 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3870 #endif
3871 cc++;
3872 #ifdef SUPPORT_UTF
3873 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3874 #endif
3875 #ifdef SUPPORT_UCP
3876 needschar = TRUE;
3877 #endif
3878 }
3879 #ifdef SUPPORT_UCP
3880 else
3881 {
3882 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3883 cc++;
3884 switch(*cc)
3885 {
3886 case PT_ANY:
3887 break;
3888
3889 case PT_LAMP:
3890 case PT_GC:
3891 case PT_PC:
3892 case PT_ALNUM:
3893 needstype = TRUE;
3894 break;
3895
3896 case PT_SC:
3897 needsscript = TRUE;
3898 break;
3899
3900 case PT_SPACE:
3901 case PT_PXSPACE:
3902 case PT_WORD:
3903 needstype = TRUE;
3904 needschar = TRUE;
3905 break;
3906
3907 case PT_CLIST:
3908 needschar = TRUE;
3909 break;
3910
3911 default:
3912 SLJIT_ASSERT_STOP();
3913 break;
3914 }
3915 cc += 2;
3916 }
3917 #endif
3918 }
3919
3920 #ifdef SUPPORT_UCP
3921 /* Simple register allocation. TMP1 is preferred if possible. */
3922 if (needstype || needsscript)
3923 {
3924 if (needschar && !charsaved)
3925 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3926 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3927 if (needschar)
3928 {
3929 if (needstype)
3930 {
3931 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3932 typereg = RETURN_ADDR;
3933 }
3934
3935 if (needsscript)
3936 scriptreg = TMP3;
3937 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3938 }
3939 else if (needstype && needsscript)
3940 scriptreg = TMP3;
3941 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3942
3943 if (needsscript)
3944 {
3945 if (scriptreg == TMP1)
3946 {
3947 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3948 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3949 }
3950 else
3951 {
3952 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3953 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3954 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3955 }
3956 }
3957 }
3958 #endif
3959
3960 /* Generating code. */
3961 cc = ccbegin;
3962 charoffset = 0;
3963 numberofcmps = 0;
3964 #ifdef SUPPORT_UCP
3965 typeoffset = 0;
3966 #endif
3967
3968 while (*cc != XCL_END)
3969 {
3970 compares--;
3971 invertcmp = (compares == 0 && list != backtracks);
3972 jump = NULL;
3973
3974 if (*cc == XCL_SINGLE)
3975 {
3976 cc ++;
3977 #ifdef SUPPORT_UTF
3978 if (common->utf)
3979 {
3980 GETCHARINC(c, cc);
3981 }
3982 else
3983 #endif
3984 c = *cc++;
3985
3986 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3987 {
3988 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3989 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
3990 numberofcmps++;
3991 }
3992 else if (numberofcmps > 0)
3993 {
3994 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3995 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3996 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3997 numberofcmps = 0;
3998 }
3999 else
4000 {
4001 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4002 numberofcmps = 0;
4003 }
4004 }
4005 else if (*cc == XCL_RANGE)
4006 {
4007 cc ++;
4008 #ifdef SUPPORT_UTF
4009 if (common->utf)
4010 {
4011 GETCHARINC(c, cc);
4012 }
4013 else
4014 #endif
4015 c = *cc++;
4016 SET_CHAR_OFFSET(c);
4017 #ifdef SUPPORT_UTF
4018 if (common->utf)
4019 {
4020 GETCHARINC(c, cc);
4021 }
4022 else
4023 #endif
4024 c = *cc++;
4025 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4026 {
4027 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4028 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4029 numberofcmps++;
4030 }
4031 else if (numberofcmps > 0)
4032 {
4033 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4034 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4035 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4036 numberofcmps = 0;
4037 }
4038 else
4039 {
4040 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4041 numberofcmps = 0;
4042 }
4043 }
4044 #ifdef SUPPORT_UCP
4045 else
4046 {
4047 if (*cc == XCL_NOTPROP)
4048 invertcmp ^= 0x1;
4049 cc++;
4050 switch(*cc)
4051 {
4052 case PT_ANY:
4053 if (list != backtracks)
4054 {
4055 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4056 continue;
4057 }
4058 else if (cc[-1] == XCL_NOTPROP)
4059 continue;
4060 jump = JUMP(SLJIT_JUMP);
4061 break;
4062
4063 case PT_LAMP:
4064 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4065 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4066 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4067 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4068 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4069 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4070 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4071 break;
4072
4073 case PT_GC:
4074 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4075 SET_TYPE_OFFSET(c);
4076 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4077 break;
4078
4079 case PT_PC:
4080 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4081 break;
4082
4083 case PT_SC:
4084 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4085 break;
4086
4087 case PT_SPACE:
4088 case PT_PXSPACE:
4089 if (*cc == PT_SPACE)
4090 {
4091 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4092 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4093 }
4094 SET_CHAR_OFFSET(9);
4095 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4096 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4097 if (*cc == PT_SPACE)
4098 JUMPHERE(jump);
4099
4100 SET_TYPE_OFFSET(ucp_Zl);
4101 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4102 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4103 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4104 break;
4105
4106 case PT_WORD:
4107 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4108 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4109 /* Fall through. */
4110
4111 case PT_ALNUM:
4112 SET_TYPE_OFFSET(ucp_Ll);
4113 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4114 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4115 SET_TYPE_OFFSET(ucp_Nd);
4116 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4117 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4118 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4119 break;
4120
4121 case PT_CLIST:
4122 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4123
4124 /* At least three characters are required.
4125 Otherwise this case would be handled by the normal code path. */
4126 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4127 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4128
4129 /* Optimizing character pairs, if their difference is power of 2. */
4130 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4131 {
4132 if (charoffset == 0)
4133 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4134 else
4135 {
4136 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4137 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4138 }
4139 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4140 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4141 other_cases += 2;
4142 }
4143 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4144 {
4145 if (charoffset == 0)
4146 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4147 else
4148 {
4149 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4150 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4151 }
4152 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4153 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4154
4155 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4156 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4157
4158 other_cases += 3;
4159 }
4160 else
4161 {
4162 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4163 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4164 }
4165
4166 while (*other_cases != NOTACHAR)
4167 {
4168 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4169 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4170 }
4171 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4172 break;
4173 }
4174 cc += 2;
4175 }
4176 #endif
4177
4178 if (jump != NULL)
4179 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4180 }
4181
4182 if (found != NULL)
4183 set_jumps(found, LABEL());
4184 }
4185
4186 #undef SET_TYPE_OFFSET
4187 #undef SET_CHAR_OFFSET
4188
4189 #endif
4190
4191 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4192 {
4193 DEFINE_COMPILER;
4194 int length;
4195 unsigned int c, oc, bit;
4196 compare_context context;
4197 struct sljit_jump *jump[4];
4198 #ifdef SUPPORT_UTF
4199 struct sljit_label *label;
4200 #ifdef SUPPORT_UCP
4201 pcre_uchar propdata[5];
4202 #endif
4203 #endif
4204
4205 switch(type)
4206 {
4207 case OP_SOD:
4208 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4209 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4210 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4211 return cc;
4212
4213 case OP_SOM:
4214 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4215 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4216 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4217 return cc;
4218
4219 case OP_NOT_WORD_BOUNDARY:
4220 case OP_WORD_BOUNDARY:
4221 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4222 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4223 return cc;
4224
4225 case OP_NOT_DIGIT:
4226 case OP_DIGIT:
4227 /* Digits are usually 0-9, so it is worth to optimize them. */
4228 if (common->digits[0] == -2)
4229 get_ctype_ranges(common, ctype_digit, common->digits);
4230 detect_partial_match(common, backtracks);
4231 /* Flip the starting bit in the negative case. */
4232 if (type == OP_NOT_DIGIT)
4233 common->digits[1] ^= 1;
4234 if (!check_ranges(common, common->digits, backtracks, TRUE))
4235 {
4236 read_char8_type(common);
4237 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4238 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4239 }
4240 if (type == OP_NOT_DIGIT)
4241 common->digits[1] ^= 1;
4242 return cc;
4243
4244 case OP_NOT_WHITESPACE:
4245 case OP_WHITESPACE:
4246 detect_partial_match(common, backtracks);
4247 read_char8_type(common);
4248 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4249 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4250 return cc;
4251
4252 case OP_NOT_WORDCHAR:
4253 case OP_WORDCHAR:
4254 detect_partial_match(common, backtracks);
4255 read_char8_type(common);
4256 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4257 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4258 return cc;
4259
4260 case OP_ANY:
4261 detect_partial_match(common, backtracks);
4262 read_char(common);
4263 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4264 {
4265 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4266 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4267 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4268 else
4269 jump[1] = check_str_end(common);
4270
4271 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4272 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4273 if (jump[1] != NULL)
4274 JUMPHERE(jump[1]);
4275 JUMPHERE(jump[0]);
4276 }
4277 else
4278 check_newlinechar(common, common->nltype, backtracks, TRUE);
4279 return cc;
4280
4281 case OP_ALLANY:
4282 detect_partial_match(common, backtracks);
4283 #ifdef SUPPORT_UTF
4284 if (common->utf)
4285 {
4286 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4287 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4288 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4289 #if defined COMPILE_PCRE8
4290 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4291 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4292 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4293 #elif defined COMPILE_PCRE16
4294 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4295 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4296 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4297 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4298 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4299 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4300 #endif
4301 JUMPHERE(jump[0]);
4302 #endif /* COMPILE_PCRE[8|16] */
4303 return cc;
4304 }
4305 #endif
4306 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4307 return cc;
4308
4309 case OP_ANYBYTE:
4310 detect_partial_match(common, backtracks);
4311 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4312 return cc;
4313
4314 #ifdef SUPPORT_UTF
4315 #ifdef SUPPORT_UCP
4316 case OP_NOTPROP:
4317 case OP_PROP:
4318 propdata[0] = 0;
4319 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4320 propdata[2] = cc[0];
4321 propdata[3] = cc[1];
4322 propdata[4] = XCL_END;
4323 compile_xclass_matchingpath(common, propdata, backtracks);
4324 return cc + 2;
4325 #endif
4326 #endif
4327
4328 case OP_ANYNL:
4329 detect_partial_match(common, backtracks);
4330 read_char(common);
4331 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4332 /* We don't need to handle soft partial matching case. */
4333 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4334 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4335 else
4336 jump[1] = check_str_end(common);
4337 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4338 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4339 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4340 jump[3] = JUMP(SLJIT_JUMP);
4341 JUMPHERE(jump[0]);
4342 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4343 JUMPHERE(jump[1]);
4344 JUMPHERE(jump[2]);
4345 JUMPHERE(jump[3]);
4346 return cc;
4347
4348 case OP_NOT_HSPACE:
4349 case OP_HSPACE:
4350 detect_partial_match(common, backtracks);
4351 read_char(common);
4352 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4353 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4354 return cc;
4355
4356 case OP_NOT_VSPACE:
4357 case OP_VSPACE:
4358 detect_partial_match(common, backtracks);
4359 read_char(common);
4360 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4361 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4362 return cc;
4363
4364 #ifdef SUPPORT_UCP
4365 case OP_EXTUNI:
4366 detect_partial_match(common, backtracks);
4367 read_char(common);
4368 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4370 /* Optimize register allocation: use a real register. */
4371 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4372 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4373
4374 label = LABEL();
4375 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4376 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4377 read_char(common);
4378 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4379 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4380 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4381
4382 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4383 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4384 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4385 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4386 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4387 JUMPTO(SLJIT_C_NOT_ZERO, label);
4388
4389 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4390 JUMPHERE(jump[0]);
4391 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4392
4393 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4394 {
4395 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4396 /* Since we successfully read a char above, partial matching must occure. */
4397 check_partial(common, TRUE);
4398 JUMPHERE(jump[0]);
4399 }
4400 return cc;
4401 #endif
4402
4403 case OP_EODN:
4404 /* Requires rather complex checks. */
4405 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4406 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4407 {
4408 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4409 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4410 if (common->mode == JIT_COMPILE)
4411 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4412 else
4413 {
4414 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4415 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4416 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4417 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4418 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4419 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4420 check_partial(common, TRUE);
4421 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4422 JUMPHERE(jump[1]);
4423 }
4424 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4425 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4426 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4427 }
4428 else if (common->nltype == NLTYPE_FIXED)
4429 {
4430 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4431 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4432 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4433 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4434 }
4435 else
4436 {
4437 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4438 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4439 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4440 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4441 jump[2] = JUMP(SLJIT_C_GREATER);
4442 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4443 /* Equal. */
4444 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4445 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4446 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4447
4448 JUMPHERE(jump[1]);
4449 if (common->nltype == NLTYPE_ANYCRLF)
4450 {
4451 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4452 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4453 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4454 }
4455 else
4456 {
4457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4458 read_char(common);
4459 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4460 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4461 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4462 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4463 }
4464 JUMPHERE(jump[2]);
4465 JUMPHERE(jump[3]);
4466 }
4467 JUMPHERE(jump[0]);
4468 check_partial(common, FALSE);
4469 return cc;
4470
4471 case OP_EOD:
4472 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4473 check_partial(common, FALSE);
4474 return cc;
4475
4476 case OP_CIRC:
4477 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4478 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4479 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4480 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4481 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4482 return cc;
4483
4484 case OP_CIRCM:
4485 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4486 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4487 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4488 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4489 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4490 jump[0] = JUMP(SLJIT_JUMP);
4491 JUMPHERE(jump[1]);
4492
4493 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4494 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4495 {
4496 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4497 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4498 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4499 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4500 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4501 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4502 }
4503 else
4504 {
4505 skip_char_back(common);
4506 read_char(common);
4507 check_newlinechar(common, common->nltype, backtracks, FALSE);
4508 }
4509 JUMPHERE(jump[0]);
4510 return cc;
4511
4512 case OP_DOLL:
4513 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4514 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4515 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4516
4517 if (!common->endonly)
4518 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4519 else
4520 {
4521 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4522 check_partial(common, FALSE);
4523 }
4524 return cc;
4525
4526 case OP_DOLLM:
4527 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4528 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4529 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4530 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4531 check_partial(common, FALSE);
4532 jump[0] = JUMP(SLJIT_JUMP);
4533 JUMPHERE(jump[1]);
4534
4535 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4536 {
4537 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4538 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4539 if (common->mode == JIT_COMPILE)
4540 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4541 else
4542 {
4543 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4544 /* STR_PTR = STR_END - IN_UCHARS(1) */
4545 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4546 check_partial(common, TRUE);
4547 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4548 JUMPHERE(jump[1]);
4549 }
4550
4551 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4552 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4553 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4554 }
4555 else
4556 {
4557 peek_char(common);
4558 check_newlinechar(common, common->nltype, backtracks, FALSE);
4559 }
4560 JUMPHERE(jump[0]);
4561 return cc;
4562
4563 case OP_CHAR:
4564 case OP_CHARI:
4565 length = 1;
4566 #ifdef SUPPORT_UTF
4567 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4568 #endif
4569 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4570 {
4571 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4572 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4573
4574 context.length = IN_UCHARS(length);
4575 context.sourcereg = -1;
4576 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4577 context.ucharptr = 0;
4578 #endif
4579 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4580 }
4581 detect_partial_match(common, backtracks);
4582 read_char(common);
4583 #ifdef SUPPORT_UTF
4584 if (common->utf)
4585 {
4586 GETCHAR(c, cc);
4587 }
4588 else
4589 #endif
4590 c = *cc;
4591 if (type == OP_CHAR || !char_has_othercase(common, cc))
4592 {
4593 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4594 return cc + length;
4595 }
4596 oc = char_othercase(common, c);
4597 bit = c ^ oc;
4598 if (is_powerof2(bit))
4599 {
4600 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4601 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4602 return cc + length;
4603 }
4604 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4605 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4606 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4607 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4608 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4609 return cc + length;
4610
4611 case OP_NOT:
4612 case OP_NOTI:
4613 detect_partial_match(common, backtracks);
4614 length = 1;
4615 #ifdef SUPPORT_UTF
4616 if (common->utf)
4617 {
4618 #ifdef COMPILE_PCRE8
4619 c = *cc;
4620 if (c < 128)
4621 {
4622 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4623 if (type == OP_NOT || !char_has_othercase(common, cc))
4624 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4625 else
4626 {
4627 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4628 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4629 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4630 }
4631 /* Skip the variable-length character. */
4632 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4633 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4634 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4635 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4636 JUMPHERE(jump[0]);
4637 return cc + 1;
4638 }
4639 else
4640 #endif /* COMPILE_PCRE8 */
4641 {
4642 GETCHARLEN(c, cc, length);
4643 read_char(common);
4644 }
4645 }
4646 else
4647 #endif /* SUPPORT_UTF */
4648 {
4649 read_char(common);
4650 c = *cc;
4651 }
4652
4653 if (type == OP_NOT || !char_has_othercase(common, cc))
4654 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4655 else
4656 {
4657 oc = char_othercase(common, c);
4658 bit = c ^ oc;
4659 if (is_powerof2(bit))
4660 {
4661 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4662 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4663 }
4664 else
4665 {
4666 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4667 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4668 }
4669 }
4670 return cc + length;
4671
4672 case OP_CLASS:
4673 case OP_NCLASS:
4674 detect_partial_match(common, backtracks);
4675 read_char(common);
4676 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4677 return cc + 32 / sizeof(pcre_uchar);
4678
4679 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4680 jump[0] = NULL;
4681 #ifdef COMPILE_PCRE8
4682 /* This check only affects 8 bit mode. In other modes, we
4683 always need to compare the value with 255. */
4684 if (common->utf)
4685 #endif /* COMPILE_PCRE8 */
4686 {
4687 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4688 if (type == OP_CLASS)
4689 {
4690 add_jump(compiler, backtracks, jump[0]);
4691 jump[0] = NULL;
4692 }
4693 }
4694 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4695 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4696 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4697 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4698 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4699 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4700 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4701 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4702 if (jump[0] != NULL)
4703 JUMPHERE(jump[0]);
4704 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4705 return cc + 32 / sizeof(pcre_uchar);
4706
4707 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4708 case OP_XCLASS:
4709 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4710 return cc + GET(cc, 0) - 1;
4711 #endif
4712
4713 case OP_REVERSE:
4714 length = GET(cc, 0);
4715 if (length == 0)
4716 return cc + LINK_SIZE;
4717 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4718 #ifdef SUPPORT_UTF
4719 if (common->utf)
4720 {
4721 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4722 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4723 label = LABEL();
4724 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4725 skip_char_back(common);
4726 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4727 JUMPTO(SLJIT_C_NOT_ZERO, label);
4728 }
4729 else
4730 #endif
4731 {
4732 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4733 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4734 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4735 }
4736 check_start_used_ptr(common);
4737 return cc + LINK_SIZE;
4738 }
4739 SLJIT_ASSERT_STOP();
4740 return cc;
4741 }
4742
4743 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4744 {
4745 /* This function consumes at least one input character. */
4746 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4747 DEFINE_COMPILER;
4748 pcre_uchar *ccbegin = cc;
4749 compare_context context;
4750 int size;
4751
4752 context.length = 0;
4753 do
4754 {
4755 if (cc >= ccend)
4756 break;
4757
4758 if (*cc == OP_CHAR)
4759 {
4760 size = 1;
4761 #ifdef SUPPORT_UTF
4762 if (common->utf && HAS_EXTRALEN(cc[1]))
4763 size += GET_EXTRALEN(cc[1]);
4764 #endif
4765 }
4766 else if (*cc == OP_CHARI)
4767 {
4768 size = 1;
4769 #ifdef SUPPORT_UTF
4770 if (common->utf)
4771 {
4772 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4773 size = 0;
4774 else if (HAS_EXTRALEN(cc[1]))
4775 size += GET_EXTRALEN(cc[1]);
4776 }
4777 else
4778 #endif
4779 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4780 size = 0;
4781 }
4782 else
4783 size = 0;
4784
4785 cc += 1 + size;
4786 context.length += IN_UCHARS(size);
4787 }
4788 while (size > 0 && context.length <= 128);
4789
4790 cc = ccbegin;
4791 if (context.length > 0)
4792 {
4793 /* We have a fixed-length byte sequence. */
4794 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4795 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4796
4797 context.sourcereg = -1;
4798 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4799 context.ucharptr = 0;
4800 #endif
4801 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4802 return cc;
4803 }
4804
4805 /* A non-fixed length character will be checked if length == 0. */
4806 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4807 }
4808
4809 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4810 {
4811 DEFINE_COMPILER;
4812 int offset = GET2(cc, 1) << 1;
4813
4814 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4815 if (!common->jscript_compat)
4816 {
4817 if (backtracks == NULL)
4818 {
4819 /* OVECTOR(1) contains the "string begin - 1" constant. */
4820 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4821 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4822 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4823 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4824 return JUMP(SLJIT_C_NOT_ZERO);
4825 }
4826 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4827 }
4828 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4829 }
4830
4831 /* Forward definitions. */
4832 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4833 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4834
4835 #define PUSH_BACKTRACK(size, ccstart, error) \
4836 do \
4837 { \
4838 backtrack = sljit_alloc_memory(compiler, (size)); \
4839 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4840 return error; \
4841 memset(backtrack, 0, size); \
4842 backtrack->prev = parent->top; \
4843 backtrack->cc = (ccstart); \
4844 parent->top = backtrack; \
4845 } \
4846 while (0)
4847
4848 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4849 do \
4850 { \
4851 backtrack = sljit_alloc_memory(compiler, (size)); \
4852 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4853 return; \
4854 memset(backtrack, 0, size); \
4855 backtrack->prev = parent->top; \
4856 backtrack->cc = (ccstart); \
4857 parent->top = backtrack; \
4858 } \
4859 while (0)
4860
4861 #define BACKTRACK_AS(type) ((type *)backtrack)
4862
4863 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4864 {
4865 DEFINE_COMPILER;
4866 int offset = GET2(cc, 1) << 1;
4867 struct sljit_jump *jump = NULL;
4868 struct sljit_jump *partial;
4869 struct sljit_jump *nopartial;
4870
4871 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4872 /* OVECTOR(1) contains the "string begin - 1" constant. */
4873 if (withchecks && !common->jscript_compat)
4874 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4875
4876 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4877 if (common->utf && *cc == OP_REFI)
4878 {
4879 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
4880 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4881 if (withchecks)
4882 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4883
4884 /* Needed to save important temporary registers. */
4885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4886 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
4887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4888 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4889 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4890 if (common->mode == JIT_COMPILE)
4891 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4892 else
4893 {
4894 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4895 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4896 check_partial(common, FALSE);
4897 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4898 JUMPHERE(nopartial);
4899 }
4900 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4901 }
4902 else
4903 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4904 {
4905 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4906 if (withchecks)
4907 jump = JUMP(SLJIT_C_ZERO);
4908
4909 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4910 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4911 if (common->mode == JIT_COMPILE)
4912 add_jump(compiler, backtracks, partial);
4913
4914 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4915 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4916
4917 if (common->mode != JIT_COMPILE)
4918 {
4919 nopartial = JUMP(SLJIT_JUMP);
4920 JUMPHERE(partial);
4921 /* TMP2 -= STR_END - STR_PTR */
4922 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4923 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4924 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4925 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4926 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4927 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4928 JUMPHERE(partial);
4929 check_partial(common, FALSE);
4930 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4931 JUMPHERE(nopartial);
4932 }
4933 }
4934
4935 if (jump != NULL)
4936 {
4937 if (emptyfail)
4938 add_jump(compiler, backtracks, jump);
4939 else
4940 JUMPHERE(jump);
4941 }
4942 return cc + 1 + IMM2_SIZE;
4943 }
4944
4945 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4946 {
4947 DEFINE_COMPILER;
4948 backtrack_common *backtrack;
4949 pcre_uchar type;
4950 struct sljit_label *label;
4951 struct sljit_jump *zerolength;
4952 struct sljit_jump *jump = NULL;
4953 pcre_uchar *ccbegin = cc;
4954 int min = 0, max = 0;
4955 BOOL minimize;
4956
4957 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4958
4959 type = cc[1 + IMM2_SIZE];
4960 minimize = (type & 0x1) != 0;
4961 switch(type)
4962 {
4963 case OP_CRSTAR:
4964 case OP_CRMINSTAR:
4965 min = 0;
4966 max = 0;
4967 cc += 1 + IMM2_SIZE + 1;
4968 break;
4969 case OP_CRPLUS:
4970 case OP_CRMINPLUS:
4971 min = 1;
4972 max = 0;
4973 cc += 1 + IMM2_SIZE + 1;
4974 break;
4975 case OP_CRQUERY:
4976 case OP_CRMINQUERY:
4977 min = 0;
4978 max = 1;
4979 cc += 1 + IMM2_SIZE + 1;
4980 break;
4981 case OP_CRRANGE:
4982 case OP_CRMINRANGE:
4983 min = GET2(cc, 1 + IMM2_SIZE + 1);
4984 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4985 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4986 break;
4987 default:
4988 SLJIT_ASSERT_STOP();
4989 break;
4990 }
4991
4992 if (!minimize)
4993 {
4994 if (min == 0)
4995 {
4996 allocate_stack(common, 2);
4997 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4998 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4999 /* Temporary release of STR_PTR. */
5000 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5001 zerolength = compile_ref_checks(common, ccbegin, NULL);
5002 /* Restore if not zero length. */
5003 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5004 }
5005 else
5006 {
5007 allocate_stack(common, 1);
5008 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5009 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5010 }
5011
5012 if (min > 1 || max > 1)
5013 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5014
5015 label = LABEL();
5016 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5017
5018 if (min > 1 || max > 1)
5019 {
5020 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5021 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5022 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5023 if (min > 1)
5024 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5025 if (max > 1)
5026 {
5027 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5028 allocate_stack(common, 1);
5029 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5030 JUMPTO(SLJIT_JUMP, label);
5031 JUMPHERE(jump);
5032 }
5033 }
5034
5035 if (max == 0)
5036 {
5037 /* Includes min > 1 case as well. */
5038 allocate_stack(common, 1);
5039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5040 JUMPTO(SLJIT_JUMP, label);
5041 }
5042
5043 JUMPHERE(zerolength);
5044 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5045
5046 decrease_call_count(common);
5047 return cc;
5048 }
5049
5050 allocate_stack(common, 2);
5051 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5052 if (type != OP_CRMINSTAR)
5053 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5054
5055 if (min == 0)
5056 {
5057 zerolength = compile_ref_checks(common, ccbegin, NULL);
5058 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5059 jump = JUMP(SLJIT_JUMP);
5060 }
5061 else
5062 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5063
5064 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5065 if (max > 0)
5066 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5067
5068 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5069 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5070
5071 if (min > 1)
5072 {
5073 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5074 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5075 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5076 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5077 }
5078 else if (max > 0)
5079 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5080
5081 if (jump != NULL)
5082 JUMPHERE(jump);
5083 JUMPHERE(zerolength);
5084
5085 decrease_call_count(common);
5086 return cc;
5087 }
5088
5089 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5090 {
5091 DEFINE_COMPILER;
5092 backtrack_common *backtrack;
5093 recurse_entry *entry = common->entries;
5094 recurse_entry *prev = NULL;
5095 int start = GET(cc, 1);
5096 pcre_uchar *start_cc;
5097
5098 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5099
5100 /* Inlining simple patterns. */
5101 if (get_framesize(common, common->start + start, TRUE) == no_stack)
5102 {
5103 start_cc = common->start + start;
5104 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5105 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5106 return cc + 1 + LINK_SIZE;
5107 }
5108
5109 while (entry != NULL)
5110 {
5111 if (entry->start == start)
5112 break;
5113 prev = entry;
5114 entry = entry->next;
5115 }
5116
5117 if (entry == NULL)
5118 {
5119 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5120 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5121 return NULL;
5122 entry->next = NULL;
5123 entry->entry = NULL;
5124 entry->calls = NULL;
5125 entry->start = start;
5126
5127 if (prev != NULL)
5128 prev->next = entry;
5129 else
5130 common->entries = entry;
5131 }
5132
5133 if (common->has_set_som && common->mark_ptr != 0)
5134 {
5135 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5136 allocate_stack(common, 2);
5137 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5138 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5139 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5140 }
5141 else if (common->has_set_som || common->mark_ptr != 0)
5142 {
5143 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5144 allocate_stack(common, 1);
5145 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5146 }
5147
5148 if (entry->entry == NULL)
5149 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5150 else
5151 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5152 /* Leave if the match is failed. */
5153 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5154 return cc + 1 + LINK_SIZE;
5155 }
5156
5157 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5158 {
5159 const pcre_uchar *begin = arguments->begin;
5160 int *offset_vector = arguments->offsets;
5161 int offset_count = arguments->offset_count;
5162 int i;
5163
5164 if (PUBL(callout) == NULL)
5165 return 0;
5166
5167 callout_block->version = 2;
5168 callout_block->callout_data = arguments->callout_data;
5169
5170 /* Offsets in subject. */
5171 callout_block->subject_length = arguments->end - arguments->begin;
5172 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5173 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5174 #if defined COMPILE_PCRE8
5175 callout_block->subject = (PCRE_SPTR)begin;
5176 #elif defined COMPILE_PCRE16
5177 callout_block->subject = (PCRE_SPTR16)begin;
5178 #elif defined COMPILE_PCRE32
5179 callout_block->subject = (PCRE_SPTR32)begin;
5180 #endif
5181
5182 /* Convert and copy the JIT offset vector to the offset_vector array. */
5183 callout_block->capture_top = 0;
5184 callout_block->offset_vector = offset_vector;
5185 for (i = 2; i < offset_count; i += 2)
5186 {
5187 offset_vector[i] = jit_ovector[i] - begin;
5188 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5189 if (jit_ovector[i] >= begin)
5190 callout_block->capture_top = i;
5191 }
5192
5193 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5194 if (offset_count > 0)
5195 offset_vector[0] = -1;
5196 if (offset_count > 1)
5197 offset_vector[1] = -1;
5198 return (*PUBL(callout))(callout_block);
5199 }
5200
5201 /* Aligning to 8 byte. */
5202 #define CALLOUT_ARG_SIZE \
5203 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5204
5205 #define CALLOUT_ARG_OFFSET(arg) \
5206 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5207
5208 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5209 {
5210 DEFINE_COMPILER;
5211 backtrack_common *backtrack;
5212
5213 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5214
5215 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5216
5217 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5218 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5219 SLJIT_ASSERT(common->capture_last_ptr != 0);
5220 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5221 OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5222
5223 /* These pointer sized fields temporarly stores internal variables. */
5224 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5227
5228 if (common->mark_ptr != 0)
5229 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5232 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5233
5234 /* Needed to save important temporary registers. */
5235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5236 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5237 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5238 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5239 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5240 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5241 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5242
5243 /* Check return value. */
5244 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5245 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5246 if (common->forced_quit_label == NULL)
5247 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5248 else
5249 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5250 return cc + 2 + 2 * LINK_SIZE;
5251 }
5252
5253 #undef CALLOUT_ARG_SIZE
5254 #undef CALLOUT_ARG_OFFSET
5255
5256 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5257 {
5258 DEFINE_COMPILER;
5259 int framesize;
5260 int private_data_ptr;
5261 backtrack_common altbacktrack;
5262 pcre_uchar *ccbegin;
5263 pcre_uchar opcode;
5264 pcre_uchar bra = OP_BRA;
5265 jump_list *tmp = NULL;
5266 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5267 jump_list **found;
5268 /* Saving previous accept variables. */
5269 struct sljit_label *save_quit_label = common->quit_label;
5270 struct sljit_label *save_accept_label = common->accept_label;
5271 jump_list *save_quit = common->quit;
5272 jump_list *save_accept = common->accept;
5273 struct sljit_jump *jump;
5274 struct sljit_jump *brajump = NULL;
5275
5276 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5277 {
5278 SLJIT_ASSERT(!conditional);
5279 bra = *cc;
5280 cc++;
5281 }
5282 private_data_ptr = PRIVATE_DATA(cc);
5283 SLJIT_ASSERT(private_data_ptr != 0);
5284 framesize = get_framesize(common, cc, FALSE);
5285 backtrack->framesize = framesize;
5286 backtrack->private_data_ptr = private_data_ptr;
5287 opcode = *cc;
5288 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5289 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5290 ccbegin = cc;
5291 cc += GET(cc, 1);
5292
5293 if (bra == OP_BRAMINZERO)
5294 {
5295 /* This is a braminzero backtrack path. */
5296 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5297 free_stack(common, 1);
5298 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5299 }
5300
5301 if (framesize < 0)
5302 {
5303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5304 allocate_stack(common, 1);
5305 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5306 }
5307 else
5308 {
5309 allocate_stack(common, framesize + 2);
5310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5311 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5314 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5315 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5316 }
5317
5318 memset(&altbacktrack, 0, sizeof(backtrack_common));
5319 common->quit_label = NULL;
5320 common->quit = NULL;
5321 while (1)
5322 {
5323 common->accept_label = NULL;
5324 common->accept = NULL;
5325 altbacktrack.top = NULL;
5326 altbacktrack.topbacktracks = NULL;
5327
5328 if (*ccbegin == OP_ALT)
5329 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5330
5331 altbacktrack.cc = ccbegin;
5332 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5333 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5334 {
5335 common->quit_label = save_quit_label;
5336 common->accept_label = save_accept_label;
5337 common->quit = save_quit;
5338 common->accept = save_accept;
5339 return NULL;
5340 }
5341 common->accept_label = LABEL();
5342 if (common->accept != NULL)
5343 set_jumps(common->accept, common->accept_label);
5344
5345 /* Reset stack. */
5346 if (framesize < 0)
5347 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5348 else {
5349 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5350 {
5351 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5352 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5353 }
5354 else
5355 {
5356 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5357 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5358 }
5359 }
5360
5361 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5362 {
5363 /* We know that STR_PTR was stored on the top of the stack. */
5364 if (conditional)
5365 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5366 else if (bra == OP_BRAZERO)
5367 {
5368 if (framesize < 0)
5369 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5370 else
5371 {
5372 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5373 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5374 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5375 }
5376 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5377 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5378 }
5379 else if (framesize >= 0)
5380 {
5381 /* For OP_BRA and OP_BRAMINZERO. */
5382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5383 }
5384 }
5385 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5386
5387 compile_backtrackingpath(common, altbacktrack.top);
5388 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5389 {
5390 common->quit_label = save_quit_label;
5391 common->accept_label = save_accept_label;
5392 common->quit = save_quit;
5393 common->accept = save_accept;
5394 return NULL;
5395 }
5396 set_jumps(altbacktrack.topbacktracks, LABEL());
5397
5398 if (*cc != OP_ALT)
5399 break;
5400
5401 ccbegin = cc;
5402 cc += GET(cc, 1);
5403 }
5404 /* None of them matched. */
5405 if (common->quit != NULL)
5406 set_jumps(common->quit, LABEL());
5407
5408 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5409 {
5410 /* Assert is failed. */
5411 if (conditional || bra == OP_BRAZERO)
5412 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5413
5414 if (framesize < 0)
5415 {
5416 /* The topmost item should be 0. */
5417 if (bra == OP_BRAZERO)
5418 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5419 else
5420 free_stack(common, 1);
5421 }
5422 else
5423 {
5424 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5425 /* The topmost item should be 0. */
5426 if (bra == OP_BRAZERO)
5427 {
5428 free_stack(common, framesize + 1);
5429 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5430 }
5431 else
5432 free_stack(common, framesize + 2);
5433 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5434 }
5435 jump = JUMP(SLJIT_JUMP);
5436 if (bra != OP_BRAZERO)
5437 add_jump(compiler, target, jump);
5438
5439 /* Assert is successful. */
5440 set_jumps(tmp, LABEL());
5441 if (framesize < 0)
5442 {
5443 /* We know that STR_PTR was stored on the top of the stack. */
5444 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5445 /* Keep the STR_PTR on the top of the stack. */
5446 if (bra == OP_BRAZERO)
5447 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5448 else if (bra == OP_BRAMINZERO)
5449 {
5450 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5451 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5452 }
5453 }
5454 else
5455 {
5456 if (bra == OP_BRA)
5457 {
5458 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5459 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5460 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5461 }
5462 else
5463 {
5464 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5465 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5466 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5468 }
5469 }
5470
5471 if (bra == OP_BRAZERO)
5472 {
5473 backtrack->matchingpath = LABEL();
5474 SET_LABEL(jump, backtrack->matchingpath);
5475 }
5476 else if (bra == OP_BRAMINZERO)
5477 {
5478 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5479 JUMPHERE(brajump);
5480 if (framesize >= 0)
5481 {
5482 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5483 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5485 }
5486 set_jumps(backtrack->common.topbacktracks, LABEL());
5487 }
5488 }
5489 else
5490 {
5491 /* AssertNot is successful. */
5492 if (framesize < 0)
5493 {
5494 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5495 if (bra != OP_BRA)
5496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5497 else
5498 free_stack(common, 1);
5499 }
5500 else
5501 {
5502 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5503 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5504 /* The topmost item should be 0. */
5505 if (bra != OP_BRA)
5506 {
5507 free_stack(common, framesize + 1);
5508 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5509 }
5510 else
5511 free_stack(common, framesize + 2);
5512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5513 }
5514
5515 if (bra == OP_BRAZERO)
5516 backtrack->matchingpath = LABEL();
5517 else if (bra == OP_BRAMINZERO)
5518 {
5519 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5520 JUMPHERE(brajump);
5521 }
5522
5523 if (bra != OP_BRA)
5524 {
5525 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5526 set_jumps(backtrack->common.topbacktracks, LABEL());
5527 backtrack->common.topbacktracks = NULL;
5528 }
5529 }
5530
5531 common->quit_label = save_quit_label;
5532 common->accept_label = save_accept_label;
5533 common->quit = save_quit;
5534 common->accept = save_accept;
5535 return cc + 1 + LINK_SIZE;
5536 }
5537
5538 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5539 {
5540 int condition = FALSE;
5541 pcre_uchar *slotA = name_table;
5542 pcre_uchar *slotB;
5543 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5544 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5545 sljit_sw no_capture;
5546 int i;
5547
5548 locals += refno & 0xff;
5549 refno >>= 8;
5550 no_capture = locals[1];
5551
5552 for (i = 0; i < name_count; i++)
5553 {
5554 if (GET2(slotA, 0) == refno) break;
5555 slotA += name_entry_size;
5556 }
5557
5558 if (i < name_count)
5559 {
5560 /* Found a name for the number - there can be only one; duplicate names
5561 for different numbers are allowed, but not vice versa. First scan down
5562 for duplicates. */
5563
5564 slotB = slotA;
5565 while (slotB > name_table)
5566 {
5567 slotB -= name_entry_size;
5568 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5569 {
5570 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5571 if (condition) break;
5572 }
5573 else break;
5574 }
5575
5576 /* Scan up for duplicates */
5577 if (!condition)
5578 {
5579 slotB = slotA;
5580 for (i++; i < name_count; i++)
5581 {
5582 slotB += name_entry_size;
5583 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5584 {
5585 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5586 if (condition) break;
5587 }
5588 else break;
5589 }
5590 }
5591 }
5592 return condition;
5593 }
5594
5595 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5596 {
5597 int condition = FALSE;
5598 pcre_uchar *slotA = name_table;
5599 pcre_uchar *slotB;
5600 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5601 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5602 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5603 sljit_uw i;
5604
5605 for (i = 0; i < name_count; i++)
5606 {
5607 if (GET2(slotA, 0) == recno) break;
5608 slotA += name_entry_size;
5609 }
5610
5611 if (i < name_count)
5612 {
5613 /* Found a name for the number - there can be only one; duplicate
5614 names for different numbers are allowed, but not vice versa. First
5615 scan down for duplicates. */
5616
5617 slotB = slotA;
5618 while (slotB > name_table)
5619 {
5620 slotB -= name_entry_size;
5621 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5622 {
5623 condition = GET2(slotB, 0) == group_num;
5624 if (condition) break;
5625 }
5626 else break;
5627 }
5628
5629 /* Scan up for duplicates */
5630 if (!condition)
5631 {
5632 slotB = slotA;
5633 for (i++; i < name_count; i++)
5634 {
5635 slotB += name_entry_size;
5636 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5637 {
5638 condition = GET2(slotB, 0) == group_num;
5639 if (condition) break;
5640 }
5641 else break;
5642 }
5643 }
5644 }
5645 return condition;
5646 }
5647
5648 /*
5649 Handling bracketed expressions is probably the most complex part.
5650
5651 Stack layout naming characters:
5652 S - Push the current STR_PTR
5653 0 - Push a 0 (NULL)
5654 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5655 before the next alternative. Not pushed if there are no alternatives.
5656 M - Any values pushed by the current alternative. Can be empty, or anything.
5657 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5658 L - Push the previous local (pointed by localptr) to the stack
5659 () - opional values stored on the stack
5660 ()* - optonal, can be stored multiple times
5661
5662 The following list shows the regular expression templates, their PCRE byte codes
5663 and stack layout supported by pcre-sljit.
5664
5665 (?:) OP_BRA | OP_KET A M
5666 () OP_CBRA | OP_KET C M
5667 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5668 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5669 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5670 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5671 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5672 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5673 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5674 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5675 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5676 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5677 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5678 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5679 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5680 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5681 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5682 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5683 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5684 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5685 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5686 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5687
5688
5689 Stack layout naming characters:
5690 A - Push the alternative index (starting from 0) on the stack.
5691 Not pushed if there is no alternatives.
5692 M - Any values pushed by the current alternative. Can be empty, or anything.
5693
5694 The next list shows the possible content of a bracket:
5695 (|) OP_*BRA | OP_ALT ... M A
5696 (?()|) OP_*COND | OP_ALT M A
5697 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5698 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5699 Or nothing, if trace is unnecessary
5700 */
5701
5702 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5703 {
5704 DEFINE_COMPILER;
5705 backtrack_common *backtrack;
5706 pcre_uchar opcode;
5707 int private_data_ptr = 0;
5708 int offset = 0;
5709 int stacksize;
5710 pcre_uchar *ccbegin;
5711 pcre_uchar *matchingpath;
5712 pcre_uchar bra = OP_BRA;
5713 pcre_uchar ket;
5714 assert_backtrack *assert;
5715 BOOL has_alternatives;
5716 struct sljit_jump *jump;
5717 struct sljit_jump *skip;
5718 struct sljit_label *rmaxlabel = NULL;
5719 struct sljit_jump *braminzerojump = NULL;
5720
5721 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5722
5723 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5724 {
5725 bra = *cc;
5726 cc++;
5727 opcode = *cc;
5728 }
5729
5730 opcode = *cc;
5731 ccbegin = cc;
5732 matchingpath = ccbegin + 1 + LINK_SIZE;
5733
5734 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5735 {
5736 /* Drop this bracket_backtrack. */
5737 parent->top = backtrack->prev;
5738 return bracketend(cc);
5739 }
5740
5741 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5742 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5743 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5744 cc += GET(cc, 1);
5745
5746 has_alternatives = *cc == OP_ALT;
5747 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5748 {
5749 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5750 if (*matchingpath == OP_NRREF)
5751 {
5752 stacksize = GET2(matchingpath, 1);
5753 if (common->currententry == NULL || stacksize == RREF_ANY)
5754 has_alternatives = FALSE;
5755 else if (common->currententry->start == 0)
5756 has_alternatives = stacksize != 0;
5757 else
5758 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5759 }
5760 }
5761
5762 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5763 opcode = OP_SCOND;
5764 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5765 opcode = OP_ONCE;
5766
5767 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5768 {
5769 /* Capturing brackets has a pre-allocated space. */
5770 offset = GET2(ccbegin, 1 + LINK_SIZE);
5771 if (common->optimized_cbracket[offset] == 0)
5772 {
5773 private_data_ptr = OVECTOR_PRIV(offset);
5774 offset <<= 1;
5775 }
5776 else
5777 {
5778 offset <<= 1;
5779 private_data_ptr = OVECTOR(offset);
5780 }
5781 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5782 matchingpath += IMM2_SIZE;
5783 }
5784 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5785 {
5786 /* Other brackets simply allocate the next entry. */
5787 private_data_ptr = PRIVATE_DATA(ccbegin);
5788 SLJIT_ASSERT(private_data_ptr != 0);
5789 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5790 if (opcode == OP_ONCE)
5791 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5792 }
5793
5794 /* Instructions before the first alternative. */
5795 stacksize = 0;
5796 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5797 stacksize++;
5798 if (bra == OP_BRAZERO)
5799 stacksize++;
5800
5801 if (stacksize > 0)
5802 allocate_stack(common, stacksize);
5803
5804 stacksize = 0;
5805 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5806 {
5807 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5808 stacksize++;
5809 }
5810
5811 if (bra == OP_BRAZERO)
5812 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5813
5814 if (bra == OP_BRAMINZERO)
5815 {
5816 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5817 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5818 if (ket != OP_KETRMIN)
5819 {
5820 free_stack(common, 1);
5821 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5822 }
5823 else
5824 {
5825 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5826 {
5827 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5828 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5829 /* Nothing stored during the first run. */
5830 skip = JUMP(SLJIT_JUMP);
5831 JUMPHERE(jump);
5832 /* Checking zero-length iteration. */
5833 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5834 {
5835 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5836 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5837 }
5838 else
5839 {
5840 /* Except when the whole stack frame must be saved. */
5841 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5842 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
5843 }
5844 JUMPHERE(skip);
5845 }
5846 else
5847 {
5848 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5849 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5850 JUMPHERE(jump);
5851 }
5852 }
5853 }
5854
5855 if (ket == OP_KETRMIN)
5856 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5857
5858 if (ket == OP_KETRMAX)
5859 {
5860 rmaxlabel = LABEL();
5861 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5862 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5863 }
5864
5865 /* Handling capturing brackets and alternatives. */
5866 if (opcode == OP_ONCE)
5867 {
5868 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5869 {
5870 /* Neither capturing brackets nor recursions are not found in the block. */
5871 if (ket == OP_KETRMIN)
5872 {
5873 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5874 allocate_stack(common, 2);
5875 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5876 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5877 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5878 }
5879 else if (ket == OP_KETRMAX || has_alternatives)
5880 {
5881 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5882 allocate_stack(common, 1);
5883 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5884 }
5885 else
5886 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5887 }
5888 else
5889 {
5890 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5891 {
5892 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5893 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5894 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5896 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5897 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5898 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5899 }
5900 else
5901 {
5902 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5903 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5904 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5905 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5907 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5908 }
5909 }
5910 }
5911 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5912 {
5913 /* Saving the previous values. */
5914 if (common->optimized_cbracket[offset >> 1] != 0)
5915 {
5916 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5917 allocate_stack(common, 2);
5918 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5919 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
5920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5921 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5923 }
5924 else
5925 {
5926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5927 allocate_stack(common, 1);
5928 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5929 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5930 }
5931 }
5932 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5933 {
5934 /* Saving the previous value. */
5935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5936 allocate_stack(common, 1);
5937 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5938 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5939 }
5940 else if (has_alternatives)
5941 {
5942 /* Pushing the starting string pointer. */
5943 allocate_stack(common, 1);
5944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5945 }
5946
5947 /* Generating code for the first alternative. */
5948 if (opcode == OP_COND || opcode == OP_SCOND)
5949 {
5950 if (*matchingpath == OP_CREF)
5951 {
5952 SLJIT_ASSERT(has_alternatives);
5953 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5954 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5955 matchingpath += 1 + IMM2_SIZE;
5956 }
5957 else if (*matchingpath == OP_NCREF)
5958 {
5959 SLJIT_ASSERT(has_alternatives);
5960 stacksize = GET2(matchingpath, 1);
5961 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5962
5963 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5964 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5965 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5966 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
5967 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5968 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5969 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5970 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5971 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5972
5973 JUMPHERE(jump);
5974 matchingpath += 1 + IMM2_SIZE;
5975 }
5976 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5977 {
5978 /* Never has other case. */
5979 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5980
5981 stacksize = GET2(matchingpath, 1);
5982 if (common->currententry == NULL)
5983 stacksize = 0;
5984 else if (stacksize == RREF_ANY)
5985 stacksize = 1;
5986 else if (common->currententry->start == 0)
5987 stacksize = stacksize == 0;
5988 else
5989 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5990
5991 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5992 {
5993 SLJIT_ASSERT(!has_alternatives);
5994 if (stacksize != 0)
5995 matchingpath += 1 + IMM2_SIZE;
5996 else
5997 {
5998 if (*cc == OP_ALT)
5999 {
6000 matchingpath = cc + 1 + LINK_SIZE;
6001 cc += GET(cc, 1);
6002 }
6003 else
6004 matchingpath = cc;
6005 }
6006 }
6007 else
6008 {
6009 SLJIT_ASSERT(has_alternatives);
6010
6011 stacksize = GET2(matchingpath, 1);
6012 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6013 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6014 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6015 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6016 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6017 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6018 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6019 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6020 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6021 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6022 matchingpath += 1 + IMM2_SIZE;
6023 }
6024 }
6025 else
6026 {
6027 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6028 /* Similar code as PUSH_BACKTRACK macro. */
6029 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6030 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6031 return NULL;
6032 memset(assert, 0, sizeof(assert_backtrack));
6033 assert->common.cc = matchingpath;
6034 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6035 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6036 }
6037 }
6038
6039 compile_matchingpath(common, matchingpath, cc, backtrack);
6040 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6041 return NULL;
6042
6043 if (opcode == OP_ONCE)
6044 {
6045 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6046 {
6047 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6048 /* TMP2 which is set here used by OP_KETRMAX below. */
6049 if (ket == OP_KETRMAX)
6050 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6051 else if (ket == OP_KETRMIN)
6052 {
6053 /* Move the STR_PTR to the private_data_ptr. */
6054 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6055 }
6056 }
6057 else
6058 {
6059 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
6060 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
6061 if (ket == OP_KETRMAX)
6062 {
6063 /* TMP2 which is set here used by OP_KETRMAX below. */
6064 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6065 }
6066 }
6067 }
6068
6069 stacksize = 0;
6070 if (ket != OP_KET || bra != OP_BRA)
6071 stacksize++;
6072 if (offset != 0)
6073 {
6074 if (common->capture_last_ptr != 0)
6075 stacksize++;
6076 if (common->optimized_cbracket[offset >> 1] == 0)
6077 stacksize += 2;
6078 }
6079 if (has_alternatives && opcode != OP_ONCE)
6080 stacksize++;
6081
6082 if (stacksize > 0)
6083 allocate_stack(common, stacksize);
6084
6085 stacksize = 0;
6086 if (ket != OP_KET || bra != OP_BRA)
6087 {
6088 if (ket != OP_KET)
6089 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6090 else
6091 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6092 stacksize++;
6093 }
6094
6095 if (offset != 0)
6096 {
6097 if (common->capture_last_ptr != 0)
6098 {
6099 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6101 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0);
6102 stacksize++;
6103 }
6104 if (common->optimized_cbracket[offset >> 1] == 0)
6105 {
6106 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6107 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6108 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6109 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6110 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6113 stacksize += 2;
6114 }
6115 }
6116
6117 if (has_alternatives)
6118 {
6119 if (opcode != OP_ONCE)
6120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6121 if (ket != OP_KETRMAX)
6122 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6123 }
6124
6125 /* Must be after the matchingpath label. */
6126 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6127 {
6128 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6129 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6130 }
6131
6132 if (ket == OP_KETRMAX)
6133 {
6134 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6135 {
6136 if (has_alternatives)
6137 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6138 /* Checking zero-length iteration. */
6139 if (opcode != OP_ONCE)
6140 {
6141 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6142 /* Drop STR_PTR for greedy plus quantifier. */
6143 if (bra != OP_BRAZERO)
6144 free_stack(common, 1);
6145 }
6146 else
6147 /* TMP2 must contain the starting STR_PTR. */
6148 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6149 }
6150 else
6151 JUMPTO(SLJIT_JUMP, rmaxlabel);
6152 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6153 }
6154
6155 if (bra == OP_BRAZERO)
6156 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6157
6158 if (bra == OP_BRAMINZERO)
6159 {
6160 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6161 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6162 if (braminzerojump != NULL)
6163 {
6164 JUMPHERE(braminzerojump);
6165 /* We need to release the end pointer to perform the
6166 backtrack for the zero-length iteration. When
6167 framesize is < 0, OP_ONCE will do the release itself. */
6168 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6169 {
6170 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6171 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6172 }
6173 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6174 free_stack(common, 1);
6175 }
6176 /* Continue to the normal backtrack. */
6177 }
6178
6179 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6180 decrease_call_count(common);
6181
6182 /* Skip the other alternatives. */
6183 while (*cc == OP_ALT)
6184 cc += GET(cc, 1);
6185 cc += 1 + LINK_SIZE;
6186 return cc;
6187 }
6188
6189 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6190 {
6191 DEFINE_COMPILER;
6192 backtrack_common *backtrack;
6193 pcre_uchar opcode;
6194 int private_data_ptr;
6195 int cbraprivptr = 0;
6196 int framesize;
6197 int stacksize;
6198 int offset = 0;
6199 BOOL zero = FALSE;
6200 pcre_uchar *ccbegin = NULL;
6201 int stack;
6202 struct sljit_label *loop = NULL;
6203 struct jump_list *emptymatch = NULL;
6204
6205 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6206 if (*cc == OP_BRAPOSZERO)
6207 {
6208 zero = TRUE;
6209 cc++;
6210 }
6211
6212 opcode = *cc;
6213 private_data_ptr = PRIVATE_DATA(cc);
6214 SLJIT_ASSERT(private_data_ptr != 0);
6215 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6216 switch(opcode)
6217 {
6218 case OP_BRAPOS:
6219 case OP_SBRAPOS:
6220 ccbegin = cc + 1 + LINK_SIZE;
6221 break;
6222
6223 case OP_CBRAPOS:
6224 case OP_SCBRAPOS:
6225 offset = GET2(cc, 1 + LINK_SIZE);
6226 /* This case cannot be optimized in the same was as
6227 normal capturing brackets. */
6228 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6229 cbraprivptr = OVECTOR_PRIV(offset);
6230 offset <<= 1;
6231 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6232 break;
6233
6234 default:
6235 SLJIT_ASSERT_STOP();
6236 break;
6237 }
6238
6239 framesize = get_framesize(common, cc, FALSE);
6240 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6241 if (framesize < 0)
6242 {
6243 if (offset != 0)
6244 {
6245 stacksize = 2;
6246 if (common->capture_last_ptr != 0)
6247 stacksize++;
6248 }
6249 else
6250 stacksize = 1;
6251
6252 if (!zero)
6253 stacksize++;
6254
6255 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6256 allocate_stack(common, stacksize);
6257 if (framesize == no_frame)
6258 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6259
6260 if (offset != 0)
6261 {
6262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6263 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6264 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6265 if (common->capture_last_ptr != 0)
6266 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6267 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6268 if (common->capture_last_ptr != 0)
6269 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6270 }
6271 else
6272 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6273
6274 if (!zero)
6275 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6276 }
6277 else
6278 {
6279 stacksize = framesize + 1;
6280 if (!zero)
6281 stacksize++;
6282 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6283 stacksize++;
6284 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6285
6286 allocate_stack(common, stacksize);
6287 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6288 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6289 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6290
6291 stack = 0;
6292 if (!zero)
6293 {
6294 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6295 stack++;
6296 }
6297 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6298 {
6299 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6300 stack++;
6301 }
6302 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6303 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6304 }
6305
6306 if (offset != 0)
6307 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6308
6309 loop = LABEL();
6310 while (*cc != OP_KETRPOS)
6311 {
6312 backtrack->top = NULL;
6313 backtrack->topbacktracks = NULL;
6314 cc += GET(cc, 1);
6315
6316 compile_matchingpath(common, ccbegin, cc, backtrack);
6317 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6318 return NULL;
6319
6320 if (framesize < 0)
6321 {
6322 if (framesize == no_frame)
6323 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6324
6325 if (offset != 0)
6326 {
6327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6328 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6329 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6330 if (common->capture_last_ptr != 0)
6331 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6332 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6333 }
6334 else
6335 {
6336 if (opcode == OP_SBRAPOS)
6337 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6339 }
6340
6341 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6342 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6343
6344 if (!zero)
6345 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6346 }
6347 else
6348 {
6349 if (offset != 0)
6350 {
6351 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6352 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6354 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6355 if (common->capture_last_ptr != 0)
6356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6358 }
6359 else
6360 {
6361 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6362 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6363 if (opcode == OP_SBRAPOS)
6364 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6365 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6366 }
6367
6368 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6369 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6370
6371 if (!zero)
6372 {
6373 if (framesize < 0)
6374 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6375 else
6376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6377 }
6378 }
6379 JUMPTO(SLJIT_JUMP, loop);
6380 flush_stubs(common);
6381
6382 compile_backtrackingpath(common, backtrack->top);
6383 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6384 return NULL;
6385 set_jumps(backtrack->topbacktracks, LABEL());
6386
6387 if (framesize < 0)
6388 {
6389 if (offset != 0)
6390 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6391 else
6392 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6393 }
6394 else
6395 {
6396 if (offset != 0)
6397 {
6398 /* Last alternative. */
6399 if (*cc == OP_KETRPOS)
6400 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6401 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6402 }
6403 else
6404 {
6405 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6406 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6407 }
6408 }
6409
6410 if (*cc == OP_KETRPOS)
6411 break;
6412 ccbegin = cc + 1 + LINK_SIZE;
6413 }
6414
6415 backtrack->topbacktracks = NULL;
6416 if (!zero)
6417 {
6418 if (framesize < 0)
6419 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6420 else /* TMP2 is set to [private_data_ptr] above. */
6421 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6422 }
6423
6424 /* None of them matched. */
6425 set_jumps(emptymatch, LABEL());
6426 decrease_call_count(common);
6427 return cc + 1 + LINK_SIZE;
6428 }
6429
6430 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6431 {
6432 int class_len;
6433
6434 *opcode = *cc;
6435 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6436 {
6437 cc++;
6438 *type = OP_CHAR;
6439 }
6440 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6441 {
6442 cc++;
6443 *type = OP_CHARI;
6444 *opcode -= OP_STARI - OP_STAR;
6445 }
6446 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6447 {
6448 cc++;
6449 *type = OP_NOT;
6450 *opcode -= OP_NOTSTAR - OP_STAR;
6451 }
6452 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6453 {
6454 cc++;
6455 *type = OP_NOTI;
6456 *opcode -= OP_NOTSTARI - OP_STAR;
6457 }
6458 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6459 {
6460 cc++;
6461 *opcode -= OP_TYPESTAR - OP_STAR;
6462 *type = 0;
6463 }
6464 else
6465 {
6466 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6467 *type = *opcode;
6468 cc++;
6469 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6470 *opcode = cc[class_len - 1];
6471 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6472 {
6473 *opcode -= OP_CRSTAR - OP_STAR;
6474 if (end != NULL)
6475 *end = cc + class_len;
6476 }
6477 else
6478 {
6479 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6480 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6481 *arg2 = GET2(cc, class_len);
6482
6483 if (*arg2 == 0)
6484 {
6485 SLJIT_ASSERT(*arg1 != 0);
6486 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6487 }
6488 if (*arg1 == *arg2)
6489 *opcode = OP_EXACT;
6490
6491 if (end != NULL)
6492 *end = cc + class_len + 2 * IMM2_SIZE;
6493 }
6494 return cc;
6495 }
6496
6497 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6498 {
6499 *arg1 = GET2(cc, 0);
6500 cc += IMM2_SIZE;
6501 }
6502
6503 if (*type == 0)
6504 {
6505 *type = *cc;
6506 if (end != NULL)
6507 *end = next_opcode(common, cc);
6508 cc++;
6509 return cc;
6510 }
6511
6512 if (end != NULL)
6513 {
6514 *end = cc + 1;
6515 #ifdef SUPPORT_UTF
6516 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6517 #endif
6518 }
6519 return cc;
6520 }
6521
6522 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6523 {
6524 DEFINE_COMPILER;
6525 backtrack_common *backtrack;
6526 pcre_uchar opcode;
6527 pcre_uchar type;
6528 int arg1 = -1, arg2 = -1;
6529 pcre_uchar* end;
6530 jump_list *nomatch = NULL;
6531 struct sljit_jump *jump = NULL;
6532 struct sljit_label *label;
6533 int private_data_ptr = PRIVATE_DATA(cc);
6534 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6535 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6536 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
6537 int tmp_base, tmp_offset;
6538
6539 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6540
6541 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6542
6543 switch(type)
6544 {
6545 case OP_NOT_DIGIT:
6546 case OP_DIGIT:
6547 case OP_NOT_WHITESPACE:
6548 case OP_WHITESPACE:
6549 case OP_NOT_WORDCHAR:
6550 case OP_WORDCHAR:
6551 case OP_ANY:
6552 case OP_ALLANY:
6553 case OP_ANYBYTE:
6554 case OP_ANYNL:
6555 case OP_NOT_HSPACE:
6556 case OP_HSPACE:
6557 case OP_NOT_VSPACE:
6558 case OP_VSPACE:
6559 case OP_CHAR:
6560 case OP_CHARI:
6561 case OP_NOT:
6562 case OP_NOTI:
6563 case OP_CLASS:
6564 case OP_NCLASS:
6565 tmp_base = TMP3;
6566 tmp_offset = 0;
6567 break;
6568
6569 default:
6570 SLJIT_ASSERT_STOP();
6571 /* Fall through. */
6572
6573 case OP_EXTUNI:
6574 case OP_XCLASS:
6575 case OP_NOTPROP:
6576 case OP_PROP:
6577 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6578 tmp_offset = POSSESSIVE0;
6579 break;
6580 }
6581
6582 switch(opcode)
6583 {
6584 case OP_STAR:
6585 case OP_PLUS:
6586 case OP_UPTO:
6587 case OP_CRRANGE:
6588 if (type == OP_ANYNL || type == OP_EXTUNI)
6589 {
6590 SLJIT_ASSERT(private_data_ptr == 0);
6591 if (opcode == OP_STAR || opcode == OP_UPTO)
6592 {
6593 allocate_stack(common, 2);
6594 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6595 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6596 }
6597 else
6598 {
6599 allocate_stack(common, 1);
6600 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6601 }
6602
6603 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6604 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6605
6606 label = LABEL();
6607 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6608 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6609 {
6610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6611 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6612 if (opcode == OP_CRRANGE && arg2 > 0)
6613 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6614 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6615 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6616 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6617 }
6618
6619 /* We cannot use TMP3 because of this allocate_stack. */
6620 allocate_stack(common, 1);
6621 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6622 JUMPTO(SLJIT_JUMP, label);
6623 if (jump != NULL)
6624 JUMPHERE(jump);
6625 }
6626 else
6627 {
6628 if (opcode == OP_PLUS)
6629 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6630 if (private_data_ptr == 0)
6631 allocate_stack(common, 2);
6632 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6633 if (opcode <= OP_PLUS)
6634 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6635 else
6636 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6637 label = LABEL();
6638 compile_char1_matchingpath(common, type, cc, &nomatch);
6639 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6640 if (opcode <= OP_PLUS)
6641 JUMPTO(SLJIT_JUMP, label);
6642 else if (opcode == OP_CRRANGE && arg1 == 0)
6643 {
6644 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6645 JUMPTO(SLJIT_JUMP, label);
6646 }
6647 else
6648 {
6649 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6650 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6651 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6652 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6653 }
6654 set_jumps(nomatch, LABEL());
6655 if (opcode == OP_CRRANGE)
6656 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6657 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6658 }
6659 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6660 break;
6661
6662 case OP_MINSTAR:
6663 case OP_MINPLUS:
6664 if (opcode == OP_MINPLUS)
6665 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6666 if (private_data_ptr == 0)
6667 allocate_stack(common, 1);
6668 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6669 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6670 break;
6671
6672 case OP_MINUPTO:
6673 case OP_CRMINRANGE:
6674 if (private_data_ptr == 0)
6675 allocate_stack(common, 2);
6676 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6677 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6678 if (opcode == OP_CRMINRANGE)
6679 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6680 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6681 break;
6682
6683 case OP_QUERY:
6684 case OP_MINQUERY:
6685 if (private_data_ptr == 0)
6686 allocate_stack(common, 1);
6687 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6688 if (opcode == OP_QUERY)
6689 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6690 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6691 break;
6692
6693 case OP_EXACT:
6694 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6695 label = LABEL();
6696 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6697 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6698 JUMPTO(SLJIT_C_NOT_ZERO, label);
6699 break;
6700
6701 case OP_POSSTAR:
6702 case OP_POSPLUS:
6703 case OP_POSUPTO:
6704 if (opcode == OP_POSPLUS)
6705 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6706 if (opcode == OP_POSUPTO)
6707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6708 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6709 label = LABEL();
6710 compile_char1_matchingpath(common, type, cc, &nomatch);
6711 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6712 if (opcode != OP_POSUPTO)
6713 JUMPTO(SLJIT_JUMP, label);
6714 else
6715 {
6716 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6717 JUMPTO(SLJIT_C_NOT_ZERO, label);
6718 }
6719 set_jumps(nomatch, LABEL());
6720 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6721 break;
6722
6723 case OP_POSQUERY:
6724 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6725 compile_char1_matchingpath(common, type, cc, &nomatch);
6726 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6727 set_jumps(nomatch, LABEL());
6728 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6729 break;
6730
6731 default:
6732 SLJIT_ASSERT_STOP();
6733 break;
6734 }
6735
6736 decrease_call_count(common);
6737 return end;
6738 }
6739
6740 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6741 {
6742 DEFINE_COMPILER;
6743 backtrack_common *backtrack;
6744
6745 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6746
6747 if (*cc == OP_FAIL)
6748 {
6749 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6750 return cc + 1;
6751 }