/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1268 - (show annotations)
Mon Mar 4 08:42:15 2013 UTC (6 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 278756 byte(s)
Support \p{Xuc} in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* Allocate memory for the regex stack on the real machine stack.
75 Fast, but limited size. */
76 #define MACHINE_STACK_SIZE 32768
77
78 /* Growth rate for stack allocated by the OS. Should be the multiply
79 of page size. */
80 #define STACK_GROWTH_RATE 8192
81
82 /* Enable to check that the allocation could destroy temporaries. */
83 #if defined SLJIT_DEBUG && SLJIT_DEBUG
84 #define DESTROY_REGISTERS 1
85 #endif
86
87 /*
88 Short summary about the backtracking mechanism empolyed by the jit code generator:
89
90 The code generator follows the recursive nature of the PERL compatible regular
91 expressions. The basic blocks of regular expressions are condition checkers
92 whose execute different commands depending on the result of the condition check.
93 The relationship between the operators can be horizontal (concatenation) and
94 vertical (sub-expression) (See struct backtrack_common for more details).
95
96 'ab' - 'a' and 'b' regexps are concatenated
97 'a+' - 'a' is the sub-expression of the '+' operator
98
99 The condition checkers are boolean (true/false) checkers. Machine code is generated
100 for the checker itself and for the actions depending on the result of the checker.
101 The 'true' case is called as the matching path (expected path), and the other is called as
102 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
103 branches on the matching path.
104
105 Greedy star operator (*) :
106 Matching path: match happens.
107 Backtrack path: match failed.
108 Non-greedy star operator (*?) :
109 Matching path: no need to perform a match.
110 Backtrack path: match is required.
111
112 The following example shows how the code generated for a capturing bracket
113 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
114 we have the following regular expression:
115
116 A(B|C)D
117
118 The generated code will be the following:
119
120 A matching path
121 '(' matching path (pushing arguments to the stack)
122 B matching path
123 ')' matching path (pushing arguments to the stack)
124 D matching path
125 return with successful match
126
127 D backtrack path
128 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
129 B backtrack path
130 C expected path
131 jump to D matching path
132 C backtrack path
133 A backtrack path
134
135 Notice, that the order of backtrack code paths are the opposite of the fast
136 code paths. In this way the topmost value on the stack is always belong
137 to the current backtrack code path. The backtrack path must check
138 whether there is a next alternative. If so, it needs to jump back to
139 the matching path eventually. Otherwise it needs to clear out its own stack
140 frame and continue the execution on the backtrack code paths.
141 */
142
143 /*
144 Saved stack frames:
145
146 Atomic blocks and asserts require reloading the values of private data
147 when the backtrack mechanism performed. Because of OP_RECURSE, the data
148 are not necessarly known in compile time, thus we need a dynamic restore
149 mechanism.
150
151 The stack frames are stored in a chain list, and have the following format:
152 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
153
154 Thus we can restore the private data to a particular point in the stack.
155 */
156
157 typedef struct jit_arguments {
158 /* Pointers first. */
159 struct sljit_stack *stack;
160 const pcre_uchar *str;
161 const pcre_uchar *begin;
162 const pcre_uchar *end;
163 int *offsets;
164 pcre_uchar *uchar_ptr;
165 pcre_uchar *mark_ptr;
166 void *callout_data;
167 /* Everything else after. */
168 int real_offset_count;
169 int offset_count;
170 int call_limit;
171 pcre_uint8 notbol;
172 pcre_uint8 noteol;
173 pcre_uint8 notempty;
174 pcre_uint8 notempty_atstart;
175 } jit_arguments;
176
177 typedef struct executable_functions {
178 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
179 PUBL(jit_callback) callback;
180 void *userdata;
181 pcre_uint32 top_bracket;
182 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
183 } executable_functions;
184
185 typedef struct jump_list {
186 struct sljit_jump *jump;
187 struct jump_list *next;
188 } jump_list;
189
190 typedef struct stub_list {
191 struct sljit_jump *start;
192 struct sljit_label *quit;
193 struct stub_list *next;
194 } stub_list;
195
196 enum frame_types { no_frame = -1, no_stack = -2 };
197
198 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
199
200 /* The following structure is the key data type for the recursive
201 code generator. It is allocated by compile_matchingpath, and contains
202 the aguments for compile_backtrackingpath. Must be the first member
203 of its descendants. */
204 typedef struct backtrack_common {
205 /* Concatenation stack. */
206 struct backtrack_common *prev;
207 jump_list *nextbacktracks;
208 /* Internal stack (for component operators). */
209 struct backtrack_common *top;
210 jump_list *topbacktracks;
211 /* Opcode pointer. */
212 pcre_uchar *cc;
213 } backtrack_common;
214
215 typedef struct assert_backtrack {
216 backtrack_common common;
217 jump_list *condfailed;
218 /* Less than 0 (-1) if a frame is not needed. */
219 int framesize;
220 /* Points to our private memory word on the stack. */
221 int private_data_ptr;
222 /* For iterators. */
223 struct sljit_label *matchingpath;
224 } assert_backtrack;
225
226 typedef struct bracket_backtrack {
227 backtrack_common common;
228 /* Where to coninue if an alternative is successfully matched. */
229 struct sljit_label *alternative_matchingpath;
230 /* For rmin and rmax iterators. */
231 struct sljit_label *recursive_matchingpath;
232 /* For greedy ? operator. */
233 struct sljit_label *zero_matchingpath;
234 /* Contains the branches of a failed condition. */
235 union {
236 /* Both for OP_COND, OP_SCOND. */
237 jump_list *condfailed;
238 assert_backtrack *assert;
239 /* For OP_ONCE. -1 if not needed. */
240 int framesize;
241 } u;
242 /* Points to our private memory word on the stack. */
243 int private_data_ptr;
244 } bracket_backtrack;
245
246 typedef struct bracketpos_backtrack {
247 backtrack_common common;
248 /* Points to our private memory word on the stack. */
249 int private_data_ptr;
250 /* Reverting stack is needed. */
251 int framesize;
252 /* Allocated stack size. */
253 int stacksize;
254 } bracketpos_backtrack;
255
256 typedef struct braminzero_backtrack {
257 backtrack_common common;
258 struct sljit_label *matchingpath;
259 } braminzero_backtrack;
260
261 typedef struct iterator_backtrack {
262 backtrack_common common;
263 /* Next iteration. */
264 struct sljit_label *matchingpath;
265 } iterator_backtrack;
266
267 typedef struct recurse_entry {
268 struct recurse_entry *next;
269 /* Contains the function entry. */
270 struct sljit_label *entry;
271 /* Collects the calls until the function is not created. */
272 jump_list *calls;
273 /* Points to the starting opcode. */
274 int start;
275 } recurse_entry;
276
277 typedef struct recurse_backtrack {
278 backtrack_common common;
279 BOOL inlined_pattern;
280 } recurse_backtrack;
281
282 #define MAX_RANGE_SIZE 6
283
284 typedef struct compiler_common {
285 struct sljit_compiler *compiler;
286 pcre_uchar *start;
287
288 /* Maps private data offset to each opcode. */
289 int *private_data_ptrs;
290 /* Tells whether the capturing bracket is optimized. */
291 pcre_uint8 *optimized_cbracket;
292 /* Starting offset of private data for capturing brackets. */
293 int cbraptr;
294 /* OVector starting point. Must be divisible by 2. */
295 int ovector_start;
296 /* Last known position of the requested byte. */
297 int req_char_ptr;
298 /* Head of the last recursion. */
299 int recursive_head_ptr;
300 /* First inspected character for partial matching. */
301 int start_used_ptr;
302 /* Starting pointer for partial soft matches. */
303 int hit_start;
304 /* End pointer of the first line. */
305 int first_line_end;
306 /* Points to the marked string. */
307 int mark_ptr;
308 /* Points to the last matched capture block index. */
309 int capture_last_ptr;
310
311 /* Flipped and lower case tables. */
312 const pcre_uint8 *fcc;
313 sljit_sw lcc;
314 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
315 int mode;
316 /* Newline control. */
317 int nltype;
318 int newline;
319 int bsr_nltype;
320 /* Dollar endonly. */
321 int endonly;
322 BOOL has_set_som;
323 /* Tables. */
324 sljit_sw ctypes;
325 int digits[2 + MAX_RANGE_SIZE];
326 /* Named capturing brackets. */
327 sljit_uw name_table;
328 sljit_sw name_count;
329 sljit_sw name_entry_size;
330
331 /* Labels and jump lists. */
332 struct sljit_label *partialmatchlabel;
333 struct sljit_label *quit_label;
334 struct sljit_label *forced_quit_label;
335 struct sljit_label *accept_label;
336 stub_list *stubs;
337 recurse_entry *entries;
338 recurse_entry *currententry;
339 jump_list *partialmatch;
340 jump_list *quit;
341 jump_list *forced_quit;
342 jump_list *accept;
343 jump_list *calllimit;
344 jump_list *stackalloc;
345 jump_list *revertframes;
346 jump_list *wordboundary;
347 jump_list *anynewline;
348 jump_list *hspace;
349 jump_list *vspace;
350 jump_list *casefulcmp;
351 jump_list *caselesscmp;
352 BOOL jscript_compat;
353 #ifdef SUPPORT_UTF
354 BOOL utf;
355 #ifdef SUPPORT_UCP
356 BOOL use_ucp;
357 #endif
358 #ifndef COMPILE_PCRE32
359 jump_list *utfreadchar;
360 #endif
361 #ifdef COMPILE_PCRE8
362 jump_list *utfreadtype8;
363 #endif
364 #endif /* SUPPORT_UTF */
365 #ifdef SUPPORT_UCP
366 jump_list *getucd;
367 #endif
368 } compiler_common;
369
370 /* For byte_sequence_compare. */
371
372 typedef struct compare_context {
373 int length;
374 int sourcereg;
375 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
376 int ucharptr;
377 union {
378 sljit_si asint;
379 sljit_uh asushort;
380 #if defined COMPILE_PCRE8
381 sljit_ub asbyte;
382 sljit_ub asuchars[4];
383 #elif defined COMPILE_PCRE16
384 sljit_uh asuchars[2];
385 #elif defined COMPILE_PCRE32
386 sljit_ui asuchars[1];
387 #endif
388 } c;
389 union {
390 sljit_si asint;
391 sljit_uh asushort;
392 #if defined COMPILE_PCRE8
393 sljit_ub asbyte;
394 sljit_ub asuchars[4];
395 #elif defined COMPILE_PCRE16
396 sljit_uh asuchars[2];
397 #elif defined COMPILE_PCRE32
398 sljit_ui asuchars[1];
399 #endif
400 } oc;
401 #endif
402 } compare_context;
403
404 /* Undefine sljit macros. */
405 #undef CMP
406
407 /* Used for accessing the elements of the stack. */
408 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
409
410 #define TMP1 SLJIT_SCRATCH_REG1
411 #define TMP2 SLJIT_SCRATCH_REG3
412 #define TMP3 SLJIT_TEMPORARY_EREG2
413 #define STR_PTR SLJIT_SAVED_REG1
414 #define STR_END SLJIT_SAVED_REG2
415 #define STACK_TOP SLJIT_SCRATCH_REG2
416 #define STACK_LIMIT SLJIT_SAVED_REG3
417 #define ARGUMENTS SLJIT_SAVED_EREG1
418 #define CALL_COUNT SLJIT_SAVED_EREG2
419 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
420
421 /* Local space layout. */
422 /* These two locals can be used by the current opcode. */
423 #define LOCALS0 (0 * sizeof(sljit_sw))
424 #define LOCALS1 (1 * sizeof(sljit_sw))
425 /* Two local variables for possessive quantifiers (char1 cannot use them). */
426 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
427 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
428 /* Max limit of recursions. */
429 #define CALL_LIMIT (4 * sizeof(sljit_sw))
430 /* The output vector is stored on the stack, and contains pointers
431 to characters. The vector data is divided into two groups: the first
432 group contains the start / end character pointers, and the second is
433 the start pointers when the end of the capturing group has not yet reached. */
434 #define OVECTOR_START (common->ovector_start)
435 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
436 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw))
437 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
438
439 #if defined COMPILE_PCRE8
440 #define MOV_UCHAR SLJIT_MOV_UB
441 #define MOVU_UCHAR SLJIT_MOVU_UB
442 #elif defined COMPILE_PCRE16
443 #define MOV_UCHAR SLJIT_MOV_UH
444 #define MOVU_UCHAR SLJIT_MOVU_UH
445 #elif defined COMPILE_PCRE32
446 #define MOV_UCHAR SLJIT_MOV_UI
447 #define MOVU_UCHAR SLJIT_MOVU_UI
448 #else
449 #error Unsupported compiling mode
450 #endif
451
452 /* Shortcuts. */
453 #define DEFINE_COMPILER \
454 struct sljit_compiler *compiler = common->compiler
455 #define OP1(op, dst, dstw, src, srcw) \
456 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
457 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
458 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
459 #define LABEL() \
460 sljit_emit_label(compiler)
461 #define JUMP(type) \
462 sljit_emit_jump(compiler, (type))
463 #define JUMPTO(type, label) \
464 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
465 #define JUMPHERE(jump) \
466 sljit_set_label((jump), sljit_emit_label(compiler))
467 #define SET_LABEL(jump, label) \
468 sljit_set_label((jump), (label))
469 #define CMP(type, src1, src1w, src2, src2w) \
470 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
471 #define CMPTO(type, src1, src1w, src2, src2w, label) \
472 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
473 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
474 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
475 #define GET_LOCAL_BASE(dst, dstw, offset) \
476 sljit_get_local_base(compiler, (dst), (dstw), (offset))
477
478 static pcre_uchar* bracketend(pcre_uchar* cc)
479 {
480 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
481 do cc += GET(cc, 1); while (*cc == OP_ALT);
482 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
483 cc += 1 + LINK_SIZE;
484 return cc;
485 }
486
487 /* Functions whose might need modification for all new supported opcodes:
488 next_opcode
489 get_private_data_length
490 set_private_data_ptrs
491 get_framesize
492 init_frame
493 get_private_data_length_for_copy
494 copy_private_data
495 compile_matchingpath
496 compile_backtrackingpath
497 */
498
499 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
500 {
501 SLJIT_UNUSED_ARG(common);
502 switch(*cc)
503 {
504 case OP_SOD:
505 case OP_SOM:
506 case OP_SET_SOM:
507 case OP_NOT_WORD_BOUNDARY:
508 case OP_WORD_BOUNDARY:
509 case OP_NOT_DIGIT:
510 case OP_DIGIT:
511 case OP_NOT_WHITESPACE:
512 case OP_WHITESPACE:
513 case OP_NOT_WORDCHAR:
514 case OP_WORDCHAR:
515 case OP_ANY:
516 case OP_ALLANY:
517 case OP_NOTPROP:
518 case OP_PROP:
519 case OP_ANYNL:
520 case OP_NOT_HSPACE:
521 case OP_HSPACE:
522 case OP_NOT_VSPACE:
523 case OP_VSPACE:
524 case OP_EXTUNI:
525 case OP_EODN:
526 case OP_EOD:
527 case OP_CIRC:
528 case OP_CIRCM:
529 case OP_DOLL:
530 case OP_DOLLM:
531 case OP_CRSTAR:
532 case OP_CRMINSTAR:
533 case OP_CRPLUS:
534 case OP_CRMINPLUS:
535 case OP_CRQUERY:
536 case OP_CRMINQUERY:
537 case OP_CRRANGE:
538 case OP_CRMINRANGE:
539 case OP_CLASS:
540 case OP_NCLASS:
541 case OP_REF:
542 case OP_REFI:
543 case OP_RECURSE:
544 case OP_CALLOUT:
545 case OP_ALT:
546 case OP_KET:
547 case OP_KETRMAX:
548 case OP_KETRMIN:
549 case OP_KETRPOS:
550 case OP_REVERSE:
551 case OP_ASSERT:
552 case OP_ASSERT_NOT:
553 case OP_ASSERTBACK:
554 case OP_ASSERTBACK_NOT:
555 case OP_ONCE:
556 case OP_ONCE_NC:
557 case OP_BRA:
558 case OP_BRAPOS:
559 case OP_CBRA:
560 case OP_CBRAPOS:
561 case OP_COND:
562 case OP_SBRA:
563 case OP_SBRAPOS:
564 case OP_SCBRA:
565 case OP_SCBRAPOS:
566 case OP_SCOND:
567 case OP_CREF:
568 case OP_NCREF:
569 case OP_RREF:
570 case OP_NRREF:
571 case OP_DEF:
572 case OP_BRAZERO:
573 case OP_BRAMINZERO:
574 case OP_BRAPOSZERO:
575 case OP_COMMIT:
576 case OP_FAIL:
577 case OP_ACCEPT:
578 case OP_ASSERT_ACCEPT:
579 case OP_CLOSE:
580 case OP_SKIPZERO:
581 return cc + PRIV(OP_lengths)[*cc];
582
583 case OP_CHAR:
584 case OP_CHARI:
585 case OP_NOT:
586 case OP_NOTI:
587 case OP_STAR:
588 case OP_MINSTAR:
589 case OP_PLUS:
590 case OP_MINPLUS:
591 case OP_QUERY:
592 case OP_MINQUERY:
593 case OP_UPTO:
594 case OP_MINUPTO:
595 case OP_EXACT:
596 case OP_POSSTAR:
597 case OP_POSPLUS:
598 case OP_POSQUERY:
599 case OP_POSUPTO:
600 case OP_STARI:
601 case OP_MINSTARI:
602 case OP_PLUSI:
603 case OP_MINPLUSI:
604 case OP_QUERYI:
605 case OP_MINQUERYI:
606 case OP_UPTOI:
607 case OP_MINUPTOI:
608 case OP_EXACTI:
609 case OP_POSSTARI:
610 case OP_POSPLUSI:
611 case OP_POSQUERYI:
612 case OP_POSUPTOI:
613 case OP_NOTSTAR:
614 case OP_NOTMINSTAR:
615 case OP_NOTPLUS:
616 case OP_NOTMINPLUS:
617 case OP_NOTQUERY:
618 case OP_NOTMINQUERY:
619 case OP_NOTUPTO:
620 case OP_NOTMINUPTO:
621 case OP_NOTEXACT:
622 case OP_NOTPOSSTAR:
623 case OP_NOTPOSPLUS:
624 case OP_NOTPOSQUERY:
625 case OP_NOTPOSUPTO:
626 case OP_NOTSTARI:
627 case OP_NOTMINSTARI:
628 case OP_NOTPLUSI:
629 case OP_NOTMINPLUSI:
630 case OP_NOTQUERYI:
631 case OP_NOTMINQUERYI:
632 case OP_NOTUPTOI:
633 case OP_NOTMINUPTOI:
634 case OP_NOTEXACTI:
635 case OP_NOTPOSSTARI:
636 case OP_NOTPOSPLUSI:
637 case OP_NOTPOSQUERYI:
638 case OP_NOTPOSUPTOI:
639 cc += PRIV(OP_lengths)[*cc];
640 #ifdef SUPPORT_UTF
641 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
642 #endif
643 return cc;
644
645 /* Special cases. */
646 case OP_TYPESTAR:
647 case OP_TYPEMINSTAR:
648 case OP_TYPEPLUS:
649 case OP_TYPEMINPLUS:
650 case OP_TYPEQUERY:
651 case OP_TYPEMINQUERY:
652 case OP_TYPEUPTO:
653 case OP_TYPEMINUPTO:
654 case OP_TYPEEXACT:
655 case OP_TYPEPOSSTAR:
656 case OP_TYPEPOSPLUS:
657 case OP_TYPEPOSQUERY:
658 case OP_TYPEPOSUPTO:
659 return cc + PRIV(OP_lengths)[*cc] - 1;
660
661 case OP_ANYBYTE:
662 #ifdef SUPPORT_UTF
663 if (common->utf) return NULL;
664 #endif
665 return cc + 1;
666
667 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
668 case OP_XCLASS:
669 return cc + GET(cc, 1);
670 #endif
671
672 case OP_MARK:
673 return cc + 1 + 2 + cc[1];
674
675 default:
676 return NULL;
677 }
678 }
679
680 #define CASE_ITERATOR_PRIVATE_DATA_1 \
681 case OP_MINSTAR: \
682 case OP_MINPLUS: \
683 case OP_QUERY: \
684 case OP_MINQUERY: \
685 case OP_MINSTARI: \
686 case OP_MINPLUSI: \
687 case OP_QUERYI: \
688 case OP_MINQUERYI: \
689 case OP_NOTMINSTAR: \
690 case OP_NOTMINPLUS: \
691 case OP_NOTQUERY: \
692 case OP_NOTMINQUERY: \
693 case OP_NOTMINSTARI: \
694 case OP_NOTMINPLUSI: \
695 case OP_NOTQUERYI: \
696 case OP_NOTMINQUERYI:
697
698 #define CASE_ITERATOR_PRIVATE_DATA_2A \
699 case OP_STAR: \
700 case OP_PLUS: \
701 case OP_STARI: \
702 case OP_PLUSI: \
703 case OP_NOTSTAR: \
704 case OP_NOTPLUS: \
705 case OP_NOTSTARI: \
706 case OP_NOTPLUSI:
707
708 #define CASE_ITERATOR_PRIVATE_DATA_2B \
709 case OP_UPTO: \
710 case OP_MINUPTO: \
711 case OP_UPTOI: \
712 case OP_MINUPTOI: \
713 case OP_NOTUPTO: \
714 case OP_NOTMINUPTO: \
715 case OP_NOTUPTOI: \
716 case OP_NOTMINUPTOI:
717
718 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
719 case OP_TYPEMINSTAR: \
720 case OP_TYPEMINPLUS: \
721 case OP_TYPEQUERY: \
722 case OP_TYPEMINQUERY:
723
724 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
725 case OP_TYPESTAR: \
726 case OP_TYPEPLUS:
727
728 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
729 case OP_TYPEUPTO: \
730 case OP_TYPEMINUPTO:
731
732 static int get_class_iterator_size(pcre_uchar *cc)
733 {
734 switch(*cc)
735 {
736 case OP_CRSTAR:
737 case OP_CRPLUS:
738 return 2;
739
740 case OP_CRMINSTAR:
741 case OP_CRMINPLUS:
742 case OP_CRQUERY:
743 case OP_CRMINQUERY:
744 return 1;
745
746 case OP_CRRANGE:
747 case OP_CRMINRANGE:
748 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
749 return 0;
750 return 2;
751
752 default:
753 return 0;
754 }
755 }
756
757 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
758 {
759 int private_data_length = 0;
760 pcre_uchar *alternative;
761 pcre_uchar *name;
762 pcre_uchar *end = NULL;
763 int space, size, i;
764 pcre_uint32 bracketlen;
765
766 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
767 while (cc < ccend)
768 {
769 space = 0;
770 size = 0;
771 bracketlen = 0;
772 switch(*cc)
773 {
774 case OP_SET_SOM:
775 common->has_set_som = TRUE;
776 cc += 1;
777 break;
778
779 case OP_REF:
780 case OP_REFI:
781 common->optimized_cbracket[GET2(cc, 1)] = 0;
782 cc += 1 + IMM2_SIZE;
783 break;
784
785 case OP_ASSERT:
786 case OP_ASSERT_NOT:
787 case OP_ASSERTBACK:
788 case OP_ASSERTBACK_NOT:
789 case OP_ONCE:
790 case OP_ONCE_NC:
791 case OP_BRAPOS:
792 case OP_SBRA:
793 case OP_SBRAPOS:
794 private_data_length += sizeof(sljit_sw);
795 bracketlen = 1 + LINK_SIZE;
796 break;
797
798 case OP_CBRAPOS:
799 case OP_SCBRAPOS:
800 private_data_length += sizeof(sljit_sw);
801 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
802 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
803 break;
804
805 case OP_COND:
806 case OP_SCOND:
807 /* Only AUTO_CALLOUT can insert this opcode. We do
808 not intend to support this case. */
809 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
810 return -1;
811
812 if (*cc == OP_COND)
813 {
814 /* Might be a hidden SCOND. */
815 alternative = cc + GET(cc, 1);
816 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
817 private_data_length += sizeof(sljit_sw);
818 }
819 else
820 private_data_length += sizeof(sljit_sw);
821 bracketlen = 1 + LINK_SIZE;
822 break;
823
824 case OP_CREF:
825 i = GET2(cc, 1);
826 common->optimized_cbracket[i] = 0;
827 cc += 1 + IMM2_SIZE;
828 break;
829
830 case OP_NCREF:
831 bracketlen = GET2(cc, 1);
832 name = (pcre_uchar *)common->name_table;
833 alternative = name;
834 for (i = 0; i < common->name_count; i++)
835 {
836 if (GET2(name, 0) == bracketlen) break;
837 name += common->name_entry_size;
838 }
839 SLJIT_ASSERT(i != common->name_count);
840
841 for (i = 0; i < common->name_count; i++)
842 {
843 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
844 common->optimized_cbracket[GET2(alternative, 0)] = 0;
845 alternative += common->name_entry_size;
846 }
847 bracketlen = 0;
848 cc += 1 + IMM2_SIZE;
849 break;
850
851 case OP_BRA:
852 bracketlen = 1 + LINK_SIZE;
853 break;
854
855 case OP_CBRA:
856 case OP_SCBRA:
857 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
858 break;
859
860 CASE_ITERATOR_PRIVATE_DATA_1
861 space = 1;
862 size = -2;
863 break;
864
865 CASE_ITERATOR_PRIVATE_DATA_2A
866 space = 2;
867 size = -2;
868 break;
869
870 CASE_ITERATOR_PRIVATE_DATA_2B
871 space = 2;
872 size = -(2 + IMM2_SIZE);
873 break;
874
875 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
876 space = 1;
877 size = 1;
878 break;
879
880 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
881 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
882 space = 2;
883 size = 1;
884 break;
885
886 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
887 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
888 space = 2;
889 size = 1 + IMM2_SIZE;
890 break;
891
892 case OP_CLASS:
893 case OP_NCLASS:
894 size += 1 + 32 / sizeof(pcre_uchar);
895 space = get_class_iterator_size(cc + size);
896 break;
897
898 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
899 case OP_XCLASS:
900 size = GET(cc, 1);
901 space = get_class_iterator_size(cc + size);
902 break;
903 #endif
904
905 case OP_RECURSE:
906 /* Set its value only once. */
907 if (common->recursive_head_ptr == 0)
908 {
909 common->recursive_head_ptr = common->ovector_start;
910 common->ovector_start += sizeof(sljit_sw);
911 }
912 cc += 1 + LINK_SIZE;
913 break;
914
915 case OP_CALLOUT:
916 if (common->capture_last_ptr == 0)
917 {
918 common->capture_last_ptr = common->ovector_start;
919 common->ovector_start += sizeof(sljit_sw);
920 }
921 cc += 2 + 2 * LINK_SIZE;
922 break;
923
924 case OP_MARK:
925 if (common->mark_ptr == 0)
926 {
927 common->mark_ptr = common->ovector_start;
928 common->ovector_start += sizeof(sljit_sw);
929 }
930 cc += 1 + 2 + cc[1];
931 break;
932
933 default:
934 cc = next_opcode(common, cc);
935 if (cc == NULL)
936 return -1;
937 break;
938 }
939
940 if (space > 0 && cc >= end)
941 private_data_length += sizeof(sljit_sw) * space;
942
943 if (size != 0)
944 {
945 if (size < 0)
946 {
947 cc += -size;
948 #ifdef SUPPORT_UTF
949 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
950 #endif
951 }
952 else
953 cc += size;
954 }
955
956 if (bracketlen != 0)
957 {
958 if (cc >= end)
959 {
960 end = bracketend(cc);
961 if (end[-1 - LINK_SIZE] == OP_KET)
962 end = NULL;
963 }
964 cc += bracketlen;
965 }
966 }
967 return private_data_length;
968 }
969
970 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
971 {
972 pcre_uchar *cc = common->start;
973 pcre_uchar *alternative;
974 pcre_uchar *end = NULL;
975 int space, size, bracketlen;
976
977 while (cc < ccend)
978 {
979 space = 0;
980 size = 0;
981 bracketlen = 0;
982 switch(*cc)
983 {
984 case OP_ASSERT:
985 case OP_ASSERT_NOT:
986 case OP_ASSERTBACK:
987 case OP_ASSERTBACK_NOT:
988 case OP_ONCE:
989 case OP_ONCE_NC:
990 case OP_BRAPOS:
991 case OP_SBRA:
992 case OP_SBRAPOS:
993 case OP_SCOND:
994 common->private_data_ptrs[cc - common->start] = private_data_ptr;
995 private_data_ptr += sizeof(sljit_sw);
996 bracketlen = 1 + LINK_SIZE;
997 break;
998
999 case OP_CBRAPOS:
1000 case OP_SCBRAPOS:
1001 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1002 private_data_ptr += sizeof(sljit_sw);
1003 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1004 break;
1005
1006 case OP_COND:
1007 /* Might be a hidden SCOND. */
1008 alternative = cc + GET(cc, 1);
1009 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1010 {
1011 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1012 private_data_ptr += sizeof(sljit_sw);
1013 }
1014 bracketlen = 1 + LINK_SIZE;
1015 break;
1016
1017 case OP_BRA:
1018 bracketlen = 1 + LINK_SIZE;
1019 break;
1020
1021 case OP_CBRA:
1022 case OP_SCBRA:
1023 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1024 break;
1025
1026 CASE_ITERATOR_PRIVATE_DATA_1
1027 space = 1;
1028 size = -2;
1029 break;
1030
1031 CASE_ITERATOR_PRIVATE_DATA_2A
1032 space = 2;
1033 size = -2;
1034 break;
1035
1036 CASE_ITERATOR_PRIVATE_DATA_2B
1037 space = 2;
1038 size = -(2 + IMM2_SIZE);
1039 break;
1040
1041 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1042 space = 1;
1043 size = 1;
1044 break;
1045
1046 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1047 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1048 space = 2;
1049 size = 1;
1050 break;
1051
1052 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1053 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1054 space = 2;
1055 size = 1 + IMM2_SIZE;
1056 break;
1057
1058 case OP_CLASS:
1059 case OP_NCLASS:
1060 size += 1 + 32 / sizeof(pcre_uchar);
1061 space = get_class_iterator_size(cc + size);
1062 break;
1063
1064 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1065 case OP_XCLASS:
1066 size = GET(cc, 1);
1067 space = get_class_iterator_size(cc + size);
1068 break;
1069 #endif
1070
1071 default:
1072 cc = next_opcode(common, cc);
1073 SLJIT_ASSERT(cc != NULL);
1074 break;
1075 }
1076
1077 if (space > 0 && cc >= end)
1078 {
1079 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1080 private_data_ptr += sizeof(sljit_sw) * space;
1081 }
1082
1083 if (size != 0)
1084 {
1085 if (size < 0)
1086 {
1087 cc += -size;
1088 #ifdef SUPPORT_UTF
1089 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1090 #endif
1091 }
1092 else
1093 cc += size;
1094 }
1095
1096 if (bracketlen > 0)
1097 {
1098 if (cc >= end)
1099 {
1100 end = bracketend(cc);
1101 if (end[-1 - LINK_SIZE] == OP_KET)
1102 end = NULL;
1103 }
1104 cc += bracketlen;
1105 }
1106 }
1107 }
1108
1109 /* Returns with a frame_types (always < 0) if no need for frame. */
1110 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1111 {
1112 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1113 int length = 0;
1114 int possessive = 0;
1115 BOOL stack_restore = FALSE;
1116 BOOL setsom_found = recursive;
1117 BOOL setmark_found = recursive;
1118 /* The last capture is a local variable even for recursions. */
1119 BOOL capture_last_found = FALSE;
1120
1121 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1122 {
1123 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1124 /* This is correct regardless of common->capture_last_ptr. */
1125 capture_last_found = TRUE;
1126 }
1127
1128 cc = next_opcode(common, cc);
1129 SLJIT_ASSERT(cc != NULL);
1130 while (cc < ccend)
1131 switch(*cc)
1132 {
1133 case OP_SET_SOM:
1134 SLJIT_ASSERT(common->has_set_som);
1135 stack_restore = TRUE;
1136 if (!setsom_found)
1137 {
1138 length += 2;
1139 setsom_found = TRUE;
1140 }
1141 cc += 1;
1142 break;
1143
1144 case OP_MARK:
1145 SLJIT_ASSERT(common->mark_ptr != 0);
1146 stack_restore = TRUE;
1147 if (!setmark_found)
1148 {
1149 length += 2;
1150 setmark_found = TRUE;
1151 }
1152 cc += 1 + 2 + cc[1];
1153 break;
1154
1155 case OP_RECURSE:
1156 stack_restore = TRUE;
1157 if (common->has_set_som && !setsom_found)
1158 {
1159 length += 2;
1160 setsom_found = TRUE;
1161 }
1162 if (common->mark_ptr != 0 && !setmark_found)
1163 {
1164 length += 2;
1165 setmark_found = TRUE;
1166 }
1167 if (common->capture_last_ptr != 0 && !capture_last_found)
1168 {
1169 length += 2;
1170 capture_last_found = TRUE;
1171 }
1172 cc += 1 + LINK_SIZE;
1173 break;
1174
1175 case OP_CBRA:
1176 case OP_CBRAPOS:
1177 case OP_SCBRA:
1178 case OP_SCBRAPOS:
1179 stack_restore = TRUE;
1180 if (common->capture_last_ptr != 0 && !capture_last_found)
1181 {
1182 length += 2;
1183 capture_last_found = TRUE;
1184 }
1185 length += 3;
1186 cc += 1 + LINK_SIZE + IMM2_SIZE;
1187 break;
1188
1189 default:
1190 stack_restore = TRUE;
1191 /* Fall through. */
1192
1193 case OP_NOT_WORD_BOUNDARY:
1194 case OP_WORD_BOUNDARY:
1195 case OP_NOT_DIGIT:
1196 case OP_DIGIT:
1197 case OP_NOT_WHITESPACE:
1198 case OP_WHITESPACE:
1199 case OP_NOT_WORDCHAR:
1200 case OP_WORDCHAR:
1201 case OP_ANY:
1202 case OP_ALLANY:
1203 case OP_ANYBYTE:
1204 case OP_NOTPROP:
1205 case OP_PROP:
1206 case OP_ANYNL:
1207 case OP_NOT_HSPACE:
1208 case OP_HSPACE:
1209 case OP_NOT_VSPACE:
1210 case OP_VSPACE:
1211 case OP_EXTUNI:
1212 case OP_EODN:
1213 case OP_EOD:
1214 case OP_CIRC:
1215 case OP_CIRCM:
1216 case OP_DOLL:
1217 case OP_DOLLM:
1218 case OP_CHAR:
1219 case OP_CHARI:
1220 case OP_NOT:
1221 case OP_NOTI:
1222
1223 case OP_EXACT:
1224 case OP_POSSTAR:
1225 case OP_POSPLUS:
1226 case OP_POSQUERY:
1227 case OP_POSUPTO:
1228
1229 case OP_EXACTI:
1230 case OP_POSSTARI:
1231 case OP_POSPLUSI:
1232 case OP_POSQUERYI:
1233 case OP_POSUPTOI:
1234
1235 case OP_NOTEXACT:
1236 case OP_NOTPOSSTAR:
1237 case OP_NOTPOSPLUS:
1238 case OP_NOTPOSQUERY:
1239 case OP_NOTPOSUPTO:
1240
1241 case OP_NOTEXACTI:
1242 case OP_NOTPOSSTARI:
1243 case OP_NOTPOSPLUSI:
1244 case OP_NOTPOSQUERYI:
1245 case OP_NOTPOSUPTOI:
1246
1247 case OP_TYPEEXACT:
1248 case OP_TYPEPOSSTAR:
1249 case OP_TYPEPOSPLUS:
1250 case OP_TYPEPOSQUERY:
1251 case OP_TYPEPOSUPTO:
1252
1253 case OP_CLASS:
1254 case OP_NCLASS:
1255 case OP_XCLASS:
1256
1257 cc = next_opcode(common, cc);
1258 SLJIT_ASSERT(cc != NULL);
1259 break;
1260 }
1261
1262 /* Possessive quantifiers can use a special case. */
1263 if (SLJIT_UNLIKELY(possessive == length))
1264 return stack_restore ? no_frame : no_stack;
1265
1266 if (length > 0)
1267 return length + 1;
1268 return stack_restore ? no_frame : no_stack;
1269 }
1270
1271 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1272 {
1273 DEFINE_COMPILER;
1274 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1275 BOOL setsom_found = recursive;
1276 BOOL setmark_found = recursive;
1277 /* The last capture is a local variable even for recursions. */
1278 BOOL capture_last_found = FALSE;
1279 int offset;
1280
1281 /* >= 1 + shortest item size (2) */
1282 SLJIT_UNUSED_ARG(stacktop);
1283 SLJIT_ASSERT(stackpos >= stacktop + 2);
1284
1285 stackpos = STACK(stackpos);
1286 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1287 cc = next_opcode(common, cc);
1288 SLJIT_ASSERT(cc != NULL);
1289 while (cc < ccend)
1290 switch(*cc)
1291 {
1292 case OP_SET_SOM:
1293 SLJIT_ASSERT(common->has_set_som);
1294 if (!setsom_found)
1295 {
1296 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1298 stackpos += (int)sizeof(sljit_sw);
1299 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1300 stackpos += (int)sizeof(sljit_sw);
1301 setsom_found = TRUE;
1302 }
1303 cc += 1;
1304 break;
1305
1306 case OP_MARK:
1307 SLJIT_ASSERT(common->mark_ptr != 0);
1308 if (!setmark_found)
1309 {
1310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1311 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1312 stackpos += (int)sizeof(sljit_sw);
1313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1314 stackpos += (int)sizeof(sljit_sw);
1315 setmark_found = TRUE;
1316 }
1317 cc += 1 + 2 + cc[1];
1318 break;
1319
1320 case OP_RECURSE:
1321 if (common->has_set_som && !setsom_found)
1322 {
1323 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1324 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1325 stackpos += (int)sizeof(sljit_sw);
1326 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1327 stackpos += (int)sizeof(sljit_sw);
1328 setsom_found = TRUE;
1329 }
1330 if (common->mark_ptr != 0 && !setmark_found)
1331 {
1332 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1334 stackpos += (int)sizeof(sljit_sw);
1335 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1336 stackpos += (int)sizeof(sljit_sw);
1337 setmark_found = TRUE;
1338 }
1339 if (common->capture_last_ptr != 0 && !capture_last_found)
1340 {
1341 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1343 stackpos += (int)sizeof(sljit_sw);
1344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1345 stackpos += (int)sizeof(sljit_sw);
1346 capture_last_found = TRUE;
1347 }
1348 cc += 1 + LINK_SIZE;
1349 break;
1350
1351 case OP_CBRA:
1352 case OP_CBRAPOS:
1353 case OP_SCBRA:
1354 case OP_SCBRAPOS:
1355 if (common->capture_last_ptr != 0 && !capture_last_found)
1356 {
1357 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1359 stackpos += (int)sizeof(sljit_sw);
1360 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1361 stackpos += (int)sizeof(sljit_sw);
1362 capture_last_found = TRUE;
1363 }
1364 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1366 stackpos += (int)sizeof(sljit_sw);
1367 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1368 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1370 stackpos += (int)sizeof(sljit_sw);
1371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1372 stackpos += (int)sizeof(sljit_sw);
1373
1374 cc += 1 + LINK_SIZE + IMM2_SIZE;
1375 break;
1376
1377 default:
1378 cc = next_opcode(common, cc);
1379 SLJIT_ASSERT(cc != NULL);
1380 break;
1381 }
1382
1383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1384 SLJIT_ASSERT(stackpos == STACK(stacktop));
1385 }
1386
1387 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1388 {
1389 int private_data_length = 2;
1390 int size;
1391 pcre_uchar *alternative;
1392 /* Calculate the sum of the private machine words. */
1393 while (cc < ccend)
1394 {
1395 size = 0;
1396 switch(*cc)
1397 {
1398 case OP_ASSERT:
1399 case OP_ASSERT_NOT:
1400 case OP_ASSERTBACK:
1401 case OP_ASSERTBACK_NOT:
1402 case OP_ONCE:
1403 case OP_ONCE_NC:
1404 case OP_BRAPOS:
1405 case OP_SBRA:
1406 case OP_SBRAPOS:
1407 case OP_SCOND:
1408 private_data_length++;
1409 cc += 1 + LINK_SIZE;
1410 break;
1411
1412 case OP_CBRA:
1413 case OP_SCBRA:
1414 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1415 private_data_length++;
1416 cc += 1 + LINK_SIZE + IMM2_SIZE;
1417 break;
1418
1419 case OP_CBRAPOS:
1420 case OP_SCBRAPOS:
1421 private_data_length += 2;
1422 cc += 1 + LINK_SIZE + IMM2_SIZE;
1423 break;
1424
1425 case OP_COND:
1426 /* Might be a hidden SCOND. */
1427 alternative = cc + GET(cc, 1);
1428 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1429 private_data_length++;
1430 cc += 1 + LINK_SIZE;
1431 break;
1432
1433 CASE_ITERATOR_PRIVATE_DATA_1
1434 if (PRIVATE_DATA(cc))
1435 private_data_length++;
1436 cc += 2;
1437 #ifdef SUPPORT_UTF
1438 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1439 #endif
1440 break;
1441
1442 CASE_ITERATOR_PRIVATE_DATA_2A
1443 if (PRIVATE_DATA(cc))
1444 private_data_length += 2;
1445 cc += 2;
1446 #ifdef SUPPORT_UTF
1447 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1448 #endif
1449 break;
1450
1451 CASE_ITERATOR_PRIVATE_DATA_2B
1452 if (PRIVATE_DATA(cc))
1453 private_data_length += 2;
1454 cc += 2 + IMM2_SIZE;
1455 #ifdef SUPPORT_UTF
1456 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1457 #endif
1458 break;
1459
1460 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1461 if (PRIVATE_DATA(cc))
1462 private_data_length++;
1463 cc += 1;
1464 break;
1465
1466 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1467 if (PRIVATE_DATA(cc))
1468 private_data_length += 2;
1469 cc += 1;
1470 break;
1471
1472 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1473 if (PRIVATE_DATA(cc))
1474 private_data_length += 2;
1475 cc += 1 + IMM2_SIZE;
1476 break;
1477
1478 case OP_CLASS:
1479 case OP_NCLASS:
1480 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1481 case OP_XCLASS:
1482 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1483 #else
1484 size = 1 + 32 / (int)sizeof(pcre_uchar);
1485 #endif
1486 if (PRIVATE_DATA(cc))
1487 private_data_length += get_class_iterator_size(cc + size);
1488 cc += size;
1489 break;
1490
1491 default:
1492 cc = next_opcode(common, cc);
1493 SLJIT_ASSERT(cc != NULL);
1494 break;
1495 }
1496 }
1497 SLJIT_ASSERT(cc == ccend);
1498 return private_data_length;
1499 }
1500
1501 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1502 BOOL save, int stackptr, int stacktop)
1503 {
1504 DEFINE_COMPILER;
1505 int srcw[2];
1506 int count, size;
1507 BOOL tmp1next = TRUE;
1508 BOOL tmp1empty = TRUE;
1509 BOOL tmp2empty = TRUE;
1510 pcre_uchar *alternative;
1511 enum {
1512 start,
1513 loop,
1514 end
1515 } status;
1516
1517 status = save ? start : loop;
1518 stackptr = STACK(stackptr - 2);
1519 stacktop = STACK(stacktop - 1);
1520
1521 if (!save)
1522 {
1523 stackptr += sizeof(sljit_sw);
1524 if (stackptr < stacktop)
1525 {
1526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1527 stackptr += sizeof(sljit_sw);
1528 tmp1empty = FALSE;
1529 }
1530 if (stackptr < stacktop)
1531 {
1532 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1533 stackptr += sizeof(sljit_sw);
1534 tmp2empty = FALSE;
1535 }
1536 /* The tmp1next must be TRUE in either way. */
1537 }
1538
1539 while (status != end)
1540 {
1541 count = 0;
1542 switch(status)
1543 {
1544 case start:
1545 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1546 count = 1;
1547 srcw[0] = common->recursive_head_ptr;
1548 status = loop;
1549 break;
1550
1551 case loop:
1552 if (cc >= ccend)
1553 {
1554 status = end;
1555 break;
1556 }
1557
1558 switch(*cc)
1559 {
1560 case OP_ASSERT:
1561 case OP_ASSERT_NOT:
1562 case OP_ASSERTBACK:
1563 case OP_ASSERTBACK_NOT:
1564 case OP_ONCE:
1565 case OP_ONCE_NC:
1566 case OP_BRAPOS:
1567 case OP_SBRA:
1568 case OP_SBRAPOS:
1569 case OP_SCOND:
1570 count = 1;
1571 srcw[0] = PRIVATE_DATA(cc);
1572 SLJIT_ASSERT(srcw[0] != 0);
1573 cc += 1 + LINK_SIZE;
1574 break;
1575
1576 case OP_CBRA:
1577 case OP_SCBRA:
1578 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1579 {
1580 count = 1;
1581 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1582 }
1583 cc += 1 + LINK_SIZE + IMM2_SIZE;
1584 break;
1585
1586 case OP_CBRAPOS:
1587 case OP_SCBRAPOS:
1588 count = 2;
1589 srcw[0] = PRIVATE_DATA(cc);
1590 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1591 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1592 cc += 1 + LINK_SIZE + IMM2_SIZE;
1593 break;
1594
1595 case OP_COND:
1596 /* Might be a hidden SCOND. */
1597 alternative = cc + GET(cc, 1);
1598 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1599 {
1600 count = 1;
1601 srcw[0] = PRIVATE_DATA(cc);
1602 SLJIT_ASSERT(srcw[0] != 0);
1603 }
1604 cc += 1 + LINK_SIZE;
1605 break;
1606
1607 CASE_ITERATOR_PRIVATE_DATA_1
1608 if (PRIVATE_DATA(cc))
1609 {
1610 count = 1;
1611 srcw[0] = PRIVATE_DATA(cc);
1612 }
1613 cc += 2;
1614 #ifdef SUPPORT_UTF
1615 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1616 #endif
1617 break;
1618
1619 CASE_ITERATOR_PRIVATE_DATA_2A
1620 if (PRIVATE_DATA(cc))
1621 {
1622 count = 2;
1623 srcw[0] = PRIVATE_DATA(cc);
1624 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1625 }
1626 cc += 2;
1627 #ifdef SUPPORT_UTF
1628 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1629 #endif
1630 break;
1631
1632 CASE_ITERATOR_PRIVATE_DATA_2B
1633 if (PRIVATE_DATA(cc))
1634 {
1635 count = 2;
1636 srcw[0] = PRIVATE_DATA(cc);
1637 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1638 }
1639 cc += 2 + IMM2_SIZE;
1640 #ifdef SUPPORT_UTF
1641 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1642 #endif
1643 break;
1644
1645 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1646 if (PRIVATE_DATA(cc))
1647 {
1648 count = 1;
1649 srcw[0] = PRIVATE_DATA(cc);
1650 }
1651 cc += 1;
1652 break;
1653
1654 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1655 if (PRIVATE_DATA(cc))
1656 {
1657 count = 2;
1658 srcw[0] = PRIVATE_DATA(cc);
1659 srcw[1] = srcw[0] + sizeof(sljit_sw);
1660 }
1661 cc += 1;
1662 break;
1663
1664 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1665 if (PRIVATE_DATA(cc))
1666 {
1667 count = 2;
1668 srcw[0] = PRIVATE_DATA(cc);
1669 srcw[1] = srcw[0] + sizeof(sljit_sw);
1670 }
1671 cc += 1 + IMM2_SIZE;
1672 break;
1673
1674 case OP_CLASS:
1675 case OP_NCLASS:
1676 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1677 case OP_XCLASS:
1678 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1679 #else
1680 size = 1 + 32 / (int)sizeof(pcre_uchar);
1681 #endif
1682 if (PRIVATE_DATA(cc))
1683 switch(get_class_iterator_size(cc + size))
1684 {
1685 case 1:
1686 count = 1;
1687 srcw[0] = PRIVATE_DATA(cc);
1688 break;
1689
1690 case 2:
1691 count = 2;
1692 srcw[0] = PRIVATE_DATA(cc);
1693 srcw[1] = srcw[0] + sizeof(sljit_sw);
1694 break;
1695
1696 default:
1697 SLJIT_ASSERT_STOP();
1698 break;
1699 }
1700 cc += size;
1701 break;
1702
1703 default:
1704 cc = next_opcode(common, cc);
1705 SLJIT_ASSERT(cc != NULL);
1706 break;
1707 }
1708 break;
1709
1710 case end:
1711 SLJIT_ASSERT_STOP();
1712 break;
1713 }
1714
1715 while (count > 0)
1716 {
1717 count--;
1718 if (save)
1719 {
1720 if (tmp1next)
1721 {
1722 if (!tmp1empty)
1723 {
1724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1725 stackptr += sizeof(sljit_sw);
1726 }
1727 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1728 tmp1empty = FALSE;
1729 tmp1next = FALSE;
1730 }
1731 else
1732 {
1733 if (!tmp2empty)
1734 {
1735 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1736 stackptr += sizeof(sljit_sw);
1737 }
1738 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1739 tmp2empty = FALSE;
1740 tmp1next = TRUE;
1741 }
1742 }
1743 else
1744 {
1745 if (tmp1next)
1746 {
1747 SLJIT_ASSERT(!tmp1empty);
1748 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1749 tmp1empty = stackptr >= stacktop;
1750 if (!tmp1empty)
1751 {
1752 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1753 stackptr += sizeof(sljit_sw);
1754 }
1755 tmp1next = FALSE;
1756 }
1757 else
1758 {
1759 SLJIT_ASSERT(!tmp2empty);
1760 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1761 tmp2empty = stackptr >= stacktop;
1762 if (!tmp2empty)
1763 {
1764 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1765 stackptr += sizeof(sljit_sw);
1766 }
1767 tmp1next = TRUE;
1768 }
1769 }
1770 }
1771 }
1772
1773 if (save)
1774 {
1775 if (tmp1next)
1776 {
1777 if (!tmp1empty)
1778 {
1779 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1780 stackptr += sizeof(sljit_sw);
1781 }
1782 if (!tmp2empty)
1783 {
1784 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1785 stackptr += sizeof(sljit_sw);
1786 }
1787 }
1788 else
1789 {
1790 if (!tmp2empty)
1791 {
1792 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1793 stackptr += sizeof(sljit_sw);
1794 }
1795 if (!tmp1empty)
1796 {
1797 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1798 stackptr += sizeof(sljit_sw);
1799 }
1800 }
1801 }
1802 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1803 }
1804
1805 #undef CASE_ITERATOR_PRIVATE_DATA_1
1806 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1807 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1808 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1809 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1810 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1811
1812 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1813 {
1814 return (value & (value - 1)) == 0;
1815 }
1816
1817 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1818 {
1819 while (list)
1820 {
1821 /* sljit_set_label is clever enough to do nothing
1822 if either the jump or the label is NULL. */
1823 SET_LABEL(list->jump, label);
1824 list = list->next;
1825 }
1826 }
1827
1828 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1829 {
1830 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1831 if (list_item)
1832 {
1833 list_item->next = *list;
1834 list_item->jump = jump;
1835 *list = list_item;
1836 }
1837 }
1838
1839 static void add_stub(compiler_common *common, struct sljit_jump *start)
1840 {
1841 DEFINE_COMPILER;
1842 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1843
1844 if (list_item)
1845 {
1846 list_item->start = start;
1847 list_item->quit = LABEL();
1848 list_item->next = common->stubs;
1849 common->stubs = list_item;
1850 }
1851 }
1852
1853 static void flush_stubs(compiler_common *common)
1854 {
1855 DEFINE_COMPILER;
1856 stub_list* list_item = common->stubs;
1857
1858 while (list_item)
1859 {
1860 JUMPHERE(list_item->start);
1861 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1862 JUMPTO(SLJIT_JUMP, list_item->quit);
1863 list_item = list_item->next;
1864 }
1865 common->stubs = NULL;
1866 }
1867
1868 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1869 {
1870 DEFINE_COMPILER;
1871
1872 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1873 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1874 }
1875
1876 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1877 {
1878 /* May destroy all locals and registers except TMP2. */
1879 DEFINE_COMPILER;
1880
1881 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1882 #ifdef DESTROY_REGISTERS
1883 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1884 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1885 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1886 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1888 #endif
1889 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1890 }
1891
1892 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1893 {
1894 DEFINE_COMPILER;
1895 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1896 }
1897
1898 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1899 {
1900 DEFINE_COMPILER;
1901 struct sljit_label *loop;
1902 int i;
1903 /* At this point we can freely use all temporary registers. */
1904 /* TMP1 returns with begin - 1. */
1905 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1906 if (length < 8)
1907 {
1908 for (i = 0; i < length; i++)
1909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1910 }
1911 else
1912 {
1913 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));
1914 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);
1915 loop = LABEL();
1916 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1917 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1918 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1919 }
1920 }
1921
1922 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1923 {
1924 DEFINE_COMPILER;
1925 struct sljit_label *loop;
1926 struct sljit_jump *early_quit;
1927
1928 /* At this point we can freely use all registers. */
1929 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1930 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1931
1932 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1933 if (common->mark_ptr != 0)
1934 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1935 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
1936 if (common->mark_ptr != 0)
1937 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1938 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1939 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1940 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1941 /* Unlikely, but possible */
1942 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1943 loop = LABEL();
1944 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1945 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
1946 /* Copy the integer value to the output buffer */
1947 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1948 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1949 #endif
1950 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1951 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1952 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1953 JUMPHERE(early_quit);
1954
1955 /* Calculate the return value, which is the maximum ovector value. */
1956 if (topbracket > 1)
1957 {
1958 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
1959 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
1960
1961 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1962 loop = LABEL();
1963 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
1964 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1965 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1966 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
1967 }
1968 else
1969 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1970 }
1971
1972 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1973 {
1974 DEFINE_COMPILER;
1975 struct sljit_jump *jump;
1976
1977 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1978 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1979
1980 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
1981 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1982 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
1983 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
1984
1985 /* Store match begin and end. */
1986 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1987 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1988
1989 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
1990 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr + sizeof(sljit_sw), SLJIT_SAVED_REG1, 0);
1991 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1992 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1993 #endif
1994 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
1995 JUMPHERE(jump);
1996
1997 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1998 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1999 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2000 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2001 #endif
2002 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2003
2004 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2005 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2006 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2007 #endif
2008 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2009
2010 JUMPTO(SLJIT_JUMP, quit);
2011 }
2012
2013 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2014 {
2015 /* May destroy TMP1. */
2016 DEFINE_COMPILER;
2017 struct sljit_jump *jump;
2018
2019 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2020 {
2021 /* The value of -1 must be kept for start_used_ptr! */
2022 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2023 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2024 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2025 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2027 JUMPHERE(jump);
2028 }
2029 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2030 {
2031 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2032 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2033 JUMPHERE(jump);
2034 }
2035 }
2036
2037 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2038 {
2039 /* Detects if the character has an othercase. */
2040 unsigned int c;
2041
2042 #ifdef SUPPORT_UTF
2043 if (common->utf)
2044 {
2045 GETCHAR(c, cc);
2046 if (c > 127)
2047 {
2048 #ifdef SUPPORT_UCP
2049 return c != UCD_OTHERCASE(c);
2050 #else
2051 return FALSE;
2052 #endif
2053 }
2054 #ifndef COMPILE_PCRE8
2055 return common->fcc[c] != c;
2056 #endif
2057 }
2058 else
2059 #endif
2060 c = *cc;
2061 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2062 }
2063
2064 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2065 {
2066 /* Returns with the othercase. */
2067 #ifdef SUPPORT_UTF
2068 if (common->utf && c > 127)
2069 {
2070 #ifdef SUPPORT_UCP
2071 return UCD_OTHERCASE(c);
2072 #else
2073 return c;
2074 #endif
2075 }
2076 #endif
2077 return TABLE_GET(c, common->fcc, c);
2078 }
2079
2080 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2081 {
2082 /* Detects if the character and its othercase has only 1 bit difference. */
2083 unsigned int c, oc, bit;
2084 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2085 int n;
2086 #endif
2087
2088 #ifdef SUPPORT_UTF
2089 if (common->utf)
2090 {
2091 GETCHAR(c, cc);
2092 if (c <= 127)
2093 oc = common->fcc[c];
2094 else
2095 {
2096 #ifdef SUPPORT_UCP
2097 oc = UCD_OTHERCASE(c);
2098 #else
2099 oc = c;
2100 #endif
2101 }
2102 }
2103 else
2104 {
2105 c = *cc;
2106 oc = TABLE_GET(c, common->fcc, c);
2107 }
2108 #else
2109 c = *cc;
2110 oc = TABLE_GET(c, common->fcc, c);
2111 #endif
2112
2113 SLJIT_ASSERT(c != oc);
2114
2115 bit = c ^ oc;
2116 /* Optimized for English alphabet. */
2117 if (c <= 127 && bit == 0x20)
2118 return (0 << 8) | 0x20;
2119
2120 /* Since c != oc, they must have at least 1 bit difference. */
2121 if (!is_powerof2(bit))
2122 return 0;
2123
2124 #if defined COMPILE_PCRE8
2125
2126 #ifdef SUPPORT_UTF
2127 if (common->utf && c > 127)
2128 {
2129 n = GET_EXTRALEN(*cc);
2130 while ((bit & 0x3f) == 0)
2131 {
2132 n--;
2133 bit >>= 6;
2134 }
2135 return (n << 8) | bit;
2136 }
2137 #endif /* SUPPORT_UTF */
2138 return (0 << 8) | bit;
2139
2140 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2141
2142 #ifdef SUPPORT_UTF
2143 if (common->utf && c > 65535)
2144 {
2145 if (bit >= (1 << 10))
2146 bit >>= 10;
2147 else
2148 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2149 }
2150 #endif /* SUPPORT_UTF */
2151 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2152
2153 #endif /* COMPILE_PCRE[8|16|32] */
2154 }
2155
2156 static void check_partial(compiler_common *common, BOOL force)
2157 {
2158 /* Checks whether a partial matching is occured. Does not modify registers. */
2159 DEFINE_COMPILER;
2160 struct sljit_jump *jump = NULL;
2161
2162 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2163
2164 if (common->mode == JIT_COMPILE)
2165 return;
2166
2167 if (!force)
2168 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2169 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2170 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2171
2172 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2174 else
2175 {
2176 if (common->partialmatchlabel != NULL)
2177 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2178 else
2179 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2180 }
2181
2182 if (jump != NULL)
2183 JUMPHERE(jump);
2184 }
2185
2186 static struct sljit_jump *check_str_end(compiler_common *common)
2187 {
2188 /* Does not affect registers. Usually used in a tight spot. */
2189 DEFINE_COMPILER;
2190 struct sljit_jump *jump;
2191 struct sljit_jump *nohit;
2192 struct sljit_jump *return_value;
2193
2194 if (common->mode == JIT_COMPILE)
2195 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2196
2197 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2198 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2199 {
2200 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2201 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2202 JUMPHERE(nohit);
2203 return_value = JUMP(SLJIT_JUMP);
2204 }
2205 else
2206 {
2207 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2208 if (common->partialmatchlabel != NULL)
2209 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2210 else
2211 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2212 }
2213 JUMPHERE(jump);
2214 return return_value;
2215 }
2216
2217 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2218 {
2219 DEFINE_COMPILER;
2220 struct sljit_jump *jump;
2221
2222 if (common->mode == JIT_COMPILE)
2223 {
2224 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2225 return;
2226 }
2227
2228 /* Partial matching mode. */
2229 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2230 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2231 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2232 {
2233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2234 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2235 }
2236 else
2237 {
2238 if (common->partialmatchlabel != NULL)
2239 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2240 else
2241 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2242 }
2243 JUMPHERE(jump);
2244 }
2245
2246 static void read_char(compiler_common *common)
2247 {
2248 /* Reads the character into TMP1, updates STR_PTR.
2249 Does not check STR_END. TMP2 Destroyed. */
2250 DEFINE_COMPILER;
2251 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2252 struct sljit_jump *jump;
2253 #endif
2254
2255 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2256 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2257 if (common->utf)
2258 {
2259 #if defined COMPILE_PCRE8
2260 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2261 #elif defined COMPILE_PCRE16
2262 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2263 #endif /* COMPILE_PCRE[8|16] */
2264 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2265 JUMPHERE(jump);
2266 }
2267 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2268 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2269 }
2270
2271 static void peek_char(compiler_common *common)
2272 {
2273 /* Reads the character into TMP1, keeps STR_PTR.
2274 Does not check STR_END. TMP2 Destroyed. */
2275 DEFINE_COMPILER;
2276 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2277 struct sljit_jump *jump;
2278 #endif
2279
2280 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2281 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2282 if (common->utf)
2283 {
2284 #if defined COMPILE_PCRE8
2285 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2286 #elif defined COMPILE_PCRE16
2287 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2288 #endif /* COMPILE_PCRE[8|16] */
2289 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2290 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2291 JUMPHERE(jump);
2292 }
2293 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2294 }
2295
2296 static void read_char8_type(compiler_common *common)
2297 {
2298 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2299 DEFINE_COMPILER;
2300 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2301 struct sljit_jump *jump;
2302 #endif
2303
2304 #ifdef SUPPORT_UTF
2305 if (common->utf)
2306 {
2307 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2308 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2309 #if defined COMPILE_PCRE8
2310 /* This can be an extra read in some situations, but hopefully
2311 it is needed in most cases. */
2312 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2313 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2314 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2315 JUMPHERE(jump);
2316 #elif defined COMPILE_PCRE16
2317 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2318 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2319 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2320 JUMPHERE(jump);
2321 /* Skip low surrogate if necessary. */
2322 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2323 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2324 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2325 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2326 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2327 #elif defined COMPILE_PCRE32
2328 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2329 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2330 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2331 JUMPHERE(jump);
2332 #endif /* COMPILE_PCRE[8|16|32] */
2333 return;
2334 }
2335 #endif /* SUPPORT_UTF */
2336 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2338 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2339 /* The ctypes array contains only 256 values. */
2340 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2341 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2342 #endif
2343 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2344 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2345 JUMPHERE(jump);
2346 #endif
2347 }
2348
2349 static void skip_char_back(compiler_common *common)
2350 {
2351 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2352 DEFINE_COMPILER;
2353 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2354 #if defined COMPILE_PCRE8
2355 struct sljit_label *label;
2356
2357 if (common->utf)
2358 {
2359 label = LABEL();
2360 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2361 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2362 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2363 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2364 return;
2365 }
2366 #elif defined COMPILE_PCRE16
2367 if (common->utf)
2368 {
2369 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2370 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2371 /* Skip low surrogate if necessary. */
2372 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2373 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2374 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2375 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2376 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2377 return;
2378 }
2379 #endif /* COMPILE_PCRE[8|16] */
2380 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2381 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2382 }
2383
2384 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2385 {
2386 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2387 DEFINE_COMPILER;
2388
2389 if (nltype == NLTYPE_ANY)
2390 {
2391 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2392 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2393 }
2394 else if (nltype == NLTYPE_ANYCRLF)
2395 {
2396 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2397 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2398 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2399 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2400 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2401 }
2402 else
2403 {
2404 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2405 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2406 }
2407 }
2408
2409 #ifdef SUPPORT_UTF
2410
2411 #if defined COMPILE_PCRE8
2412 static void do_utfreadchar(compiler_common *common)
2413 {
2414 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2415 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2416 DEFINE_COMPILER;
2417 struct sljit_jump *jump;
2418
2419 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2420 /* Searching for the first zero. */
2421 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2422 jump = JUMP(SLJIT_C_NOT_ZERO);
2423 /* Two byte sequence. */
2424 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2425 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2426 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2427 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2428 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2429 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2430 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2431 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2432 JUMPHERE(jump);
2433
2434 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2435 jump = JUMP(SLJIT_C_NOT_ZERO);
2436 /* Three byte sequence. */
2437 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2438 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2439 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2440 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2441 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2442 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2443 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2444 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2445 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2446 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2447 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2448 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2449 JUMPHERE(jump);
2450
2451 /* Four byte sequence. */
2452 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2453 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2454 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2455 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2456 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2457 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2458 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2459 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2460 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2461 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2462 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2463 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2464 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2465 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2466 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2467 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2468 }
2469
2470 static void do_utfreadtype8(compiler_common *common)
2471 {
2472 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2473 of the character (>= 0xc0). Return value in TMP1. */
2474 DEFINE_COMPILER;
2475 struct sljit_jump *jump;
2476 struct sljit_jump *compare;
2477
2478 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2479
2480 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2481 jump = JUMP(SLJIT_C_NOT_ZERO);
2482 /* Two byte sequence. */
2483 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2485 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2486 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2487 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2488 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2489 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2490 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2491 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2492
2493 JUMPHERE(compare);
2494 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2495 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2496 JUMPHERE(jump);
2497
2498 /* We only have types for characters less than 256. */
2499 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2500 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2502 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2503 }
2504
2505 #elif defined COMPILE_PCRE16
2506
2507 static void do_utfreadchar(compiler_common *common)
2508 {
2509 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2510 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2511 DEFINE_COMPILER;
2512 struct sljit_jump *jump;
2513
2514 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2515 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2516 /* Do nothing, only return. */
2517 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2518
2519 JUMPHERE(jump);
2520 /* Combine two 16 bit characters. */
2521 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2522 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2523 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2524 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2525 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2526 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2528 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2529 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2530 }
2531
2532 #endif /* COMPILE_PCRE[8|16] */
2533
2534 #endif /* SUPPORT_UTF */
2535
2536 #ifdef SUPPORT_UCP
2537
2538 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2539 #define UCD_BLOCK_MASK 127
2540 #define UCD_BLOCK_SHIFT 7
2541
2542 static void do_getucd(compiler_common *common)
2543 {
2544 /* Search the UCD record for the character comes in TMP1.
2545 Returns chartype in TMP1 and UCD offset in TMP2. */
2546 DEFINE_COMPILER;
2547
2548 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2549
2550 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2551 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2552 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2553 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2554 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2555 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2556 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2557 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2558 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2559 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2560 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2561 }
2562 #endif
2563
2564 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2565 {
2566 DEFINE_COMPILER;
2567 struct sljit_label *mainloop;
2568 struct sljit_label *newlinelabel = NULL;
2569 struct sljit_jump *start;
2570 struct sljit_jump *end = NULL;
2571 struct sljit_jump *nl = NULL;
2572 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2573 struct sljit_jump *singlechar;
2574 #endif
2575 jump_list *newline = NULL;
2576 BOOL newlinecheck = FALSE;
2577 BOOL readuchar = FALSE;
2578
2579 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2580 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2581 newlinecheck = TRUE;
2582
2583 if (firstline)
2584 {
2585 /* Search for the end of the first line. */
2586 SLJIT_ASSERT(common->first_line_end != 0);
2587 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2588
2589 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2590 {
2591 mainloop = LABEL();
2592 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2593 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2594 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2595 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2596 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2597 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2598 JUMPHERE(end);
2599 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2600 }
2601 else
2602 {
2603 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2604 mainloop = LABEL();
2605 /* Continual stores does not cause data dependency. */
2606 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2607 read_char(common);
2608 check_newlinechar(common, common->nltype, &newline, TRUE);
2609 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2610 JUMPHERE(end);
2611 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2612 set_jumps(newline, LABEL());
2613 }
2614
2615 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2616 }
2617
2618 start = JUMP(SLJIT_JUMP);
2619
2620 if (newlinecheck)
2621 {
2622 newlinelabel = LABEL();
2623 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2624 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2625 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2626 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2627 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2628 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2629 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2630 #endif
2631 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2632 nl = JUMP(SLJIT_JUMP);
2633 }
2634
2635 mainloop = LABEL();
2636
2637 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2638 #ifdef SUPPORT_UTF
2639 if (common->utf) readuchar = TRUE;
2640 #endif
2641 if (newlinecheck) readuchar = TRUE;
2642
2643 if (readuchar)
2644 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2645
2646 if (newlinecheck)
2647 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2648
2649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2650 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2651 #if defined COMPILE_PCRE8
2652 if (common->utf)
2653 {
2654 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2655 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2657 JUMPHERE(singlechar);
2658 }
2659 #elif defined COMPILE_PCRE16
2660 if (common->utf)
2661 {
2662 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2663 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2665 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2666 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2667 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2668 JUMPHERE(singlechar);
2669 }
2670 #endif /* COMPILE_PCRE[8|16] */
2671 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2672 JUMPHERE(start);
2673
2674 if (newlinecheck)
2675 {
2676 JUMPHERE(end);
2677 JUMPHERE(nl);
2678 }
2679
2680 return mainloop;
2681 }
2682
2683 #define MAX_N_CHARS 3
2684
2685 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2686 {
2687 DEFINE_COMPILER;
2688 struct sljit_label *start;
2689 struct sljit_jump *quit;
2690 pcre_uint32 chars[MAX_N_CHARS * 2];
2691 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2692 int location = 0;
2693 pcre_int32 len, c, bit, caseless;
2694 int must_stop;
2695
2696 /* We do not support alternatives now. */
2697 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2698 return FALSE;
2699
2700 while (TRUE)
2701 {
2702 caseless = 0;
2703 must_stop = 1;
2704 switch(*cc)
2705 {
2706 case OP_CHAR:
2707 must_stop = 0;
2708 cc++;
2709 break;
2710
2711 case OP_CHARI:
2712 caseless = 1;
2713 must_stop = 0;
2714 cc++;
2715 break;
2716
2717 case OP_SOD:
2718 case OP_SOM:
2719 case OP_SET_SOM:
2720 case OP_NOT_WORD_BOUNDARY:
2721 case OP_WORD_BOUNDARY:
2722 case OP_EODN:
2723 case OP_EOD:
2724 case OP_CIRC:
2725 case OP_CIRCM:
2726 case OP_DOLL:
2727 case OP_DOLLM:
2728 /* Zero width assertions. */
2729 cc++;
2730 continue;
2731
2732 case OP_PLUS:
2733 case OP_MINPLUS:
2734 case OP_POSPLUS:
2735 cc++;
2736 break;
2737
2738 case OP_EXACT:
2739 cc += 1 + IMM2_SIZE;
2740 break;
2741
2742 case OP_PLUSI:
2743 case OP_MINPLUSI:
2744 case OP_POSPLUSI:
2745 caseless = 1;
2746 cc++;
2747 break;
2748
2749 case OP_EXACTI:
2750 caseless = 1;
2751 cc += 1 + IMM2_SIZE;
2752 break;
2753
2754 default:
2755 must_stop = 2;
2756 break;
2757 }
2758
2759 if (must_stop == 2)
2760 break;
2761
2762 len = 1;
2763 #ifdef SUPPORT_UTF
2764 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2765 #endif
2766
2767 if (caseless && char_has_othercase(common, cc))
2768 {
2769 caseless = char_get_othercase_bit(common, cc);
2770 if (caseless == 0)
2771 return FALSE;
2772 #ifdef COMPILE_PCRE8
2773 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2774 #else
2775 if ((caseless & 0x100) != 0)
2776 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2777 else
2778 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2779 #endif
2780 }
2781 else
2782 caseless = 0;
2783
2784 while (len > 0 && location < MAX_N_CHARS * 2)
2785 {
2786 c = *cc;
2787 bit = 0;
2788 if (len == (caseless & 0xff))
2789 {
2790 bit = caseless >> 8;
2791 c |= bit;
2792 }
2793
2794 chars[location] = c;
2795 chars[location + 1] = bit;
2796
2797 len--;
2798 location += 2;
2799 cc++;
2800 }
2801
2802 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2803 break;
2804 }
2805
2806 /* At least two characters are required. */
2807 if (location < 2 * 2)
2808 return FALSE;
2809
2810 if (firstline)
2811 {
2812 SLJIT_ASSERT(common->first_line_end != 0);
2813 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2814 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2815 }
2816 else
2817 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2818
2819 start = LABEL();
2820 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2821
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2823 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2824 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2825 if (chars[1] != 0)
2826 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2827 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2828 if (location > 2 * 2)
2829 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2830 if (chars[3] != 0)
2831 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2832 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2833 if (location > 2 * 2)
2834 {
2835 if (chars[5] != 0)
2836 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2837 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2838 }
2839 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2840
2841 JUMPHERE(quit);
2842
2843 if (firstline)
2844 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2845 else
2846 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2847 return TRUE;
2848 }
2849
2850 #undef MAX_N_CHARS
2851
2852 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2853 {
2854 DEFINE_COMPILER;
2855 struct sljit_label *start;
2856 struct sljit_jump *quit;
2857 struct sljit_jump *found;
2858 pcre_uchar oc, bit;
2859
2860 if (firstline)
2861 {
2862 SLJIT_ASSERT(common->first_line_end != 0);
2863 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2864 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2865 }
2866
2867 start = LABEL();
2868 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2869 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2870
2871 oc = first_char;
2872 if (caseless)
2873 {
2874 oc = TABLE_GET(first_char, common->fcc, first_char);
2875 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2876 if (first_char > 127 && common->utf)
2877 oc = UCD_OTHERCASE(first_char);
2878 #endif
2879 }
2880 if (first_char == oc)
2881 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2882 else
2883 {
2884 bit = first_char ^ oc;
2885 if (is_powerof2(bit))
2886 {
2887 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2888 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2889 }
2890 else
2891 {
2892 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2893 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2894 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2895 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2896 found = JUMP(SLJIT_C_NOT_ZERO);
2897 }
2898 }
2899
2900 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2901 JUMPTO(SLJIT_JUMP, start);
2902 JUMPHERE(found);
2903 JUMPHERE(quit);
2904
2905 if (firstline)
2906 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2907 }
2908
2909 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2910 {
2911 DEFINE_COMPILER;
2912 struct sljit_label *loop;
2913 struct sljit_jump *lastchar;
2914 struct sljit_jump *firstchar;
2915 struct sljit_jump *quit;
2916 struct sljit_jump *foundcr = NULL;
2917 struct sljit_jump *notfoundnl;
2918 jump_list *newline = NULL;
2919
2920 if (firstline)
2921 {
2922 SLJIT_ASSERT(common->first_line_end != 0);
2923 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2924 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2925 }
2926
2927 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2928 {
2929 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2930 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2931 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2932 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2933 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2934
2935 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2936 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2937 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2938 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2939 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2940 #endif
2941 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2942
2943 loop = LABEL();
2944 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2945 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2946 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2947 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2948 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2949 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2950
2951 JUMPHERE(quit);
2952 JUMPHERE(firstchar);
2953 JUMPHERE(lastchar);
2954
2955 if (firstline)
2956 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2957 return;
2958 }
2959
2960 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2961 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2962 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2963 skip_char_back(common);
2964
2965 loop = LABEL();
2966 read_char(common);
2967 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2968 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2969 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2970 check_newlinechar(common, common->nltype, &newline, FALSE);
2971 set_jumps(newline, loop);
2972
2973 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2974 {
2975 quit = JUMP(SLJIT_JUMP);
2976 JUMPHERE(foundcr);
2977 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2978 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2979 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2980 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2981 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2982 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2983 #endif
2984 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2985 JUMPHERE(notfoundnl);
2986 JUMPHERE(quit);
2987 }
2988 JUMPHERE(lastchar);
2989 JUMPHERE(firstchar);
2990
2991 if (firstline)
2992 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2993 }
2994
2995 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
2996
2997 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2998 {
2999 DEFINE_COMPILER;
3000 struct sljit_label *start;
3001 struct sljit_jump *quit;
3002 struct sljit_jump *found = NULL;
3003 jump_list *matches = NULL;
3004 pcre_uint8 inverted_start_bits[32];
3005 int i;
3006 #ifndef COMPILE_PCRE8
3007 struct sljit_jump *jump;
3008 #endif
3009
3010 for (i = 0; i < 32; ++i)
3011 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3012
3013 if (firstline)
3014 {
3015 SLJIT_ASSERT(common->first_line_end != 0);
3016 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3017 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3018 }
3019
3020 start = LABEL();
3021 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3022 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3023 #ifdef SUPPORT_UTF
3024 if (common->utf)
3025 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3026 #endif
3027
3028 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3029 {
3030 #ifndef COMPILE_PCRE8
3031 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3032 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3033 JUMPHERE(jump);
3034 #endif
3035 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3036 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3037 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3038 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3039 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3040 found = JUMP(SLJIT_C_NOT_ZERO);
3041 }
3042
3043 #ifdef SUPPORT_UTF
3044 if (common->utf)
3045 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3046 #endif
3047 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3048 #ifdef SUPPORT_UTF
3049 #if defined COMPILE_PCRE8
3050 if (common->utf)
3051 {
3052 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3053 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3054 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3055 }
3056 #elif defined COMPILE_PCRE16
3057 if (common->utf)
3058 {
3059 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3060 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3061 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3062 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3063 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3064 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3065 }
3066 #endif /* COMPILE_PCRE[8|16] */
3067 #endif /* SUPPORT_UTF */
3068 JUMPTO(SLJIT_JUMP, start);
3069 if (found != NULL)
3070 JUMPHERE(found);
3071 if (matches != NULL)
3072 set_jumps(matches, LABEL());
3073 JUMPHERE(quit);
3074
3075 if (firstline)
3076 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3077 }
3078
3079 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3080 {
3081 DEFINE_COMPILER;
3082 struct sljit_label *loop;
3083 struct sljit_jump *toolong;
3084 struct sljit_jump *alreadyfound;
3085 struct sljit_jump *found;
3086 struct sljit_jump *foundoc = NULL;
3087 struct sljit_jump *notfound;
3088 pcre_uint32 oc, bit;
3089
3090 SLJIT_ASSERT(common->req_char_ptr != 0);
3091 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3092 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3093 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3094 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3095
3096 if (has_firstchar)
3097 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3098 else
3099 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3100
3101 loop = LABEL();
3102 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3103
3104 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3105 oc = req_char;
3106 if (caseless)
3107 {
3108 oc = TABLE_GET(req_char, common->fcc, req_char);
3109 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3110 if (req_char > 127 && common->utf)
3111 oc = UCD_OTHERCASE(req_char);
3112 #endif
3113 }
3114 if (req_char == oc)
3115 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3116 else
3117 {
3118 bit = req_char ^ oc;
3119 if (is_powerof2(bit))
3120 {
3121 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3122 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3123 }
3124 else
3125 {
3126 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3127 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3128 }
3129 }
3130 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3131 JUMPTO(SLJIT_JUMP, loop);
3132
3133 JUMPHERE(found);
3134 if (foundoc)
3135 JUMPHERE(foundoc);
3136 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3137 JUMPHERE(alreadyfound);
3138 JUMPHERE(toolong);
3139 return notfound;
3140 }
3141
3142 static void do_revertframes(compiler_common *common)
3143 {
3144 DEFINE_COMPILER;
3145 struct sljit_jump *jump;
3146 struct sljit_label *mainloop;
3147
3148 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3149 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3150 GET_LOCAL_BASE(TMP3, 0, 0);
3151
3152 /* Drop frames until we reach STACK_TOP. */
3153 mainloop = LABEL();
3154 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3155 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3156 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3157
3158 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3159 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3160 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3161 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3162 JUMPTO(SLJIT_JUMP, mainloop);
3163
3164 JUMPHERE(jump);
3165 jump = JUMP(SLJIT_C_SIG_LESS);
3166 /* End of dropping frames. */
3167 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3168
3169 JUMPHERE(jump);
3170 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3171 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3172 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3173 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3174 JUMPTO(SLJIT_JUMP, mainloop);
3175 }
3176
3177 static void check_wordboundary(compiler_common *common)
3178 {
3179 DEFINE_COMPILER;
3180 struct sljit_jump *skipread;
3181 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3182 struct sljit_jump *jump;
3183 #endif
3184
3185 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3186
3187 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3188 /* Get type of the previous char, and put it to LOCALS1. */
3189 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3190 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3192 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3193 skip_char_back(common);
3194 check_start_used_ptr(common);
3195 read_char(common);
3196
3197 /* Testing char type. */
3198 #ifdef SUPPORT_UCP
3199 if (common->use_ucp)
3200 {
3201 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3202 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3203 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3204 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3205 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3206 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3207 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3208 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3210 JUMPHERE(jump);
3211 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3212 }
3213 else
3214 #endif
3215 {
3216 #ifndef COMPILE_PCRE8
3217 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3218 #elif defined SUPPORT_UTF
3219 /* Here LOCALS1 has already been zeroed. */
3220 jump = NULL;
3221 if (common->utf)
3222 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3223 #endif /* COMPILE_PCRE8 */
3224 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3225 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3226 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3228 #ifndef COMPILE_PCRE8
3229 JUMPHERE(jump);
3230 #elif defined SUPPORT_UTF
3231 if (jump != NULL)
3232 JUMPHERE(jump);
3233 #endif /* COMPILE_PCRE8 */
3234 }
3235 JUMPHERE(skipread);
3236
3237 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3238 skipread = check_str_end(common);
3239 peek_char(common);
3240
3241 /* Testing char type. This is a code duplication. */
3242 #ifdef SUPPORT_UCP
3243 if (common->use_ucp)
3244 {
3245 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3246 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3247 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3248 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3249 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3250 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3251 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3252 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3253 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3254 JUMPHERE(jump);
3255 }
3256 else
3257 #endif
3258 {
3259 #ifndef COMPILE_PCRE8
3260 /* TMP2 may be destroyed by peek_char. */
3261 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3262 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3263 #elif defined SUPPORT_UTF
3264 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3265 jump = NULL;
3266 if (common->utf)
3267 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3268 #endif
3269 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3270 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3271 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3272 #ifndef COMPILE_PCRE8
3273 JUMPHERE(jump);
3274 #elif defined SUPPORT_UTF
3275 if (jump != NULL)
3276 JUMPHERE(jump);
3277 #endif /* COMPILE_PCRE8 */
3278 }
3279 JUMPHERE(skipread);
3280
3281 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3282 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3283 }
3284
3285 /*
3286 range format:
3287
3288 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3289 ranges[1] = first bit (0 or 1)
3290 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3291 */
3292
3293 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3294 {
3295 DEFINE_COMPILER;
3296 struct sljit_jump *jump;
3297
3298 if (ranges[0] < 0)
3299 return FALSE;
3300
3301 switch(ranges[0])
3302 {
3303 case 1:
3304 if (readch)
3305 read_char(common);
3306 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3307 return TRUE;
3308
3309 case 2:
3310 if (readch)
3311 read_char(common);
3312 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3313 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3314 return TRUE;
3315
3316 case 4:
3317 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3318 {
3319 if (readch)
3320 read_char(common);
3321 if (ranges[1] != 0)
3322 {
3323 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3324 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3325 }
3326 else
3327 {
3328 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3329 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3330 JUMPHERE(jump);
3331 }
3332 return TRUE;
3333 }
3334 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3335 {
3336 if (readch)
3337 read_char(common);
3338 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3339 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3340 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3341 return TRUE;
3342 }
3343 return FALSE;
3344
3345 default:
3346 return FALSE;
3347 }
3348 }
3349
3350 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3351 {
3352 int i, bit, length;
3353 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3354
3355 bit = ctypes[0] & flag;
3356 ranges[0] = -1;
3357 ranges[1] = bit != 0 ? 1 : 0;
3358 length = 0;
3359
3360 for (i = 1; i < 256; i++)
3361 if ((ctypes[i] & flag) != bit)
3362 {
3363 if (length >= MAX_RANGE_SIZE)
3364 return;
3365 ranges[2 + length] = i;
3366 length++;
3367 bit ^= flag;
3368 }
3369
3370 if (bit != 0)
3371 {
3372 if (length >= MAX_RANGE_SIZE)
3373 return;
3374 ranges[2 + length] = 256;
3375 length++;
3376 }
3377 ranges[0] = length;
3378 }
3379
3380 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3381 {
3382 int ranges[2 + MAX_RANGE_SIZE];
3383 pcre_uint8 bit, cbit, all;
3384 int i, byte, length = 0;
3385
3386 bit = bits[0] & 0x1;
3387 ranges[1] = bit;
3388 /* Can be 0 or 255. */
3389 all = -bit;
3390
3391 for (i = 0; i < 256; )
3392 {
3393 byte = i >> 3;
3394 if ((i & 0x7) == 0 && bits[byte] == all)
3395 i += 8;
3396 else
3397 {
3398 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3399 if (cbit != bit)
3400 {
3401 if (length >= MAX_RANGE_SIZE)
3402 return FALSE;
3403 ranges[2 + length] = i;
3404 length++;
3405 bit = cbit;
3406 all = -cbit;
3407 }
3408 i++;
3409 }
3410 }
3411
3412 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3413 {
3414 if (length >= MAX_RANGE_SIZE)
3415 return FALSE;
3416 ranges[2 + length] = 256;
3417 length++;
3418 }
3419 ranges[0] = length;
3420
3421 return check_ranges(common, ranges, backtracks, FALSE);
3422 }
3423
3424 static void check_anynewline(compiler_common *common)
3425 {
3426 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3427 DEFINE_COMPILER;
3428
3429 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3430
3431 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3432 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3433 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3434 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3435 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3436 #ifdef COMPILE_PCRE8
3437 if (common->utf)
3438 {
3439 #endif
3440 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3441 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3443 #ifdef COMPILE_PCRE8
3444 }
3445 #endif
3446 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3447 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3448 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3449 }
3450
3451 static void check_hspace(compiler_common *common)
3452 {
3453 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3454 DEFINE_COMPILER;
3455
3456 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3457
3458 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3459 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3460 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3461 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3462 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3463 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3464 #ifdef COMPILE_PCRE8
3465 if (common->utf)
3466 {
3467 #endif
3468 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3470 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3471 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3472 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3473 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3474 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3475 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3476 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3477 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3478 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3479 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3480 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3481 #ifdef COMPILE_PCRE8
3482 }
3483 #endif
3484 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3485 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3486
3487 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3488 }
3489
3490 static void check_vspace(compiler_common *common)
3491 {
3492 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3493 DEFINE_COMPILER;
3494
3495 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3496
3497 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3498 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3499 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3500 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3501 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3502 #ifdef COMPILE_PCRE8
3503 if (common->utf)
3504 {
3505 #endif
3506 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3507 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3508 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3509 #ifdef COMPILE_PCRE8
3510 }
3511 #endif
3512 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3513 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3514
3515 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3516 }
3517
3518 #define CHAR1 STR_END
3519 #define CHAR2 STACK_TOP
3520
3521 static void do_casefulcmp(compiler_common *common)
3522 {
3523 DEFINE_COMPILER;
3524 struct sljit_jump *jump;
3525 struct sljit_label *label;
3526
3527 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3528 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3529 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3530 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3531 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3532 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3533
3534 label = LABEL();
3535 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3536 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3537 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3538 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3539 JUMPTO(SLJIT_C_NOT_ZERO, label);
3540
3541 JUMPHERE(jump);
3542 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3543 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3544 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3545 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3546 }
3547
3548 #define LCC_TABLE STACK_LIMIT
3549
3550 static void do_caselesscmp(compiler_common *common)
3551 {
3552 DEFINE_COMPILER;
3553 struct sljit_jump *jump;
3554 struct sljit_label *label;
3555
3556 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3557 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3558
3559 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3560 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3561 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3562 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3563 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3564 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3565
3566 label = LABEL();
3567 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3568 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3569 #ifndef COMPILE_PCRE8
3570 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3571 #endif
3572 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3573 #ifndef COMPILE_PCRE8
3574 JUMPHERE(jump);
3575 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3576 #endif
3577 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3578 #ifndef COMPILE_PCRE8
3579 JUMPHERE(jump);
3580 #endif
3581 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3582 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3583 JUMPTO(SLJIT_C_NOT_ZERO, label);
3584
3585 JUMPHERE(jump);
3586 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3587 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3588 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3589 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3590 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3591 }
3592
3593 #undef LCC_TABLE
3594 #undef CHAR1
3595 #undef CHAR2
3596
3597 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3598
3599 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3600 {
3601 /* This function would be ineffective to do in JIT level. */
3602 pcre_uint32 c1, c2;
3603 const pcre_uchar *src2 = args->uchar_ptr;
3604 const pcre_uchar *end2 = args->end;
3605 const ucd_record *ur;
3606 const pcre_uint32 *pp;
3607
3608 while (src1 < end1)
3609 {
3610 if (src2 >= end2)
3611 return (pcre_uchar*)1;
3612 GETCHARINC(c1, src1);
3613 GETCHARINC(c2, src2);
3614 ur = GET_UCD(c2);
3615 if (c1 != c2 && c1 != c2 + ur->other_case)
3616 {
3617 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3618 for (;;)
3619 {
3620 if (c1 < *pp) return NULL;
3621 if (c1 == *pp++) break;
3622 }
3623 }
3624 }
3625 return src2;
3626 }
3627
3628 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3629
3630 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3631 compare_context* context, jump_list **backtracks)
3632 {
3633 DEFINE_COMPILER;
3634 unsigned int othercasebit = 0;
3635 pcre_uchar *othercasechar = NULL;
3636 #ifdef SUPPORT_UTF
3637 int utflength;
3638 #endif
3639
3640 if (caseless && char_has_othercase(common, cc))
3641 {
3642 othercasebit = char_get_othercase_bit(common, cc);
3643 SLJIT_ASSERT(othercasebit);
3644 /* Extracting bit difference info. */
3645 #if defined COMPILE_PCRE8
3646 othercasechar = cc + (othercasebit >> 8);
3647 othercasebit &= 0xff;
3648 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3649 /* Note that this code only handles characters in the BMP. If there
3650 ever are characters outside the BMP whose othercase differs in only one
3651 bit from itself (there currently are none), this code will need to be
3652 revised for COMPILE_PCRE32. */
3653 othercasechar = cc + (othercasebit >> 9);
3654 if ((othercasebit & 0x100) != 0)
3655 othercasebit = (othercasebit & 0xff) << 8;
3656 else
3657 othercasebit &= 0xff;
3658 #endif /* COMPILE_PCRE[8|16|32] */
3659 }
3660
3661 if (context->sourcereg == -1)
3662 {
3663 #if defined COMPILE_PCRE8
3664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3665 if (context->length >= 4)
3666 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3667 else if (context->length >= 2)
3668 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3669 else
3670 #endif
3671 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3672 #elif defined COMPILE_PCRE16
3673 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3674 if (context->length >= 4)
3675 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3676 else
3677 #endif
3678 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3679 #elif defined COMPILE_PCRE32
3680 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3681 #endif /* COMPILE_PCRE[8|16|32] */
3682 context->sourcereg = TMP2;
3683 }
3684
3685 #ifdef SUPPORT_UTF
3686 utflength = 1;
3687 if (common->utf && HAS_EXTRALEN(*cc))
3688 utflength += GET_EXTRALEN(*cc);
3689
3690 do
3691 {
3692 #endif
3693
3694 context->length -= IN_UCHARS(1);
3695 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3696
3697 /* Unaligned read is supported. */
3698 if (othercasebit != 0 && othercasechar == cc)
3699 {
3700 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3701 context->oc.asuchars[context->ucharptr] = othercasebit;
3702 }
3703 else
3704 {
3705 context->c.asuchars[context->ucharptr] = *cc;
3706 context->oc.asuchars[context->ucharptr] = 0;
3707 }
3708 context->ucharptr++;
3709
3710 #if defined COMPILE_PCRE8
3711 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3712 #else
3713 if (context->ucharptr >= 2 || context->length == 0)
3714 #endif
3715 {
3716 if (context->length >= 4)
3717 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3718 else if (context->length >= 2)
3719 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3720 #if defined COMPILE_PCRE8
3721 else if (context->length >= 1)
3722 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3723 #endif /* COMPILE_PCRE8 */
3724 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3725
3726 switch(context->ucharptr)
3727 {
3728 case 4 / sizeof(pcre_uchar):
3729 if (context->oc.asint != 0)
3730 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3731 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3732 break;
3733
3734 case 2 / sizeof(pcre_uchar):
3735 if (context->oc.asushort != 0)
3736 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3737 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3738 break;
3739
3740 #ifdef COMPILE_PCRE8
3741 case 1:
3742 if (context->oc.asbyte != 0)
3743 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3744 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3745 break;
3746 #endif
3747
3748 default:
3749 SLJIT_ASSERT_STOP();
3750 break;
3751 }
3752 context->ucharptr = 0;
3753 }
3754
3755 #else
3756
3757 /* Unaligned read is unsupported or in 32 bit mode. */
3758 if (context->length >= 1)
3759 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3760
3761 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3762
3763 if (othercasebit != 0 && othercasechar == cc)
3764 {
3765 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3766 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3767 }
3768 else
3769 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3770
3771 #endif
3772
3773 cc++;
3774 #ifdef SUPPORT_UTF
3775 utflength--;
3776 }
3777 while (utflength > 0);
3778 #endif
3779
3780 return cc;
3781 }
3782
3783 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3784
3785 #define SET_TYPE_OFFSET(value) \
3786 if ((value) != typeoffset) \
3787 { \
3788 if ((value) > typeoffset) \
3789 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3790 else \
3791 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3792 } \
3793 typeoffset = (value);
3794
3795 #define SET_CHAR_OFFSET(value) \
3796 if ((value) != charoffset) \
3797 { \
3798 if ((value) > charoffset) \
3799 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3800 else \
3801 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3802 } \
3803 charoffset = (value);
3804
3805 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3806 {
3807 DEFINE_COMPILER;
3808 jump_list *found = NULL;
3809 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3810 pcre_int32 c, charoffset;
3811 const pcre_uint32 *other_cases;
3812 struct sljit_jump *jump = NULL;
3813 pcre_uchar *ccbegin;
3814 int compares, invertcmp, numberofcmps;
3815 #ifdef SUPPORT_UCP
3816 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3817 BOOL charsaved = FALSE;
3818 int typereg = TMP1, scriptreg = TMP1;
3819 pcre_int32 typeoffset;
3820 #endif
3821
3822 /* Although SUPPORT_UTF must be defined, we are
3823 not necessary in utf mode even in 8 bit mode. */
3824 detect_partial_match(common, backtracks);
3825 read_char(common);
3826
3827 if ((*cc++ & XCL_MAP) != 0)
3828 {
3829 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3830 #ifndef COMPILE_PCRE8
3831 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3832 #elif defined SUPPORT_UTF
3833 if (common->utf)
3834 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3835 #endif
3836
3837 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3838 {
3839 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3840 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3841 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3842 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3843 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3844 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3845 }
3846
3847 #ifndef COMPILE_PCRE8
3848 JUMPHERE(jump);
3849 #elif defined SUPPORT_UTF
3850 if (common->utf)
3851 JUMPHERE(jump);
3852 #endif
3853 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3854 #ifdef SUPPORT_UCP
3855 charsaved = TRUE;
3856 #endif
3857 cc += 32 / sizeof(pcre_uchar);
3858 }
3859
3860 /* Scanning the necessary info. */
3861 ccbegin = cc;
3862 compares = 0;
3863 while (*cc != XCL_END)
3864 {
3865 compares++;
3866 if (*cc == XCL_SINGLE)
3867 {
3868 cc += 2;
3869 #ifdef SUPPORT_UTF
3870 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3871 #endif
3872 #ifdef SUPPORT_UCP
3873 needschar = TRUE;
3874 #endif
3875 }
3876 else if (*cc == XCL_RANGE)
3877 {
3878 cc += 2;
3879 #ifdef SUPPORT_UTF
3880 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3881 #endif
3882 cc++;
3883 #ifdef SUPPORT_UTF
3884 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3885 #endif
3886 #ifdef SUPPORT_UCP
3887 needschar = TRUE;
3888 #endif
3889 }
3890 #ifdef SUPPORT_UCP
3891 else
3892 {
3893 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3894 cc++;
3895 switch(*cc)
3896 {
3897 case PT_ANY:
3898 break;
3899
3900 case PT_LAMP:
3901 case PT_GC:
3902 case PT_PC:
3903 case PT_ALNUM:
3904 needstype = TRUE;
3905 break;
3906
3907 case PT_SC:
3908 needsscript = TRUE;
3909 break;
3910
3911 case PT_SPACE:
3912 case PT_PXSPACE:
3913 case PT_WORD:
3914 needstype = TRUE;
3915 needschar = TRUE;
3916 break;
3917
3918 case PT_CLIST:
3919 case PT_UCNC:
3920 needschar = TRUE;
3921 break;
3922
3923 default:
3924 SLJIT_ASSERT_STOP();
3925 break;
3926 }
3927 cc += 2;
3928 }
3929 #endif
3930 }
3931
3932 #ifdef SUPPORT_UCP
3933 /* Simple register allocation. TMP1 is preferred if possible. */
3934 if (needstype || needsscript)
3935 {
3936 if (needschar && !charsaved)
3937 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3938 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3939 if (needschar)
3940 {
3941 if (needstype)
3942 {
3943 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3944 typereg = RETURN_ADDR;
3945 }
3946
3947 if (needsscript)
3948 scriptreg = TMP3;
3949 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3950 }
3951 else if (needstype && needsscript)
3952 scriptreg = TMP3;
3953 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3954
3955 if (needsscript)
3956 {
3957 if (scriptreg == TMP1)
3958 {
3959 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3960 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3961 }
3962 else
3963 {
3964 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3965 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3966 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3967 }
3968 }
3969 }
3970 #endif
3971
3972 /* Generating code. */
3973 cc = ccbegin;
3974 charoffset = 0;
3975 numberofcmps = 0;
3976 #ifdef SUPPORT_UCP
3977 typeoffset = 0;
3978 #endif
3979
3980 while (*cc != XCL_END)
3981 {
3982 compares--;
3983 invertcmp = (compares == 0 && list != backtracks);
3984 jump = NULL;
3985
3986 if (*cc == XCL_SINGLE)
3987 {
3988 cc ++;
3989 #ifdef SUPPORT_UTF
3990 if (common->utf)
3991 {
3992 GETCHARINC(c, cc);
3993 }
3994 else
3995 #endif
3996 c = *cc++;
3997
3998 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3999 {
4000 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4001 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4002 numberofcmps++;
4003 }
4004 else if (numberofcmps > 0)
4005 {
4006 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4007 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4008 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4009 numberofcmps = 0;
4010 }
4011 else
4012 {
4013 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4014 numberofcmps = 0;
4015 }
4016 }
4017 else if (*cc == XCL_RANGE)
4018 {
4019 cc ++;
4020 #ifdef SUPPORT_UTF
4021 if (common->utf)
4022 {
4023 GETCHARINC(c, cc);
4024 }
4025 else
4026 #endif
4027 c = *cc++;
4028 SET_CHAR_OFFSET(c);
4029 #ifdef SUPPORT_UTF
4030 if (common->utf)
4031 {
4032 GETCHARINC(c, cc);
4033 }
4034 else
4035 #endif
4036 c = *cc++;
4037 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4038 {
4039 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4040 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4041 numberofcmps++;
4042 }
4043 else if (numberofcmps > 0)
4044 {
4045 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4046 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4047 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4048 numberofcmps = 0;
4049 }
4050 else
4051 {
4052 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4053 numberofcmps = 0;
4054 }
4055 }
4056 #ifdef SUPPORT_UCP
4057 else
4058 {
4059 if (*cc == XCL_NOTPROP)
4060 invertcmp ^= 0x1;
4061 cc++;
4062 switch(*cc)
4063 {
4064 case PT_ANY:
4065 if (list != backtracks)
4066 {
4067 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4068 continue;
4069 }
4070 else if (cc[-1] == XCL_NOTPROP)
4071 continue;
4072 jump = JUMP(SLJIT_JUMP);
4073 break;
4074
4075 case PT_LAMP:
4076 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4077 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4078 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4079 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4080 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4081 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4082 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4083 break;
4084
4085 case PT_GC:
4086 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4087 SET_TYPE_OFFSET(c);
4088 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4089 break;
4090
4091 case PT_PC:
4092 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4093 break;
4094
4095 case PT_SC:
4096 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4097 break;
4098
4099 case PT_SPACE:
4100 case PT_PXSPACE:
4101 if (*cc == PT_SPACE)
4102 {
4103 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4104 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4105 }
4106 SET_CHAR_OFFSET(9);
4107 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4108 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4109 if (*cc == PT_SPACE)
4110 JUMPHERE(jump);
4111
4112 SET_TYPE_OFFSET(ucp_Zl);
4113 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4114 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4115 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4116 break;
4117
4118 case PT_WORD:
4119 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4120 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4121 /* Fall through. */
4122
4123 case PT_ALNUM:
4124 SET_TYPE_OFFSET(ucp_Ll);
4125 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4126 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4127 SET_TYPE_OFFSET(ucp_Nd);
4128 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4129 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4130 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4131 break;
4132
4133 case PT_CLIST:
4134 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4135
4136 /* At least three characters are required.
4137 Otherwise this case would be handled by the normal code path. */
4138 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4139 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4140
4141 /* Optimizing character pairs, if their difference is power of 2. */
4142 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4143 {
4144 if (charoffset == 0)
4145 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4146 else
4147 {
4148 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4149 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4150 }
4151 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4152 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4153 other_cases += 2;
4154 }
4155 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4156 {
4157 if (charoffset == 0)
4158 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4159 else
4160 {
4161 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4162 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4163 }
4164 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4165 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4166
4167 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4168 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4169
4170 other_cases += 3;
4171 }
4172 else
4173 {
4174 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4175 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4176 }
4177
4178 while (*other_cases != NOTACHAR)
4179 {
4180 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4181 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4182 }
4183 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4184 break;
4185
4186 case PT_UCNC:
4187 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4188 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4189 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4190 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4191 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4192 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4193
4194 SET_CHAR_OFFSET(0xa0);
4195 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4196 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4197 SET_CHAR_OFFSET(0);
4198 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4199 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4200 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4201 break;
4202 }
4203 cc += 2;
4204 }
4205 #endif
4206
4207 if (jump != NULL)
4208 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4209 }
4210
4211 if (found != NULL)
4212 set_jumps(found, LABEL());
4213 }
4214
4215 #undef SET_TYPE_OFFSET
4216 #undef SET_CHAR_OFFSET
4217
4218 #endif
4219
4220 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4221 {
4222 DEFINE_COMPILER;
4223 int length;
4224 unsigned int c, oc, bit;
4225 compare_context context;
4226 struct sljit_jump *jump[4];
4227 #ifdef SUPPORT_UTF
4228 struct sljit_label *label;
4229 #ifdef SUPPORT_UCP
4230 pcre_uchar propdata[5];
4231 #endif
4232 #endif
4233
4234 switch(type)
4235 {
4236 case OP_SOD:
4237 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4238 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4239 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4240 return cc;
4241
4242 case OP_SOM:
4243 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4244 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4245 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4246 return cc;
4247
4248 case OP_NOT_WORD_BOUNDARY:
4249 case OP_WORD_BOUNDARY:
4250 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4251 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4252 return cc;
4253
4254 case OP_NOT_DIGIT:
4255 case OP_DIGIT:
4256 /* Digits are usually 0-9, so it is worth to optimize them. */
4257 if (common->digits[0] == -2)
4258 get_ctype_ranges(common, ctype_digit, common->digits);
4259 detect_partial_match(common, backtracks);
4260 /* Flip the starting bit in the negative case. */
4261 if (type == OP_NOT_DIGIT)
4262 common->digits[1] ^= 1;
4263 if (!check_ranges(common, common->digits, backtracks, TRUE))
4264 {
4265 read_char8_type(common);
4266 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4267 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4268 }
4269 if (type == OP_NOT_DIGIT)
4270 common->digits[1] ^= 1;
4271 return cc;
4272
4273 case OP_NOT_WHITESPACE:
4274 case OP_WHITESPACE:
4275 detect_partial_match(common, backtracks);
4276 read_char8_type(common);
4277 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4278 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4279 return cc;
4280
4281 case OP_NOT_WORDCHAR:
4282 case OP_WORDCHAR:
4283 detect_partial_match(common, backtracks);
4284 read_char8_type(common);
4285 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4286 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4287 return cc;
4288
4289 case OP_ANY:
4290 detect_partial_match(common, backtracks);
4291 read_char(common);
4292 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4293 {
4294 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4295 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4296 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4297 else
4298 jump[1] = check_str_end(common);
4299
4300 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4301 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4302 if (jump[1] != NULL)
4303 JUMPHERE(jump[1]);
4304 JUMPHERE(jump[0]);
4305 }
4306 else
4307 check_newlinechar(common, common->nltype, backtracks, TRUE);
4308 return cc;
4309
4310 case OP_ALLANY:
4311 detect_partial_match(common, backtracks);
4312 #ifdef SUPPORT_UTF
4313 if (common->utf)
4314 {
4315 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4316 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4317 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4318 #if defined COMPILE_PCRE8
4319 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4320 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4321 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4322 #elif defined COMPILE_PCRE16
4323 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4324 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4325 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4326 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4327 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4328 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4329 #endif
4330 JUMPHERE(jump[0]);
4331 #endif /* COMPILE_PCRE[8|16] */
4332 return cc;
4333 }
4334 #endif
4335 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336 return cc;
4337
4338 case OP_ANYBYTE:
4339 detect_partial_match(common, backtracks);
4340 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4341 return cc;
4342
4343 #ifdef SUPPORT_UTF
4344 #ifdef SUPPORT_UCP
4345 case OP_NOTPROP:
4346 case OP_PROP:
4347 propdata[0] = 0;
4348 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4349 propdata[2] = cc[0];
4350 propdata[3] = cc[1];
4351 propdata[4] = XCL_END;
4352 compile_xclass_matchingpath(common, propdata, backtracks);
4353 return cc + 2;
4354 #endif
4355 #endif
4356
4357 case OP_ANYNL:
4358 detect_partial_match(common, backtracks);
4359 read_char(common);
4360 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4361 /* We don't need to handle soft partial matching case. */
4362 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4363 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4364 else
4365 jump[1] = check_str_end(common);
4366 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4367 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4369 jump[3] = JUMP(SLJIT_JUMP);
4370 JUMPHERE(jump[0]);
4371 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4372 JUMPHERE(jump[1]);
4373 JUMPHERE(jump[2]);
4374 JUMPHERE(jump[3]);
4375 return cc;
4376
4377 case OP_NOT_HSPACE:
4378 case OP_HSPACE:
4379 detect_partial_match(common, backtracks);
4380 read_char(common);
4381 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4382 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4383 return cc;
4384
4385 case OP_NOT_VSPACE:
4386 case OP_VSPACE:
4387 detect_partial_match(common, backtracks);
4388 read_char(common);
4389 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4390 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4391 return cc;
4392
4393 #ifdef SUPPORT_UCP
4394 case OP_EXTUNI:
4395 detect_partial_match(common, backtracks);
4396 read_char(common);
4397 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4398 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4399 /* Optimize register allocation: use a real register. */
4400 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4401 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4402
4403 label = LABEL();
4404 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4405 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4406 read_char(common);
4407 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4408 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4409 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4410
4411 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4412 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4413 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4414 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4415 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4416 JUMPTO(SLJIT_C_NOT_ZERO, label);
4417
4418 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4419 JUMPHERE(jump[0]);
4420 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4421
4422 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4423 {
4424 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4425 /* Since we successfully read a char above, partial matching must occure. */
4426 check_partial(common, TRUE);
4427 JUMPHERE(jump[0]);
4428 }
4429 return cc;
4430 #endif
4431
4432 case OP_EODN:
4433 /* Requires rather complex checks. */
4434 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4435 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4436 {
4437 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4438 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4439 if (common->mode == JIT_COMPILE)
4440 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4441 else
4442 {
4443 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4444 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4445 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4447 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4448 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4449 check_partial(common, TRUE);
4450 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4451 JUMPHERE(jump[1]);
4452 }
4453 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4454 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4455 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4456 }
4457 else if (common->nltype == NLTYPE_FIXED)
4458 {
4459 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4460 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4461 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4462 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4463 }
4464 else
4465 {
4466 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4467 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4468 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4469 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4470 jump[2] = JUMP(SLJIT_C_GREATER);
4471 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4472 /* Equal. */
4473 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4474 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4475 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4476
4477 JUMPHERE(jump[1]);
4478 if (common->nltype == NLTYPE_ANYCRLF)
4479 {
4480 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4481 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4482 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4483 }
4484 else
4485 {
4486 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4487 read_char(common);
4488 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4489 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4490 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4491 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4492 }
4493 JUMPHERE(jump[2]);
4494 JUMPHERE(jump[3]);
4495 }
4496 JUMPHERE(jump[0]);
4497 check_partial(common, FALSE);
4498 return cc;
4499
4500 case OP_EOD:
4501 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4502 check_partial(common, FALSE);
4503 return cc;
4504
4505 case OP_CIRC:
4506 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4508 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4509 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4510 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4511 return cc;
4512
4513 case OP_CIRCM:
4514 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4515 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4516 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4517 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4518 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4519 jump[0] = JUMP(SLJIT_JUMP);
4520 JUMPHERE(jump[1]);
4521
4522 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4523 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4524 {
4525 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4526 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4527 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4528 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4529 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4530 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4531 }
4532 else
4533 {
4534 skip_char_back(common);
4535 read_char(common);
4536 check_newlinechar(common, common->nltype, backtracks, FALSE);
4537 }
4538 JUMPHERE(jump[0]);
4539 return cc;
4540
4541 case OP_DOLL:
4542 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4543 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4544 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4545
4546 if (!common->endonly)
4547 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4548 else
4549 {
4550 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4551 check_partial(common, FALSE);
4552 }
4553 return cc;
4554
4555 case OP_DOLLM:
4556 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4557 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4558 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4559 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4560 check_partial(common, FALSE);
4561 jump[0] = JUMP(SLJIT_JUMP);
4562 JUMPHERE(jump[1]);
4563
4564 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4565 {
4566 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4567 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4568 if (common->mode == JIT_COMPILE)
4569 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4570 else
4571 {
4572 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4573 /* STR_PTR = STR_END - IN_UCHARS(1) */
4574 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4575 check_partial(common, TRUE);
4576 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4577 JUMPHERE(jump[1]);
4578 }
4579
4580 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4581 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4582 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4583 }
4584 else
4585 {
4586 peek_char(common);
4587 check_newlinechar(common, common->nltype, backtracks, FALSE);
4588 }
4589 JUMPHERE(jump[0]);
4590 return cc;
4591
4592 case OP_CHAR:
4593 case OP_CHARI:
4594 length = 1;
4595 #ifdef SUPPORT_UTF
4596 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4597 #endif
4598 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4599 {
4600 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4601 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4602
4603 context.length = IN_UCHARS(length);
4604 context.sourcereg = -1;
4605 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4606 context.ucharptr = 0;
4607 #endif
4608 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4609 }
4610 detect_partial_match(common, backtracks);
4611 read_char(common);
4612 #ifdef SUPPORT_UTF
4613 if (common->utf)
4614 {
4615 GETCHAR(c, cc);
4616 }
4617 else
4618 #endif
4619 c = *cc;
4620 if (type == OP_CHAR || !char_has_othercase(common, cc))
4621 {
4622 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4623 return cc + length;
4624 }
4625 oc = char_othercase(common, c);
4626 bit = c ^ oc;
4627 if (is_powerof2(bit))
4628 {
4629 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4630 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4631 return cc + length;
4632 }
4633 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4634 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4635 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4636 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4637 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4638 return cc + length;
4639
4640 case OP_NOT:
4641 case OP_NOTI:
4642 detect_partial_match(common, backtracks);
4643 length = 1;
4644 #ifdef SUPPORT_UTF
4645 if (common->utf)
4646 {
4647 #ifdef COMPILE_PCRE8
4648 c = *cc;
4649 if (c < 128)
4650 {
4651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4652 if (type == OP_NOT || !char_has_othercase(common, cc))
4653 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4654 else
4655 {
4656 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4657 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4658 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4659 }
4660 /* Skip the variable-length character. */
4661 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4662 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4663 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4665 JUMPHERE(jump[0]);
4666 return cc + 1;
4667 }
4668 else
4669 #endif /* COMPILE_PCRE8 */
4670 {
4671 GETCHARLEN(c, cc, length);
4672 read_char(common);
4673 }
4674 }
4675 else
4676 #endif /* SUPPORT_UTF */
4677 {
4678 read_char(common);
4679 c = *cc;
4680 }
4681
4682 if (type == OP_NOT || !char_has_othercase(common, cc))
4683 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4684 else
4685 {
4686 oc = char_othercase(common, c);
4687 bit = c ^ oc;
4688 if (is_powerof2(bit))
4689 {
4690 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4691 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4692 }
4693 else
4694 {
4695 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4696 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4697 }
4698 }
4699 return cc + length;
4700
4701 case OP_CLASS:
4702 case OP_NCLASS:
4703 detect_partial_match(common, backtracks);
4704 read_char(common);
4705 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4706 return cc + 32 / sizeof(pcre_uchar);
4707
4708 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4709 jump[0] = NULL;
4710 #ifdef COMPILE_PCRE8
4711 /* This check only affects 8 bit mode. In other modes, we
4712 always need to compare the value with 255. */
4713 if (common->utf)
4714 #endif /* COMPILE_PCRE8 */
4715 {
4716 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4717 if (type == OP_CLASS)
4718 {
4719 add_jump(compiler, backtracks, jump[0]);
4720 jump[0] = NULL;
4721 }
4722 }
4723 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4724 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4725 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4726 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4727 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4728 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4729 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4730 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4731 if (jump[0] != NULL)
4732 JUMPHERE(jump[0]);
4733 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4734 return cc + 32 / sizeof(pcre_uchar);
4735
4736 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4737 case OP_XCLASS:
4738 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4739 return cc + GET(cc, 0) - 1;
4740 #endif
4741
4742 case OP_REVERSE:
4743 length = GET(cc, 0);
4744 if (length == 0)
4745 return cc + LINK_SIZE;
4746 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4747 #ifdef SUPPORT_UTF
4748 if (common->utf)
4749 {
4750 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4751 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4752 label = LABEL();
4753 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4754 skip_char_back(common);
4755 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4756 JUMPTO(SLJIT_C_NOT_ZERO, label);
4757 }
4758 else
4759 #endif
4760 {
4761 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4762 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4763 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4764 }
4765 check_start_used_ptr(common);
4766 return cc + LINK_SIZE;
4767 }
4768 SLJIT_ASSERT_STOP();
4769 return cc;
4770 }
4771
4772 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4773 {
4774 /* This function consumes at least one input character. */
4775 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4776 DEFINE_COMPILER;
4777 pcre_uchar *ccbegin = cc;
4778 compare_context context;
4779 int size;
4780
4781 context.length = 0;
4782 do
4783 {
4784 if (cc >= ccend)
4785 break;
4786
4787 if (*cc == OP_CHAR)
4788 {
4789 size = 1;
4790 #ifdef SUPPORT_UTF
4791 if (common->utf && HAS_EXTRALEN(cc[1]))
4792 size += GET_EXTRALEN(cc[1]);
4793 #endif
4794 }
4795 else if (*cc == OP_CHARI)
4796 {
4797 size = 1;
4798 #ifdef SUPPORT_UTF
4799 if (common->utf)
4800 {
4801 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4802 size = 0;
4803 else if (HAS_EXTRALEN(cc[1]))
4804 size += GET_EXTRALEN(cc[1]);
4805 }
4806 else
4807 #endif
4808 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4809 size = 0;
4810 }
4811 else
4812 size = 0;
4813
4814 cc += 1 + size;
4815 context.length += IN_UCHARS(size);
4816 }
4817 while (size > 0 && context.length <= 128);
4818
4819 cc = ccbegin;
4820 if (context.length > 0)
4821 {
4822 /* We have a fixed-length byte sequence. */
4823 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4824 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4825
4826 context.sourcereg = -1;
4827 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4828 context.ucharptr = 0;
4829 #endif
4830 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4831 return cc;
4832 }
4833
4834 /* A non-fixed length character will be checked if length == 0. */
4835 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4836 }
4837
4838 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4839 {
4840 DEFINE_COMPILER;
4841 int offset = GET2(cc, 1) << 1;
4842
4843 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4844 if (!common->jscript_compat)
4845 {
4846 if (backtracks == NULL)
4847 {
4848 /* OVECTOR(1) contains the "string begin - 1" constant. */
4849 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4850 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4851 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4852 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4853 return JUMP(SLJIT_C_NOT_ZERO);
4854 }
4855 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4856 }
4857 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4858 }
4859
4860 /* Forward definitions. */
4861 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4862 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4863
4864 #define PUSH_BACKTRACK(size, ccstart, error) \
4865 do \
4866 { \
4867 backtrack = sljit_alloc_memory(compiler, (size)); \
4868 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4869 return error; \
4870 memset(backtrack, 0, size); \
4871 backtrack->prev = parent->top; \
4872 backtrack->cc = (ccstart); \
4873 parent->top = backtrack; \
4874 } \
4875 while (0)
4876
4877 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4878 do \
4879 { \
4880 backtrack = sljit_alloc_memory(compiler, (size)); \
4881 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4882 return; \
4883 memset(backtrack, 0, size); \
4884 backtrack->prev = parent->top; \
4885 backtrack->cc = (ccstart); \
4886 parent->top = backtrack; \
4887 } \
4888 while (0)
4889
4890 #define BACKTRACK_AS(type) ((type *)backtrack)
4891
4892 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4893 {
4894 DEFINE_COMPILER;
4895 int offset = GET2(cc, 1) << 1;
4896 struct sljit_jump *jump = NULL;
4897 struct sljit_jump *partial;
4898 struct sljit_jump *nopartial;
4899
4900 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4901 /* OVECTOR(1) contains the "string begin - 1" constant. */
4902 if (withchecks && !common->jscript_compat)
4903 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4904
4905 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4906 if (common->utf && *cc == OP_REFI)
4907 {
4908 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
4909 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4910 if (withchecks)
4911 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4912
4913 /* Needed to save important temporary registers. */
4914 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4915 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
4916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4917 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4918 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4919 if (common->mode == JIT_COMPILE)
4920 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4921 else
4922 {
4923 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4924 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4925 check_partial(common, FALSE);
4926 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4927 JUMPHERE(nopartial);
4928 }
4929 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4930 }
4931 else
4932 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4933 {
4934 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4935 if (withchecks)
4936 jump = JUMP(SLJIT_C_ZERO);
4937
4938 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4939 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4940 if (common->mode == JIT_COMPILE)
4941 add_jump(compiler, backtracks, partial);
4942
4943 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4944 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4945
4946 if (common->mode != JIT_COMPILE)
4947 {
4948 nopartial = JUMP(SLJIT_JUMP);
4949 JUMPHERE(partial);
4950 /* TMP2 -= STR_END - STR_PTR */
4951 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4952 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4953 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4954 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4955 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4956 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4957 JUMPHERE(partial);
4958 check_partial(common, FALSE);
4959 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4960 JUMPHERE(nopartial);
4961 }
4962 }
4963
4964 if (jump != NULL)
4965 {
4966 if (emptyfail)
4967 add_jump(compiler, backtracks, jump);
4968 else
4969 JUMPHERE(jump);
4970 }
4971 return cc + 1 + IMM2_SIZE;
4972 }
4973
4974 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4975 {
4976 DEFINE_COMPILER;
4977 backtrack_common *backtrack;
4978 pcre_uchar type;
4979 struct sljit_label *label;
4980 struct sljit_jump *zerolength;
4981 struct sljit_jump *jump = NULL;
4982 pcre_uchar *ccbegin = cc;
4983 int min = 0, max = 0;
4984 BOOL minimize;
4985
4986 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4987
4988 type = cc[1 + IMM2_SIZE];
4989 minimize = (type & 0x1) != 0;
4990 switch(type)
4991 {
4992 case OP_CRSTAR:
4993 case OP_CRMINSTAR:
4994 min = 0;
4995 max = 0;
4996 cc += 1 + IMM2_SIZE + 1;
4997 break;
4998 case OP_CRPLUS:
4999 case OP_CRMINPLUS:
5000 min = 1;
5001 max = 0;
5002 cc += 1 + IMM2_SIZE + 1;
5003 break;
5004 case OP_CRQUERY:
5005 case OP_CRMINQUERY:
5006 min = 0;
5007 max = 1;
5008 cc += 1 + IMM2_SIZE + 1;
5009 break;
5010 case OP_CRRANGE:
5011 case OP_CRMINRANGE:
5012 min = GET2(cc, 1 + IMM2_SIZE + 1);
5013 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5014 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5015 break;
5016 default:
5017 SLJIT_ASSERT_STOP();
5018 break;
5019 }
5020
5021 if (!minimize)
5022 {
5023 if (min == 0)
5024 {
5025 allocate_stack(common, 2);
5026 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5027 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5028 /* Temporary release of STR_PTR. */
5029 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5030 zerolength = compile_ref_checks(common, ccbegin, NULL);
5031 /* Restore if not zero length. */
5032 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5033 }
5034 else
5035 {
5036 allocate_stack(common, 1);
5037 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5038 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5039 }
5040
5041 if (min > 1 || max > 1)
5042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5043
5044 label = LABEL();
5045 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5046
5047 if (min > 1 || max > 1)
5048 {
5049 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5050 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5052 if (min > 1)
5053 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5054 if (max > 1)
5055 {
5056 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5057 allocate_stack(common, 1);
5058 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5059 JUMPTO(SLJIT_JUMP, label);
5060 JUMPHERE(jump);
5061 }
5062 }
5063
5064 if (max == 0)
5065 {
5066 /* Includes min > 1 case as well. */
5067 allocate_stack(common, 1);
5068 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5069 JUMPTO(SLJIT_JUMP, label);
5070 }
5071
5072 JUMPHERE(zerolength);
5073 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5074
5075 decrease_call_count(common);
5076 return cc;
5077 }
5078
5079 allocate_stack(common, 2);
5080 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5081 if (type != OP_CRMINSTAR)
5082 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5083
5084 if (min == 0)
5085 {
5086 zerolength = compile_ref_checks(common, ccbegin, NULL);
5087 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5088 jump = JUMP(SLJIT_JUMP);
5089 }
5090 else
5091 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5092
5093 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5094 if (max > 0)
5095 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5096
5097 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5098 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5099
5100 if (min > 1)
5101 {
5102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5103 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5104 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5105 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5106 }
5107 else if (max > 0)
5108 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5109
5110 if (jump != NULL)
5111 JUMPHERE(jump);
5112 JUMPHERE(zerolength);
5113
5114 decrease_call_count(common);
5115 return cc;
5116 }
5117
5118 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5119 {
5120 DEFINE_COMPILER;
5121 backtrack_common *backtrack;
5122 recurse_entry *entry = common->entries;
5123 recurse_entry *prev = NULL;
5124 int start = GET(cc, 1);
5125 pcre_uchar *start_cc;
5126
5127 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5128
5129 /* Inlining simple patterns. */
5130 if (get_framesize(common, common->start + start, TRUE) == no_stack)
5131 {
5132 start_cc = common->start + start;
5133 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5134 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5135 return cc + 1 + LINK_SIZE;
5136 }
5137
5138 while (entry != NULL)
5139 {
5140 if (entry->start == start)
5141 break;
5142 prev = entry;
5143 entry = entry->next;
5144 }
5145
5146 if (entry == NULL)
5147 {
5148 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5149 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5150 return NULL;
5151 entry->next = NULL;
5152 entry->entry = NULL;
5153 entry->calls = NULL;
5154 entry->start = start;
5155
5156 if (prev != NULL)
5157 prev->next = entry;
5158 else
5159 common->entries = entry;
5160 }
5161
5162 if (common->has_set_som && common->mark_ptr != 0)
5163 {
5164 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5165 allocate_stack(common, 2);
5166 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5167 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5169 }
5170 else if (common->has_set_som || common->mark_ptr != 0)
5171 {
5172 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5173 allocate_stack(common, 1);
5174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5175 }
5176
5177 if (entry->entry == NULL)
5178 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5179 else
5180 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5181 /* Leave if the match is failed. */
5182 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5183 return cc + 1 + LINK_SIZE;
5184 }
5185
5186 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5187 {
5188 const pcre_uchar *begin = arguments->begin;
5189 int *offset_vector = arguments->offsets;
5190 int offset_count = arguments->offset_count;
5191 int i;
5192
5193 if (PUBL(callout) == NULL)
5194 return 0;
5195
5196 callout_block->version = 2;
5197 callout_block->callout_data = arguments->callout_data;
5198
5199 /* Offsets in subject. */
5200 callout_block->subject_length = arguments->end - arguments->begin;
5201 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5202 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5203 #if defined COMPILE_PCRE8
5204 callout_block->subject = (PCRE_SPTR)begin;
5205 #elif defined COMPILE_PCRE16
5206 callout_block->subject = (PCRE_SPTR16)begin;
5207 #elif defined COMPILE_PCRE32
5208 callout_block->subject = (PCRE_SPTR32)begin;
5209 #endif
5210
5211 /* Convert and copy the JIT offset vector to the offset_vector array. */
5212 callout_block->capture_top = 0;
5213 callout_block->offset_vector = offset_vector;
5214 for (i = 2; i < offset_count; i += 2)
5215 {
5216 offset_vector[i] = jit_ovector[i] - begin;
5217 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5218 if (jit_ovector[i] >= begin)
5219 callout_block->capture_top = i;
5220 }
5221
5222 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5223 if (offset_count > 0)
5224 offset_vector[0] = -1;
5225 if (offset_count > 1)
5226 offset_vector[1] = -1;
5227 return (*PUBL(callout))(callout_block);
5228 }
5229
5230 /* Aligning to 8 byte. */
5231 #define CALLOUT_ARG_SIZE \
5232 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5233
5234 #define CALLOUT_ARG_OFFSET(arg) \
5235 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5236
5237 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5238 {
5239 DEFINE_COMPILER;
5240 backtrack_common *backtrack;
5241
5242 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5243
5244 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5245
5246 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5247 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5248 SLJIT_ASSERT(common->capture_last_ptr != 0);
5249 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5250 OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5251
5252 /* These pointer sized fields temporarly stores internal variables. */
5253 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5254 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5255 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5256
5257 if (common->mark_ptr != 0)
5258 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5259 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5260 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5261 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5262
5263 /* Needed to save important temporary registers. */
5264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5265 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5266 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5267 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5268 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5269 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5270 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5271
5272 /* Check return value. */
5273 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5274 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5275 if (common->forced_quit_label == NULL)
5276 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5277 else
5278 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5279 return cc + 2 + 2 * LINK_SIZE;
5280 }
5281
5282 #undef CALLOUT_ARG_SIZE
5283 #undef CALLOUT_ARG_OFFSET
5284
5285 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5286 {
5287 DEFINE_COMPILER;
5288 int framesize;
5289 int private_data_ptr;
5290 backtrack_common altbacktrack;
5291 pcre_uchar *ccbegin;
5292 pcre_uchar opcode;
5293 pcre_uchar bra = OP_BRA;
5294 jump_list *tmp = NULL;
5295 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5296 jump_list **found;
5297 /* Saving previous accept variables. */
5298 struct sljit_label *save_quit_label = common->quit_label;
5299 struct sljit_label *save_accept_label = common->accept_label;
5300 jump_list *save_quit = common->quit;
5301 jump_list *save_accept = common->accept;
5302 struct sljit_jump *jump;
5303 struct sljit_jump *brajump = NULL;
5304
5305 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5306 {
5307 SLJIT_ASSERT(!conditional);
5308 bra = *cc;
5309 cc++;
5310 }
5311 private_data_ptr = PRIVATE_DATA(cc);
5312 SLJIT_ASSERT(private_data_ptr != 0);
5313 framesize = get_framesize(common, cc, FALSE);
5314 backtrack->framesize = framesize;
5315 backtrack->private_data_ptr = private_data_ptr;
5316 opcode = *cc;
5317 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5318 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5319 ccbegin = cc;
5320 cc += GET(cc, 1);
5321
5322 if (bra == OP_BRAMINZERO)
5323 {
5324 /* This is a braminzero backtrack path. */
5325 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5326 free_stack(common, 1);
5327 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5328 }
5329
5330 if (framesize < 0)
5331 {
5332 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5333 allocate_stack(common, 1);
5334 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5335 }
5336 else
5337 {
5338 allocate_stack(common, framesize + 2);
5339 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5340 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5344 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5345 }
5346
5347 memset(&altbacktrack, 0, sizeof(backtrack_common));
5348 common->quit_label = NULL;
5349 common->quit = NULL;
5350 while (1)
5351 {
5352 common->accept_label = NULL;
5353 common->accept = NULL;
5354 altbacktrack.top = NULL;
5355 altbacktrack.topbacktracks = NULL;
5356
5357 if (*ccbegin == OP_ALT)
5358 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5359
5360 altbacktrack.cc = ccbegin;
5361 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5362 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5363 {
5364 common->quit_label = save_quit_label;
5365 common->accept_label = save_accept_label;
5366 common->quit = save_quit;
5367 common->accept = save_accept;
5368 return NULL;
5369 }
5370 common->accept_label = LABEL();
5371 if (common->accept != NULL)
5372 set_jumps(common->accept, common->accept_label);
5373
5374 /* Reset stack. */
5375 if (framesize < 0)
5376 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5377 else {
5378 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5379 {
5380 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5381 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5382 }
5383 else
5384 {
5385 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5386 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5387 }
5388 }
5389
5390 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5391 {
5392 /* We know that STR_PTR was stored on the top of the stack. */
5393 if (conditional)
5394 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5395 else if (bra == OP_BRAZERO)
5396 {
5397 if (framesize < 0)
5398 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5399 else
5400 {
5401 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5402 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5404 }
5405 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5406 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5407 }
5408 else if (framesize >= 0)
5409 {
5410 /* For OP_BRA and OP_BRAMINZERO. */
5411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5412 }
5413 }
5414 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5415
5416 compile_backtrackingpath(common, altbacktrack.top);
5417 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5418 {
5419 common->quit_label = save_quit_label;
5420 common->accept_label = save_accept_label;
5421 common->quit = save_quit;
5422 common->accept = save_accept;
5423 return NULL;
5424 }
5425 set_jumps(altbacktrack.topbacktracks, LABEL());
5426
5427 if (*cc != OP_ALT)
5428 break;
5429
5430 ccbegin = cc;
5431 cc += GET(cc, 1);
5432 }
5433 /* None of them matched. */
5434 if (common->quit != NULL)
5435 set_jumps(common->quit, LABEL());
5436
5437 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5438 {
5439 /* Assert is failed. */
5440 if (conditional || bra == OP_BRAZERO)
5441 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5442
5443 if (framesize < 0)
5444 {
5445 /* The topmost item should be 0. */
5446 if (bra == OP_BRAZERO)
5447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5448 else
5449 free_stack(common, 1);
5450 }
5451 else
5452 {
5453 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5454 /* The topmost item should be 0. */
5455 if (bra == OP_BRAZERO)
5456 {
5457 free_stack(common, framesize + 1);
5458 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5459 }
5460 else
5461 free_stack(common, framesize + 2);
5462 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5463 }
5464 jump = JUMP(SLJIT_JUMP);
5465 if (bra != OP_BRAZERO)
5466 add_jump(compiler, target, jump);
5467
5468 /* Assert is successful. */
5469 set_jumps(tmp, LABEL());
5470 if (framesize < 0)
5471 {
5472 /* We know that STR_PTR was stored on the top of the stack. */
5473 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5474 /* Keep the STR_PTR on the top of the stack. */
5475 if (bra == OP_BRAZERO)
5476 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5477 else if (bra == OP_BRAMINZERO)
5478 {
5479 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5480 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5481 }
5482 }
5483 else
5484 {
5485 if (bra == OP_BRA)
5486 {
5487 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5488 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5489 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5490 }
5491 else
5492 {
5493 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5494 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5495 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5497 }
5498 }
5499
5500 if (bra == OP_BRAZERO)
5501 {
5502 backtrack->matchingpath = LABEL();
5503 SET_LABEL(jump, backtrack->matchingpath);
5504 }
5505 else if (bra == OP_BRAMINZERO)
5506 {
5507 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5508 JUMPHERE(brajump);
5509 if (framesize >= 0)
5510 {
5511 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5512 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5513 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5514 }
5515 set_jumps(backtrack->common.topbacktracks, LABEL());
5516 }
5517 }
5518 else
5519 {
5520 /* AssertNot is successful. */
5521 if (framesize < 0)
5522 {
5523 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5524 if (bra != OP_BRA)
5525 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5526 else
5527 free_stack(common, 1);
5528 }
5529 else
5530 {
5531 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5532 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5533 /* The topmost item should be 0. */
5534 if (bra != OP_BRA)
5535 {
5536 free_stack(common, framesize + 1);
5537 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5538 }
5539 else
5540 free_stack(common, framesize + 2);
5541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5542 }
5543
5544 if (bra == OP_BRAZERO)
5545 backtrack->matchingpath = LABEL();
5546 else if (bra == OP_BRAMINZERO)
5547 {
5548 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5549 JUMPHERE(brajump);
5550 }
5551
5552 if (bra != OP_BRA)
5553 {
5554 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5555 set_jumps(backtrack->common.topbacktracks, LABEL());
5556 backtrack->common.topbacktracks = NULL;
5557 }
5558 }
5559
5560 common->quit_label = save_quit_label;
5561 common->accept_label = save_accept_label;
5562 common->quit = save_quit;
5563 common->accept = save_accept;
5564 return cc + 1 + LINK_SIZE;
5565 }
5566
5567 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5568 {
5569 int condition = FALSE;
5570 pcre_uchar *slotA = name_table;
5571 pcre_uchar *slotB;
5572 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5573 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5574 sljit_sw no_capture;
5575 int i;
5576
5577 locals += refno & 0xff;
5578 refno >>= 8;
5579 no_capture = locals[1];
5580
5581 for (i = 0; i < name_count; i++)
5582 {
5583 if (GET2(slotA, 0) == refno) break;
5584 slotA += name_entry_size;
5585 }
5586
5587 if (i < name_count)
5588 {
5589 /* Found a name for the number - there can be only one; duplicate names
5590 for different numbers are allowed, but not vice versa. First scan down
5591 for duplicates. */
5592
5593 slotB = slotA;
5594 while (slotB > name_table)
5595 {
5596 slotB -= name_entry_size;
5597 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5598 {
5599 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5600 if (condition) break;
5601 }
5602 else break;
5603 }
5604
5605 /* Scan up for duplicates */
5606 if (!condition)
5607 {
5608 slotB = slotA;
5609 for (i++; i < name_count; i++)
5610 {
5611 slotB += name_entry_size;
5612 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5613 {
5614 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5615 if (condition) break;
5616 }
5617 else break;
5618 }
5619 }
5620 }
5621 return condition;
5622 }
5623
5624 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5625 {
5626 int condition = FALSE;
5627 pcre_uchar *slotA = name_table;
5628 pcre_uchar *slotB;
5629 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5630 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5631 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5632 sljit_uw i;
5633
5634 for (i = 0; i < name_count; i++)
5635 {
5636 if (GET2(slotA, 0) == recno) break;
5637 slotA += name_entry_size;
5638 }
5639
5640 if (i < name_count)
5641 {
5642 /* Found a name for the number - there can be only one; duplicate
5643 names for different numbers are allowed, but not vice versa. First
5644 scan down for duplicates. */
5645
5646 slotB = slotA;
5647 while (slotB > name_table)
5648 {
5649 slotB -= name_entry_size;
5650 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5651 {
5652 condition = GET2(slotB, 0) == group_num;
5653 if (condition) break;
5654 }
5655 else break;
5656 }
5657
5658 /* Scan up for duplicates */
5659 if (!condition)
5660 {
5661 slotB = slotA;
5662 for (i++; i < name_count; i++)
5663 {
5664 slotB += name_entry_size;
5665 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5666 {
5667 condition = GET2(slotB, 0) == group_num;
5668 if (condition) break;
5669 }
5670 else break;
5671 }
5672 }
5673 }
5674 return condition;
5675 }
5676
5677 /*
5678 Handling bracketed expressions is probably the most complex part.
5679
5680 Stack layout naming characters:
5681 S - Push the current STR_PTR
5682 0 - Push a 0 (NULL)
5683 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5684 before the next alternative. Not pushed if there are no alternatives.
5685 M - Any values pushed by the current alternative. Can be empty, or anything.
5686 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5687 L - Push the previous local (pointed by localptr) to the stack
5688 () - opional values stored on the stack
5689 ()* - optonal, can be stored multiple times
5690
5691 The following list shows the regular expression templates, their PCRE byte codes
5692 and stack layout supported by pcre-sljit.
5693
5694 (?:) OP_BRA | OP_KET A M
5695 () OP_CBRA | OP_KET C M
5696 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5697 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5698 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5699 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5700 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5701 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5702 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5703 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5704 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5705 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5706 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5707 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5708 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5709 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5710 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5711 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5712 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5713 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5714 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5715 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5716
5717
5718 Stack layout naming characters:
5719 A - Push the alternative index (starting from 0) on the stack.
5720 Not pushed if there is no alternatives.
5721 M - Any values pushed by the current alternative. Can be empty, or anything.
5722
5723 The next list shows the possible content of a bracket:
5724 (|) OP_*BRA | OP_ALT ... M A
5725 (?()|) OP_*COND | OP_ALT M A
5726 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5727 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5728 Or nothing, if trace is unnecessary
5729 */
5730
5731 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5732 {
5733 DEFINE_COMPILER;
5734 backtrack_common *backtrack;
5735 pcre_uchar opcode;
5736 int private_data_ptr = 0;
5737 int offset = 0;
5738 int stacksize;
5739 pcre_uchar *ccbegin;
5740 pcre_uchar *matchingpath;
5741 pcre_uchar bra = OP_BRA;
5742 pcre_uchar ket;
5743 assert_backtrack *assert;
5744 BOOL has_alternatives;
5745 struct sljit_jump *jump;
5746 struct sljit_jump *skip;
5747 struct sljit_label *rmaxlabel = NULL;
5748 struct sljit_jump *braminzerojump = NULL;
5749
5750 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5751
5752 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5753 {
5754 bra = *cc;
5755 cc++;
5756 opcode = *cc;
5757 }
5758
5759 opcode = *cc;
5760 ccbegin = cc;
5761 matchingpath = ccbegin + 1 + LINK_SIZE;
5762
5763 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5764 {
5765 /* Drop this bracket_backtrack. */
5766 parent->top = backtrack->prev;
5767 return bracketend(cc);
5768 }
5769
5770 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5771 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5772 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5773 cc += GET(cc, 1);
5774
5775 has_alternatives = *cc == OP_ALT;
5776 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5777 {
5778 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5779 if (*matchingpath == OP_NRREF)
5780 {
5781 stacksize = GET2(matchingpath, 1);
5782 if (common->currententry == NULL || stacksize == RREF_ANY)
5783 has_alternatives = FALSE;
5784 else if (common->currententry->start == 0)
5785 has_alternatives = stacksize != 0;
5786 else
5787 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5788 }
5789 }
5790
5791 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5792 opcode = OP_SCOND;
5793 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5794 opcode = OP_ONCE;
5795
5796 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5797 {
5798 /* Capturing brackets has a pre-allocated space. */
5799 offset = GET2(ccbegin, 1 + LINK_SIZE);
5800 if (common->optimized_cbracket[offset] == 0)
5801 {
5802 private_data_ptr = OVECTOR_PRIV(offset);
5803 offset <<= 1;
5804 }
5805 else
5806 {
5807 offset <<= 1;
5808 private_data_ptr = OVECTOR(offset);
5809 }
5810 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5811 matchingpath += IMM2_SIZE;
5812 }
5813 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5814 {
5815 /* Other brackets simply allocate the next entry. */
5816 private_data_ptr = PRIVATE_DATA(ccbegin);
5817 SLJIT_ASSERT(private_data_ptr != 0);
5818 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5819 if (opcode == OP_ONCE)
5820 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5821 }
5822
5823 /* Instructions before the first alternative. */
5824 stacksize = 0;
5825 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5826 stacksize++;
5827 if (bra == OP_BRAZERO)
5828 stacksize++;
5829
5830 if (stacksize > 0)
5831 allocate_stack(common, stacksize);
5832
5833 stacksize = 0;
5834 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5835 {
5836 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5837 stacksize++;
5838 }
5839
5840 if (bra == OP_BRAZERO)
5841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5842
5843 if (bra == OP_BRAMINZERO)
5844 {
5845 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5846 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5847 if (ket != OP_KETRMIN)
5848 {
5849 free_stack(common, 1);
5850 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5851 }
5852 else
5853 {
5854 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5855 {
5856 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5857 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5858 /* Nothing stored during the first run. */
5859 skip = JUMP(SLJIT_JUMP);
5860 JUMPHERE(jump);
5861 /* Checking zero-length iteration. */
5862 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5863 {
5864 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5865 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5866 }
5867 else
5868 {
5869 /* Except when the whole stack frame must be saved. */
5870 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5871 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
5872 }
5873 JUMPHERE(skip);
5874 }
5875 else
5876 {
5877 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5878 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5879 JUMPHERE(jump);
5880 }
5881 }
5882 }
5883
5884 if (ket == OP_KETRMIN)
5885 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5886
5887 if (ket == OP_KETRMAX)
5888 {
5889 rmaxlabel = LABEL();
5890 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5891 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5892 }
5893
5894 /* Handling capturing brackets and alternatives. */
5895 if (opcode == OP_ONCE)
5896 {
5897 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5898 {
5899 /* Neither capturing brackets nor recursions are not found in the block. */
5900 if (ket == OP_KETRMIN)
5901 {
5902 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5903 allocate_stack(common, 2);
5904 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5906 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5907 }
5908 else if (ket == OP_KETRMAX || has_alternatives)
5909 {
5910 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5911 allocate_stack(common, 1);
5912 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5913 }
5914 else
5915 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5916 }
5917 else
5918 {
5919 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5920 {
5921 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5923 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5924 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5925 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5926 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5927 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5928 }
5929 else
5930 {
5931 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5932 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5933 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5934 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5935 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5936 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5937 }
5938 }
5939 }
5940 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5941 {
5942 /* Saving the previous values. */
5943 if (common->optimized_cbracket[offset >> 1] != 0)
5944 {
5945 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5946 allocate_stack(common, 2);
5947 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5948 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
5949 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5950 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5952 }
5953 else
5954 {
5955 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5956 allocate_stack(common, 1);
5957 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5958 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5959 }
5960 }
5961 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5962 {
5963 /* Saving the previous value. */
5964 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5965 allocate_stack(common, 1);
5966 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5968 }
5969 else if (has_alternatives)
5970 {
5971 /* Pushing the starting string pointer. */
5972 allocate_stack(common, 1);
5973 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5974 }
5975
5976 /* Generating code for the first alternative. */
5977 if (opcode == OP_COND || opcode == OP_SCOND)
5978 {
5979 if (*matchingpath == OP_CREF)
5980 {
5981 SLJIT_ASSERT(has_alternatives);
5982 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5983 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5984 matchingpath += 1 + IMM2_SIZE;
5985 }
5986 else if (*matchingpath == OP_NCREF)
5987 {
5988 SLJIT_ASSERT(has_alternatives);
5989 stacksize = GET2(matchingpath, 1);
5990 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5991
5992 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5993 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5994 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5995 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
5996 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5997 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5998 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5999 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6000 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6001
6002 JUMPHERE(jump);
6003 matchingpath += 1 + IMM2_SIZE;
6004 }
6005 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6006 {
6007 /* Never has other case. */
6008 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6009
6010 stacksize = GET2(matchingpath, 1);
6011 if (common->currententry == NULL)
6012 stacksize = 0;
6013 else if (stacksize == RREF_ANY)
6014 stacksize = 1;
6015 else if (common->currententry->start == 0)
6016 stacksize = stacksize == 0;
6017 else
6018 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6019
6020 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6021 {
6022 SLJIT_ASSERT(!has_alternatives);
6023 if (stacksize != 0)
6024 matchingpath += 1 + IMM2_SIZE;
6025 else
6026 {
6027 if (*cc == OP_ALT)
6028 {
6029 matchingpath = cc + 1 + LINK_SIZE;
6030 cc += GET(cc, 1);
6031 }
6032 else
6033 matchingpath = cc;
6034 }
6035 }
6036 else
6037 {
6038 SLJIT_ASSERT(has_alternatives);
6039
6040 stacksize = GET2(matchingpath, 1);
6041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6045 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6046 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6047 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6048 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6049 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6050 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6051 matchingpath += 1 + IMM2_SIZE;
6052 }
6053 }
6054 else
6055 {
6056 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6057 /* Similar code as PUSH_BACKTRACK macro. */
6058 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6059 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6060 return NULL;
6061 memset(assert, 0, sizeof(assert_backtrack));
6062 assert->common.cc = matchingpath;
6063 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6064 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6065 }
6066 }
6067
6068 compile_matchingpath(common, matchingpath, cc, backtrack);
6069 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6070 return NULL;
6071
6072 if (opcode == OP_ONCE)
6073 {
6074 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6075 {
6076 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6077 /* TMP2 which is set here used by OP_KETRMAX below. */
6078 if (ket == OP_KETRMAX)
6079 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6080 else if (ket == OP_KETRMIN)
6081 {
6082 /* Move the STR_PTR to the private_data_ptr. */
6083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6084 }
6085 }
6086 else
6087 {
6088 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
6089 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
6090 if (ket == OP_KETRMAX)
6091 {
6092 /* TMP2 which is set here used by OP_KETRMAX below. */
6093 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6094 }
6095 }
6096 }
6097
6098 stacksize = 0;
6099 if (ket != OP_KET || bra != OP_BRA)
6100 stacksize++;
6101 if (offset != 0)
6102 {
6103 if (common->capture_last_ptr != 0)
6104 stacksize++;
6105 if (common->optimized_cbracket[offset >> 1] == 0)
6106 stacksize += 2;
6107 }
6108 if (has_alternatives && opcode != OP_ONCE)
6109 stacksize++;
6110
6111 if (stacksize > 0)
6112 allocate_stack(common, stacksize);
6113
6114 stacksize = 0;
6115 if (ket != OP_KET || bra != OP_BRA)
6116 {
6117 if (ket != OP_KET)
6118 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6119 else
6120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6121 stacksize++;
6122 }
6123
6124 if (offset != 0)
6125 {
6126 if (common->capture_last_ptr != 0)
6127 {
6128 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6129 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6130 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0);
6131 stacksize++;
6132 }
6133 if (common->optimized_cbracket[offset >> 1] == 0)
6134 {
6135 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6136 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6137 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6138 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6139 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6141 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6142 stacksize += 2;
6143 }
6144 }
6145
6146 if (has_alternatives)
6147 {
6148 if (opcode != OP_ONCE)
6149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6150 if (ket != OP_KETRMAX)
6151 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6152 }
6153
6154 /* Must be after the matchingpath label. */
6155 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6156 {
6157 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6159 }
6160
6161 if (ket == OP_KETRMAX)
6162 {
6163 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6164 {
6165 if (has_alternatives)
6166 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6167 /* Checking zero-length iteration. */
6168 if (opcode != OP_ONCE)
6169 {
6170 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6171 /* Drop STR_PTR for greedy plus quantifier. */
6172 if (bra != OP_BRAZERO)
6173 free_stack(common, 1);
6174 }
6175 else
6176 /* TMP2 must contain the starting STR_PTR. */
6177 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6178 }
6179 else
6180 JUMPTO(SLJIT_JUMP, rmaxlabel);
6181 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6182 }
6183
6184 if (bra == OP_BRAZERO)
6185 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6186
6187 if (bra == OP_BRAMINZERO)
6188 {
6189 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6190 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6191 if (braminzerojump != NULL)
6192 {
6193 JUMPHERE(braminzerojump);
6194 /* We need to release the end pointer to perform the
6195 backtrack for the zero-length iteration. When
6196 framesize is < 0, OP_ONCE will do the release itself. */
6197 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6198 {
6199 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6200 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6201 }
6202 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6203 free_stack(common, 1);
6204 }
6205 /* Continue to the normal backtrack. */
6206 }
6207
6208 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6209 decrease_call_count(common);
6210
6211 /* Skip the other alternatives. */
6212 while (*cc == OP_ALT)
6213 cc += GET(cc, 1);
6214 cc += 1 + LINK_SIZE;
6215 return cc;
6216 }
6217
6218 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6219 {
6220 DEFINE_COMPILER;
6221 backtrack_common *backtrack;
6222 pcre_uchar opcode;
6223 int private_data_ptr;
6224 int cbraprivptr = 0;
6225 int framesize;
6226 int stacksize;
6227 int offset = 0;
6228 BOOL zero = FALSE;
6229 pcre_uchar *ccbegin = NULL;
6230 int stack;
6231 struct sljit_label *loop = NULL;
6232 struct jump_list *emptymatch = NULL;
6233
6234 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6235 if (*cc == OP_BRAPOSZERO)
6236 {
6237 zero = TRUE;
6238 cc++;
6239 }
6240
6241 opcode = *cc;
6242 private_data_ptr = PRIVATE_DATA(cc);
6243 SLJIT_ASSERT(private_data_ptr != 0);
6244 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6245 switch(opcode)
6246 {
6247 case OP_BRAPOS:
6248 case OP_SBRAPOS:
6249 ccbegin = cc + 1 + LINK_SIZE;
6250 break;
6251
6252 case OP_CBRAPOS:
6253 case OP_SCBRAPOS:
6254 offset = GET2(cc, 1 + LINK_SIZE);
6255 /* This case cannot be optimized in the same was as
6256 normal capturing brackets. */
6257 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6258 cbraprivptr = OVECTOR_PRIV(offset);
6259 offset <<= 1;
6260 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6261 break;
6262
6263 default:
6264 SLJIT_ASSERT_STOP();
6265 break;
6266 }
6267
6268 framesize = get_framesize(common, cc, FALSE);
6269 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6270 if (framesize < 0)
6271 {
6272 if (offset != 0)
6273 {
6274 stacksize = 2;
6275 if (common->capture_last_ptr != 0)
6276 stacksize++;
6277 }
6278 else
6279 stacksize = 1;
6280
6281 if (!zero)
6282 stacksize++;
6283
6284 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6285 allocate_stack(common, stacksize);
6286 if (framesize == no_frame)
6287 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6288
6289 if (offset != 0)
6290 {
6291 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6292 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6294 if (common->capture_last_ptr != 0)
6295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6297 if (common->capture_last_ptr != 0)
6298 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6299 }
6300 else
6301 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6302
6303 if (!zero)
6304 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6305 }
6306 else
6307 {
6308 stacksize = framesize + 1;
6309 if (!zero)
6310 stacksize++;
6311 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6312 stacksize++;
6313 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6314
6315 allocate_stack(common, stacksize);
6316 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6317 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6318 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6319
6320 stack = 0;
6321 if (!zero)
6322 {
6323 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6324 stack++;
6325 }
6326 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6327 {
6328 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6329 stack++;
6330 }
6331 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6332 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6333 }
6334
6335 if (offset != 0)
6336 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6337
6338 loop = LABEL();
6339 while (*cc != OP_KETRPOS)
6340 {
6341 backtrack->top = NULL;
6342 backtrack->topbacktracks = NULL;
6343 cc += GET(cc, 1);
6344
6345 compile_matchingpath(common, ccbegin, cc, backtrack);
6346 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6347 return NULL;
6348
6349 if (framesize < 0)
6350 {
6351 if (framesize == no_frame)
6352 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6353
6354 if (offset != 0)
6355 {
6356 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6358 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6359 if (common->capture_last_ptr != 0)
6360 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6361 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6362 }
6363 else
6364 {
6365 if (opcode == OP_SBRAPOS)
6366 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6367 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6368 }
6369
6370 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6371 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6372
6373 if (!zero)
6374 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6375 }
6376 else
6377 {
6378 if (offset != 0)
6379 {
6380 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6381 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6383 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6384 if (common->capture_last_ptr != 0)
6385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6386 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6387 }
6388 else
6389 {
6390 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6391 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6392 if (opcode == OP_SBRAPOS)
6393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6394 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6395 }
6396
6397 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6398 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6399
6400 if (!zero)
6401 {
6402 if (framesize < 0)
6403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6404 else
6405 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6406 }
6407 }
6408 JUMPTO(SLJIT_JUMP, loop);
6409 flush_stubs(common);
6410
6411 compile_backtrackingpath(common, backtrack->top);
6412 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6413 return NULL;
6414 set_jumps(backtrack->topbacktracks, LABEL());
6415
6416 if (framesize < 0)
6417 {
6418 if (offset != 0)
6419 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6420 else
6421 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6422 }
6423 else
6424 {
6425 if (offset != 0)
6426 {
6427 /* Last alternative. */
6428 if (*cc == OP_KETRPOS)
6429 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6430 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6431 }
6432 else
6433 {
6434 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6435 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6436 }
6437 }
6438
6439 if (*cc == OP_KETRPOS)
6440 break;
6441 ccbegin = cc + 1 + LINK_SIZE;
6442 }
6443
6444 backtrack->topbacktracks = NULL;
6445 if (!zero)
6446 {
6447 if (framesize < 0)
6448 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6449 else /* TMP2 is set to [private_data_ptr] above. */
6450 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6451 }
6452
6453 /* None of them matched. */
6454 set_jumps(emptymatch, LABEL());
6455 decrease_call_count(common);
6456 return cc + 1 + LINK_SIZE;
6457 }
6458
6459 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6460 {
6461 int class_len;
6462
6463 *opcode = *cc;
6464 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6465 {
6466 cc++;
6467 *type = OP_CHAR;
6468 }
6469 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6470 {
6471 cc++;
6472 *type = OP_CHARI;
6473 *opcode -= OP_STARI - OP_STAR;
6474 }
6475 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6476 {
6477 cc++;
6478 *type = OP_NOT;
6479 *opcode -= OP_NOTSTAR - OP_STAR;
6480 }
6481 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6482 {
6483 cc++;
6484 *type = OP_NOTI;
6485 *opcode -= OP_NOTSTARI - OP_STAR;
6486 }
6487 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6488 {
6489 cc++;
6490 *opcode -= OP_TYPESTAR - OP_STAR;
6491 *type = 0;
6492 }
6493 else
6494 {
6495 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6496 *type = *opcode;
6497 cc++;
6498 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6499 *opcode = cc[class_len - 1];
6500 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6501 {
6502 *opcode -= OP_CRSTAR - OP_STAR;
6503 if (end != NULL)
6504 *end = cc + class_len;
6505 }
6506 else
6507 {
6508 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6509 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6510 *arg2 = GET2(cc, class_len);
6511
6512 if (*arg2 == 0)
6513 {
6514 SLJIT_ASSERT(*arg1 != 0);
6515 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6516 }
6517 if (*arg1 == *arg2)
6518 *opcode = OP_EXACT;
6519
6520 if (end != NULL)
6521 *end = cc + class_len + 2 * IMM2_SIZE;
6522 }
6523 return cc;
6524 }
6525
6526 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6527 {
6528 *arg1 = GET2(cc, 0);
6529 cc += IMM2_SIZE;
6530 }
6531
6532 if (*type == 0)
6533 {
6534 *type = *cc;
6535 if (end != NULL)
6536 *end = next_opcode(common, cc);
6537 cc++;
6538 return cc;
6539 }
6540
6541 if (end != NULL)
6542 {
6543 *end = cc + 1;
6544 #ifdef SUPPORT_UTF
6545 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6546 #endif
6547 }
6548 return cc;
6549 }
6550
6551 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6552 {
6553 DEFINE_COMPILER;
6554 backtrack_common *backtrack;
6555 pcre_uchar opcode;
6556 pcre_uchar type;
6557 int arg1 = -1, arg2 = -1;
6558 pcre_uchar* end;
6559 jump_list *nomatch = NULL;
6560 struct sljit_jump *jump = NULL;
6561 struct sljit_label *label;
6562 int private_data_ptr = PRIVATE_DATA(cc);
6563 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6564 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6565 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
6566 int tmp_base, tmp_offset;
6567
6568 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6569
6570 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6571
6572 switch(type)
6573 {
6574 case OP_NOT_DIGIT:
6575 case OP_DIGIT:
6576 case OP_NOT_WHITESPACE:
6577 case OP_WHITESPACE:
6578 case OP_NOT_WORDCHAR:
6579 case OP_WORDCHAR:
6580 case OP_ANY:
6581 case OP_ALLANY:
6582 case OP_ANYBYTE:
6583 case OP_ANYNL:
6584 case OP_NOT_HSPACE:
6585 case OP_HSPACE:
6586 case OP_NOT_VSPACE:
6587 case OP_VSPACE:
6588 case OP_CHAR:
6589 case OP_CHARI:
6590 case OP_NOT:
6591 case OP_NOTI:
6592 case OP_CLASS:
6593 case OP_NCLASS:
6594 tmp_base = TMP3;
6595 tmp_offset = 0;
6596 break;
6597
6598 default:
6599 SLJIT_ASSERT_STOP();
6600 /* Fall through. */
6601
6602 case OP_EXTUNI:
6603 case OP_XCLASS:
6604 case OP_NOTPROP:
6605 case OP_PROP:
6606 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6607 tmp_offset = POSSESSIVE0;
6608 break;
6609 }
6610
6611 switch(opcode)
6612 {
6613 case OP_STAR:
6614 case OP_PLUS:
6615 case OP_UPTO:
6616 case OP_CRRANGE:
6617 if (type == OP_ANYNL || type == OP_EXTUNI)
6618 {
6619 SLJIT_ASSERT(private_data_ptr == 0);
6620 if (opcode == OP_STAR || opcode == OP_UPTO)
6621 {
6622 allocate_stack(common, 2);
6623 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6624 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6625 }
6626 else
6627 {
6628 allocate_stack(common, 1);
6629 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6630 }
6631
6632 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6633 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6634
6635 label = LABEL();
6636 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6637 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6638 {
6639 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6640 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6641 if (opcode == OP_CRRANGE && arg2 > 0)
6642 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6643 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6644 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6645 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6646 }
6647
6648 /* We cannot use TMP3 because of this allocate_stack. */
6649 allocate_stack(common, 1);
6650 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6651 JUMPTO(SLJIT_JUMP, label);
6652 if (jump != NULL)
6653 JUMPHERE(jump);
6654 }
6655 else
6656 {
6657 if (opcode == OP_PLUS)
6658 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6659 if (private_data_ptr == 0)
6660 allocate_stack(common, 2);
6661 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6662 if (opcode <= OP_PLUS)
6663 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6664 else
6665 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6666 label = LABEL();
6667 compile_char1_matchingpath(common, type, cc, &nomatch);
6668 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6669 if (opcode <= OP_PLUS)
6670 JUMPTO(SLJIT_JUMP, label);
6671 else if (opcode == OP_CRRANGE && arg1 == 0)
6672 {
6673 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6674 JUMPTO(SLJIT_JUMP, label);
6675 }
6676 else
6677 {
6678 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6679 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6680 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6681 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6682 }
6683 set_jumps(nomatch, LABEL());
6684 if (opcode == OP_CRRANGE)
6685 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6686 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6687 }
6688 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6689 break;
6690
6691 case OP_MINSTAR:
6692 case OP_MINPLUS:
6693 if (opcode == OP_MINPLUS)
6694 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6695 if (private_data_ptr == 0)
6696 allocate_stack(common, 1);
6697 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6698 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6699 break;
6700
6701 case OP_MINUPTO:
6702 case OP_CRMINRANGE:
6703 if (private_data_ptr == 0)
6704 allocate_stack(common, 2);
6705 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6706 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6707 if (opcode == OP_CRMINRANGE)
6708 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6709 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6710 break;
6711
6712 case OP_QUERY:
6713 case OP_MINQUERY:
6714 if (private_data_ptr == 0)
6715 allocate_stack(common, 1);
6716 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6717 if (opcode == OP_QUERY)
6718 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6719 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6720 break;
6721
6722 case OP_EXACT:
6723 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6724 label = LABEL();
6725 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6726 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6727 JUMPTO(SLJIT_C_NOT_ZERO, label);
6728 break;
6729
6730 case OP_POSSTAR:
6731 case OP_POSPLUS:
6732 case OP_POSUPTO:
6733 if (opcode == OP_POSPLUS)
6734 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6735 if (opcode == OP_POSUPTO)
6736 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6737 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6738 label = LABEL();
6739 compile_char1_matchingpath(common, type, cc, &nomatch);
6740 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6741 if (opcode != OP_POSUPTO)
6742 JUMPTO(SLJIT_JUMP, label);
6743 else
6744 {
6745 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6746 JUMPTO(SLJIT_C_NOT_ZERO, label);