/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1367 - (show annotations)
Mon Oct 7 07:41:44 2013 UTC (6 years, 1 month ago) by zherczeg
File MIME type: text/plain
File size: 303105 byte(s)
Error occurred while calculating annotation data.
Add support for OP_DNCREF and OP_DNRREF in JIT, and landing Philip's PT_SPACE fix.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 6
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 int newline;
367 int bsr_nltype;
368 /* Dollar endonly. */
369 int endonly;
370 /* Tables. */
371 sljit_sw ctypes;
372 int digits[2 + MAX_RANGE_SIZE];
373 /* Named capturing brackets. */
374 pcre_uchar *name_table;
375 sljit_sw name_count;
376 sljit_sw name_entry_size;
377
378 /* Labels and jump lists. */
379 struct sljit_label *partialmatchlabel;
380 struct sljit_label *quit_label;
381 struct sljit_label *forced_quit_label;
382 struct sljit_label *accept_label;
383 stub_list *stubs;
384 recurse_entry *entries;
385 recurse_entry *currententry;
386 jump_list *partialmatch;
387 jump_list *quit;
388 jump_list *positive_assert_quit;
389 jump_list *forced_quit;
390 jump_list *accept;
391 jump_list *calllimit;
392 jump_list *stackalloc;
393 jump_list *revertframes;
394 jump_list *wordboundary;
395 jump_list *anynewline;
396 jump_list *hspace;
397 jump_list *vspace;
398 jump_list *casefulcmp;
399 jump_list *caselesscmp;
400 jump_list *reset_match;
401 BOOL jscript_compat;
402 #ifdef SUPPORT_UTF
403 BOOL utf;
404 #ifdef SUPPORT_UCP
405 BOOL use_ucp;
406 #endif
407 #ifndef COMPILE_PCRE32
408 jump_list *utfreadchar;
409 #endif
410 #ifdef COMPILE_PCRE8
411 jump_list *utfreadtype8;
412 #endif
413 #endif /* SUPPORT_UTF */
414 #ifdef SUPPORT_UCP
415 jump_list *getucd;
416 #endif
417 } compiler_common;
418
419 /* For byte_sequence_compare. */
420
421 typedef struct compare_context {
422 int length;
423 int sourcereg;
424 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
425 int ucharptr;
426 union {
427 sljit_si asint;
428 sljit_uh asushort;
429 #if defined COMPILE_PCRE8
430 sljit_ub asbyte;
431 sljit_ub asuchars[4];
432 #elif defined COMPILE_PCRE16
433 sljit_uh asuchars[2];
434 #elif defined COMPILE_PCRE32
435 sljit_ui asuchars[1];
436 #endif
437 } c;
438 union {
439 sljit_si asint;
440 sljit_uh asushort;
441 #if defined COMPILE_PCRE8
442 sljit_ub asbyte;
443 sljit_ub asuchars[4];
444 #elif defined COMPILE_PCRE16
445 sljit_uh asuchars[2];
446 #elif defined COMPILE_PCRE32
447 sljit_ui asuchars[1];
448 #endif
449 } oc;
450 #endif
451 } compare_context;
452
453 /* Undefine sljit macros. */
454 #undef CMP
455
456 /* Used for accessing the elements of the stack. */
457 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
458
459 #define TMP1 SLJIT_SCRATCH_REG1
460 #define TMP2 SLJIT_SCRATCH_REG3
461 #define TMP3 SLJIT_TEMPORARY_EREG2
462 #define STR_PTR SLJIT_SAVED_REG1
463 #define STR_END SLJIT_SAVED_REG2
464 #define STACK_TOP SLJIT_SCRATCH_REG2
465 #define STACK_LIMIT SLJIT_SAVED_REG3
466 #define ARGUMENTS SLJIT_SAVED_EREG1
467 #define COUNT_MATCH SLJIT_SAVED_EREG2
468 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469
470 /* Local space layout. */
471 /* These two locals can be used by the current opcode. */
472 #define LOCALS0 (0 * sizeof(sljit_sw))
473 #define LOCALS1 (1 * sizeof(sljit_sw))
474 /* Two local variables for possessive quantifiers (char1 cannot use them). */
475 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
477 /* Max limit of recursions. */
478 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
479 /* The output vector is stored on the stack, and contains pointers
480 to characters. The vector data is divided into two groups: the first
481 group contains the start / end character pointers, and the second is
482 the start pointers when the end of the capturing group has not yet reached. */
483 #define OVECTOR_START (common->ovector_start)
484 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
485 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
486 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
487
488 #if defined COMPILE_PCRE8
489 #define MOV_UCHAR SLJIT_MOV_UB
490 #define MOVU_UCHAR SLJIT_MOVU_UB
491 #elif defined COMPILE_PCRE16
492 #define MOV_UCHAR SLJIT_MOV_UH
493 #define MOVU_UCHAR SLJIT_MOVU_UH
494 #elif defined COMPILE_PCRE32
495 #define MOV_UCHAR SLJIT_MOV_UI
496 #define MOVU_UCHAR SLJIT_MOVU_UI
497 #else
498 #error Unsupported compiling mode
499 #endif
500
501 /* Shortcuts. */
502 #define DEFINE_COMPILER \
503 struct sljit_compiler *compiler = common->compiler
504 #define OP1(op, dst, dstw, src, srcw) \
505 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508 #define LABEL() \
509 sljit_emit_label(compiler)
510 #define JUMP(type) \
511 sljit_emit_jump(compiler, (type))
512 #define JUMPTO(type, label) \
513 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514 #define JUMPHERE(jump) \
515 sljit_set_label((jump), sljit_emit_label(compiler))
516 #define SET_LABEL(jump, label) \
517 sljit_set_label((jump), (label))
518 #define CMP(type, src1, src1w, src2, src2w) \
519 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520 #define CMPTO(type, src1, src1w, src2, src2w, label) \
521 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
522 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
524 #define GET_LOCAL_BASE(dst, dstw, offset) \
525 sljit_get_local_base(compiler, (dst), (dstw), (offset))
526
527 static pcre_uchar* bracketend(pcre_uchar* cc)
528 {
529 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
530 do cc += GET(cc, 1); while (*cc == OP_ALT);
531 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
532 cc += 1 + LINK_SIZE;
533 return cc;
534 }
535
536 /* Functions whose might need modification for all new supported opcodes:
537 next_opcode
538 check_opcode_types
539 set_private_data_ptrs
540 get_framesize
541 init_frame
542 get_private_data_copy_length
543 copy_private_data
544 compile_matchingpath
545 compile_backtrackingpath
546 */
547
548 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
549 {
550 SLJIT_UNUSED_ARG(common);
551 switch(*cc)
552 {
553 case OP_SOD:
554 case OP_SOM:
555 case OP_SET_SOM:
556 case OP_NOT_WORD_BOUNDARY:
557 case OP_WORD_BOUNDARY:
558 case OP_NOT_DIGIT:
559 case OP_DIGIT:
560 case OP_NOT_WHITESPACE:
561 case OP_WHITESPACE:
562 case OP_NOT_WORDCHAR:
563 case OP_WORDCHAR:
564 case OP_ANY:
565 case OP_ALLANY:
566 case OP_NOTPROP:
567 case OP_PROP:
568 case OP_ANYNL:
569 case OP_NOT_HSPACE:
570 case OP_HSPACE:
571 case OP_NOT_VSPACE:
572 case OP_VSPACE:
573 case OP_EXTUNI:
574 case OP_EODN:
575 case OP_EOD:
576 case OP_CIRC:
577 case OP_CIRCM:
578 case OP_DOLL:
579 case OP_DOLLM:
580 case OP_CRSTAR:
581 case OP_CRMINSTAR:
582 case OP_CRPLUS:
583 case OP_CRMINPLUS:
584 case OP_CRQUERY:
585 case OP_CRMINQUERY:
586 case OP_CRRANGE:
587 case OP_CRMINRANGE:
588 case OP_CLASS:
589 case OP_NCLASS:
590 case OP_REF:
591 case OP_REFI:
592 case OP_RECURSE:
593 case OP_CALLOUT:
594 case OP_ALT:
595 case OP_KET:
596 case OP_KETRMAX:
597 case OP_KETRMIN:
598 case OP_KETRPOS:
599 case OP_REVERSE:
600 case OP_ASSERT:
601 case OP_ASSERT_NOT:
602 case OP_ASSERTBACK:
603 case OP_ASSERTBACK_NOT:
604 case OP_ONCE:
605 case OP_ONCE_NC:
606 case OP_BRA:
607 case OP_BRAPOS:
608 case OP_CBRA:
609 case OP_CBRAPOS:
610 case OP_COND:
611 case OP_SBRA:
612 case OP_SBRAPOS:
613 case OP_SCBRA:
614 case OP_SCBRAPOS:
615 case OP_SCOND:
616 case OP_CREF:
617 case OP_DNCREF:
618 case OP_RREF:
619 case OP_DNRREF:
620 case OP_DEF:
621 case OP_BRAZERO:
622 case OP_BRAMINZERO:
623 case OP_BRAPOSZERO:
624 case OP_PRUNE:
625 case OP_SKIP:
626 case OP_THEN:
627 case OP_COMMIT:
628 case OP_FAIL:
629 case OP_ACCEPT:
630 case OP_ASSERT_ACCEPT:
631 case OP_CLOSE:
632 case OP_SKIPZERO:
633 return cc + PRIV(OP_lengths)[*cc];
634
635 case OP_CHAR:
636 case OP_CHARI:
637 case OP_NOT:
638 case OP_NOTI:
639 case OP_STAR:
640 case OP_MINSTAR:
641 case OP_PLUS:
642 case OP_MINPLUS:
643 case OP_QUERY:
644 case OP_MINQUERY:
645 case OP_UPTO:
646 case OP_MINUPTO:
647 case OP_EXACT:
648 case OP_POSSTAR:
649 case OP_POSPLUS:
650 case OP_POSQUERY:
651 case OP_POSUPTO:
652 case OP_STARI:
653 case OP_MINSTARI:
654 case OP_PLUSI:
655 case OP_MINPLUSI:
656 case OP_QUERYI:
657 case OP_MINQUERYI:
658 case OP_UPTOI:
659 case OP_MINUPTOI:
660 case OP_EXACTI:
661 case OP_POSSTARI:
662 case OP_POSPLUSI:
663 case OP_POSQUERYI:
664 case OP_POSUPTOI:
665 case OP_NOTSTAR:
666 case OP_NOTMINSTAR:
667 case OP_NOTPLUS:
668 case OP_NOTMINPLUS:
669 case OP_NOTQUERY:
670 case OP_NOTMINQUERY:
671 case OP_NOTUPTO:
672 case OP_NOTMINUPTO:
673 case OP_NOTEXACT:
674 case OP_NOTPOSSTAR:
675 case OP_NOTPOSPLUS:
676 case OP_NOTPOSQUERY:
677 case OP_NOTPOSUPTO:
678 case OP_NOTSTARI:
679 case OP_NOTMINSTARI:
680 case OP_NOTPLUSI:
681 case OP_NOTMINPLUSI:
682 case OP_NOTQUERYI:
683 case OP_NOTMINQUERYI:
684 case OP_NOTUPTOI:
685 case OP_NOTMINUPTOI:
686 case OP_NOTEXACTI:
687 case OP_NOTPOSSTARI:
688 case OP_NOTPOSPLUSI:
689 case OP_NOTPOSQUERYI:
690 case OP_NOTPOSUPTOI:
691 cc += PRIV(OP_lengths)[*cc];
692 #ifdef SUPPORT_UTF
693 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
694 #endif
695 return cc;
696
697 /* Special cases. */
698 case OP_TYPESTAR:
699 case OP_TYPEMINSTAR:
700 case OP_TYPEPLUS:
701 case OP_TYPEMINPLUS:
702 case OP_TYPEQUERY:
703 case OP_TYPEMINQUERY:
704 case OP_TYPEUPTO:
705 case OP_TYPEMINUPTO:
706 case OP_TYPEEXACT:
707 case OP_TYPEPOSSTAR:
708 case OP_TYPEPOSPLUS:
709 case OP_TYPEPOSQUERY:
710 case OP_TYPEPOSUPTO:
711 return cc + PRIV(OP_lengths)[*cc] - 1;
712
713 case OP_ANYBYTE:
714 #ifdef SUPPORT_UTF
715 if (common->utf) return NULL;
716 #endif
717 return cc + 1;
718
719 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
720 case OP_XCLASS:
721 return cc + GET(cc, 1);
722 #endif
723
724 case OP_MARK:
725 case OP_PRUNE_ARG:
726 case OP_SKIP_ARG:
727 case OP_THEN_ARG:
728 return cc + 1 + 2 + cc[1];
729
730 default:
731 /* All opcodes are supported now! */
732 SLJIT_ASSERT_STOP();
733 return NULL;
734 }
735 }
736
737 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
738 {
739 pcre_uchar *slot;
740 int i;
741
742 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
743 while (cc < ccend)
744 {
745 switch(*cc)
746 {
747 case OP_SET_SOM:
748 common->has_set_som = TRUE;
749 cc += 1;
750 break;
751
752 case OP_REF:
753 case OP_REFI:
754 common->optimized_cbracket[GET2(cc, 1)] = 0;
755 cc += 1 + IMM2_SIZE;
756 break;
757
758 case OP_CBRAPOS:
759 case OP_SCBRAPOS:
760 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
761 cc += 1 + LINK_SIZE + IMM2_SIZE;
762 break;
763
764 case OP_COND:
765 case OP_SCOND:
766 /* Only AUTO_CALLOUT can insert this opcode. We do
767 not intend to support this case. */
768 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
769 return FALSE;
770 cc += 1 + LINK_SIZE;
771 break;
772
773 case OP_CREF:
774 i = GET2(cc, 1);
775 common->optimized_cbracket[i] = 0;
776 cc += 1 + IMM2_SIZE;
777 break;
778
779 case OP_DNCREF:
780 i = GET2(cc, 1 + IMM2_SIZE);
781 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
782 while (i-- > 0)
783 {
784 common->optimized_cbracket[GET2(slot, 0)] = 0;
785 slot += common->name_entry_size;
786 }
787 cc += 1 + 2 * IMM2_SIZE;
788 break;
789
790 case OP_RECURSE:
791 /* Set its value only once. */
792 if (common->recursive_head_ptr == 0)
793 {
794 common->recursive_head_ptr = common->ovector_start;
795 common->ovector_start += sizeof(sljit_sw);
796 }
797 cc += 1 + LINK_SIZE;
798 break;
799
800 case OP_CALLOUT:
801 if (common->capture_last_ptr == 0)
802 {
803 common->capture_last_ptr = common->ovector_start;
804 common->ovector_start += sizeof(sljit_sw);
805 }
806 cc += 2 + 2 * LINK_SIZE;
807 break;
808
809 case OP_THEN_ARG:
810 common->has_then = TRUE;
811 common->control_head_ptr = 1;
812 /* Fall through. */
813
814 case OP_PRUNE_ARG:
815 common->needs_start_ptr = TRUE;
816 /* Fall through. */
817
818 case OP_MARK:
819 if (common->mark_ptr == 0)
820 {
821 common->mark_ptr = common->ovector_start;
822 common->ovector_start += sizeof(sljit_sw);
823 }
824 cc += 1 + 2 + cc[1];
825 break;
826
827 case OP_THEN:
828 common->has_then = TRUE;
829 common->control_head_ptr = 1;
830 /* Fall through. */
831
832 case OP_PRUNE:
833 case OP_SKIP:
834 common->needs_start_ptr = TRUE;
835 cc += 1;
836 break;
837
838 case OP_SKIP_ARG:
839 common->control_head_ptr = 1;
840 common->has_skip_arg = TRUE;
841 cc += 1 + 2 + cc[1];
842 break;
843
844 default:
845 cc = next_opcode(common, cc);
846 if (cc == NULL)
847 return FALSE;
848 break;
849 }
850 }
851 return TRUE;
852 }
853
854 static int get_class_iterator_size(pcre_uchar *cc)
855 {
856 switch(*cc)
857 {
858 case OP_CRSTAR:
859 case OP_CRPLUS:
860 return 2;
861
862 case OP_CRMINSTAR:
863 case OP_CRMINPLUS:
864 case OP_CRQUERY:
865 case OP_CRMINQUERY:
866 return 1;
867
868 case OP_CRRANGE:
869 case OP_CRMINRANGE:
870 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
871 return 0;
872 return 2;
873
874 default:
875 return 0;
876 }
877 }
878
879 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
880 {
881 pcre_uchar *end = bracketend(begin);
882 pcre_uchar *next;
883 pcre_uchar *next_end;
884 pcre_uchar *max_end;
885 pcre_uchar type;
886 sljit_sw length = end - begin;
887 int min, max, i;
888
889 /* Detect fixed iterations first. */
890 if (end[-(1 + LINK_SIZE)] != OP_KET)
891 return FALSE;
892
893 /* Already detected repeat. */
894 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
895 return TRUE;
896
897 next = end;
898 min = 1;
899 while (1)
900 {
901 if (*next != *begin)
902 break;
903 next_end = bracketend(next);
904 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
905 break;
906 next = next_end;
907 min++;
908 }
909
910 if (min == 2)
911 return FALSE;
912
913 max = 0;
914 max_end = next;
915 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
916 {
917 type = *next;
918 while (1)
919 {
920 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
921 break;
922 next_end = bracketend(next + 2 + LINK_SIZE);
923 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
924 break;
925 next = next_end;
926 max++;
927 }
928
929 if (next[0] == type && next[1] == *begin && max >= 1)
930 {
931 next_end = bracketend(next + 1);
932 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
933 {
934 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
935 if (*next_end != OP_KET)
936 break;
937
938 if (i == max)
939 {
940 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
941 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
942 /* +2 the original and the last. */
943 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
944 if (min == 1)
945 return TRUE;
946 min--;
947 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
948 }
949 }
950 }
951 }
952
953 if (min >= 3)
954 {
955 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
956 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
957 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
958 return TRUE;
959 }
960
961 return FALSE;
962 }
963
964 #define CASE_ITERATOR_PRIVATE_DATA_1 \
965 case OP_MINSTAR: \
966 case OP_MINPLUS: \
967 case OP_QUERY: \
968 case OP_MINQUERY: \
969 case OP_MINSTARI: \
970 case OP_MINPLUSI: \
971 case OP_QUERYI: \
972 case OP_MINQUERYI: \
973 case OP_NOTMINSTAR: \
974 case OP_NOTMINPLUS: \
975 case OP_NOTQUERY: \
976 case OP_NOTMINQUERY: \
977 case OP_NOTMINSTARI: \
978 case OP_NOTMINPLUSI: \
979 case OP_NOTQUERYI: \
980 case OP_NOTMINQUERYI:
981
982 #define CASE_ITERATOR_PRIVATE_DATA_2A \
983 case OP_STAR: \
984 case OP_PLUS: \
985 case OP_STARI: \
986 case OP_PLUSI: \
987 case OP_NOTSTAR: \
988 case OP_NOTPLUS: \
989 case OP_NOTSTARI: \
990 case OP_NOTPLUSI:
991
992 #define CASE_ITERATOR_PRIVATE_DATA_2B \
993 case OP_UPTO: \
994 case OP_MINUPTO: \
995 case OP_UPTOI: \
996 case OP_MINUPTOI: \
997 case OP_NOTUPTO: \
998 case OP_NOTMINUPTO: \
999 case OP_NOTUPTOI: \
1000 case OP_NOTMINUPTOI:
1001
1002 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1003 case OP_TYPEMINSTAR: \
1004 case OP_TYPEMINPLUS: \
1005 case OP_TYPEQUERY: \
1006 case OP_TYPEMINQUERY:
1007
1008 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1009 case OP_TYPESTAR: \
1010 case OP_TYPEPLUS:
1011
1012 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1013 case OP_TYPEUPTO: \
1014 case OP_TYPEMINUPTO:
1015
1016 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1017 {
1018 pcre_uchar *cc = common->start;
1019 pcre_uchar *alternative;
1020 pcre_uchar *end = NULL;
1021 int private_data_ptr = *private_data_start;
1022 int space, size, bracketlen;
1023
1024 while (cc < ccend)
1025 {
1026 space = 0;
1027 size = 0;
1028 bracketlen = 0;
1029 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1030 return;
1031
1032 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1033 if (detect_repeat(common, cc))
1034 {
1035 /* These brackets are converted to repeats, so no global
1036 based single character repeat is allowed. */
1037 if (cc >= end)
1038 end = bracketend(cc);
1039 }
1040
1041 switch(*cc)
1042 {
1043 case OP_KET:
1044 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1045 {
1046 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1047 private_data_ptr += sizeof(sljit_sw);
1048 cc += common->private_data_ptrs[cc + 1 - common->start];
1049 }
1050 cc += 1 + LINK_SIZE;
1051 break;
1052
1053 case OP_ASSERT:
1054 case OP_ASSERT_NOT:
1055 case OP_ASSERTBACK:
1056 case OP_ASSERTBACK_NOT:
1057 case OP_ONCE:
1058 case OP_ONCE_NC:
1059 case OP_BRAPOS:
1060 case OP_SBRA:
1061 case OP_SBRAPOS:
1062 case OP_SCOND:
1063 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1064 private_data_ptr += sizeof(sljit_sw);
1065 bracketlen = 1 + LINK_SIZE;
1066 break;
1067
1068 case OP_CBRAPOS:
1069 case OP_SCBRAPOS:
1070 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1071 private_data_ptr += sizeof(sljit_sw);
1072 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1073 break;
1074
1075 case OP_COND:
1076 /* Might be a hidden SCOND. */
1077 alternative = cc + GET(cc, 1);
1078 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1079 {
1080 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1081 private_data_ptr += sizeof(sljit_sw);
1082 }
1083 bracketlen = 1 + LINK_SIZE;
1084 break;
1085
1086 case OP_BRA:
1087 bracketlen = 1 + LINK_SIZE;
1088 break;
1089
1090 case OP_CBRA:
1091 case OP_SCBRA:
1092 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1093 break;
1094
1095 CASE_ITERATOR_PRIVATE_DATA_1
1096 space = 1;
1097 size = -2;
1098 break;
1099
1100 CASE_ITERATOR_PRIVATE_DATA_2A
1101 space = 2;
1102 size = -2;
1103 break;
1104
1105 CASE_ITERATOR_PRIVATE_DATA_2B
1106 space = 2;
1107 size = -(2 + IMM2_SIZE);
1108 break;
1109
1110 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1111 space = 1;
1112 size = 1;
1113 break;
1114
1115 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1116 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1117 space = 2;
1118 size = 1;
1119 break;
1120
1121 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1122 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1123 space = 2;
1124 size = 1 + IMM2_SIZE;
1125 break;
1126
1127 case OP_CLASS:
1128 case OP_NCLASS:
1129 size += 1 + 32 / sizeof(pcre_uchar);
1130 space = get_class_iterator_size(cc + size);
1131 break;
1132
1133 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1134 case OP_XCLASS:
1135 size = GET(cc, 1);
1136 space = get_class_iterator_size(cc + size);
1137 break;
1138 #endif
1139
1140 default:
1141 cc = next_opcode(common, cc);
1142 SLJIT_ASSERT(cc != NULL);
1143 break;
1144 }
1145
1146 /* Character iterators, which are not inside a repeated bracket,
1147 gets a private slot instead of allocating it on the stack. */
1148 if (space > 0 && cc >= end)
1149 {
1150 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1151 private_data_ptr += sizeof(sljit_sw) * space;
1152 }
1153
1154 if (size != 0)
1155 {
1156 if (size < 0)
1157 {
1158 cc += -size;
1159 #ifdef SUPPORT_UTF
1160 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1161 #endif
1162 }
1163 else
1164 cc += size;
1165 }
1166
1167 if (bracketlen > 0)
1168 {
1169 if (cc >= end)
1170 {
1171 end = bracketend(cc);
1172 if (end[-1 - LINK_SIZE] == OP_KET)
1173 end = NULL;
1174 }
1175 cc += bracketlen;
1176 }
1177 }
1178 *private_data_start = private_data_ptr;
1179 }
1180
1181 /* Returns with a frame_types (always < 0) if no need for frame. */
1182 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1183 {
1184 int length = 0;
1185 int possessive = 0;
1186 BOOL stack_restore = FALSE;
1187 BOOL setsom_found = recursive;
1188 BOOL setmark_found = recursive;
1189 /* The last capture is a local variable even for recursions. */
1190 BOOL capture_last_found = FALSE;
1191
1192 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1193 SLJIT_ASSERT(common->control_head_ptr != 0);
1194 *needs_control_head = TRUE;
1195 #else
1196 *needs_control_head = FALSE;
1197 #endif
1198
1199 if (ccend == NULL)
1200 {
1201 ccend = bracketend(cc) - (1 + LINK_SIZE);
1202 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1203 {
1204 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1205 /* This is correct regardless of common->capture_last_ptr. */
1206 capture_last_found = TRUE;
1207 }
1208 cc = next_opcode(common, cc);
1209 }
1210
1211 SLJIT_ASSERT(cc != NULL);
1212 while (cc < ccend)
1213 switch(*cc)
1214 {
1215 case OP_SET_SOM:
1216 SLJIT_ASSERT(common->has_set_som);
1217 stack_restore = TRUE;
1218 if (!setsom_found)
1219 {
1220 length += 2;
1221 setsom_found = TRUE;
1222 }
1223 cc += 1;
1224 break;
1225
1226 case OP_MARK:
1227 case OP_PRUNE_ARG:
1228 case OP_THEN_ARG:
1229 SLJIT_ASSERT(common->mark_ptr != 0);
1230 stack_restore = TRUE;
1231 if (!setmark_found)
1232 {
1233 length += 2;
1234 setmark_found = TRUE;
1235 }
1236 if (common->control_head_ptr != 0)
1237 *needs_control_head = TRUE;
1238 cc += 1 + 2 + cc[1];
1239 break;
1240
1241 case OP_RECURSE:
1242 stack_restore = TRUE;
1243 if (common->has_set_som && !setsom_found)
1244 {
1245 length += 2;
1246 setsom_found = TRUE;
1247 }
1248 if (common->mark_ptr != 0 && !setmark_found)
1249 {
1250 length += 2;
1251 setmark_found = TRUE;
1252 }
1253 if (common->capture_last_ptr != 0 && !capture_last_found)
1254 {
1255 length += 2;
1256 capture_last_found = TRUE;
1257 }
1258 cc += 1 + LINK_SIZE;
1259 break;
1260
1261 case OP_CBRA:
1262 case OP_CBRAPOS:
1263 case OP_SCBRA:
1264 case OP_SCBRAPOS:
1265 stack_restore = TRUE;
1266 if (common->capture_last_ptr != 0 && !capture_last_found)
1267 {
1268 length += 2;
1269 capture_last_found = TRUE;
1270 }
1271 length += 3;
1272 cc += 1 + LINK_SIZE + IMM2_SIZE;
1273 break;
1274
1275 default:
1276 stack_restore = TRUE;
1277 /* Fall through. */
1278
1279 case OP_NOT_WORD_BOUNDARY:
1280 case OP_WORD_BOUNDARY:
1281 case OP_NOT_DIGIT:
1282 case OP_DIGIT:
1283 case OP_NOT_WHITESPACE:
1284 case OP_WHITESPACE:
1285 case OP_NOT_WORDCHAR:
1286 case OP_WORDCHAR:
1287 case OP_ANY:
1288 case OP_ALLANY:
1289 case OP_ANYBYTE:
1290 case OP_NOTPROP:
1291 case OP_PROP:
1292 case OP_ANYNL:
1293 case OP_NOT_HSPACE:
1294 case OP_HSPACE:
1295 case OP_NOT_VSPACE:
1296 case OP_VSPACE:
1297 case OP_EXTUNI:
1298 case OP_EODN:
1299 case OP_EOD:
1300 case OP_CIRC:
1301 case OP_CIRCM:
1302 case OP_DOLL:
1303 case OP_DOLLM:
1304 case OP_CHAR:
1305 case OP_CHARI:
1306 case OP_NOT:
1307 case OP_NOTI:
1308
1309 case OP_EXACT:
1310 case OP_POSSTAR:
1311 case OP_POSPLUS:
1312 case OP_POSQUERY:
1313 case OP_POSUPTO:
1314
1315 case OP_EXACTI:
1316 case OP_POSSTARI:
1317 case OP_POSPLUSI:
1318 case OP_POSQUERYI:
1319 case OP_POSUPTOI:
1320
1321 case OP_NOTEXACT:
1322 case OP_NOTPOSSTAR:
1323 case OP_NOTPOSPLUS:
1324 case OP_NOTPOSQUERY:
1325 case OP_NOTPOSUPTO:
1326
1327 case OP_NOTEXACTI:
1328 case OP_NOTPOSSTARI:
1329 case OP_NOTPOSPLUSI:
1330 case OP_NOTPOSQUERYI:
1331 case OP_NOTPOSUPTOI:
1332
1333 case OP_TYPEEXACT:
1334 case OP_TYPEPOSSTAR:
1335 case OP_TYPEPOSPLUS:
1336 case OP_TYPEPOSQUERY:
1337 case OP_TYPEPOSUPTO:
1338
1339 case OP_CLASS:
1340 case OP_NCLASS:
1341 case OP_XCLASS:
1342
1343 cc = next_opcode(common, cc);
1344 SLJIT_ASSERT(cc != NULL);
1345 break;
1346 }
1347
1348 /* Possessive quantifiers can use a special case. */
1349 if (SLJIT_UNLIKELY(possessive == length))
1350 return stack_restore ? no_frame : no_stack;
1351
1352 if (length > 0)
1353 return length + 1;
1354 return stack_restore ? no_frame : no_stack;
1355 }
1356
1357 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1358 {
1359 DEFINE_COMPILER;
1360 BOOL setsom_found = recursive;
1361 BOOL setmark_found = recursive;
1362 /* The last capture is a local variable even for recursions. */
1363 BOOL capture_last_found = FALSE;
1364 int offset;
1365
1366 /* >= 1 + shortest item size (2) */
1367 SLJIT_UNUSED_ARG(stacktop);
1368 SLJIT_ASSERT(stackpos >= stacktop + 2);
1369
1370 stackpos = STACK(stackpos);
1371 if (ccend == NULL)
1372 {
1373 ccend = bracketend(cc) - (1 + LINK_SIZE);
1374 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1375 cc = next_opcode(common, cc);
1376 }
1377
1378 SLJIT_ASSERT(cc != NULL);
1379 while (cc < ccend)
1380 switch(*cc)
1381 {
1382 case OP_SET_SOM:
1383 SLJIT_ASSERT(common->has_set_som);
1384 if (!setsom_found)
1385 {
1386 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1388 stackpos += (int)sizeof(sljit_sw);
1389 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1390 stackpos += (int)sizeof(sljit_sw);
1391 setsom_found = TRUE;
1392 }
1393 cc += 1;
1394 break;
1395
1396 case OP_MARK:
1397 case OP_PRUNE_ARG:
1398 case OP_THEN_ARG:
1399 SLJIT_ASSERT(common->mark_ptr != 0);
1400 if (!setmark_found)
1401 {
1402 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1404 stackpos += (int)sizeof(sljit_sw);
1405 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1406 stackpos += (int)sizeof(sljit_sw);
1407 setmark_found = TRUE;
1408 }
1409 cc += 1 + 2 + cc[1];
1410 break;
1411
1412 case OP_RECURSE:
1413 if (common->has_set_som && !setsom_found)
1414 {
1415 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1416 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1417 stackpos += (int)sizeof(sljit_sw);
1418 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1419 stackpos += (int)sizeof(sljit_sw);
1420 setsom_found = TRUE;
1421 }
1422 if (common->mark_ptr != 0 && !setmark_found)
1423 {
1424 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1426 stackpos += (int)sizeof(sljit_sw);
1427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1428 stackpos += (int)sizeof(sljit_sw);
1429 setmark_found = TRUE;
1430 }
1431 if (common->capture_last_ptr != 0 && !capture_last_found)
1432 {
1433 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1435 stackpos += (int)sizeof(sljit_sw);
1436 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1437 stackpos += (int)sizeof(sljit_sw);
1438 capture_last_found = TRUE;
1439 }
1440 cc += 1 + LINK_SIZE;
1441 break;
1442
1443 case OP_CBRA:
1444 case OP_CBRAPOS:
1445 case OP_SCBRA:
1446 case OP_SCBRAPOS:
1447 if (common->capture_last_ptr != 0 && !capture_last_found)
1448 {
1449 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1450 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1451 stackpos += (int)sizeof(sljit_sw);
1452 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1453 stackpos += (int)sizeof(sljit_sw);
1454 capture_last_found = TRUE;
1455 }
1456 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1458 stackpos += (int)sizeof(sljit_sw);
1459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1460 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1462 stackpos += (int)sizeof(sljit_sw);
1463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1464 stackpos += (int)sizeof(sljit_sw);
1465
1466 cc += 1 + LINK_SIZE + IMM2_SIZE;
1467 break;
1468
1469 default:
1470 cc = next_opcode(common, cc);
1471 SLJIT_ASSERT(cc != NULL);
1472 break;
1473 }
1474
1475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1476 SLJIT_ASSERT(stackpos == STACK(stacktop));
1477 }
1478
1479 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1480 {
1481 int private_data_length = needs_control_head ? 3 : 2;
1482 int size;
1483 pcre_uchar *alternative;
1484 /* Calculate the sum of the private machine words. */
1485 while (cc < ccend)
1486 {
1487 size = 0;
1488 switch(*cc)
1489 {
1490 case OP_KET:
1491 if (PRIVATE_DATA(cc) != 0)
1492 private_data_length++;
1493 cc += 1 + LINK_SIZE;
1494 break;
1495
1496 case OP_ASSERT:
1497 case OP_ASSERT_NOT:
1498 case OP_ASSERTBACK:
1499 case OP_ASSERTBACK_NOT:
1500 case OP_ONCE:
1501 case OP_ONCE_NC:
1502 case OP_BRAPOS:
1503 case OP_SBRA:
1504 case OP_SBRAPOS:
1505 case OP_SCOND:
1506 private_data_length++;
1507 cc += 1 + LINK_SIZE;
1508 break;
1509
1510 case OP_CBRA:
1511 case OP_SCBRA:
1512 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1513 private_data_length++;
1514 cc += 1 + LINK_SIZE + IMM2_SIZE;
1515 break;
1516
1517 case OP_CBRAPOS:
1518 case OP_SCBRAPOS:
1519 private_data_length += 2;
1520 cc += 1 + LINK_SIZE + IMM2_SIZE;
1521 break;
1522
1523 case OP_COND:
1524 /* Might be a hidden SCOND. */
1525 alternative = cc + GET(cc, 1);
1526 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1527 private_data_length++;
1528 cc += 1 + LINK_SIZE;
1529 break;
1530
1531 CASE_ITERATOR_PRIVATE_DATA_1
1532 if (PRIVATE_DATA(cc))
1533 private_data_length++;
1534 cc += 2;
1535 #ifdef SUPPORT_UTF
1536 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1537 #endif
1538 break;
1539
1540 CASE_ITERATOR_PRIVATE_DATA_2A
1541 if (PRIVATE_DATA(cc))
1542 private_data_length += 2;
1543 cc += 2;
1544 #ifdef SUPPORT_UTF
1545 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1546 #endif
1547 break;
1548
1549 CASE_ITERATOR_PRIVATE_DATA_2B
1550 if (PRIVATE_DATA(cc))
1551 private_data_length += 2;
1552 cc += 2 + IMM2_SIZE;
1553 #ifdef SUPPORT_UTF
1554 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1555 #endif
1556 break;
1557
1558 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1559 if (PRIVATE_DATA(cc))
1560 private_data_length++;
1561 cc += 1;
1562 break;
1563
1564 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1565 if (PRIVATE_DATA(cc))
1566 private_data_length += 2;
1567 cc += 1;
1568 break;
1569
1570 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1571 if (PRIVATE_DATA(cc))
1572 private_data_length += 2;
1573 cc += 1 + IMM2_SIZE;
1574 break;
1575
1576 case OP_CLASS:
1577 case OP_NCLASS:
1578 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1579 case OP_XCLASS:
1580 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1581 #else
1582 size = 1 + 32 / (int)sizeof(pcre_uchar);
1583 #endif
1584 if (PRIVATE_DATA(cc))
1585 private_data_length += get_class_iterator_size(cc + size);
1586 cc += size;
1587 break;
1588
1589 default:
1590 cc = next_opcode(common, cc);
1591 SLJIT_ASSERT(cc != NULL);
1592 break;
1593 }
1594 }
1595 SLJIT_ASSERT(cc == ccend);
1596 return private_data_length;
1597 }
1598
1599 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1600 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1601 {
1602 DEFINE_COMPILER;
1603 int srcw[2];
1604 int count, size;
1605 BOOL tmp1next = TRUE;
1606 BOOL tmp1empty = TRUE;
1607 BOOL tmp2empty = TRUE;
1608 pcre_uchar *alternative;
1609 enum {
1610 start,
1611 loop,
1612 end
1613 } status;
1614
1615 status = save ? start : loop;
1616 stackptr = STACK(stackptr - 2);
1617 stacktop = STACK(stacktop - 1);
1618
1619 if (!save)
1620 {
1621 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1622 if (stackptr < stacktop)
1623 {
1624 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1625 stackptr += sizeof(sljit_sw);
1626 tmp1empty = FALSE;
1627 }
1628 if (stackptr < stacktop)
1629 {
1630 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1631 stackptr += sizeof(sljit_sw);
1632 tmp2empty = FALSE;
1633 }
1634 /* The tmp1next must be TRUE in either way. */
1635 }
1636
1637 do
1638 {
1639 count = 0;
1640 switch(status)
1641 {
1642 case start:
1643 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1644 count = 1;
1645 srcw[0] = common->recursive_head_ptr;
1646 if (needs_control_head)
1647 {
1648 SLJIT_ASSERT(common->control_head_ptr != 0);
1649 count = 2;
1650 srcw[1] = common->control_head_ptr;
1651 }
1652 status = loop;
1653 break;
1654
1655 case loop:
1656 if (cc >= ccend)
1657 {
1658 status = end;
1659 break;
1660 }
1661
1662 switch(*cc)
1663 {
1664 case OP_KET:
1665 if (PRIVATE_DATA(cc) != 0)
1666 {
1667 count = 1;
1668 srcw[0] = PRIVATE_DATA(cc);
1669 }
1670 cc += 1 + LINK_SIZE;
1671 break;
1672
1673 case OP_ASSERT:
1674 case OP_ASSERT_NOT:
1675 case OP_ASSERTBACK:
1676 case OP_ASSERTBACK_NOT:
1677 case OP_ONCE:
1678 case OP_ONCE_NC:
1679 case OP_BRAPOS:
1680 case OP_SBRA:
1681 case OP_SBRAPOS:
1682 case OP_SCOND:
1683 count = 1;
1684 srcw[0] = PRIVATE_DATA(cc);
1685 SLJIT_ASSERT(srcw[0] != 0);
1686 cc += 1 + LINK_SIZE;
1687 break;
1688
1689 case OP_CBRA:
1690 case OP_SCBRA:
1691 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1692 {
1693 count = 1;
1694 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1695 }
1696 cc += 1 + LINK_SIZE + IMM2_SIZE;
1697 break;
1698
1699 case OP_CBRAPOS:
1700 case OP_SCBRAPOS:
1701 count = 2;
1702 srcw[0] = PRIVATE_DATA(cc);
1703 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1704 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1705 cc += 1 + LINK_SIZE + IMM2_SIZE;
1706 break;
1707
1708 case OP_COND:
1709 /* Might be a hidden SCOND. */
1710 alternative = cc + GET(cc, 1);
1711 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1712 {
1713 count = 1;
1714 srcw[0] = PRIVATE_DATA(cc);
1715 SLJIT_ASSERT(srcw[0] != 0);
1716 }
1717 cc += 1 + LINK_SIZE;
1718 break;
1719
1720 CASE_ITERATOR_PRIVATE_DATA_1
1721 if (PRIVATE_DATA(cc))
1722 {
1723 count = 1;
1724 srcw[0] = PRIVATE_DATA(cc);
1725 }
1726 cc += 2;
1727 #ifdef SUPPORT_UTF
1728 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1729 #endif
1730 break;
1731
1732 CASE_ITERATOR_PRIVATE_DATA_2A
1733 if (PRIVATE_DATA(cc))
1734 {
1735 count = 2;
1736 srcw[0] = PRIVATE_DATA(cc);
1737 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1738 }
1739 cc += 2;
1740 #ifdef SUPPORT_UTF
1741 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1742 #endif
1743 break;
1744
1745 CASE_ITERATOR_PRIVATE_DATA_2B
1746 if (PRIVATE_DATA(cc))
1747 {
1748 count = 2;
1749 srcw[0] = PRIVATE_DATA(cc);
1750 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1751 }
1752 cc += 2 + IMM2_SIZE;
1753 #ifdef SUPPORT_UTF
1754 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1755 #endif
1756 break;
1757
1758 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1759 if (PRIVATE_DATA(cc))
1760 {
1761 count = 1;
1762 srcw[0] = PRIVATE_DATA(cc);
1763 }
1764 cc += 1;
1765 break;
1766
1767 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1768 if (PRIVATE_DATA(cc))
1769 {
1770 count = 2;
1771 srcw[0] = PRIVATE_DATA(cc);
1772 srcw[1] = srcw[0] + sizeof(sljit_sw);
1773 }
1774 cc += 1;
1775 break;
1776
1777 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1778 if (PRIVATE_DATA(cc))
1779 {
1780 count = 2;
1781 srcw[0] = PRIVATE_DATA(cc);
1782 srcw[1] = srcw[0] + sizeof(sljit_sw);
1783 }
1784 cc += 1 + IMM2_SIZE;
1785 break;
1786
1787 case OP_CLASS:
1788 case OP_NCLASS:
1789 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1790 case OP_XCLASS:
1791 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1792 #else
1793 size = 1 + 32 / (int)sizeof(pcre_uchar);
1794 #endif
1795 if (PRIVATE_DATA(cc))
1796 switch(get_class_iterator_size(cc + size))
1797 {
1798 case 1:
1799 count = 1;
1800 srcw[0] = PRIVATE_DATA(cc);
1801 break;
1802
1803 case 2:
1804 count = 2;
1805 srcw[0] = PRIVATE_DATA(cc);
1806 srcw[1] = srcw[0] + sizeof(sljit_sw);
1807 break;
1808
1809 default:
1810 SLJIT_ASSERT_STOP();
1811 break;
1812 }
1813 cc += size;
1814 break;
1815
1816 default:
1817 cc = next_opcode(common, cc);
1818 SLJIT_ASSERT(cc != NULL);
1819 break;
1820 }
1821 break;
1822
1823 case end:
1824 SLJIT_ASSERT_STOP();
1825 break;
1826 }
1827
1828 while (count > 0)
1829 {
1830 count--;
1831 if (save)
1832 {
1833 if (tmp1next)
1834 {
1835 if (!tmp1empty)
1836 {
1837 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1838 stackptr += sizeof(sljit_sw);
1839 }
1840 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1841 tmp1empty = FALSE;
1842 tmp1next = FALSE;
1843 }
1844 else
1845 {
1846 if (!tmp2empty)
1847 {
1848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1849 stackptr += sizeof(sljit_sw);
1850 }
1851 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1852 tmp2empty = FALSE;
1853 tmp1next = TRUE;
1854 }
1855 }
1856 else
1857 {
1858 if (tmp1next)
1859 {
1860 SLJIT_ASSERT(!tmp1empty);
1861 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1862 tmp1empty = stackptr >= stacktop;
1863 if (!tmp1empty)
1864 {
1865 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1866 stackptr += sizeof(sljit_sw);
1867 }
1868 tmp1next = FALSE;
1869 }
1870 else
1871 {
1872 SLJIT_ASSERT(!tmp2empty);
1873 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1874 tmp2empty = stackptr >= stacktop;
1875 if (!tmp2empty)
1876 {
1877 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1878 stackptr += sizeof(sljit_sw);
1879 }
1880 tmp1next = TRUE;
1881 }
1882 }
1883 }
1884 }
1885 while (status != end);
1886
1887 if (save)
1888 {
1889 if (tmp1next)
1890 {
1891 if (!tmp1empty)
1892 {
1893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1894 stackptr += sizeof(sljit_sw);
1895 }
1896 if (!tmp2empty)
1897 {
1898 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1899 stackptr += sizeof(sljit_sw);
1900 }
1901 }
1902 else
1903 {
1904 if (!tmp2empty)
1905 {
1906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1907 stackptr += sizeof(sljit_sw);
1908 }
1909 if (!tmp1empty)
1910 {
1911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1912 stackptr += sizeof(sljit_sw);
1913 }
1914 }
1915 }
1916 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1917 }
1918
1919 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1920 {
1921 pcre_uchar *end = bracketend(cc);
1922 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1923
1924 /* Assert captures then. */
1925 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1926 current_offset = NULL;
1927 /* Conditional block does not. */
1928 if (*cc == OP_COND || *cc == OP_SCOND)
1929 has_alternatives = FALSE;
1930
1931 cc = next_opcode(common, cc);
1932 if (has_alternatives)
1933 current_offset = common->then_offsets + (cc - common->start);
1934
1935 while (cc < end)
1936 {
1937 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1938 cc = set_then_offsets(common, cc, current_offset);
1939 else
1940 {
1941 if (*cc == OP_ALT && has_alternatives)
1942 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1943 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1944 *current_offset = 1;
1945 cc = next_opcode(common, cc);
1946 }
1947 }
1948
1949 return end;
1950 }
1951
1952 #undef CASE_ITERATOR_PRIVATE_DATA_1
1953 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1954 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1955 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1956 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1957 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1958
1959 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1960 {
1961 return (value & (value - 1)) == 0;
1962 }
1963
1964 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1965 {
1966 while (list)
1967 {
1968 /* sljit_set_label is clever enough to do nothing
1969 if either the jump or the label is NULL. */
1970 SET_LABEL(list->jump, label);
1971 list = list->next;
1972 }
1973 }
1974
1975 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1976 {
1977 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1978 if (list_item)
1979 {
1980 list_item->next = *list;
1981 list_item->jump = jump;
1982 *list = list_item;
1983 }
1984 }
1985
1986 static void add_stub(compiler_common *common, struct sljit_jump *start)
1987 {
1988 DEFINE_COMPILER;
1989 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1990
1991 if (list_item)
1992 {
1993 list_item->start = start;
1994 list_item->quit = LABEL();
1995 list_item->next = common->stubs;
1996 common->stubs = list_item;
1997 }
1998 }
1999
2000 static void flush_stubs(compiler_common *common)
2001 {
2002 DEFINE_COMPILER;
2003 stub_list* list_item = common->stubs;
2004
2005 while (list_item)
2006 {
2007 JUMPHERE(list_item->start);
2008 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2009 JUMPTO(SLJIT_JUMP, list_item->quit);
2010 list_item = list_item->next;
2011 }
2012 common->stubs = NULL;
2013 }
2014
2015 static SLJIT_INLINE void count_match(compiler_common *common)
2016 {
2017 DEFINE_COMPILER;
2018
2019 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2020 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2021 }
2022
2023 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2024 {
2025 /* May destroy all locals and registers except TMP2. */
2026 DEFINE_COMPILER;
2027
2028 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2029 #ifdef DESTROY_REGISTERS
2030 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2031 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2032 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2033 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2035 #endif
2036 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2037 }
2038
2039 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2040 {
2041 DEFINE_COMPILER;
2042 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2043 }
2044
2045 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2046 {
2047 DEFINE_COMPILER;
2048 struct sljit_label *loop;
2049 int i;
2050
2051 /* At this point we can freely use all temporary registers. */
2052 SLJIT_ASSERT(length > 1);
2053 /* TMP1 returns with begin - 1. */
2054 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2055 if (length < 8)
2056 {
2057 for (i = 1; i < length; i++)
2058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2059 }
2060 else
2061 {
2062 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2063 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2064 loop = LABEL();
2065 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2066 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2067 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2068 }
2069 }
2070
2071 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2072 {
2073 DEFINE_COMPILER;
2074 struct sljit_label *loop;
2075 int i;
2076
2077 SLJIT_ASSERT(length > 1);
2078 /* OVECTOR(1) contains the "string begin - 1" constant. */
2079 if (length > 2)
2080 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2081 if (length < 8)
2082 {
2083 for (i = 2; i < length; i++)
2084 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2085 }
2086 else
2087 {
2088 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2089 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2090 loop = LABEL();
2091 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2092 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2093 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2094 }
2095
2096 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2097 if (common->mark_ptr != 0)
2098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2099 if (common->control_head_ptr != 0)
2100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2101 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2103 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2104 }
2105
2106 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2107 {
2108 while (current != NULL)
2109 {
2110 switch (current[-2])
2111 {
2112 case type_then_trap:
2113 break;
2114
2115 case type_mark:
2116 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2117 return current[-4];
2118 break;
2119
2120 default:
2121 SLJIT_ASSERT_STOP();
2122 break;
2123 }
2124 current = (sljit_sw*)current[-1];
2125 }
2126 return -1;
2127 }
2128
2129 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2130 {
2131 DEFINE_COMPILER;
2132 struct sljit_label *loop;
2133 struct sljit_jump *early_quit;
2134
2135 /* At this point we can freely use all registers. */
2136 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2137 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2138
2139 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2140 if (common->mark_ptr != 0)
2141 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2142 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2143 if (common->mark_ptr != 0)
2144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2145 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2146 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2147 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2148 /* Unlikely, but possible */
2149 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2150 loop = LABEL();
2151 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2152 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2153 /* Copy the integer value to the output buffer */
2154 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2155 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2156 #endif
2157 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2158 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2159 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2160 JUMPHERE(early_quit);
2161
2162 /* Calculate the return value, which is the maximum ovector value. */
2163 if (topbracket > 1)
2164 {
2165 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2166 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2167
2168 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2169 loop = LABEL();
2170 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2171 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2172 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2173 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2174 }
2175 else
2176 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2177 }
2178
2179 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2180 {
2181 DEFINE_COMPILER;
2182 struct sljit_jump *jump;
2183
2184 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2185 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2186 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2187
2188 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2189 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2190 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2191 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2192
2193 /* Store match begin and end. */
2194 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2195 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2196
2197 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2198 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2199 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2200 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2201 #endif
2202 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2203 JUMPHERE(jump);
2204
2205 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2206 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2207 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2208 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2209 #endif
2210 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2211
2212 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2213 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2214 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2215 #endif
2216 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2217
2218 JUMPTO(SLJIT_JUMP, quit);
2219 }
2220
2221 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2222 {
2223 /* May destroy TMP1. */
2224 DEFINE_COMPILER;
2225 struct sljit_jump *jump;
2226
2227 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2228 {
2229 /* The value of -1 must be kept for start_used_ptr! */
2230 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2231 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2232 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2233 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2234 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2235 JUMPHERE(jump);
2236 }
2237 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2238 {
2239 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2240 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2241 JUMPHERE(jump);
2242 }
2243 }
2244
2245 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2246 {
2247 /* Detects if the character has an othercase. */
2248 unsigned int c;
2249
2250 #ifdef SUPPORT_UTF
2251 if (common->utf)
2252 {
2253 GETCHAR(c, cc);
2254 if (c > 127)
2255 {
2256 #ifdef SUPPORT_UCP
2257 return c != UCD_OTHERCASE(c);
2258 #else
2259 return FALSE;
2260 #endif
2261 }
2262 #ifndef COMPILE_PCRE8
2263 return common->fcc[c] != c;
2264 #endif
2265 }
2266 else
2267 #endif
2268 c = *cc;
2269 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2270 }
2271
2272 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2273 {
2274 /* Returns with the othercase. */
2275 #ifdef SUPPORT_UTF
2276 if (common->utf && c > 127)
2277 {
2278 #ifdef SUPPORT_UCP
2279 return UCD_OTHERCASE(c);
2280 #else
2281 return c;
2282 #endif
2283 }
2284 #endif
2285 return TABLE_GET(c, common->fcc, c);
2286 }
2287
2288 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2289 {
2290 /* Detects if the character and its othercase has only 1 bit difference. */
2291 unsigned int c, oc, bit;
2292 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2293 int n;
2294 #endif
2295
2296 #ifdef SUPPORT_UTF
2297 if (common->utf)
2298 {
2299 GETCHAR(c, cc);
2300 if (c <= 127)
2301 oc = common->fcc[c];
2302 else
2303 {
2304 #ifdef SUPPORT_UCP
2305 oc = UCD_OTHERCASE(c);
2306 #else
2307 oc = c;
2308 #endif
2309 }
2310 }
2311 else
2312 {
2313 c = *cc;
2314 oc = TABLE_GET(c, common->fcc, c);
2315 }
2316 #else
2317 c = *cc;
2318 oc = TABLE_GET(c, common->fcc, c);
2319 #endif
2320
2321 SLJIT_ASSERT(c != oc);
2322
2323 bit = c ^ oc;
2324 /* Optimized for English alphabet. */
2325 if (c <= 127 && bit == 0x20)
2326 return (0 << 8) | 0x20;
2327
2328 /* Since c != oc, they must have at least 1 bit difference. */
2329 if (!is_powerof2(bit))
2330 return 0;
2331
2332 #if defined COMPILE_PCRE8
2333
2334 #ifdef SUPPORT_UTF
2335 if (common->utf && c > 127)
2336 {
2337 n = GET_EXTRALEN(*cc);
2338 while ((bit & 0x3f) == 0)
2339 {
2340 n--;
2341 bit >>= 6;
2342 }
2343 return (n << 8) | bit;
2344 }
2345 #endif /* SUPPORT_UTF */
2346 return (0 << 8) | bit;
2347
2348 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2349
2350 #ifdef SUPPORT_UTF
2351 if (common->utf && c > 65535)
2352 {
2353 if (bit >= (1 << 10))
2354 bit >>= 10;
2355 else
2356 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2357 }
2358 #endif /* SUPPORT_UTF */
2359 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2360
2361 #endif /* COMPILE_PCRE[8|16|32] */
2362 }
2363
2364 static void check_partial(compiler_common *common, BOOL force)
2365 {
2366 /* Checks whether a partial matching is occurred. Does not modify registers. */
2367 DEFINE_COMPILER;
2368 struct sljit_jump *jump = NULL;
2369
2370 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2371
2372 if (common->mode == JIT_COMPILE)
2373 return;
2374
2375 if (!force)
2376 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2377 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2378 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2379
2380 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2381 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2382 else
2383 {
2384 if (common->partialmatchlabel != NULL)
2385 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2386 else
2387 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2388 }
2389
2390 if (jump != NULL)
2391 JUMPHERE(jump);
2392 }
2393
2394 static void check_str_end(compiler_common *common, jump_list **end_reached)
2395 {
2396 /* Does not affect registers. Usually used in a tight spot. */
2397 DEFINE_COMPILER;
2398 struct sljit_jump *jump;
2399
2400 if (common->mode == JIT_COMPILE)
2401 {
2402 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2403 return;
2404 }
2405
2406 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2407 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2408 {
2409 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2411 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2412 }
2413 else
2414 {
2415 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2416 if (common->partialmatchlabel != NULL)
2417 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2418 else
2419 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2420 }
2421 JUMPHERE(jump);
2422 }
2423
2424 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2425 {
2426 DEFINE_COMPILER;
2427 struct sljit_jump *jump;
2428
2429 if (common->mode == JIT_COMPILE)
2430 {
2431 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2432 return;
2433 }
2434
2435 /* Partial matching mode. */
2436 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2437 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2438 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2439 {
2440 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2441 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2442 }
2443 else
2444 {
2445 if (common->partialmatchlabel != NULL)
2446 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2447 else
2448 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2449 }
2450 JUMPHERE(jump);
2451 }
2452
2453 static void read_char(compiler_common *common)
2454 {
2455 /* Reads the character into TMP1, updates STR_PTR.
2456 Does not check STR_END. TMP2 Destroyed. */
2457 DEFINE_COMPILER;
2458 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2459 struct sljit_jump *jump;
2460 #endif
2461
2462 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2463 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2464 if (common->utf)
2465 {
2466 #if defined COMPILE_PCRE8
2467 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2468 #elif defined COMPILE_PCRE16
2469 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2470 #endif /* COMPILE_PCRE[8|16] */
2471 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2472 JUMPHERE(jump);
2473 }
2474 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2475 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2476 }
2477
2478 static void peek_char(compiler_common *common)
2479 {
2480 /* Reads the character into TMP1, keeps STR_PTR.
2481 Does not check STR_END. TMP2 Destroyed. */
2482 DEFINE_COMPILER;
2483 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2484 struct sljit_jump *jump;
2485 #endif
2486
2487 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2488 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2489 if (common->utf)
2490 {
2491 #if defined COMPILE_PCRE8
2492 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2493 #elif defined COMPILE_PCRE16
2494 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2495 #endif /* COMPILE_PCRE[8|16] */
2496 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2497 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2498 JUMPHERE(jump);
2499 }
2500 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2501 }
2502
2503 static void read_char8_type(compiler_common *common)
2504 {
2505 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2506 DEFINE_COMPILER;
2507 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2508 struct sljit_jump *jump;
2509 #endif
2510
2511 #ifdef SUPPORT_UTF
2512 if (common->utf)
2513 {
2514 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2515 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2516 #if defined COMPILE_PCRE8
2517 /* This can be an extra read in some situations, but hopefully
2518 it is needed in most cases. */
2519 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2520 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2521 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2522 JUMPHERE(jump);
2523 #elif defined COMPILE_PCRE16
2524 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2525 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2526 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2527 JUMPHERE(jump);
2528 /* Skip low surrogate if necessary. */
2529 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2530 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2531 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2532 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2534 #elif defined COMPILE_PCRE32
2535 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2536 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2537 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2538 JUMPHERE(jump);
2539 #endif /* COMPILE_PCRE[8|16|32] */
2540 return;
2541 }
2542 #endif /* SUPPORT_UTF */
2543 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2544 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2545 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2546 /* The ctypes array contains only 256 values. */
2547 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2548 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2549 #endif
2550 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2551 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2552 JUMPHERE(jump);
2553 #endif
2554 }
2555
2556 static void skip_char_back(compiler_common *common)
2557 {
2558 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2559 DEFINE_COMPILER;
2560 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2561 #if defined COMPILE_PCRE8
2562 struct sljit_label *label;
2563
2564 if (common->utf)
2565 {
2566 label = LABEL();
2567 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2568 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2569 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2570 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2571 return;
2572 }
2573 #elif defined COMPILE_PCRE16
2574 if (common->utf)
2575 {
2576 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2577 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2578 /* Skip low surrogate if necessary. */
2579 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2580 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2581 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2582 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2583 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2584 return;
2585 }
2586 #endif /* COMPILE_PCRE[8|16] */
2587 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2588 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2589 }
2590
2591 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2592 {
2593 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2594 DEFINE_COMPILER;
2595
2596 if (nltype == NLTYPE_ANY)
2597 {
2598 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2599 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2600 }
2601 else if (nltype == NLTYPE_ANYCRLF)
2602 {
2603 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2604 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2605 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2606 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2607 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2608 }
2609 else
2610 {
2611 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2612 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2613 }
2614 }
2615
2616 #ifdef SUPPORT_UTF
2617
2618 #if defined COMPILE_PCRE8
2619 static void do_utfreadchar(compiler_common *common)
2620 {
2621 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2622 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2623 DEFINE_COMPILER;
2624 struct sljit_jump *jump;
2625
2626 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2627 /* Searching for the first zero. */
2628 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2629 jump = JUMP(SLJIT_C_NOT_ZERO);
2630 /* Two byte sequence. */
2631 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2632 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2633 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2634 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2635 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2636 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2637 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2638 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2639 JUMPHERE(jump);
2640
2641 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2642 jump = JUMP(SLJIT_C_NOT_ZERO);
2643 /* Three byte sequence. */
2644 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2645 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2646 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2647 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2648 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2649 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2650 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2651 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2652 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2653 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2654 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2655 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2656 JUMPHERE(jump);
2657
2658 /* Four byte sequence. */
2659 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2660 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2661 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2662 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2663 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2664 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2665 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2666 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2667 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2668 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2669 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2670 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2671 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2672 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2673 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2674 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2675 }
2676
2677 static void do_utfreadtype8(compiler_common *common)
2678 {
2679 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2680 of the character (>= 0xc0). Return value in TMP1. */
2681 DEFINE_COMPILER;
2682 struct sljit_jump *jump;
2683 struct sljit_jump *compare;
2684
2685 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2686
2687 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2688 jump = JUMP(SLJIT_C_NOT_ZERO);
2689 /* Two byte sequence. */
2690 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2691 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2692 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2693 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2694 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2695 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2696 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2697 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2698 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2699
2700 JUMPHERE(compare);
2701 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2702 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2703 JUMPHERE(jump);
2704
2705 /* We only have types for characters less than 256. */
2706 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2708 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2709 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2710 }
2711
2712 #elif defined COMPILE_PCRE16
2713
2714 static void do_utfreadchar(compiler_common *common)
2715 {
2716 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2717 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2718 DEFINE_COMPILER;
2719 struct sljit_jump *jump;
2720
2721 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2722 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2723 /* Do nothing, only return. */
2724 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2725
2726 JUMPHERE(jump);
2727 /* Combine two 16 bit characters. */
2728 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2729 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2730 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2731 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2732 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2733 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2734 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2735 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2736 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2737 }
2738
2739 #endif /* COMPILE_PCRE[8|16] */
2740
2741 #endif /* SUPPORT_UTF */
2742
2743 #ifdef SUPPORT_UCP
2744
2745 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2746 #define UCD_BLOCK_MASK 127
2747 #define UCD_BLOCK_SHIFT 7
2748
2749 static void do_getucd(compiler_common *common)
2750 {
2751 /* Search the UCD record for the character comes in TMP1.
2752 Returns chartype in TMP1 and UCD offset in TMP2. */
2753 DEFINE_COMPILER;
2754
2755 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2756
2757 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2758 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2759 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2760 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2761 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2762 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2763 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2764 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2765 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2766 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2767 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2768 }
2769 #endif
2770
2771 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2772 {
2773 DEFINE_COMPILER;
2774 struct sljit_label *mainloop;
2775 struct sljit_label *newlinelabel = NULL;
2776 struct sljit_jump *start;
2777 struct sljit_jump *end = NULL;
2778 struct sljit_jump *nl = NULL;
2779 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2780 struct sljit_jump *singlechar;
2781 #endif
2782 jump_list *newline = NULL;
2783 BOOL newlinecheck = FALSE;
2784 BOOL readuchar = FALSE;
2785
2786 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2787 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2788 newlinecheck = TRUE;
2789
2790 if (firstline)
2791 {
2792 /* Search for the end of the first line. */
2793 SLJIT_ASSERT(common->first_line_end != 0);
2794 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2795
2796 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2797 {
2798 mainloop = LABEL();
2799 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2800 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2801 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2802 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2803 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2804 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2805 JUMPHERE(end);
2806 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2807 }
2808 else
2809 {
2810 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2811 mainloop = LABEL();
2812 /* Continual stores does not cause data dependency. */
2813 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2814 read_char(common);
2815 check_newlinechar(common, common->nltype, &newline, TRUE);
2816 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2817 JUMPHERE(end);
2818 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2819 set_jumps(newline, LABEL());
2820 }
2821
2822 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2823 }
2824
2825 start = JUMP(SLJIT_JUMP);
2826
2827 if (newlinecheck)
2828 {
2829 newlinelabel = LABEL();
2830 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2831 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2832 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2833 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2834 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2835 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2836 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2837 #endif
2838 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2839 nl = JUMP(SLJIT_JUMP);
2840 }
2841
2842 mainloop = LABEL();
2843
2844 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2845 #ifdef SUPPORT_UTF
2846 if (common->utf) readuchar = TRUE;
2847 #endif
2848 if (newlinecheck) readuchar = TRUE;
2849
2850 if (readuchar)
2851 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2852
2853 if (newlinecheck)
2854 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2855
2856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2857 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2858 #if defined COMPILE_PCRE8
2859 if (common->utf)
2860 {
2861 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2862 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2863 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2864 JUMPHERE(singlechar);
2865 }
2866 #elif defined COMPILE_PCRE16
2867 if (common->utf)
2868 {
2869 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2870 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2871 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2872 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2873 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2874 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2875 JUMPHERE(singlechar);
2876 }
2877 #endif /* COMPILE_PCRE[8|16] */
2878 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2879 JUMPHERE(start);
2880
2881 if (newlinecheck)
2882 {
2883 JUMPHERE(end);
2884 JUMPHERE(nl);
2885 }
2886
2887 return mainloop;
2888 }
2889
2890 #define MAX_N_CHARS 3
2891
2892 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2893 {
2894 DEFINE_COMPILER;
2895 struct sljit_label *start;
2896 struct sljit_jump *quit;
2897 pcre_uint32 chars[MAX_N_CHARS * 2];
2898 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2899 int location = 0;
2900 pcre_int32 len, c, bit, caseless;
2901 int must_stop;
2902
2903 /* We do not support alternatives now. */
2904 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2905 return FALSE;
2906
2907 while (TRUE)
2908 {
2909 caseless = 0;
2910 must_stop = 1;
2911 switch(*cc)
2912 {
2913 case OP_CHAR:
2914 must_stop = 0;
2915 cc++;
2916 break;
2917
2918 case OP_CHARI:
2919 caseless = 1;
2920 must_stop = 0;
2921 cc++;
2922 break;
2923
2924 case OP_SOD:
2925 case OP_SOM:
2926 case OP_SET_SOM:
2927 case OP_NOT_WORD_BOUNDARY:
2928 case OP_WORD_BOUNDARY:
2929 case OP_EODN:
2930 case OP_EOD:
2931 case OP_CIRC:
2932 case OP_CIRCM:
2933 case OP_DOLL:
2934 case OP_DOLLM:
2935 /* Zero width assertions. */
2936 cc++;
2937 continue;
2938
2939 case OP_PLUS:
2940 case OP_MINPLUS:
2941 case OP_POSPLUS:
2942 cc++;
2943 break;
2944
2945 case OP_EXACT:
2946 cc += 1 + IMM2_SIZE;
2947 break;
2948
2949 case OP_PLUSI:
2950 case OP_MINPLUSI:
2951 case OP_POSPLUSI:
2952 caseless = 1;
2953 cc++;
2954 break;
2955
2956 case OP_EXACTI:
2957 caseless = 1;
2958 cc += 1 + IMM2_SIZE;
2959 break;
2960
2961 default:
2962 must_stop = 2;
2963 break;
2964 }
2965
2966 if (must_stop == 2)
2967 break;
2968
2969 len = 1;
2970 #ifdef SUPPORT_UTF
2971 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2972 #endif
2973
2974 if (caseless && char_has_othercase(common, cc))
2975 {
2976 caseless = char_get_othercase_bit(common, cc);
2977 if (caseless == 0)
2978 return FALSE;
2979 #ifdef COMPILE_PCRE8
2980 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2981 #else
2982 if ((caseless & 0x100) != 0)
2983 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2984 else
2985 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2986 #endif
2987 }
2988 else
2989 caseless = 0;
2990
2991 while (len > 0 && location < MAX_N_CHARS * 2)
2992 {
2993 c = *cc;
2994 bit = 0;
2995 if (len == (caseless & 0xff))
2996 {
2997 bit = caseless >> 8;
2998 c |= bit;
2999 }
3000
3001 chars[location] = c;
3002 chars[location + 1] = bit;
3003
3004 len--;
3005 location += 2;
3006 cc++;
3007 }
3008
3009 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3010 break;
3011 }
3012
3013 /* At least two characters are required. */
3014 if (location < 2 * 2)
3015 return FALSE;
3016
3017 if (firstline)
3018 {
3019 SLJIT_ASSERT(common->first_line_end != 0);
3020 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3021 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3022 }
3023 else
3024 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3025
3026 start = LABEL();
3027 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3028
3029 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3030 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3031 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3032 if (chars[1] != 0)
3033 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3034 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3035 if (location > 2 * 2)
3036 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3037 if (chars[3] != 0)
3038 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3039 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3040 if (location > 2 * 2)
3041 {
3042 if (chars[5] != 0)
3043 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3044 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3045 }
3046 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3047
3048 JUMPHERE(quit);
3049
3050 if (firstline)
3051 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3052 else
3053 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3054 return TRUE;
3055 }
3056
3057 #undef MAX_N_CHARS
3058
3059 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3060 {
3061 DEFINE_COMPILER;
3062 struct sljit_label *start;
3063 struct sljit_jump *quit;
3064 struct sljit_jump *found;
3065 pcre_uchar oc, bit;
3066
3067 if (firstline)
3068 {
3069 SLJIT_ASSERT(common->first_line_end != 0);
3070 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3071 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3072 }
3073
3074 start = LABEL();
3075 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3076 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3077
3078 oc = first_char;
3079 if (caseless)
3080 {
3081 oc = TABLE_GET(first_char, common->fcc, first_char);
3082 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3083 if (first_char > 127 && common->utf)
3084 oc = UCD_OTHERCASE(first_char);
3085 #endif
3086 }
3087 if (first_char == oc)
3088 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3089 else
3090 {
3091 bit = first_char ^ oc;
3092 if (is_powerof2(bit))
3093 {
3094 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3095 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3096 }
3097 else
3098 {
3099 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3100 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3101 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3102 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3103 found = JUMP(SLJIT_C_NOT_ZERO);
3104 }
3105 }
3106
3107 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3108 JUMPTO(SLJIT_JUMP, start);
3109 JUMPHERE(found);
3110 JUMPHERE(quit);
3111
3112 if (firstline)
3113 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3114 }
3115
3116 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3117 {
3118 DEFINE_COMPILER;
3119 struct sljit_label *loop;
3120 struct sljit_jump *lastchar;
3121 struct sljit_jump *firstchar;
3122 struct sljit_jump *quit;
3123 struct sljit_jump *foundcr = NULL;
3124 struct sljit_jump *notfoundnl;
3125 jump_list *newline = NULL;
3126
3127 if (firstline)
3128 {
3129 SLJIT_ASSERT(common->first_line_end != 0);
3130 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3131 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3132 }
3133
3134 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3135 {
3136 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3137 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3138 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3139 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3140 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3141
3142 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3143 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3144 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3145 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3146 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3147 #endif
3148 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3149
3150 loop = LABEL();
3151 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3152 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3153 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3154 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3155 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3156 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3157
3158 JUMPHERE(quit);
3159 JUMPHERE(firstchar);
3160 JUMPHERE(lastchar);
3161
3162 if (firstline)
3163 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3164 return;
3165 }
3166
3167 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3168 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3169 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3170 skip_char_back(common);
3171
3172 loop = LABEL();
3173 read_char(common);
3174 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3175 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3176 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3177 check_newlinechar(common, common->nltype, &newline, FALSE);
3178 set_jumps(newline, loop);
3179
3180 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3181 {
3182 quit = JUMP(SLJIT_JUMP);
3183 JUMPHERE(foundcr);
3184 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3185 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3186 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3187 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3188 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3189 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3190 #endif
3191 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3192 JUMPHERE(notfoundnl);
3193 JUMPHERE(quit);
3194 }
3195 JUMPHERE(lastchar);
3196 JUMPHERE(firstchar);
3197
3198 if (firstline)
3199 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3200 }
3201
3202 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3203
3204 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3205 {
3206 DEFINE_COMPILER;
3207 struct sljit_label *start;
3208 struct sljit_jump *quit;
3209 struct sljit_jump *found = NULL;
3210 jump_list *matches = NULL;
3211 pcre_uint8 inverted_start_bits[32];
3212 int i;
3213 #ifndef COMPILE_PCRE8
3214 struct sljit_jump *jump;
3215 #endif
3216
3217 for (i = 0; i < 32; ++i)
3218 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3219
3220 if (firstline)
3221 {
3222 SLJIT_ASSERT(common->first_line_end != 0);
3223 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3224 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3225 }
3226
3227 start = LABEL();
3228 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3229 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3230 #ifdef SUPPORT_UTF
3231 if (common->utf)
3232 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3233 #endif
3234
3235 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3236 {
3237 #ifndef COMPILE_PCRE8
3238 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3239 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3240 JUMPHERE(jump);
3241 #endif
3242 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3243 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3244 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3245 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3246 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3247 found = JUMP(SLJIT_C_NOT_ZERO);
3248 }
3249
3250 #ifdef SUPPORT_UTF
3251 if (common->utf)
3252 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3253 #endif
3254 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3255 #ifdef SUPPORT_UTF
3256 #if defined COMPILE_PCRE8
3257 if (common->utf)
3258 {
3259 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3260 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3261 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3262 }
3263 #elif defined COMPILE_PCRE16
3264 if (common->utf)
3265 {
3266 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3267 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3268 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3269 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3270 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3271 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3272 }
3273 #endif /* COMPILE_PCRE[8|16] */
3274 #endif /* SUPPORT_UTF */
3275 JUMPTO(SLJIT_JUMP, start);
3276 if (found != NULL)
3277 JUMPHERE(found);
3278 if (matches != NULL)
3279 set_jumps(matches, LABEL());
3280 JUMPHERE(quit);
3281
3282 if (firstline)
3283 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3284 }
3285
3286 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3287 {
3288 DEFINE_COMPILER;
3289 struct sljit_label *loop;
3290 struct sljit_jump *toolong;
3291 struct sljit_jump *alreadyfound;
3292 struct sljit_jump *found;
3293 struct sljit_jump *foundoc = NULL;
3294 struct sljit_jump *notfound;
3295 pcre_uint32 oc, bit;
3296
3297 SLJIT_ASSERT(common->req_char_ptr != 0);
3298 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3299 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3300 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3301 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3302
3303 if (has_firstchar)
3304 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3305 else
3306 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3307
3308 loop = LABEL();
3309 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3310
3311 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3312 oc = req_char;
3313 if (caseless)
3314 {
3315 oc = TABLE_GET(req_char, common->fcc, req_char);
3316 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3317 if (req_char > 127 && common->utf)
3318 oc = UCD_OTHERCASE(req_char);
3319 #endif
3320 }
3321 if (req_char == oc)
3322 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3323 else
3324 {
3325 bit = req_char ^ oc;
3326 if (is_powerof2(bit))
3327 {
3328 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3329 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3330 }
3331 else
3332 {
3333 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3334 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3335 }
3336 }
3337 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3338 JUMPTO(SLJIT_JUMP, loop);
3339
3340 JUMPHERE(found);
3341 if (foundoc)
3342 JUMPHERE(foundoc);
3343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3344 JUMPHERE(alreadyfound);
3345 JUMPHERE(toolong);
3346 return notfound;
3347 }
3348
3349 static void do_revertframes(compiler_common *common)
3350 {
3351 DEFINE_COMPILER;
3352 struct sljit_jump *jump;
3353 struct sljit_label *mainloop;
3354
3355 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3356 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3357 GET_LOCAL_BASE(TMP3, 0, 0);
3358
3359 /* Drop frames until we reach STACK_TOP. */
3360 mainloop = LABEL();
3361 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3362 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3363 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3364
3365 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3366 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3367 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3368 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3369 JUMPTO(SLJIT_JUMP, mainloop);
3370
3371 JUMPHERE(jump);
3372 jump = JUMP(SLJIT_C_SIG_LESS);
3373 /* End of dropping frames. */
3374 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3375
3376 JUMPHERE(jump);
3377 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3378 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3379 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3380 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3381 JUMPTO(SLJIT_JUMP, mainloop);
3382 }
3383
3384 static void check_wordboundary(compiler_common *common)
3385 {
3386 DEFINE_COMPILER;
3387 struct sljit_jump *skipread;
3388 jump_list *skipread_list = NULL;
3389 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3390 struct sljit_jump *jump;
3391 #endif
3392
3393 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3394
3395 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3396 /* Get type of the previous char, and put it to LOCALS1. */
3397 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3398 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3399 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3400 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3401 skip_char_back(common);
3402 check_start_used_ptr(common);
3403 read_char(common);
3404
3405 /* Testing char type. */
3406 #ifdef SUPPORT_UCP
3407 if (common->use_ucp)
3408 {
3409 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3410 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3411 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3412 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3413 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3414 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3415 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3416 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3417 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3418 JUMPHERE(jump);
3419 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3420 }
3421 else
3422 #endif
3423 {
3424 #ifndef COMPILE_PCRE8
3425 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3426 #elif defined SUPPORT_UTF
3427 /* Here LOCALS1 has already been zeroed. */
3428 jump = NULL;
3429 if (common->utf)
3430 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3431 #endif /* COMPILE_PCRE8 */
3432 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3433 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3434 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3435 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3436 #ifndef COMPILE_PCRE8
3437 JUMPHERE(jump);
3438 #elif defined SUPPORT_UTF
3439 if (jump != NULL)
3440 JUMPHERE(jump);
3441 #endif /* COMPILE_PCRE8 */
3442 }
3443 JUMPHERE(skipread);
3444
3445 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3446 check_str_end(common, &skipread_list);
3447 peek_char(common);
3448
3449 /* Testing char type. This is a code duplication. */
3450 #ifdef SUPPORT_UCP
3451 if (common->use_ucp)
3452 {
3453 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3454 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3455 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3456 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3457 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3458 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3459 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3460 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3461 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3462 JUMPHERE(jump);
3463 }
3464 else
3465 #endif
3466 {
3467 #ifndef COMPILE_PCRE8
3468 /* TMP2 may be destroyed by peek_char. */
3469 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3470 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3471 #elif defined SUPPORT_UTF
3472 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3473 jump = NULL;
3474 if (common->utf)
3475 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3476 #endif
3477 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3478 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3479 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3480 #ifndef COMPILE_PCRE8
3481 JUMPHERE(jump);
3482 #elif defined SUPPORT_UTF
3483 if (jump != NULL)
3484 JUMPHERE(jump);
3485 #endif /* COMPILE_PCRE8 */
3486 }
3487 set_jumps(skipread_list, LABEL());
3488
3489 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3490 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3491 }
3492
3493 /*
3494 range format:
3495
3496 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3497 ranges[1] = first bit (0 or 1)
3498 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3499 */
3500
3501 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3502 {
3503 DEFINE_COMPILER;
3504 struct sljit_jump *jump;
3505
3506 if (ranges[0] < 0)
3507 return FALSE;
3508
3509 switch(ranges[0])
3510 {
3511 case 1:
3512 if (readch)
3513 read_char(common);
3514 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3515 return TRUE;
3516
3517 case 2:
3518 if (readch)
3519 read_char(common);
3520 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3521 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3522 return TRUE;
3523
3524 case 4:
3525 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3526 {
3527 if (readch)
3528 read_char(common);
3529 if (ranges[1] != 0)
3530 {
3531 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3532 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3533 }
3534 else
3535 {
3536 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3537 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3538 JUMPHERE(jump);
3539 }
3540 return TRUE;
3541 }
3542 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3543 {
3544 if (readch)
3545 read_char(common);
3546 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3547 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3548 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3549 return TRUE;
3550 }
3551 return FALSE;
3552
3553 default:
3554 return FALSE;
3555 }
3556 }
3557
3558 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3559 {
3560 int i, bit, length;
3561 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3562
3563 bit = ctypes[0] & flag;
3564 ranges[0] = -1;
3565 ranges[1] = bit != 0 ? 1 : 0;
3566 length = 0;
3567
3568 for (i = 1; i < 256; i++)
3569 if ((ctypes[i] & flag) != bit)
3570 {
3571 if (length >= MAX_RANGE_SIZE)
3572 return;
3573 ranges[2 + length] = i;
3574 length++;
3575 bit ^= flag;
3576 }
3577
3578 if (bit != 0)
3579 {
3580 if (length >= MAX_RANGE_SIZE)
3581 return;
3582 ranges[2 + length] = 256;
3583 length++;
3584 }
3585 ranges[0] = length;
3586 }
3587
3588 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3589 {
3590 int ranges[2 + MAX_RANGE_SIZE];
3591 pcre_uint8 bit, cbit, all;
3592 int i, byte, length = 0;
3593
3594 bit = bits[0] & 0x1;
3595 ranges[1] = bit;
3596 /* Can be 0 or 255. */
3597 all = -bit;
3598
3599 for (i = 0; i < 256; )
3600 {
3601 byte = i >> 3;
3602 if ((i & 0x7) == 0 && bits[byte] == all)
3603 i += 8;
3604 else
3605 {
3606 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3607 if (cbit != bit)
3608 {
3609 if (length >= MAX_RANGE_SIZE)
3610 return FALSE;
3611 ranges[2 + length] = i;
3612 length++;
3613 bit = cbit;
3614 all = -cbit;
3615 }
3616 i++;
3617 }
3618 }
3619
3620 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3621 {
3622 if (length >= MAX_RANGE_SIZE)
3623 return FALSE;
3624 ranges[2 + length] = 256;
3625 length++;
3626 }
3627 ranges[0] = length;
3628
3629 return check_ranges(common, ranges, backtracks, FALSE);
3630 }
3631
3632 static void check_anynewline(compiler_common *common)
3633 {
3634 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3635 DEFINE_COMPILER;
3636
3637 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3638
3639 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3640 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3641 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3642 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3643 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3644 #ifdef COMPILE_PCRE8
3645 if (common->utf)
3646 {
3647 #endif
3648 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3649 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3650 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3651 #ifdef COMPILE_PCRE8
3652 }
3653 #endif
3654 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3655 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3656 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3657 }
3658
3659 static void check_hspace(compiler_common *common)
3660 {
3661 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3662 DEFINE_COMPILER;
3663
3664 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3665
3666 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3667 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3668 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3669 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3670 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3671 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3672 #ifdef COMPILE_PCRE8
3673 if (common->utf)
3674 {
3675 #endif
3676 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3677 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3678 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3679 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3680 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3681 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3682 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3683 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3684 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3685 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3686 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3687 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3688 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3689 #ifdef COMPILE_PCRE8
3690 }
3691 #endif
3692 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3693 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3694
3695 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3696 }
3697
3698 static void check_vspace(compiler_common *common)
3699 {
3700 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3701 DEFINE_COMPILER;
3702
3703 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3704
3705 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3706 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3707 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3708 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3709 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3710 #ifdef COMPILE_PCRE8
3711 if (common->utf)
3712 {
3713 #endif
3714 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3715 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3716 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3717 #ifdef COMPILE_PCRE8
3718 }
3719 #endif
3720 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3721 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3722
3723 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3724 }
3725
3726 #define CHAR1 STR_END
3727 #define CHAR2 STACK_TOP
3728
3729 static void do_casefulcmp(compiler_common *common)
3730 {
3731 DEFINE_COMPILER;
3732 struct sljit_jump *jump;
3733 struct sljit_label *label;
3734
3735 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3736 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3737 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3739 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3740 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3741
3742 label = LABEL();
3743 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3744 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3745 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3746 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3747 JUMPTO(SLJIT_C_NOT_ZERO, label);
3748
3749 JUMPHERE(jump);
3750 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3751 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3752 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3753 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3754 }
3755
3756 #define LCC_TABLE STACK_LIMIT
3757
3758 static void do_caselesscmp(compiler_common *common)
3759 {
3760 DEFINE_COMPILER;
3761 struct sljit_jump *jump;
3762 struct sljit_label *label;
3763
3764 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3765 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3766
3767 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3768 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3769 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3770 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3771 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3772 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3773
3774 label = LABEL();
3775 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3776 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3777 #ifndef COMPILE_PCRE8
3778 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3779 #endif
3780 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3781 #ifndef COMPILE_PCRE8
3782 JUMPHERE(jump);
3783 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3784 #endif
3785 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3786 #ifndef COMPILE_PCRE8
3787 JUMPHERE(jump);
3788 #endif
3789 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3790 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3791 JUMPTO(SLJIT_C_NOT_ZERO, label);
3792
3793 JUMPHERE(jump);
3794 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3795 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3796 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3797 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3798 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3799 }
3800
3801 #undef LCC_TABLE
3802 #undef CHAR1
3803 #undef CHAR2
3804
3805 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3806
3807 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3808 {
3809 /* This function would be ineffective to do in JIT level. */
3810 pcre_uint32 c1, c2;
3811 const pcre_uchar *src2 = args->uchar_ptr;
3812 const pcre_uchar *end2 = args->end;
3813 const ucd_record *ur;
3814 const pcre_uint32 *pp;
3815
3816 while (src1 < end1)
3817 {
3818 if (src2 >= end2)
3819 return (pcre_uchar*)1;
3820 GETCHARINC(c1, src1);
3821 GETCHARINC(c2, src2);
3822 ur = GET_UCD(c2);
3823 if (c1 != c2 && c1 != c2 + ur->other_case)
3824 {
3825 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3826 for (;;)
3827 {
3828 if (c1 < *pp) return NULL;
3829 if (c1 == *pp++) break;
3830 }
3831 }
3832 }
3833 return src2;
3834 }
3835
3836 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3837
3838 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3839 compare_context* context, jump_list **backtracks)
3840 {
3841 DEFINE_COMPILER;
3842 unsigned int othercasebit = 0;
3843 pcre_uchar *othercasechar = NULL;
3844 #ifdef SUPPORT_UTF
3845 int utflength;
3846 #endif
3847
3848 if (caseless && char_has_othercase(common, cc))
3849 {
3850 othercasebit = char_get_othercase_bit(common, cc);
3851 SLJIT_ASSERT(othercasebit);
3852 /* Extracting bit difference info. */
3853 #if defined COMPILE_PCRE8
3854 othercasechar = cc + (othercasebit >> 8);
3855 othercasebit &= 0xff;
3856 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3857 /* Note that this code only handles characters in the BMP. If there
3858 ever are characters outside the BMP whose othercase differs in only one
3859 bit from itself (there currently are none), this code will need to be
3860 revised for COMPILE_PCRE32. */
3861 othercasechar = cc + (othercasebit >> 9);
3862 if ((othercasebit & 0x100) != 0)
3863 othercasebit = (othercasebit & 0xff) << 8;
3864 else
3865 othercasebit &= 0xff;
3866 #endif /* COMPILE_PCRE[8|16|32] */
3867 }
3868
3869 if (context->sourcereg == -1)
3870 {
3871 #if defined COMPILE_PCRE8
3872 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3873 if (context->length >= 4)
3874 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3875 else if (context->length >= 2)
3876 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3877 else
3878 #endif
3879 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3880 #elif defined COMPILE_PCRE16
3881 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3882 if (context->length >= 4)
3883 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3884 else
3885 #endif
3886 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3887 #elif defined COMPILE_PCRE32
3888 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3889 #endif /* COMPILE_PCRE[8|16|32] */
3890 context->sourcereg = TMP2;
3891 }
3892
3893 #ifdef SUPPORT_UTF
3894 utflength = 1;
3895 if (common->utf && HAS_EXTRALEN(*cc))
3896 utflength += GET_EXTRALEN(*cc);
3897
3898 do
3899 {
3900 #endif
3901
3902 context->length -= IN_UCHARS(1);
3903 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3904
3905 /* Unaligned read is supported. */
3906 if (othercasebit != 0 && othercasechar == cc)
3907 {
3908 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3909 context->oc.asuchars[context->ucharptr] = othercasebit;
3910 }
3911 else
3912 {
3913 context->c.asuchars[context->ucharptr] = *cc;
3914 context->oc.asuchars[context->ucharptr] = 0;
3915 }
3916 context->ucharptr++;
3917
3918 #if defined COMPILE_PCRE8
3919 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3920 #else
3921 if (context->ucharptr >= 2 || context->length == 0)
3922 #endif
3923 {
3924 if (context->length >= 4)
3925 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3926 else if (context->length >= 2)
3927 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3928 #if defined COMPILE_PCRE8
3929 else if (context->length >= 1)
3930 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3931 #endif /* COMPILE_PCRE8 */
3932 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3933
3934 switch(context->ucharptr)
3935 {
3936 case 4 / sizeof(pcre_uchar):
3937 if (context->oc.asint != 0)
3938 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3939 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3940 break;
3941
3942 case 2 / sizeof(pcre_uchar):
3943 if (context->oc.asushort != 0)
3944 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3945 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3946 break;
3947
3948 #ifdef COMPILE_PCRE8
3949 case 1:
3950 if (context->oc.asbyte != 0)
3951 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3952 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3953 break;
3954 #endif
3955
3956 default:
3957 SLJIT_ASSERT_STOP();
3958 break;
3959 }
3960 context->ucharptr = 0;
3961 }
3962
3963 #else
3964
3965 /* Unaligned read is unsupported or in 32 bit mode. */
3966 if (context->length >= 1)
3967 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3968
3969 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3970
3971 if (othercasebit != 0 && othercasechar == cc)
3972 {
3973 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3974 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3975 }
3976 else
3977 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3978
3979 #endif
3980
3981 cc++;
3982 #ifdef SUPPORT_UTF
3983 utflength--;
3984 }
3985 while (utflength > 0);
3986 #endif
3987
3988 return cc;
3989 }
3990
3991 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3992
3993 #define SET_TYPE_OFFSET(value) \
3994 if ((value) != typeoffset) \
3995 { \
3996 if ((value) > typeoffset) \
3997 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3998 else \
3999 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4000 } \
4001 typeoffset = (value);
4002
4003 #define SET_CHAR_OFFSET(value) \
4004 if ((value) != charoffset) \
4005 { \
4006 if ((value) > charoffset) \
4007 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4008 else \
4009 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4010 } \
4011 charoffset = (value);
4012
4013 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4014 {
4015 DEFINE_COMPILER;
4016 jump_list *found = NULL;
4017 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4018 pcre_int32 c, charoffset;
4019 const pcre_uint32 *other_cases;
4020 struct sljit_jump *jump = NULL;
4021 pcre_uchar *ccbegin;
4022 int compares, invertcmp, numberofcmps;
4023 #ifdef SUPPORT_UCP
4024 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4025 BOOL charsaved = FALSE;
4026 int typereg = TMP1, scriptreg = TMP1;
4027 pcre_int32 typeoffset;
4028 #endif
4029
4030 /* Although SUPPORT_UTF must be defined, we are
4031 not necessary in utf mode even in 8 bit mode. */
4032 detect_partial_match(common, backtracks);
4033 read_char(common);
4034
4035 if ((*cc++ & XCL_MAP) != 0)
4036 {
4037 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4038 #ifndef COMPILE_PCRE8
4039 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4040 #elif defined SUPPORT_UTF
4041 if (common->utf)
4042 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4043 #endif
4044
4045 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4046 {
4047 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4048 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4049 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4050 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4051 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4052 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4053 }
4054
4055 #ifndef COMPILE_PCRE8
4056 JUMPHERE(jump);
4057 #elif defined SUPPORT_UTF
4058 if (common->utf)
4059 JUMPHERE(jump);
4060 #endif
4061 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4062 #ifdef SUPPORT_UCP
4063 charsaved = TRUE;
4064 #endif
4065 cc += 32 / sizeof(pcre_uchar);
4066 }
4067
4068 /* Scanning the necessary info. */
4069 ccbegin = cc;
4070 compares = 0;
4071 while (*cc != XCL_END)
4072 {
4073 compares++;
4074 if (*cc == XCL_SINGLE)
4075 {
4076 cc += 2;
4077 #ifdef SUPPORT_UTF
4078 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4079 #endif
4080 #ifdef SUPPORT_UCP
4081 needschar = TRUE;
4082 #endif
4083 }
4084 else if (*cc == XCL_RANGE)
4085 {
4086 cc += 2;
4087 #ifdef SUPPORT_UTF
4088 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4089 #endif
4090 cc++;
4091 #ifdef SUPPORT_UTF
4092 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4093 #endif
4094 #ifdef SUPPORT_UCP
4095 needschar = TRUE;
4096 #endif
4097 }
4098 #ifdef SUPPORT_UCP
4099 else
4100 {
4101 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4102 cc++;
4103 switch(*cc)
4104 {
4105 case PT_ANY:
4106 break;
4107
4108 case PT_LAMP:
4109 case PT_GC:
4110 case PT_PC:
4111 case PT_ALNUM:
4112 needstype = TRUE;
4113 break;
4114
4115 case PT_SC:
4116 needsscript = TRUE;
4117 break;
4118
4119 case PT_SPACE:
4120 case PT_PXSPACE:
4121 case PT_WORD:
4122 needstype = TRUE;
4123 needschar = TRUE;
4124 break;
4125
4126 case PT_CLIST:
4127 case PT_UCNC:
4128 needschar = TRUE;
4129 break;
4130
4131 default:
4132 SLJIT_ASSERT_STOP();
4133 break;
4134 }
4135 cc += 2;
4136 }
4137 #endif
4138 }
4139
4140 #ifdef SUPPORT_UCP
4141 /* Simple register allocation. TMP1 is preferred if possible. */
4142 if (needstype || needsscript)
4143 {
4144 if (needschar && !charsaved)
4145 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4146 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4147 if (needschar)
4148 {
4149 if (needstype)
4150 {
4151 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4152 typereg = RETURN_ADDR;
4153 }
4154
4155 if (needsscript)
4156 scriptreg = TMP3;
4157 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4158 }
4159 else if (needstype && needsscript)
4160 scriptreg = TMP3;
4161 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4162
4163 if (needsscript)
4164 {
4165 if (scriptreg == TMP1)
4166 {
4167 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4168 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4169 }
4170 else
4171 {
4172 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4173 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4174 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4175 }
4176 }
4177 }
4178 #endif
4179
4180 /* Generating code. */
4181 cc = ccbegin;
4182 charoffset = 0;
4183 numberofcmps = 0;
4184 #ifdef SUPPORT_UCP
4185 typeoffset = 0;
4186 #endif
4187
4188 while (*cc != XCL_END)
4189 {
4190 compares--;
4191 invertcmp = (compares == 0 && list != backtracks);
4192 jump = NULL;
4193
4194 if (*cc == XCL_SINGLE)
4195 {
4196 cc ++;
4197 #ifdef SUPPORT_UTF
4198 if (common->utf)
4199 {
4200 GETCHARINC(c, cc);
4201 }
4202 else
4203 #endif
4204 c = *cc++;
4205
4206 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4207 {
4208 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4209 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4210 numberofcmps++;
4211 }
4212 else if (numberofcmps > 0)
4213 {
4214 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4215 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4216 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4217 numberofcmps = 0;
4218 }
4219 else
4220 {
4221 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4222 numberofcmps = 0;
4223 }
4224 }
4225 else if (*cc == XCL_RANGE)
4226 {
4227 cc ++;
4228 #ifdef SUPPORT_UTF
4229 if (common->utf)
4230 {
4231 GETCHARINC(c, cc);
4232 }
4233 else
4234 #endif
4235 c = *cc++;
4236 SET_CHAR_OFFSET(c);
4237 #ifdef SUPPORT_UTF
4238 if (common->utf)
4239 {
4240 GETCHARINC(c, cc);
4241 }
4242 else
4243 #endif
4244 c = *cc++;
4245 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4246 {
4247 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4248 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4249 numberofcmps++;
4250 }
4251 else if (numberofcmps > 0)
4252 {
4253 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4254 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4255 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4256 numberofcmps = 0;
4257 }
4258 else
4259 {
4260 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4261 numberofcmps = 0;
4262 }
4263 }
4264 #ifdef SUPPORT_UCP
4265 else
4266 {
4267 if (*cc == XCL_NOTPROP)
4268 invertcmp ^= 0x1;
4269 cc++;
4270 switch(*cc)
4271 {
4272 case PT_ANY:
4273 if (list != backtracks)
4274 {
4275 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4276 continue;
4277 }
4278 else if (cc[-1] == XCL_NOTPROP)
4279 continue;
4280 jump = JUMP(SLJIT_JUMP);
4281 break;
4282
4283 case PT_LAMP:
4284 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4285 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4286 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4287 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4288 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4289 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4290 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4291 break;
4292
4293 case PT_GC:
4294 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4295 SET_TYPE_OFFSET(c);
4296 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4297 break;
4298
4299 case PT_PC:
4300 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4301 break;
4302
4303 case PT_SC:
4304 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4305 break;
4306
4307 case PT_SPACE:
4308 case PT_PXSPACE:
4309 SET_CHAR_OFFSET(9);
4310 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4311 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4312
4313 SET_TYPE_OFFSET(ucp_Zl);
4314 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4315 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4316 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4317 break;
4318
4319 case PT_WORD:
4320 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4321 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4322 /* Fall through. */
4323
4324 case PT_ALNUM:
4325 SET_TYPE_OFFSET(ucp_Ll);
4326 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4327 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4328 SET_TYPE_OFFSET(ucp_Nd);
4329 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4330 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4331 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4332 break;
4333
4334 case PT_CLIST:
4335 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4336
4337 /* At least three characters are required.
4338 Otherwise this case would be handled by the normal code path. */
4339 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4340 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4341
4342 /* Optimizing character pairs, if their difference is power of 2. */
4343 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4344 {
4345 if (charoffset == 0)
4346 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4347 else
4348 {
4349 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4350 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4351 }
4352 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4353 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4354 other_cases += 2;
4355 }
4356 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4357 {
4358 if (charoffset == 0)
4359 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4360 else
4361 {
4362 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4363 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4364 }
4365 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4366 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4367
4368 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4369 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4370
4371 other_cases += 3;
4372 }
4373 else
4374 {
4375 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4376 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4377 }
4378
4379 while (*other_cases != NOTACHAR)
4380 {
4381 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4382 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4383 }
4384 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4385 break;
4386
4387 case PT_UCNC:
4388 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4389 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4390 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4391 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4392 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4393 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4394
4395 SET_CHAR_OFFSET(0xa0);
4396 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4397 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4398 SET_CHAR_OFFSET(0);
4399 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4400 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4401 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4402 break;
4403 }
4404 cc += 2;
4405 }
4406 #endif
4407
4408 if (jump != NULL)
4409 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4410 }
4411
4412 if (found != NULL)
4413 set_jumps(found, LABEL());
4414 }
4415
4416 #undef SET_TYPE_OFFSET
4417 #undef SET_CHAR_OFFSET
4418
4419 #endif
4420
4421 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4422 {
4423 DEFINE_COMPILER;
4424 int length;
4425 unsigned int c, oc, bit;
4426 compare_context context;
4427 struct sljit_jump *jump[4];
4428 jump_list *end_list;
4429 #ifdef SUPPORT_UTF
4430 struct sljit_label *label;
4431 #ifdef SUPPORT_UCP
4432 pcre_uchar propdata[5];
4433 #endif
4434 #endif
4435
4436 switch(type)
4437 {
4438 case OP_SOD:
4439 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4441 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4442 return cc;
4443
4444 case OP_SOM:
4445 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4446 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4447 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4448 return cc;
4449
4450 case OP_NOT_WORD_BOUNDARY:
4451 case OP_WORD_BOUNDARY:
4452 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4453 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4454 return cc;
4455
4456 case OP_NOT_DIGIT:
4457 case OP_DIGIT:
4458 /* Digits are usually 0-9, so it is worth to optimize them. */
4459 if (common->digits[0] == -2)
4460 get_ctype_ranges(common, ctype_digit, common->digits);
4461 detect_partial_match(common, backtracks);
4462 /* Flip the starting bit in the negative case. */
4463 if (type == OP_NOT_DIGIT)
4464 common->digits[1] ^= 1;
4465 if (!check_ranges(common, common->digits, backtracks, TRUE))
4466 {
4467 read_char8_type(common);
4468 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4469 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4470 }
4471 if (type == OP_NOT_DIGIT)
4472 common->digits[1] ^= 1;
4473 return cc;
4474
4475 case OP_NOT_WHITESPACE:
4476 case OP_WHITESPACE:
4477 detect_partial_match(common, backtracks);
4478 read_char8_type(common);
4479 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4480 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4481 return cc;
4482
4483 case OP_NOT_WORDCHAR:
4484 case OP_WORDCHAR:
4485 detect_partial_match(common, backtracks);
4486 read_char8_type(common);
4487 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4488 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4489 return cc;
4490
4491 case OP_ANY:
4492 detect_partial_match(common, backtracks);
4493 read_char(common);
4494 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4495 {
4496 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4497 end_list = NULL;
4498 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4499 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4500 else
4501 check_str_end(common, &end_list);
4502
4503 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4504 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4505 set_jumps(end_list, LABEL());
4506 JUMPHERE(jump[0]);
4507 }
4508 else
4509 check_newlinechar(common, common->nltype, backtracks, TRUE);
4510 return cc;
4511
4512 case OP_ALLANY:
4513 detect_partial_match(common, backtracks);
4514 #ifdef SUPPORT_UTF
4515 if (common->utf)
4516 {
4517 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4518 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4519 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4520 #if defined COMPILE_PCRE8
4521 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4522 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4523 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4524 #elif defined COMPILE_PCRE16
4525 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4526 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4527 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4528 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4529 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4530 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4531 #endif
4532 JUMPHERE(jump[0]);
4533 #endif /* COMPILE_PCRE[8|16] */
4534 return cc;
4535 }
4536 #endif
4537 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4538 return cc;
4539
4540 case OP_ANYBYTE:
4541 detect_partial_match(common, backtracks);
4542 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4543 return cc;
4544
4545 #ifdef SUPPORT_UTF
4546 #ifdef SUPPORT_UCP
4547 case OP_NOTPROP:
4548 case OP_PROP:
4549 propdata[0] = 0;
4550 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4551 propdata[2] = cc[0];
4552 propdata[3] = cc[1];
4553 propdata[4] = XCL_END;
4554 compile_xclass_matchingpath(common, propdata, backtracks);
4555 return cc + 2;
4556 #endif
4557 #endif
4558
4559 case OP_ANYNL:
4560 detect_partial_match(common, backtracks);
4561 read_char(common);
4562 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4563 /* We don't need to handle soft partial matching case. */
4564 end_list = NULL;
4565 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4566 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4567 else
4568 check_str_end(common, &end_list);
4569 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4570 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4571 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4572 jump[2] = JUMP(SLJIT_JUMP);
4573 JUMPHERE(jump[0]);
4574 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4575 set_jumps(end_list, LABEL());
4576 JUMPHERE(jump[1]);
4577 JUMPHERE(jump[2]);
4578 return cc;
4579
4580 case OP_NOT_HSPACE:
4581 case OP_HSPACE:
4582 detect_partial_match(common, backtracks);
4583 read_char(common);
4584 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4585 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4586 return cc;
4587
4588 case OP_NOT_VSPACE:
4589 case OP_VSPACE:
4590 detect_partial_match(common, backtracks);
4591 read_char(common);
4592 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4593 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4594 return cc;
4595
4596 #ifdef SUPPORT_UCP
4597 case OP_EXTUNI:
4598 detect_partial_match(common, backtracks);
4599 read_char(common);
4600 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4601 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4602 /* Optimize register allocation: use a real register. */
4603 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4604 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4605
4606 label = LABEL();
4607 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4608 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4609 read_char(common);
4610 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4611 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4612 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4613
4614 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4615 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4616 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4617 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4618 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4619 JUMPTO(SLJIT_C_NOT_ZERO, label);
4620
4621 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4622 JUMPHERE(jump[0]);
4623 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4624
4625 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4626 {
4627 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4628 /* Since we successfully read a char above, partial matching must occure. */
4629 check_partial(common, TRUE);
4630 JUMPHERE(jump[0]);
4631 }
4632 return cc;
4633 #endif
4634
4635 case OP_EODN:
4636 /* Requires rather complex checks. */
4637 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4638 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4639 {
4640 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4641 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4642 if (common->mode == JIT_COMPILE)
4643 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4644 else
4645 {
4646 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4647 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4648 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4649 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4650 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4651 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4652 check_partial(common, TRUE);
4653 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4654 JUMPHERE(jump[1]);
4655 }
4656 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4657 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4658 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4659 }
4660 else if (common->nltype == NLTYPE_FIXED)
4661 {
4662 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4663 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4664 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4665 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4666 }
4667 else
4668 {
4669 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4670 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4671 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4672 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4673 jump[2] = JUMP(SLJIT_C_GREATER);
4674 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4675 /* Equal. */
4676 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4677 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4678 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4679
4680 JUMPHERE(jump[1]);
4681 if (common->nltype == NLTYPE_ANYCRLF)
4682 {
4683 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4684 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4685 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4686 }
4687 else
4688 {
4689 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4690 read_char(common);
4691 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4692 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4693 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4694 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4695 }
4696 JUMPHERE(jump[2]);
4697 JUMPHERE(jump[3]);
4698 }
4699 JUMPHERE(jump[0]);
4700 check_partial(common, FALSE);
4701 return cc;
4702
4703 case OP_EOD:
4704 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4705 check_partial(common, FALSE);
4706 return cc;
4707
4708 case OP_CIRC:
4709 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4711 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4712 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4713 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4714 return cc;
4715
4716 case OP_CIRCM:
4717 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4718 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4719 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4720 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4721 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4722 jump[0] = JUMP(SLJIT_JUMP);
4723 JUMPHERE(jump[1]);
4724
4725 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4726 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4727 {
4728 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4729 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4730 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4731 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4732 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4733 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4734 }
4735 else
4736 {
4737 skip_char_back(common);
4738 read_char(common);
4739 check_newlinechar(common, common->nltype, backtracks, FALSE);
4740 }
4741 JUMPHERE(jump[0]);
4742 return cc;
4743
4744 case OP_DOLL:
4745 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4746 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4747 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4748
4749 if (!common->endonly)
4750 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4751 else
4752 {
4753 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4754 check_partial(common, FALSE);
4755 }
4756 return cc;
4757
4758 case OP_DOLLM:
4759 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4760 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4761 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4762 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4763 check_partial(common, FALSE);
4764 jump[0] = JUMP(SLJIT_JUMP);
4765 JUMPHERE(jump[1]);
4766
4767 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4768 {
4769 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4770 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4771 if (common->mode == JIT_COMPILE)
4772 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4773 else
4774 {
4775 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4776 /* STR_PTR = STR_END - IN_UCHARS(1) */
4777 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4778 check_partial(common, TRUE);
4779 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4780 JUMPHERE(jump[1]);
4781 }
4782
4783 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4784 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4785 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4786 }
4787 else
4788 {
4789 peek_char(common);
4790 check_newlinechar(common, common->nltype, backtracks, FALSE);
4791 }
4792 JUMPHERE(jump[0]);
4793 return cc;
4794
4795 case OP_CHAR:
4796 case OP_CHARI:
4797 length = 1;
4798 #ifdef SUPPORT_UTF
4799 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4800 #endif
4801 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4802 {
4803 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4804 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4805
4806 context.length = IN_UCHARS(length);
4807 context.sourcereg = -1;
4808 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4809 context.ucharptr = 0;
4810 #endif
4811 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4812 }
4813 detect_partial_match(common, backtracks);
4814 read_char(common);
4815 #ifdef SUPPORT_UTF
4816 if (common->utf)
4817 {
4818 GETCHAR(c, cc);
4819 }
4820 else
4821 #endif
4822 c = *cc;
4823 if (type == OP_CHAR || !char_has_othercase(common, cc))
4824 {
4825 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4826 return cc + length;
4827 }
4828 oc = char_othercase(common, c);
4829 bit = c ^ oc;
4830 if (is_powerof2(bit))
4831 {
4832 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4833 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4834 return cc + length;
4835 }
4836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4837 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4838 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4839 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4840 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4841 return cc + length;
4842
4843 case OP_NOT:
4844 case OP_NOTI:
4845 detect_partial_match(common, backtracks);
4846 length = 1;
4847 #ifdef SUPPORT_UTF
4848 if (common->utf)
4849 {
4850 #ifdef COMPILE_PCRE8
4851 c = *cc;
4852 if (c < 128)
4853 {
4854 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4855 if (type == OP_NOT || !char_has_othercase(common, cc))
4856 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4857 else
4858 {
4859 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4860 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4861 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4862 }
4863 /* Skip the variable-length character. */
4864 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4865 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4866 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4867 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4868 JUMPHERE(jump[0]);
4869 return cc + 1;
4870 }
4871 else
4872 #endif /* COMPILE_PCRE8 */
4873 {
4874 GETCHARLEN(c, cc, length);
4875 read_char(common);
4876 }
4877 }
4878 else
4879 #endif /* SUPPORT_UTF */
4880 {
4881 read_char(common);
4882 c = *cc;
4883 }
4884
4885 if (type == OP_NOT || !char_has_othercase(common, cc))
4886 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4887 else
4888 {
4889 oc = char_othercase(common, c);
4890 bit = c ^ oc;
4891 if (is_powerof2(bit))
4892 {
4893 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4894 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4895 }
4896 else
4897 {
4898 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4899 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4900 }
4901 }
4902 return cc + length;
4903
4904 case OP_CLASS:
4905 case OP_NCLASS:
4906 detect_partial_match(common, backtracks);
4907 read_char(common);
4908 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4909 return cc + 32 / sizeof(pcre_uchar);
4910
4911 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4912 jump[0] = NULL;
4913 #ifdef COMPILE_PCRE8
4914 /* This check only affects 8 bit mode. In other modes, we
4915 always need to compare the value with 255. */
4916 if (common->utf)
4917 #endif /* COMPILE_PCRE8 */
4918 {
4919 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4920 if (type == OP_CLASS)
4921 {
4922 add_jump(compiler, backtracks, jump[0]);
4923 jump[0] = NULL;
4924 }
4925 }
4926 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4927 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4928 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4929 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4930 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4931 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4932 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4933 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4934 if (jump[0] != NULL)
4935 JUMPHERE(jump[0]);
4936 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4937 return cc + 32 / sizeof(pcre_uchar);
4938
4939 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4940 case OP_XCLASS:
4941 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4942 return cc + GET(cc, 0) - 1;
4943 #endif
4944
4945 case OP_REVERSE:
4946 length = GET(cc, 0);
4947 if (length == 0)
4948 return cc + LINK_SIZE;
4949 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4950 #ifdef SUPPORT_UTF
4951 if (common->utf)
4952 {
4953 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4954 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4955 label = LABEL();
4956 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4957 skip_char_back(common);
4958 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4959 JUMPTO(SLJIT_C_NOT_ZERO, label);
4960 }
4961 else
4962 #endif
4963 {
4964 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4965 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4966 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4967 }
4968 check_start_used_ptr(common);
4969 return cc + LINK_SIZE;
4970 }
4971 SLJIT_ASSERT_STOP();
4972 return cc;
4973 }
4974
4975 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4976 {
4977 /* This function consumes at least one input character. */
4978 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4979 DEFINE_COMPILER;
4980 pcre_uchar *ccbegin = cc;
4981 compare_context context;
4982 int size;
4983
4984 context.length = 0;
4985 do
4986 {
4987 if (cc >= ccend)
4988 break;
4989
4990 if (*cc == OP_CHAR)
4991 {
4992 size = 1;
4993 #ifdef SUPPORT_UTF
4994 if (common->utf && HAS_EXTRALEN(cc[1]))
4995 size += GET_EXTRALEN(cc[1]);
4996 #endif
4997 }
4998 else if (*cc == OP_CHARI)
4999 {
5000 size = 1;
5001 #ifdef SUPPORT_UTF
5002 if (common->utf)
5003 {
5004 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5005 size = 0;
5006 else if (HAS_EXTRALEN(cc[1]))
5007 size += GET_EXTRALEN(cc[1]);
5008 }
5009 else
5010 #endif
5011 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5012 size = 0;
5013 }
5014 else
5015 size = 0;
5016
5017 cc += 1 + size;
5018 context.length += IN_UCHARS(size);
5019 }
5020 while (size > 0 && context.length <= 128);
5021
5022 cc = ccbegin;
5023 if (context.length > 0)
5024 {
5025 /* We have a fixed-length byte sequence. */
5026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5027 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5028
5029 context.sourcereg = -1;
5030 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5031 context.ucharptr = 0;
5032 #endif
5033 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5034 return cc;
5035 }
5036
5037 /* A non-fixed length character will be checked if length == 0. */
5038 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5039 }
5040
5041 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5042 {
5043 DEFINE_COMPILER;
5044 int offset = GET2(cc, 1) << 1;
5045
5046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5047 if (!common->jscript_compat)
5048 {
5049 if (backtracks == NULL)
5050 {
5051 /* OVECTOR(1) contains the "string begin - 1" constant. */
5052 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5053 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5054 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5055 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5056 return JUMP(SLJIT_C_NOT_ZERO);
5057 }
5058 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5059 }
5060 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5061 }
5062
5063 /* Forward definitions. */
5064 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5065 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5066
5067 #define PUSH_BACKTRACK(size, ccstart, error) \
5068 do \
5069 { \
5070 backtrack = sljit_alloc_memory(compiler, (size)); \
5071 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5072 return error; \
5073 memset(backtrack, 0, size); \
5074 backtrack->prev = parent->top; \
5075 backtrack->cc = (ccstart); \
5076 parent->top = backtrack; \
5077 } \
5078 while (0)
5079
5080 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5081 do \
5082 { \
5083 backtrack = sljit_alloc_memory(compiler, (size)); \
5084 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5085 return; \
5086 memset(backtrack, 0, size); \
5087 backtrack->prev = parent->top; \
5088 backtrack->cc = (ccstart); \
5089 parent->top = backtrack; \
5090 } \
5091 while (0)
5092
5093 #define BACKTRACK_AS(type) ((type *)backtrack)
5094
5095 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5096 {
5097 DEFINE_COMPILER;
5098 int offset = GET2(cc, 1) << 1;
5099 struct sljit_jump *jump = NULL;
5100 struct sljit_jump *partial;
5101 struct sljit_jump *nopartial;
5102
5103 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5104 /* OVECTOR(1) contains the "string begin - 1" constant. */
5105 if (withchecks && !common->jscript_compat)
5106 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5107
5108 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5109 if (common->utf && *cc == OP_REFI)
5110 {
5111 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5112 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5113 if (withchecks)
5114 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5115
5116 /* Needed to save important temporary registers. */
5117 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5118 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5120 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5121 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5122 if (common->mode == JIT_COMPILE)
5123 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5124 else
5125 {
5126 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5127 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5128 check_partial(common, FALSE);
5129 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5130 JUMPHERE(nopartial);
5131 }
5132 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5133 }
5134 else
5135 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5136 {
5137 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5138 if (withchecks)
5139 jump = JUMP(SLJIT_C_ZERO);
5140
5141 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5142 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5143 if (common->mode == JIT_COMPILE)
5144 add_jump(compiler, backtracks, partial);
5145
5146 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5147 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5148
5149 if (common->mode != JIT_COMPILE)
5150 {
5151 nopartial = JUMP(SLJIT_JUMP);
5152 JUMPHERE(partial);
5153 /* TMP2 -= STR_END - STR_PTR */
5154 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5155 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5156 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5157 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5158 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5159 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5160 JUMPHERE(partial);
5161 check_partial(common, FALSE);
5162 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5163 JUMPHERE(nopartial);
5164 }
5165 }
5166
5167 if (jump != NULL)
5168 {
5169 if (emptyfail)
5170 add_jump(compiler, backtracks, jump);
5171 else
5172 JUMPHERE(jump);
5173 }
5174 return cc + 1 + IMM2_SIZE;
5175 }
5176
5177 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5178 {
5179 DEFINE_COMPILER;
5180 backtrack_common *backtrack;
5181 pcre_uchar type;
5182 struct sljit_label *label;
5183 struct sljit_jump *zerolength;
5184 struct sljit_jump *jump = NULL;
5185 pcre_uchar *ccbegin = cc;
5186 int min = 0, max = 0;
5187 BOOL minimize;
5188
5189 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5190
5191 type = cc[1 + IMM2_SIZE];
5192 minimize = (type & 0x1) != 0;
5193 switch(type)
5194 {
5195 case OP_CRSTAR:
5196 case OP_CRMINSTAR:
5197 min = 0;
5198 max = 0;
5199 cc += 1 + IMM2_SIZE + 1;
5200 break;
5201 case OP_CRPLUS:
5202 case OP_CRMINPLUS:
5203 min = 1;
5204 max = 0;
5205 cc += 1 + IMM2_SIZE + 1;
5206 break;
5207 case OP_CRQUERY:
5208 case OP_CRMINQUERY:
5209 min = 0;
5210 max = 1;
5211 cc += 1 + IMM2_SIZE + 1;
5212 break;
5213 case OP_CRRANGE:
5214 case OP_CRMINRANGE:
5215 min = GET2(cc, 1 + IMM2_SIZE + 1);
5216 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5217 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5218 break;
5219 default:
5220 SLJIT_ASSERT_STOP();
5221 break;
5222 }
5223
5224 if (!minimize)
5225 {
5226 if (min == 0)
5227 {
5228 allocate_stack(common, 2);
5229 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5231 /* Temporary release of STR_PTR. */
5232 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5233 zerolength = compile_ref_checks(common, ccbegin, NULL);
5234 /* Restore if not zero length. */
5235 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5236 }
5237 else
5238 {
5239 allocate_stack(common, 1);
5240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5241 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5242 }
5243
5244 if (min > 1 || max > 1)
5245 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5246
5247 label = LABEL();
5248 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5249
5250 if (min > 1 || max > 1)
5251 {
5252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5253 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5255 if (min > 1)
5256 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5257 if (max > 1)
5258 {
5259 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5260 allocate_stack(common, 1);
5261 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5262 JUMPTO(SLJIT_JUMP, label);
5263 JUMPHERE(jump);
5264 }
5265 }
5266
5267 if (max == 0)
5268 {
5269 /* Includes min > 1 case as well. */
5270 allocate_stack(common, 1);
5271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5272 JUMPTO(SLJIT_JUMP, label);
5273 }
5274
5275 JUMPHERE(zerolength);
5276 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5277
5278 count_match(common);
5279 return cc;
5280 }
5281
5282 allocate_stack(common, 2);
5283 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5284 if (type != OP_CRMINSTAR)
5285 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5286
5287 if (min == 0)
5288 {
5289 zerolength = compile_ref_checks(common, ccbegin, NULL);
5290 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5291 jump = JUMP(SLJIT_JUMP);
5292 }
5293 else
5294 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5295
5296 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5297 if (max > 0)
5298 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5299
5300 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5301 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5302
5303 if (min > 1)
5304 {
5305 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5306 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5307 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5308 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5309 }
5310 else if (max > 0)
5311 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5312
5313 if (jump != NULL)
5314 JUMPHERE(jump);
5315 JUMPHERE(zerolength);
5316
5317 count_match(common);
5318 return cc;
5319 }
5320
5321 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5322 {
5323 DEFINE_COMPILER;
5324 backtrack_common *backtrack;
5325 recurse_entry *entry = common->entries;
5326 recurse_entry *prev = NULL;
5327 sljit_sw start = GET(cc, 1);
5328 pcre_uchar *start_cc;
5329 BOOL needs_control_head;
5330
5331 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5332
5333 /* Inlining simple patterns. */
5334 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5335 {
5336 start_cc = common->start + start;
5337 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5338 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5339 return cc + 1 + LINK_SIZE;
5340 }
5341
5342 while (entry != NULL)
5343 {
5344 if (entry->start == start)
5345 break;
5346 prev = entry;
5347 entry = entry->next;
5348 }
5349
5350 if (entry == NULL)
5351 {
5352 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5353 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5354 return NULL;
5355 entry->next = NULL;
5356 entry->entry = NULL;
5357 entry->calls = NULL;
5358 entry->start = start;
5359
5360 if (prev != NULL)
5361 prev->next = entry;
5362 else
5363 common->entries = entry;
5364 }
5365
5366 if (common->has_set_som && common->mark_ptr != 0)
5367 {
5368 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5369 allocate_stack(common, 2);
5370 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5372 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5373 }
5374 else if (common->has_set_som || common->mark_ptr != 0)
5375 {
5376 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5377 allocate_stack(common, 1);
5378 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5379 }
5380
5381 if (entry->entry == NULL)
5382 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5383 else
5384 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5385 /* Leave if the match is failed. */
5386 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5387 return cc + 1 + LINK_SIZE;
5388 }
5389
5390 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5391 {
5392 const pcre_uchar *begin = arguments->begin;
5393 int *offset_vector = arguments->offsets;
5394 int offset_count = arguments->offset_count;
5395 int i;
5396
5397 if (PUBL(callout) == NULL)
5398 return 0;
5399
5400 callout_block->version = 2;
5401 callout_block->callout_data = arguments->callout_data;
5402
5403 /* Offsets in subject. */
5404 callout_block->subject_length = arguments->end - arguments->begin;
5405 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5406 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5407 #if defined COMPILE_PCRE8
5408 callout_block->subject = (PCRE_SPTR)begin;
5409 #elif defined COMPILE_PCRE16
5410 callout_block->subject = (PCRE_SPTR16)begin;
5411 #elif defined COMPILE_PCRE32
5412 callout_block->subject = (PCRE_SPTR32)begin;
5413 #endif
5414
5415 /* Convert and copy the JIT offset vector to the offset_vector array. */
5416 callout_block->capture_top = 0;
5417 callout_block->offset_vector = offset_vector;
5418 for (i = 2; i < offset_count; i += 2)
5419 {
5420 offset_vector[i] = jit_ovector[i] - begin;
5421 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5422 if (jit_ovector[i] >= begin)
5423 callout_block->capture_top = i;
5424 }
5425
5426 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5427 if (offset_count > 0)
5428 offset_vector[0] = -1;
5429 if (offset_count > 1)
5430 offset_vector[1] = -1;
5431 return (*PUBL(callout))(callout_block);
5432 }
5433
5434 /* Aligning to 8 byte. */
5435 #define CALLOUT_ARG_SIZE \
5436 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5437
5438 #define CALLOUT_ARG_OFFSET(arg) \
5439 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5440
5441 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5442 {
5443 DEFINE_COMPILER;
5444 backtrack_common *backtrack;
5445
5446 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5447
5448 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5449
5450 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5451 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5452 SLJIT_ASSERT(common->capture_last_ptr != 0);
5453 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5454 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5455
5456 /* These pointer sized fields temporarly stores internal variables. */
5457 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5458 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5460
5461 if (common->mark_ptr != 0)
5462 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5463 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5464 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5465 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5466
5467 /* Needed to save important temporary registers. */
5468 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5469 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5470 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5471 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5472 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5473 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5474 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5475
5476 /* Check return value. */
5477 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5478 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5479 if (common->forced_quit_label == NULL)
5480 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5481 else
5482 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5483 return cc + 2 + 2 * LINK_SIZE;
5484 }
5485
5486 #undef CALLOUT_ARG_SIZE
5487 #undef CALLOUT_ARG_OFFSET
5488
5489 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5490 {
5491 DEFINE_COMPILER;
5492 int framesize;
5493 int extrasize;
5494 BOOL needs_control_head;
5495 int private_data_ptr;
5496 backtrack_common altbacktrack;
5497 pcre_uchar *ccbegin;
5498 pcre_uchar opcode;
5499 pcre_uchar bra = OP_BRA;
5500 jump_list *tmp = NULL;
5501 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5502 jump_list **found;
5503 /* Saving previous accept variables. */
5504 BOOL save_local_exit = common->local_exit;
5505 BOOL save_positive_assert = common->positive_assert;
5506 then_trap_backtrack *save_then_trap = common->then_trap;
5507 struct sljit_label *save_quit_label = common->quit_label;
5508 struct sljit_label *save_accept_label = common->accept_label;
5509 jump_list *save_quit = common->quit;
5510 jump_list *save_positive_assert_quit = common->positive_assert_quit;
5511 jump_list *save_accept = common->accept;
5512 struct sljit_jump *jump;
5513 struct sljit_jump *brajump = NULL;
5514
5515 /* Assert captures then. */
5516 common->then_trap = NULL;
5517
5518 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5519 {
5520 SLJIT_ASSERT(!conditional);
5521 bra = *cc;
5522 cc++;
5523 }
5524 private_data_ptr = PRIVATE_DATA(cc);
5525 SLJIT_ASSERT(private_data_ptr != 0);
5526 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5527 backtrack->framesize = framesize;
5528 backtrack->private_data_ptr = private_data_ptr;
5529 opcode = *cc;
5530 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5531 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5532 ccbegin = cc;
5533 cc += GET(cc, 1);
5534
5535 if (bra == OP_BRAMINZERO)
5536 {
5537 /* This is a braminzero backtrack path. */
5538 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5539 free_stack(common, 1);
5540 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5541 }
5542
5543 if (framesize < 0)
5544 {
5545 extrasize = needs_control_head ? 2 : 1;
5546 if (framesize == no_frame)
5547 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5548 allocate_stack(common, extrasize);
5549 if (needs_control_head)
5550 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5551 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5552 if (needs_control_head)
5553 {
5554 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5555 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5556 }
5557 }
5558 else
5559 {
5560 extrasize = needs_control_head ? 3 : 2;
5561 allocate_stack(common, framesize + extrasize);
5562 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5563 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5564 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5565 if (needs_control_head)
5566 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5567 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5568 if (needs_control_head)
5569 {
5570 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5571 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5572 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5573 }
5574 else
5575 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5576 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5577 }
5578
5579 memset(&altbacktrack, 0, sizeof(backtrack_common));
5580 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5581 {
5582 /* Negative assert is stronger than positive assert. */
5583 common->local_exit = TRUE;
5584 common->quit_label = NULL;
5585 common->quit = NULL;
5586 common->positive_assert = FALSE;
5587 }
5588 else
5589 common->positive_assert = TRUE;
5590 common->positive_assert_quit = NULL;
5591
5592 while (1)
5593 {
5594 common->accept_label = NULL;
5595 common->accept = NULL;
5596 altbacktrack.top = NULL;
5597 altbacktrack.topbacktracks = NULL;
5598
5599 if (*ccbegin == OP_ALT)
5600 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5601
5602 altbacktrack.cc = ccbegin;
5603 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5604 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5605 {
5606 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5607 {
5608 common->local_exit = save_local_exit;
5609 common->quit_label = save_quit_label;
5610 common->quit = save_quit;
5611 }
5612 common->positive_assert = save_positive_assert;
5613 common->then_trap = save_then_trap;
5614 common->accept_label = save_accept_label;
5615 common->positive_assert_quit = save_positive_assert_quit;
5616 common->accept = save_accept;
5617 return NULL;
5618 }
5619 common->accept_label = LABEL();
5620 if (common->accept != NULL)
5621 set_jumps(common->accept, common->accept_label);
5622
5623 /* Reset stack. */
5624 if (framesize < 0)
5625 {
5626 if (framesize == no_frame)
5627 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5628 else
5629 free_stack(common, extrasize);
5630 if (needs_control_head)
5631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5632 }
5633 else
5634 {
5635 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5636 {
5637 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5638 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5639 if (needs_control_head)
5640 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5641 }
5642 else
5643 {
5644 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5645 if (needs_control_head)
5646 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5647 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5648 }
5649 }
5650
5651 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5652 {
5653 /* We know that STR_PTR was stored on the top of the stack. */
5654 if (conditional)
5655 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5656 else if (bra == OP_BRAZERO)
5657 {
5658 if (framesize < 0)
5659 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5660 else
5661 {
5662 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5663 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5665 }
5666 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5667 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5668 }
5669 else if (framesize >= 0)
5670 {
5671 /* For OP_BRA and OP_BRAMINZERO. */
5672 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5673 }
5674 }
5675 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5676
5677 compile_backtrackingpath(common, altbacktrack.top);
5678 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5679 {
5680 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5681 {
5682 common->local_exit = save_local_exit;
5683 common->quit_label = save_quit_label;
5684 common->quit = save_quit;
5685 }
5686 common->positive_assert = save_positive_assert;
5687 common->then_trap = save_then_trap;
5688 common->accept_label = save_accept_label;
5689 common->positive_assert_quit = save_positive_assert_quit;
5690 common->accept = save_accept;
5691 return NULL;
5692 }
5693 set_jumps(altbacktrack.topbacktracks, LABEL());
5694
5695 if (*cc != OP_ALT)
5696 break;
5697
5698 ccbegin = cc;
5699 cc += GET(cc, 1);
5700 }
5701
5702 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5703 {
5704 SLJIT_ASSERT(common->positive_assert_quit == NULL);
5705 /* Makes the check less complicated below. */
5706 common->positive_assert_quit = common->quit;
5707 }
5708
5709 /* None of them matched. */
5710 if (common->positive_assert_quit != NULL)
5711 {
5712 jump = JUMP(SLJIT_JUMP);
5713 set_jumps(common->positive_assert_quit, LABEL());
5714 SLJIT_ASSERT(framesize != no_stack);
5715 if (framesize < 0)
5716 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5717 else
5718 {
5719 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5720 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5721 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5722 }
5723 JUMPHERE(jump);
5724 }
5725
5726 if (needs_control_head)
5727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5728
5729 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5730 {
5731 /* Assert is failed. */
5732 if (conditional || bra == OP_BRAZERO)
5733 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5734
5735 if (framesize < 0)
5736 {
5737 /* The topmost item should be 0. */
5738 if (bra == OP_BRAZERO)
5739 {
5740 if (extrasize == 2)
5741 free_stack(common, 1);
5742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5743 }
5744 else
5745 free_stack(common, extrasize);
5746 }
5747 else
5748 {
5749 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5750 /* The topmost item should be 0. */
5751 if (bra == OP_BRAZERO)
5752 {
5753 free_stack(common, framesize + extrasize - 1);
5754 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5755 }
5756 else
5757 free_stack(common, framesize + extrasize);
5758 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5759 }
5760 jump = JUMP(SLJIT_JUMP);
5761 if (bra != OP_BRAZERO)
5762 add_jump(compiler, target, jump);
5763
5764 /* Assert is successful. */
5765 set_jumps(tmp, LABEL());
5766 if (framesize < 0)
5767 {
5768 /* We know that STR_PTR was stored on the top of the stack. */
5769 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5770 /* Keep the STR_PTR on the top of the stack. */
5771 if (bra == OP_BRAZERO)
5772 {
5773 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5774 if (extrasize == 2)
5775 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5776 }
5777 else if (bra == OP_BRAMINZERO)
5778 {
5779 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5780 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5781 }
5782 }
5783 else
5784 {
5785 if (bra == OP_BRA)
5786 {
5787 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5788 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5789 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5790 }
5791 else
5792 {
5793 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5794 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5795 if (extrasize == 2)
5796 {
5797 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5798 if (bra == OP_BRAMINZERO)
5799 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5800 }
5801 else
5802 {
5803 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5804 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5805 }
5806 }
5807 }
5808
5809 if (bra == OP_BRAZERO)
5810 {
5811 backtrack->matchingpath = LABEL();
5812 SET_LABEL(jump, backtrack->matchingpath);
5813 }
5814 else if (bra == OP_BRAMINZERO)
5815 {
5816 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5817 JUMPHERE(brajump);
5818 if (framesize >= 0)
5819 {
5820 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5821 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5822 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5823 }
5824 set_jumps(backtrack->common.topbacktracks, LABEL());
5825 }
5826 }
5827 else
5828 {
5829 /* AssertNot is successful. */
5830 if (framesize < 0)
5831 {
5832 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5833 if (bra != OP_BRA)
5834 {
5835 if (extrasize == 2)
5836 free_stack(common, 1);
5837 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5838 }
5839 else
5840 free_stack(common, extrasize);
5841 }
5842 else
5843 {
5844 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5845 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5846 /* The topmost item should be 0. */
5847 if (bra != OP_BRA)
5848 {
5849 free_stack(common, framesize + extrasize - 1);
5850 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5851 }
5852 else
5853 free_stack(common, framesize + extrasize);
5854 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5855 }
5856
5857 if (bra == OP_BRAZERO)
5858 backtrack->matchingpath = LABEL();
5859 else if (bra == OP_BRAMINZERO)
5860 {
5861 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5862 JUMPHERE(brajump);
5863 }
5864
5865 if (bra != OP_BRA)
5866 {
5867 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5868 set_jumps(backtrack->common.topbacktracks, LABEL());
5869 backtrack->common.topbacktracks = NULL;
5870 }
5871 }
5872
5873 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5874 {
5875 common->local_exit = save_local_exit;
5876 common->quit_label = save_quit_label;
5877 common->quit = save_quit;
5878 }
5879 common->positive_assert = save_positive_assert;
5880 common->then_trap = save_then_trap;
5881 common->accept_label = save_accept_label;
5882 common->positive_assert_quit = save_positive_assert_quit;
5883 common->accept = save_accept;
5884 return cc + 1 + LINK_SIZE;
5885 }
5886
5887 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
5888 {
5889 DEFINE_COMPILER;
5890 int stacksize;
5891
5892 if (framesize < 0)
5893 {
5894 if (framesize == no_frame)
5895 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5896 else
5897 {
5898 stacksize = needs_control_head ? 1 : 0;
5899 if (ket != OP_KET || has_alternatives)
5900 stacksize++;
5901 free_stack(common, stacksize);
5902 }
5903
5904 if (needs_control_head)
5905 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
5906
5907 /* TMP2 which is set here used by OP_KETRMAX below. */
5908 if (ket == OP_KETRMAX)
5909 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5910 else if (ket == OP_KETRMIN)
5911 {
5912 /* Move the STR_PTR to the private_data_ptr. */
5913 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5914 }
5915 }
5916 else
5917 {
5918 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
5919 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
5920 if (needs_control_head)
5921 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
5922
5923 if (ket == OP_KETRMAX)
5924 {
5925 /* TMP2 which is set here used by OP_KETRMAX below. */
5926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5927 }
5928 }
5929 if (needs_control_head)
5930 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
5931 }
5932
5933 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
5934 {
5935 DEFINE_COMPILER;
5936
5937 if (common->capture_last_ptr != 0)
5938 {
5939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5940 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
5941 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
5942 stacksize++;
5943 }
5944 if (common->optimized_cbracket[offset >> 1] == 0)
5945 {
5946 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5947 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5948 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
5949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5950 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
5951 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5953 stacksize += 2;
5954 }
5955 return stacksize;
5956 }
5957
5958 /*
5959 Handling bracketed expressions is probably the most complex part.
5960
5961 Stack layout naming characters:
5962 S - Push the current STR_PTR
5963 0 - Push a 0 (NULL)
5964 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5965 before the next alternative. Not pushed if there are no alternatives.
5966 M - Any values pushed by the current alternative. Can be empty, or anything.
5967 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5968 L - Push the previous local (pointed by localptr) to the stack
5969 () - opional values stored on the stack
5970 ()* - optonal, can be stored multiple times
5971
5972 The following list shows the regular expression templates, their PCRE byte codes
5973 and stack layout supported by pcre-sljit.
5974
5975 (?:) OP_BRA | OP_KET A M
5976 () OP_CBRA | OP_KET C M
5977 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5978 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5979 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5980 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5981 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5982 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5983 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5984 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5985 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5986 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5987 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5988 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5989 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5990 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5991 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5992 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5993 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5994 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5995 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5996 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5997
5998
5999 Stack layout naming characters:
6000 A - Push the alternative index (starting from 0) on the stack.
6001 Not pushed if there is no alternatives.
6002 M - Any values pushed by the current alternative. Can be empty, or anything.
6003
6004 The next list shows the possible content of a bracket:
6005 (|) OP_*BRA | OP_ALT ... M A
6006 (?()|) OP_*COND | OP_ALT M A
6007 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6008 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6009 Or nothing, if trace is unnecessary
6010 */
6011
6012 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6013 {
6014 DEFINE_COMPILER;
6015 backtrack_common *backtrack;
6016 pcre_uchar opcode;
6017 int private_data_ptr = 0;
6018 int offset = 0;
6019 int i, stacksize;
6020 int repeat_ptr = 0, repeat_length = 0;
6021 int repeat_type = 0, repeat_count = 0;
6022 pcre_uchar *ccbegin;
6023 pcre_uchar *matchingpath;
6024 pcre_uchar *slot;
6025 pcre_uchar bra = OP_BRA;
6026 pcre_uchar ket;
6027 assert_backtrack *assert;
6028 BOOL has_alternatives;
6029 BOOL needs_control_head = FALSE;
6030 struct sljit_jump *jump;
6031 struct sljit_jump *skip;
6032 struct sljit_label *rmax_label = NULL;
6033 struct sljit_jump *braminzero = NULL;
6034
6035 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6036
6037 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6038 {
6039 bra = *cc;
6040 cc++;
6041 opcode = *cc;
6042 }
6043
6044 opcode = *cc;
6045 ccbegin = cc;
6046 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6047 ket = *matchingpath;
6048 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6049 {
6050 repeat_ptr = PRIVATE_DATA(matchingpath);
6051 repeat_length = PRIVATE_DATA(matchingpath + 1);
6052 repeat_type = PRIVATE_DATA(matchingpath + 2);
6053 repeat_count = PRIVATE_DATA(matchingpath + 3);
6054 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6055 if (repeat_type == OP_UPTO)
6056 ket = OP_KETRMAX;
6057 if (repeat_type == OP_MINUPTO)
6058 ket = OP_KETRMIN;
6059 }
6060
6061 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6062 {
6063 /* Drop this bracket_backtrack. */
6064 parent->top = backtrack->prev;
6065 return matchingpath + 1 + LINK_SIZE + repeat_length;
6066 }
6067
6068 matchingpath = ccbegin + 1 + LINK_SIZE;
6069 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6070 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6071 cc += GET(cc, 1);
6072
6073 has_alternatives = *cc == OP_ALT;
6074 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6075 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6076
6077 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6078 opcode = OP_SCOND;
6079 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6080 opcode = OP_ONCE;
6081
6082 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6083 {
6084 /* Capturing brackets has a pre-allocated space. */
6085 offset = GET2(ccbegin, 1 + LINK_SIZE);
6086 if (common->optimized_cbracket[offset] == 0)
6087 {
6088 private_data_ptr = OVECTOR_PRIV(offset);
6089 offset <<= 1;
6090 }
6091 else
6092 {
6093 offset <<= 1;
6094 private_data_ptr = OVECTOR(offset);
6095 }
6096 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6097 matchingpath += IMM2_SIZE;
6098 }
6099 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6100 {
6101 /* Other brackets simply allocate the next entry. */
6102 private_data_ptr = PRIVATE_DATA(ccbegin);
6103 SLJIT_ASSERT(private_data_ptr != 0);
6104 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6105 if (opcode == OP_ONCE)
6106 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6107 }
6108
6109 /* Instructions before the first alternative. */
6110 stacksize = 0;
6111 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6112 stacksize++;
6113 if (bra == OP_BRAZERO)
6114 stacksize++;
6115
6116 if (stacksize > 0)
6117 allocate_stack(common, stacksize);
6118
6119 stacksize = 0;
6120 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6121 {
6122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6123 stacksize++;
6124 }
6125
6126 if (bra == OP_BRAZERO)
6127 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6128
6129 if (bra == OP_BRAMINZERO)
6130 {
6131 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6132 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6133 if (ket != OP_KETRMIN)
6134 {
6135 free_stack(common, 1);
6136 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6137 }
6138 else
6139 {
6140 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6141 {
6142 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6143 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6144 /* Nothing stored during the first run. */
6145 skip = JUMP(SLJIT_JUMP);
6146 JUMPHERE(jump);
6147 /* Checking zero-length iteration. */
6148 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6149 {
6150 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6151 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6152 }
6153 else
6154 {
6155 /* Except when the whole stack frame must be saved. */
6156 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6157 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6158 }
6159 JUMPHERE(skip);
6160 }
6161 else
6162 {
6163 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6164 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6165 JUMPHERE(jump);
6166 }
6167 }
6168 }
6169
6170 if (repeat_type != 0)
6171 {
6172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6173 if (repeat_type == OP_EXACT)
6174 rmax_label = LABEL();
6175 }
6176
6177 if (ket == OP_KETRMIN)
6178 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6179
6180 if (ket == OP_KETRMAX)
6181 {
6182 rmax_label = LABEL();
6183 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6184 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6185 }
6186
6187 /* Handling capturing brackets and alternatives. */
6188 if (opcode == OP_ONCE)
6189 {
6190 stacksize = 0;
6191 if (needs_control_head)
6192 {
6193 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6194 stacksize++;
6195 }
6196
6197 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6198 {
6199 /* Neither capturing brackets nor recursions are found in the block. */
6200 if (ket == OP_KETRMIN)
6201 {
6202 stacksize += 2;
6203 if (!needs_control_head)
6204 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6205 }
6206 else
6207 {
6208 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6210 if (ket == OP_KETRMAX || has_alternatives)
6211 stacksize++;
6212 }
6213
6214 if (stacksize > 0)
6215 allocate_stack(common, stacksize);
6216
6217 stacksize = 0;
6218 if (needs_control_head)
6219 {
6220 stacksize++;
6221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6222 }
6223
6224 if (ket == OP_KETRMIN)
6225 {
6226 if (needs_control_head)
6227 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6229 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6230 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6232 }
6233 else if (ket == OP_KETRMAX || has_alternatives)
6234 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6235 }
6236 else
6237 {
6238 if (ket != OP_KET || has_alternatives)
6239 stacksize++;
6240
6241 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6242 allocate_stack(common, stacksize);
6243
6244 if (needs_control_head)
6245 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6246
6247 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6248 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6249
6250 stacksize = needs_control_head ? 1 : 0;
6251 if (ket != OP_KET || has_alternatives)
6252 {
6253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6255 stacksize++;
6256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6257 }
6258 else
6259 {
6260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6261 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6262 }
6263 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6264 }
6265 }
6266 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6267 {
6268 /* Saving the previous values. */
6269 if (common->optimized_cbracket[offset >> 1] != 0)
6270 {
6271 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6272 allocate_stack(common, 2);
6273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6274 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6275 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6277 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6278 }
6279 else
6280 {
6281 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6282 allocate_stack(common, 1);
6283 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6285 }
6286 }
6287 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6288 {
6289 /* Saving the previous value. */
6290 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6291 allocate_stack(common, 1);
6292 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6294 }
6295 else if (has_alternatives)
6296 {
6297 /* Pushing the starting string pointer. */
6298 allocate_stack(common, 1);
6299 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6300 }
6301
6302 /* Generating code for the first alternative. */
6303 if (opcode == OP_COND || opcode == OP_SCOND)
6304 {
6305 if (*matchingpath == OP_CREF)
6306 {
6307 SLJIT_ASSERT(has_alternatives);
6308 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6309 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6310 matchingpath += 1 + IMM2_SIZE;
6311 }
6312 else if (*matchingpath == OP_DNCREF)
6313 {
6314 SLJIT_ASSERT(has_alternatives);
6315
6316 i = GET2(matchingpath, 1 + IMM2_SIZE);
6317 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
6318 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6319 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6320 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
6321 slot += common->name_entry_size;
6322 i--;
6323 while (i-- > 0)
6324 {
6325 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
6326 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
6327 slot += common->name_entry_size;
6328 }
6329 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6330 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
6331 matchingpath += 1 + 2 * IMM2_SIZE;
6332 }
6333 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
6334 {
6335 /* Never has other case. */
6336 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6337 SLJIT_ASSERT(!has_alternatives);
6338
6339 if (*matchingpath == OP_RREF)
6340 {
6341 stacksize = GET2(matchingpath, 1);
6342 if (common->currententry == NULL)
6343 stacksize = 0;
6344 else if (stacksize == RREF_ANY)
6345 stacksize = 1;
6346 else if (common->currententry->start == 0)
6347 stacksize = stacksize == 0;
6348 else
6349 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6350
6351 if (stacksize != 0)
6352 matchingpath += 1 + IMM2_SIZE;
6353 }
6354 else
6355 {
6356 if (common->currententry == NULL || common->currententry->start == 0)
6357 stacksize = 0;
6358 else
6359 {
6360 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
6361 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
6362 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6363 while (stacksize > 0)
6364 {
6365 if (GET2(slot, 0) == i)
6366 break;
6367 slot += common->name_entry_size;
6368 stacksize--;
6369 }
6370 }
6371
6372 if (stacksize != 0)
6373 matchingpath += 1 + 2 * IMM2_SIZE;
6374 }
6375
6376 /* The stacksize == 0 is a common "else" case. */
6377 if (stacksize == 0)
6378 {
6379 if (*cc == OP_ALT)
6380 {
6381 matchingpath = cc + 1 + LINK_SIZE;
6382 cc += GET(cc, 1);
6383 }
6384 else
6385 matchingpath = cc;
6386 }
6387 }
6388 else
6389 {
6390 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6391 /* Similar code as PUSH_BACKTRACK macro. */
6392 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6393 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6394 return NULL;
6395 memset(assert, 0, sizeof(assert_backtrack));
6396 assert->common.cc = matchingpath;
6397 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6398 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6399 }
6400 }
6401
6402 compile_matchingpath(common, matchingpath, cc, backtrack);
6403 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6404 return NULL;
6405
6406 if (opcode == OP_ONCE)
6407 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6408
6409 stacksize = 0;
6410 if (repeat_type == OP_MINUPTO)
6411 {
6412 /* We need to preserve the counter. TMP2 will be used below. */
6413 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6414 stacksize++;
6415 }
6416 if (ket != OP_KET || bra != OP_BRA)
6417 stacksize++;
6418 if (offset != 0)
6419 {
6420 if (common->capture_last_ptr != 0)
6421 stacksize++;
6422 if (common->optimized_cbracket[offset >> 1] == 0)
6423 stacksize += 2;
6424 }
6425 if (has_alternatives && opcode != OP_ONCE)
6426 stacksize++;
6427
6428 if (stacksize > 0)
6429 allocate_stack(common, stacksize);
6430
6431 stacksize = 0;
6432 if (repeat_type == OP_MINUPTO)
6433 {
6434 /* TMP2 was set above. */
6435 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
6436 stacksize++;
6437 }
6438
6439 if (ket != OP_KET || bra != OP_BRA)
6440 {
6441 if (ket != OP_KET)
6442 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6443 else
6444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6445 stacksize++;
6446 }
6447
6448 if (offset != 0)
6449 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6450
6451 if (has_alternatives)
6452 {
6453 if (opcode != OP_ONCE)
6454 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6455 if (ket != OP_KETRMAX)
6456 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6457 }
6458
6459 /* Must be after the matchingpath label. */
6460 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6461 {
6462 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6463 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6464 }
6465
6466 if (ket == OP_KETRMAX)
6467 {
6468 if (repeat_type != 0)
6469 {
6470 if (has_alternatives)
6471 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6472 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6473 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6474 /* Drop STR_PTR for greedy plus quantifier. */
6475 if (opcode != OP_ONCE)
6476 free_stack(common, 1);
6477 }
6478 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
6479 {
6480 if (has_alternatives)
6481 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6482 /* Checking zero-length iteration. */
6483 if (opcode != OP_ONCE)
6484 {
6485 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
6486 /* Drop STR_PTR for greedy plus quantifier. */
6487 if (bra != OP_BRAZERO)
6488 free_stack(common, 1);
6489 }
6490 else
6491 /* TMP2 must contain the starting STR_PTR. */
6492 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
6493 }
6494 else
6495 JUMPTO(SLJIT_JUMP, rmax_label);
6496 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6497 }
6498
6499 if (repeat_type == OP_EXACT)
6500 {
6501 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6502 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6503 }
6504 else if (repeat_type == OP_UPTO)
6505 {
6506 /* We need to preserve the counter. */
6507 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6508 allocate_stack(common, 1);
6509 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6510 }
6511
6512 if (bra == OP_BRAZERO)
6513 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6514
6515 if (bra == OP_BRAMINZERO)
6516 {
6517 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6518 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6519 if (braminzero != NULL)
6520 {
6521 JUMPHERE(braminzero);
6522 /* We need to release the end pointer to perform the
6523 backtrack for the zero-length iteration. When
6524 framesize is < 0, OP_ONCE will do the release itself. */
6525 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6526 {
6527 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6528 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6529 }
6530 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6531 free_stack(common, 1);
6532 }
6533 /* Continue to the normal backtrack. */
6534 }
6535
6536 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6537 count_match(common);
6538
6539 /* Skip the other alternatives. */
6540 while (*cc == OP_ALT)
6541 cc += GET(cc, 1);
6542 cc += 1 + LINK_SIZE;
6543
6544 /* Temporarily encoding the needs_control_head in framesize. */
6545 if (opcode == OP_ONCE)
6546 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6547 return cc + repeat_length;
6548 }
6549
6550 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6551 {
6552 DEFINE_COMPILER;
6553 backtrack_common *backtrack;
6554 pcre_uchar opcode;
6555 int private_data_ptr;
6556 int cbraprivptr = 0;
6557 BOOL needs_control_head;
6558 int framesize;
6559 int stacksize;
6560 int offset = 0;
6561 BOOL zero = FALSE;
6562 pcre_uchar *ccbegin = NULL;
6563 int stack; /* Also contains the offset of control head. */
6564 struct sljit_label *loop = NULL;
6565 struct jump_list *emptymatch = NULL;
6566
6567 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6568 if (*cc == OP_BRAPOSZERO)
6569 {
6570 zero = TRUE;
6571 cc++;
6572 }
6573
6574 opcode = *cc;
6575 private_data_ptr = PRIVATE_DATA(cc);
6576 SLJIT_ASSERT(private_data_ptr != 0);
6577 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6578 switch(opcode)
6579 {
6580 case OP_BRAPOS:
6581 case OP_SBRAPOS:
6582 ccbegin = cc + 1 + LINK_SIZE;
6583 break;
6584
6585 case OP_CBRAPOS:
6586 case OP_SCBRAPOS:
6587 offset = GET2(cc, 1 + LINK_SIZE);
6588 /* This case cannot be optimized in the same was as
6589 normal capturing brackets. */
6590 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6591 cbraprivptr = OVECTOR_PRIV(offset);
6592 offset <<= 1;
6593 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6594 break;
6595
6596 default:
6597 SLJIT_ASSERT_STOP();
6598 break;
6599 }
6600
6601 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6602 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6603 if (framesize < 0)
6604 {
6605 if (offset != 0)
6606 {
6607 stacksize = 2;
6608 if (common->capture_last_ptr != 0)
6609 stacksize++;
6610 }
6611 else
6612 stacksize = 1;
6613
6614 if (needs_control_head)
6615 stacksize++;
6616 if (!zero)
6617 stacksize++;
6618
6619 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6620 allocate_stack(common, stacksize);
6621 if (framesize == no_frame)
6622 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6623
6624 stack = 0;
6625 if (offset != 0)
6626 {
6627 stack = 2;
6628 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6629 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6630 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6631 if (common->capture_last_ptr != 0)
6632 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6633 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6634 if (needs_control_head)
6635 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6636 if (common->capture_last_ptr != 0)
6637 {
6638 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6639 stack = 3;
6640 }
6641 }
6642 else
6643 {
6644 if (needs_control_head)
6645 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6646 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6647 stack = 1;
6648 }
6649
6650 if (needs_control_head)
6651 stack++;
6652 if (!zero)
6653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6654 if (needs_control_head)
6655 {
6656 stack--;
6657 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6658 }
6659 }
6660 else
6661 {
6662 stacksize = framesize + 1;
6663 if (!zero)
6664 stacksize++;
6665 if (needs_control_head)
6666 stacksize++;
6667 if (offset == 0)
6668 stacksize++;
6669 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6670
6671 allocate_stack(common, stacksize);
6672 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6673 if (needs_control_head)
6674 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6675 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6676
6677 stack = 0;
6678 if (!zero)
6679 {
6680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6681 stack = 1;
6682 }
6683 if (needs_control_head)
6684 {
6685 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6686 stack++;
6687 }
6688 if (offset == 0)
6689 {
6690 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6691 stack++;
6692 }
6693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6694 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
6695 stack -= 1 + (offset == 0);
6696 }
6697
6698 if (offset != 0)
6699 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6700
6701 loop = LABEL();
6702 while (*cc != OP_KETRPOS)
6703 {
6704 backtrack->top = NULL;
6705 backtrack->topbacktracks = NULL;
6706 cc += GET(cc, 1);
6707
6708 compile_matchingpath(common, ccbegin, cc, backtrack);
6709 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6710 return NULL;
6711
6712 if (framesize < 0)
6713 {
6714 if (framesize == no_frame)
6715 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6716
6717 if (offset != 0)
6718 {
6719 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6721 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6722 if (common->capture_last_ptr != 0)
6723 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6725 }
6726 else
6727 {
6728 if (opcode == OP_SBRAPOS)
6729 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6731 }
6732
6733 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6734 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6735
6736 if (!zero)
6737 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6738 }