/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1377 - (show annotations)
Sun Oct 13 20:44:30 2013 UTC (6 years ago) by zherczeg
File MIME type: text/plain
File size: 307056 byte(s)
Matching 0x85 and 0x180e to \s in JIT when UCP is used.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 6
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 int newline;
367 int bsr_nltype;
368 /* Dollar endonly. */
369 int endonly;
370 /* Tables. */
371 sljit_sw ctypes;
372 int digits[2 + MAX_RANGE_SIZE];
373 /* Named capturing brackets. */
374 pcre_uchar *name_table;
375 sljit_sw name_count;
376 sljit_sw name_entry_size;
377
378 /* Labels and jump lists. */
379 struct sljit_label *partialmatchlabel;
380 struct sljit_label *quit_label;
381 struct sljit_label *forced_quit_label;
382 struct sljit_label *accept_label;
383 stub_list *stubs;
384 recurse_entry *entries;
385 recurse_entry *currententry;
386 jump_list *partialmatch;
387 jump_list *quit;
388 jump_list *positive_assert_quit;
389 jump_list *forced_quit;
390 jump_list *accept;
391 jump_list *calllimit;
392 jump_list *stackalloc;
393 jump_list *revertframes;
394 jump_list *wordboundary;
395 jump_list *anynewline;
396 jump_list *hspace;
397 jump_list *vspace;
398 jump_list *casefulcmp;
399 jump_list *caselesscmp;
400 jump_list *reset_match;
401 BOOL jscript_compat;
402 #ifdef SUPPORT_UTF
403 BOOL utf;
404 #ifdef SUPPORT_UCP
405 BOOL use_ucp;
406 #endif
407 #ifndef COMPILE_PCRE32
408 jump_list *utfreadchar;
409 #endif
410 #ifdef COMPILE_PCRE8
411 jump_list *utfreadtype8;
412 #endif
413 #endif /* SUPPORT_UTF */
414 #ifdef SUPPORT_UCP
415 jump_list *getucd;
416 #endif
417 } compiler_common;
418
419 /* For byte_sequence_compare. */
420
421 typedef struct compare_context {
422 int length;
423 int sourcereg;
424 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
425 int ucharptr;
426 union {
427 sljit_si asint;
428 sljit_uh asushort;
429 #if defined COMPILE_PCRE8
430 sljit_ub asbyte;
431 sljit_ub asuchars[4];
432 #elif defined COMPILE_PCRE16
433 sljit_uh asuchars[2];
434 #elif defined COMPILE_PCRE32
435 sljit_ui asuchars[1];
436 #endif
437 } c;
438 union {
439 sljit_si asint;
440 sljit_uh asushort;
441 #if defined COMPILE_PCRE8
442 sljit_ub asbyte;
443 sljit_ub asuchars[4];
444 #elif defined COMPILE_PCRE16
445 sljit_uh asuchars[2];
446 #elif defined COMPILE_PCRE32
447 sljit_ui asuchars[1];
448 #endif
449 } oc;
450 #endif
451 } compare_context;
452
453 /* Undefine sljit macros. */
454 #undef CMP
455
456 /* Used for accessing the elements of the stack. */
457 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
458
459 #define TMP1 SLJIT_SCRATCH_REG1
460 #define TMP2 SLJIT_SCRATCH_REG3
461 #define TMP3 SLJIT_TEMPORARY_EREG2
462 #define STR_PTR SLJIT_SAVED_REG1
463 #define STR_END SLJIT_SAVED_REG2
464 #define STACK_TOP SLJIT_SCRATCH_REG2
465 #define STACK_LIMIT SLJIT_SAVED_REG3
466 #define ARGUMENTS SLJIT_SAVED_EREG1
467 #define COUNT_MATCH SLJIT_SAVED_EREG2
468 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469
470 /* Local space layout. */
471 /* These two locals can be used by the current opcode. */
472 #define LOCALS0 (0 * sizeof(sljit_sw))
473 #define LOCALS1 (1 * sizeof(sljit_sw))
474 /* Two local variables for possessive quantifiers (char1 cannot use them). */
475 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
477 /* Max limit of recursions. */
478 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
479 /* The output vector is stored on the stack, and contains pointers
480 to characters. The vector data is divided into two groups: the first
481 group contains the start / end character pointers, and the second is
482 the start pointers when the end of the capturing group has not yet reached. */
483 #define OVECTOR_START (common->ovector_start)
484 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
485 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
486 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
487
488 #if defined COMPILE_PCRE8
489 #define MOV_UCHAR SLJIT_MOV_UB
490 #define MOVU_UCHAR SLJIT_MOVU_UB
491 #elif defined COMPILE_PCRE16
492 #define MOV_UCHAR SLJIT_MOV_UH
493 #define MOVU_UCHAR SLJIT_MOVU_UH
494 #elif defined COMPILE_PCRE32
495 #define MOV_UCHAR SLJIT_MOV_UI
496 #define MOVU_UCHAR SLJIT_MOVU_UI
497 #else
498 #error Unsupported compiling mode
499 #endif
500
501 /* Shortcuts. */
502 #define DEFINE_COMPILER \
503 struct sljit_compiler *compiler = common->compiler
504 #define OP1(op, dst, dstw, src, srcw) \
505 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508 #define LABEL() \
509 sljit_emit_label(compiler)
510 #define JUMP(type) \
511 sljit_emit_jump(compiler, (type))
512 #define JUMPTO(type, label) \
513 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514 #define JUMPHERE(jump) \
515 sljit_set_label((jump), sljit_emit_label(compiler))
516 #define SET_LABEL(jump, label) \
517 sljit_set_label((jump), (label))
518 #define CMP(type, src1, src1w, src2, src2w) \
519 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520 #define CMPTO(type, src1, src1w, src2, src2w, label) \
521 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
522 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
524 #define GET_LOCAL_BASE(dst, dstw, offset) \
525 sljit_get_local_base(compiler, (dst), (dstw), (offset))
526
527 static pcre_uchar* bracketend(pcre_uchar* cc)
528 {
529 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
530 do cc += GET(cc, 1); while (*cc == OP_ALT);
531 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
532 cc += 1 + LINK_SIZE;
533 return cc;
534 }
535
536 /* Functions whose might need modification for all new supported opcodes:
537 next_opcode
538 check_opcode_types
539 set_private_data_ptrs
540 get_framesize
541 init_frame
542 get_private_data_copy_length
543 copy_private_data
544 compile_matchingpath
545 compile_backtrackingpath
546 */
547
548 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
549 {
550 SLJIT_UNUSED_ARG(common);
551 switch(*cc)
552 {
553 case OP_SOD:
554 case OP_SOM:
555 case OP_SET_SOM:
556 case OP_NOT_WORD_BOUNDARY:
557 case OP_WORD_BOUNDARY:
558 case OP_NOT_DIGIT:
559 case OP_DIGIT:
560 case OP_NOT_WHITESPACE:
561 case OP_WHITESPACE:
562 case OP_NOT_WORDCHAR:
563 case OP_WORDCHAR:
564 case OP_ANY:
565 case OP_ALLANY:
566 case OP_NOTPROP:
567 case OP_PROP:
568 case OP_ANYNL:
569 case OP_NOT_HSPACE:
570 case OP_HSPACE:
571 case OP_NOT_VSPACE:
572 case OP_VSPACE:
573 case OP_EXTUNI:
574 case OP_EODN:
575 case OP_EOD:
576 case OP_CIRC:
577 case OP_CIRCM:
578 case OP_DOLL:
579 case OP_DOLLM:
580 case OP_CRSTAR:
581 case OP_CRMINSTAR:
582 case OP_CRPLUS:
583 case OP_CRMINPLUS:
584 case OP_CRQUERY:
585 case OP_CRMINQUERY:
586 case OP_CRRANGE:
587 case OP_CRMINRANGE:
588 case OP_CLASS:
589 case OP_NCLASS:
590 case OP_REF:
591 case OP_REFI:
592 case OP_DNREF:
593 case OP_DNREFI:
594 case OP_RECURSE:
595 case OP_CALLOUT:
596 case OP_ALT:
597 case OP_KET:
598 case OP_KETRMAX:
599 case OP_KETRMIN:
600 case OP_KETRPOS:
601 case OP_REVERSE:
602 case OP_ASSERT:
603 case OP_ASSERT_NOT:
604 case OP_ASSERTBACK:
605 case OP_ASSERTBACK_NOT:
606 case OP_ONCE:
607 case OP_ONCE_NC:
608 case OP_BRA:
609 case OP_BRAPOS:
610 case OP_CBRA:
611 case OP_CBRAPOS:
612 case OP_COND:
613 case OP_SBRA:
614 case OP_SBRAPOS:
615 case OP_SCBRA:
616 case OP_SCBRAPOS:
617 case OP_SCOND:
618 case OP_CREF:
619 case OP_DNCREF:
620 case OP_RREF:
621 case OP_DNRREF:
622 case OP_DEF:
623 case OP_BRAZERO:
624 case OP_BRAMINZERO:
625 case OP_BRAPOSZERO:
626 case OP_PRUNE:
627 case OP_SKIP:
628 case OP_THEN:
629 case OP_COMMIT:
630 case OP_FAIL:
631 case OP_ACCEPT:
632 case OP_ASSERT_ACCEPT:
633 case OP_CLOSE:
634 case OP_SKIPZERO:
635 return cc + PRIV(OP_lengths)[*cc];
636
637 case OP_CHAR:
638 case OP_CHARI:
639 case OP_NOT:
640 case OP_NOTI:
641 case OP_STAR:
642 case OP_MINSTAR:
643 case OP_PLUS:
644 case OP_MINPLUS:
645 case OP_QUERY:
646 case OP_MINQUERY:
647 case OP_UPTO:
648 case OP_MINUPTO:
649 case OP_EXACT:
650 case OP_POSSTAR:
651 case OP_POSPLUS:
652 case OP_POSQUERY:
653 case OP_POSUPTO:
654 case OP_STARI:
655 case OP_MINSTARI:
656 case OP_PLUSI:
657 case OP_MINPLUSI:
658 case OP_QUERYI:
659 case OP_MINQUERYI:
660 case OP_UPTOI:
661 case OP_MINUPTOI:
662 case OP_EXACTI:
663 case OP_POSSTARI:
664 case OP_POSPLUSI:
665 case OP_POSQUERYI:
666 case OP_POSUPTOI:
667 case OP_NOTSTAR:
668 case OP_NOTMINSTAR:
669 case OP_NOTPLUS:
670 case OP_NOTMINPLUS:
671 case OP_NOTQUERY:
672 case OP_NOTMINQUERY:
673 case OP_NOTUPTO:
674 case OP_NOTMINUPTO:
675 case OP_NOTEXACT:
676 case OP_NOTPOSSTAR:
677 case OP_NOTPOSPLUS:
678 case OP_NOTPOSQUERY:
679 case OP_NOTPOSUPTO:
680 case OP_NOTSTARI:
681 case OP_NOTMINSTARI:
682 case OP_NOTPLUSI:
683 case OP_NOTMINPLUSI:
684 case OP_NOTQUERYI:
685 case OP_NOTMINQUERYI:
686 case OP_NOTUPTOI:
687 case OP_NOTMINUPTOI:
688 case OP_NOTEXACTI:
689 case OP_NOTPOSSTARI:
690 case OP_NOTPOSPLUSI:
691 case OP_NOTPOSQUERYI:
692 case OP_NOTPOSUPTOI:
693 cc += PRIV(OP_lengths)[*cc];
694 #ifdef SUPPORT_UTF
695 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
696 #endif
697 return cc;
698
699 /* Special cases. */
700 case OP_TYPESTAR:
701 case OP_TYPEMINSTAR:
702 case OP_TYPEPLUS:
703 case OP_TYPEMINPLUS:
704 case OP_TYPEQUERY:
705 case OP_TYPEMINQUERY:
706 case OP_TYPEUPTO:
707 case OP_TYPEMINUPTO:
708 case OP_TYPEEXACT:
709 case OP_TYPEPOSSTAR:
710 case OP_TYPEPOSPLUS:
711 case OP_TYPEPOSQUERY:
712 case OP_TYPEPOSUPTO:
713 return cc + PRIV(OP_lengths)[*cc] - 1;
714
715 case OP_ANYBYTE:
716 #ifdef SUPPORT_UTF
717 if (common->utf) return NULL;
718 #endif
719 return cc + 1;
720
721 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
722 case OP_XCLASS:
723 return cc + GET(cc, 1);
724 #endif
725
726 case OP_MARK:
727 case OP_PRUNE_ARG:
728 case OP_SKIP_ARG:
729 case OP_THEN_ARG:
730 return cc + 1 + 2 + cc[1];
731
732 default:
733 /* All opcodes are supported now! */
734 SLJIT_ASSERT_STOP();
735 return NULL;
736 }
737 }
738
739 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
740 {
741 int count;
742 pcre_uchar *slot;
743
744 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
745 while (cc < ccend)
746 {
747 switch(*cc)
748 {
749 case OP_SET_SOM:
750 common->has_set_som = TRUE;
751 cc += 1;
752 break;
753
754 case OP_REF:
755 case OP_REFI:
756 common->optimized_cbracket[GET2(cc, 1)] = 0;
757 cc += 1 + IMM2_SIZE;
758 break;
759
760 case OP_CBRAPOS:
761 case OP_SCBRAPOS:
762 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
763 cc += 1 + LINK_SIZE + IMM2_SIZE;
764 break;
765
766 case OP_COND:
767 case OP_SCOND:
768 /* Only AUTO_CALLOUT can insert this opcode. We do
769 not intend to support this case. */
770 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
771 return FALSE;
772 cc += 1 + LINK_SIZE;
773 break;
774
775 case OP_CREF:
776 common->optimized_cbracket[GET2(cc, 1)] = 0;
777 cc += 1 + IMM2_SIZE;
778 break;
779
780 case OP_DNREF:
781 case OP_DNREFI:
782 case OP_DNCREF:
783 count = GET2(cc, 1 + IMM2_SIZE);
784 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
785 while (count-- > 0)
786 {
787 common->optimized_cbracket[GET2(slot, 0)] = 0;
788 slot += common->name_entry_size;
789 }
790 cc += 1 + 2 * IMM2_SIZE;
791 break;
792
793 case OP_RECURSE:
794 /* Set its value only once. */
795 if (common->recursive_head_ptr == 0)
796 {
797 common->recursive_head_ptr = common->ovector_start;
798 common->ovector_start += sizeof(sljit_sw);
799 }
800 cc += 1 + LINK_SIZE;
801 break;
802
803 case OP_CALLOUT:
804 if (common->capture_last_ptr == 0)
805 {
806 common->capture_last_ptr = common->ovector_start;
807 common->ovector_start += sizeof(sljit_sw);
808 }
809 cc += 2 + 2 * LINK_SIZE;
810 break;
811
812 case OP_THEN_ARG:
813 common->has_then = TRUE;
814 common->control_head_ptr = 1;
815 /* Fall through. */
816
817 case OP_PRUNE_ARG:
818 common->needs_start_ptr = TRUE;
819 /* Fall through. */
820
821 case OP_MARK:
822 if (common->mark_ptr == 0)
823 {
824 common->mark_ptr = common->ovector_start;
825 common->ovector_start += sizeof(sljit_sw);
826 }
827 cc += 1 + 2 + cc[1];
828 break;
829
830 case OP_THEN:
831 common->has_then = TRUE;
832 common->control_head_ptr = 1;
833 /* Fall through. */
834
835 case OP_PRUNE:
836 case OP_SKIP:
837 common->needs_start_ptr = TRUE;
838 cc += 1;
839 break;
840
841 case OP_SKIP_ARG:
842 common->control_head_ptr = 1;
843 common->has_skip_arg = TRUE;
844 cc += 1 + 2 + cc[1];
845 break;
846
847 default:
848 cc = next_opcode(common, cc);
849 if (cc == NULL)
850 return FALSE;
851 break;
852 }
853 }
854 return TRUE;
855 }
856
857 static int get_class_iterator_size(pcre_uchar *cc)
858 {
859 switch(*cc)
860 {
861 case OP_CRSTAR:
862 case OP_CRPLUS:
863 return 2;
864
865 case OP_CRMINSTAR:
866 case OP_CRMINPLUS:
867 case OP_CRQUERY:
868 case OP_CRMINQUERY:
869 return 1;
870
871 case OP_CRRANGE:
872 case OP_CRMINRANGE:
873 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
874 return 0;
875 return 2;
876
877 default:
878 return 0;
879 }
880 }
881
882 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
883 {
884 pcre_uchar *end = bracketend(begin);
885 pcre_uchar *next;
886 pcre_uchar *next_end;
887 pcre_uchar *max_end;
888 pcre_uchar type;
889 sljit_sw length = end - begin;
890 int min, max, i;
891
892 /* Detect fixed iterations first. */
893 if (end[-(1 + LINK_SIZE)] != OP_KET)
894 return FALSE;
895
896 /* Already detected repeat. */
897 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
898 return TRUE;
899
900 next = end;
901 min = 1;
902 while (1)
903 {
904 if (*next != *begin)
905 break;
906 next_end = bracketend(next);
907 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
908 break;
909 next = next_end;
910 min++;
911 }
912
913 if (min == 2)
914 return FALSE;
915
916 max = 0;
917 max_end = next;
918 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
919 {
920 type = *next;
921 while (1)
922 {
923 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
924 break;
925 next_end = bracketend(next + 2 + LINK_SIZE);
926 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
927 break;
928 next = next_end;
929 max++;
930 }
931
932 if (next[0] == type && next[1] == *begin && max >= 1)
933 {
934 next_end = bracketend(next + 1);
935 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
936 {
937 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
938 if (*next_end != OP_KET)
939 break;
940
941 if (i == max)
942 {
943 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
944 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
945 /* +2 the original and the last. */
946 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
947 if (min == 1)
948 return TRUE;
949 min--;
950 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
951 }
952 }
953 }
954 }
955
956 if (min >= 3)
957 {
958 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
959 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
960 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
961 return TRUE;
962 }
963
964 return FALSE;
965 }
966
967 #define CASE_ITERATOR_PRIVATE_DATA_1 \
968 case OP_MINSTAR: \
969 case OP_MINPLUS: \
970 case OP_QUERY: \
971 case OP_MINQUERY: \
972 case OP_MINSTARI: \
973 case OP_MINPLUSI: \
974 case OP_QUERYI: \
975 case OP_MINQUERYI: \
976 case OP_NOTMINSTAR: \
977 case OP_NOTMINPLUS: \
978 case OP_NOTQUERY: \
979 case OP_NOTMINQUERY: \
980 case OP_NOTMINSTARI: \
981 case OP_NOTMINPLUSI: \
982 case OP_NOTQUERYI: \
983 case OP_NOTMINQUERYI:
984
985 #define CASE_ITERATOR_PRIVATE_DATA_2A \
986 case OP_STAR: \
987 case OP_PLUS: \
988 case OP_STARI: \
989 case OP_PLUSI: \
990 case OP_NOTSTAR: \
991 case OP_NOTPLUS: \
992 case OP_NOTSTARI: \
993 case OP_NOTPLUSI:
994
995 #define CASE_ITERATOR_PRIVATE_DATA_2B \
996 case OP_UPTO: \
997 case OP_MINUPTO: \
998 case OP_UPTOI: \
999 case OP_MINUPTOI: \
1000 case OP_NOTUPTO: \
1001 case OP_NOTMINUPTO: \
1002 case OP_NOTUPTOI: \
1003 case OP_NOTMINUPTOI:
1004
1005 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1006 case OP_TYPEMINSTAR: \
1007 case OP_TYPEMINPLUS: \
1008 case OP_TYPEQUERY: \
1009 case OP_TYPEMINQUERY:
1010
1011 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1012 case OP_TYPESTAR: \
1013 case OP_TYPEPLUS:
1014
1015 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1016 case OP_TYPEUPTO: \
1017 case OP_TYPEMINUPTO:
1018
1019 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1020 {
1021 pcre_uchar *cc = common->start;
1022 pcre_uchar *alternative;
1023 pcre_uchar *end = NULL;
1024 int private_data_ptr = *private_data_start;
1025 int space, size, bracketlen;
1026
1027 while (cc < ccend)
1028 {
1029 space = 0;
1030 size = 0;
1031 bracketlen = 0;
1032 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1033 return;
1034
1035 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1036 if (detect_repeat(common, cc))
1037 {
1038 /* These brackets are converted to repeats, so no global
1039 based single character repeat is allowed. */
1040 if (cc >= end)
1041 end = bracketend(cc);
1042 }
1043
1044 switch(*cc)
1045 {
1046 case OP_KET:
1047 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1048 {
1049 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1050 private_data_ptr += sizeof(sljit_sw);
1051 cc += common->private_data_ptrs[cc + 1 - common->start];
1052 }
1053 cc += 1 + LINK_SIZE;
1054 break;
1055
1056 case OP_ASSERT:
1057 case OP_ASSERT_NOT:
1058 case OP_ASSERTBACK:
1059 case OP_ASSERTBACK_NOT:
1060 case OP_ONCE:
1061 case OP_ONCE_NC:
1062 case OP_BRAPOS:
1063 case OP_SBRA:
1064 case OP_SBRAPOS:
1065 case OP_SCOND:
1066 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1067 private_data_ptr += sizeof(sljit_sw);
1068 bracketlen = 1 + LINK_SIZE;
1069 break;
1070
1071 case OP_CBRAPOS:
1072 case OP_SCBRAPOS:
1073 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1074 private_data_ptr += sizeof(sljit_sw);
1075 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1076 break;
1077
1078 case OP_COND:
1079 /* Might be a hidden SCOND. */
1080 alternative = cc + GET(cc, 1);
1081 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1082 {
1083 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1084 private_data_ptr += sizeof(sljit_sw);
1085 }
1086 bracketlen = 1 + LINK_SIZE;
1087 break;
1088
1089 case OP_BRA:
1090 bracketlen = 1 + LINK_SIZE;
1091 break;
1092
1093 case OP_CBRA:
1094 case OP_SCBRA:
1095 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1096 break;
1097
1098 CASE_ITERATOR_PRIVATE_DATA_1
1099 space = 1;
1100 size = -2;
1101 break;
1102
1103 CASE_ITERATOR_PRIVATE_DATA_2A
1104 space = 2;
1105 size = -2;
1106 break;
1107
1108 CASE_ITERATOR_PRIVATE_DATA_2B
1109 space = 2;
1110 size = -(2 + IMM2_SIZE);
1111 break;
1112
1113 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1114 space = 1;
1115 size = 1;
1116 break;
1117
1118 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1119 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1120 space = 2;
1121 size = 1;
1122 break;
1123
1124 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1125 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1126 space = 2;
1127 size = 1 + IMM2_SIZE;
1128 break;
1129
1130 case OP_CLASS:
1131 case OP_NCLASS:
1132 size += 1 + 32 / sizeof(pcre_uchar);
1133 space = get_class_iterator_size(cc + size);
1134 break;
1135
1136 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1137 case OP_XCLASS:
1138 size = GET(cc, 1);
1139 space = get_class_iterator_size(cc + size);
1140 break;
1141 #endif
1142
1143 default:
1144 cc = next_opcode(common, cc);
1145 SLJIT_ASSERT(cc != NULL);
1146 break;
1147 }
1148
1149 /* Character iterators, which are not inside a repeated bracket,
1150 gets a private slot instead of allocating it on the stack. */
1151 if (space > 0 && cc >= end)
1152 {
1153 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1154 private_data_ptr += sizeof(sljit_sw) * space;
1155 }
1156
1157 if (size != 0)
1158 {
1159 if (size < 0)
1160 {
1161 cc += -size;
1162 #ifdef SUPPORT_UTF
1163 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1164 #endif
1165 }
1166 else
1167 cc += size;
1168 }
1169
1170 if (bracketlen > 0)
1171 {
1172 if (cc >= end)
1173 {
1174 end = bracketend(cc);
1175 if (end[-1 - LINK_SIZE] == OP_KET)
1176 end = NULL;
1177 }
1178 cc += bracketlen;
1179 }
1180 }
1181 *private_data_start = private_data_ptr;
1182 }
1183
1184 /* Returns with a frame_types (always < 0) if no need for frame. */
1185 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1186 {
1187 int length = 0;
1188 int possessive = 0;
1189 BOOL stack_restore = FALSE;
1190 BOOL setsom_found = recursive;
1191 BOOL setmark_found = recursive;
1192 /* The last capture is a local variable even for recursions. */
1193 BOOL capture_last_found = FALSE;
1194
1195 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1196 SLJIT_ASSERT(common->control_head_ptr != 0);
1197 *needs_control_head = TRUE;
1198 #else
1199 *needs_control_head = FALSE;
1200 #endif
1201
1202 if (ccend == NULL)
1203 {
1204 ccend = bracketend(cc) - (1 + LINK_SIZE);
1205 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1206 {
1207 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1208 /* This is correct regardless of common->capture_last_ptr. */
1209 capture_last_found = TRUE;
1210 }
1211 cc = next_opcode(common, cc);
1212 }
1213
1214 SLJIT_ASSERT(cc != NULL);
1215 while (cc < ccend)
1216 switch(*cc)
1217 {
1218 case OP_SET_SOM:
1219 SLJIT_ASSERT(common->has_set_som);
1220 stack_restore = TRUE;
1221 if (!setsom_found)
1222 {
1223 length += 2;
1224 setsom_found = TRUE;
1225 }
1226 cc += 1;
1227 break;
1228
1229 case OP_MARK:
1230 case OP_PRUNE_ARG:
1231 case OP_THEN_ARG:
1232 SLJIT_ASSERT(common->mark_ptr != 0);
1233 stack_restore = TRUE;
1234 if (!setmark_found)
1235 {
1236 length += 2;
1237 setmark_found = TRUE;
1238 }
1239 if (common->control_head_ptr != 0)
1240 *needs_control_head = TRUE;
1241 cc += 1 + 2 + cc[1];
1242 break;
1243
1244 case OP_RECURSE:
1245 stack_restore = TRUE;
1246 if (common->has_set_som && !setsom_found)
1247 {
1248 length += 2;
1249 setsom_found = TRUE;
1250 }
1251 if (common->mark_ptr != 0 && !setmark_found)
1252 {
1253 length += 2;
1254 setmark_found = TRUE;
1255 }
1256 if (common->capture_last_ptr != 0 && !capture_last_found)
1257 {
1258 length += 2;
1259 capture_last_found = TRUE;
1260 }
1261 cc += 1 + LINK_SIZE;
1262 break;
1263
1264 case OP_CBRA:
1265 case OP_CBRAPOS:
1266 case OP_SCBRA:
1267 case OP_SCBRAPOS:
1268 stack_restore = TRUE;
1269 if (common->capture_last_ptr != 0 && !capture_last_found)
1270 {
1271 length += 2;
1272 capture_last_found = TRUE;
1273 }
1274 length += 3;
1275 cc += 1 + LINK_SIZE + IMM2_SIZE;
1276 break;
1277
1278 default:
1279 stack_restore = TRUE;
1280 /* Fall through. */
1281
1282 case OP_NOT_WORD_BOUNDARY:
1283 case OP_WORD_BOUNDARY:
1284 case OP_NOT_DIGIT:
1285 case OP_DIGIT:
1286 case OP_NOT_WHITESPACE:
1287 case OP_WHITESPACE:
1288 case OP_NOT_WORDCHAR:
1289 case OP_WORDCHAR:
1290 case OP_ANY:
1291 case OP_ALLANY:
1292 case OP_ANYBYTE:
1293 case OP_NOTPROP:
1294 case OP_PROP:
1295 case OP_ANYNL:
1296 case OP_NOT_HSPACE:
1297 case OP_HSPACE:
1298 case OP_NOT_VSPACE:
1299 case OP_VSPACE:
1300 case OP_EXTUNI:
1301 case OP_EODN:
1302 case OP_EOD:
1303 case OP_CIRC:
1304 case OP_CIRCM:
1305 case OP_DOLL:
1306 case OP_DOLLM:
1307 case OP_CHAR:
1308 case OP_CHARI:
1309 case OP_NOT:
1310 case OP_NOTI:
1311
1312 case OP_EXACT:
1313 case OP_POSSTAR:
1314 case OP_POSPLUS:
1315 case OP_POSQUERY:
1316 case OP_POSUPTO:
1317
1318 case OP_EXACTI:
1319 case OP_POSSTARI:
1320 case OP_POSPLUSI:
1321 case OP_POSQUERYI:
1322 case OP_POSUPTOI:
1323
1324 case OP_NOTEXACT:
1325 case OP_NOTPOSSTAR:
1326 case OP_NOTPOSPLUS:
1327 case OP_NOTPOSQUERY:
1328 case OP_NOTPOSUPTO:
1329
1330 case OP_NOTEXACTI:
1331 case OP_NOTPOSSTARI:
1332 case OP_NOTPOSPLUSI:
1333 case OP_NOTPOSQUERYI:
1334 case OP_NOTPOSUPTOI:
1335
1336 case OP_TYPEEXACT:
1337 case OP_TYPEPOSSTAR:
1338 case OP_TYPEPOSPLUS:
1339 case OP_TYPEPOSQUERY:
1340 case OP_TYPEPOSUPTO:
1341
1342 case OP_CLASS:
1343 case OP_NCLASS:
1344 case OP_XCLASS:
1345
1346 cc = next_opcode(common, cc);
1347 SLJIT_ASSERT(cc != NULL);
1348 break;
1349 }
1350
1351 /* Possessive quantifiers can use a special case. */
1352 if (SLJIT_UNLIKELY(possessive == length))
1353 return stack_restore ? no_frame : no_stack;
1354
1355 if (length > 0)
1356 return length + 1;
1357 return stack_restore ? no_frame : no_stack;
1358 }
1359
1360 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1361 {
1362 DEFINE_COMPILER;
1363 BOOL setsom_found = recursive;
1364 BOOL setmark_found = recursive;
1365 /* The last capture is a local variable even for recursions. */
1366 BOOL capture_last_found = FALSE;
1367 int offset;
1368
1369 /* >= 1 + shortest item size (2) */
1370 SLJIT_UNUSED_ARG(stacktop);
1371 SLJIT_ASSERT(stackpos >= stacktop + 2);
1372
1373 stackpos = STACK(stackpos);
1374 if (ccend == NULL)
1375 {
1376 ccend = bracketend(cc) - (1 + LINK_SIZE);
1377 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1378 cc = next_opcode(common, cc);
1379 }
1380
1381 SLJIT_ASSERT(cc != NULL);
1382 while (cc < ccend)
1383 switch(*cc)
1384 {
1385 case OP_SET_SOM:
1386 SLJIT_ASSERT(common->has_set_som);
1387 if (!setsom_found)
1388 {
1389 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1391 stackpos += (int)sizeof(sljit_sw);
1392 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1393 stackpos += (int)sizeof(sljit_sw);
1394 setsom_found = TRUE;
1395 }
1396 cc += 1;
1397 break;
1398
1399 case OP_MARK:
1400 case OP_PRUNE_ARG:
1401 case OP_THEN_ARG:
1402 SLJIT_ASSERT(common->mark_ptr != 0);
1403 if (!setmark_found)
1404 {
1405 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1406 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1407 stackpos += (int)sizeof(sljit_sw);
1408 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1409 stackpos += (int)sizeof(sljit_sw);
1410 setmark_found = TRUE;
1411 }
1412 cc += 1 + 2 + cc[1];
1413 break;
1414
1415 case OP_RECURSE:
1416 if (common->has_set_som && !setsom_found)
1417 {
1418 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1419 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1420 stackpos += (int)sizeof(sljit_sw);
1421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1422 stackpos += (int)sizeof(sljit_sw);
1423 setsom_found = TRUE;
1424 }
1425 if (common->mark_ptr != 0 && !setmark_found)
1426 {
1427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1429 stackpos += (int)sizeof(sljit_sw);
1430 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1431 stackpos += (int)sizeof(sljit_sw);
1432 setmark_found = TRUE;
1433 }
1434 if (common->capture_last_ptr != 0 && !capture_last_found)
1435 {
1436 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1437 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1438 stackpos += (int)sizeof(sljit_sw);
1439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1440 stackpos += (int)sizeof(sljit_sw);
1441 capture_last_found = TRUE;
1442 }
1443 cc += 1 + LINK_SIZE;
1444 break;
1445
1446 case OP_CBRA:
1447 case OP_CBRAPOS:
1448 case OP_SCBRA:
1449 case OP_SCBRAPOS:
1450 if (common->capture_last_ptr != 0 && !capture_last_found)
1451 {
1452 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1453 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1454 stackpos += (int)sizeof(sljit_sw);
1455 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1456 stackpos += (int)sizeof(sljit_sw);
1457 capture_last_found = TRUE;
1458 }
1459 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1461 stackpos += (int)sizeof(sljit_sw);
1462 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1463 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1465 stackpos += (int)sizeof(sljit_sw);
1466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1467 stackpos += (int)sizeof(sljit_sw);
1468
1469 cc += 1 + LINK_SIZE + IMM2_SIZE;
1470 break;
1471
1472 default:
1473 cc = next_opcode(common, cc);
1474 SLJIT_ASSERT(cc != NULL);
1475 break;
1476 }
1477
1478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1479 SLJIT_ASSERT(stackpos == STACK(stacktop));
1480 }
1481
1482 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1483 {
1484 int private_data_length = needs_control_head ? 3 : 2;
1485 int size;
1486 pcre_uchar *alternative;
1487 /* Calculate the sum of the private machine words. */
1488 while (cc < ccend)
1489 {
1490 size = 0;
1491 switch(*cc)
1492 {
1493 case OP_KET:
1494 if (PRIVATE_DATA(cc) != 0)
1495 private_data_length++;
1496 cc += 1 + LINK_SIZE;
1497 break;
1498
1499 case OP_ASSERT:
1500 case OP_ASSERT_NOT:
1501 case OP_ASSERTBACK:
1502 case OP_ASSERTBACK_NOT:
1503 case OP_ONCE:
1504 case OP_ONCE_NC:
1505 case OP_BRAPOS:
1506 case OP_SBRA:
1507 case OP_SBRAPOS:
1508 case OP_SCOND:
1509 private_data_length++;
1510 cc += 1 + LINK_SIZE;
1511 break;
1512
1513 case OP_CBRA:
1514 case OP_SCBRA:
1515 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1516 private_data_length++;
1517 cc += 1 + LINK_SIZE + IMM2_SIZE;
1518 break;
1519
1520 case OP_CBRAPOS:
1521 case OP_SCBRAPOS:
1522 private_data_length += 2;
1523 cc += 1 + LINK_SIZE + IMM2_SIZE;
1524 break;
1525
1526 case OP_COND:
1527 /* Might be a hidden SCOND. */
1528 alternative = cc + GET(cc, 1);
1529 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1530 private_data_length++;
1531 cc += 1 + LINK_SIZE;
1532 break;
1533
1534 CASE_ITERATOR_PRIVATE_DATA_1
1535 if (PRIVATE_DATA(cc))
1536 private_data_length++;
1537 cc += 2;
1538 #ifdef SUPPORT_UTF
1539 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1540 #endif
1541 break;
1542
1543 CASE_ITERATOR_PRIVATE_DATA_2A
1544 if (PRIVATE_DATA(cc))
1545 private_data_length += 2;
1546 cc += 2;
1547 #ifdef SUPPORT_UTF
1548 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1549 #endif
1550 break;
1551
1552 CASE_ITERATOR_PRIVATE_DATA_2B
1553 if (PRIVATE_DATA(cc))
1554 private_data_length += 2;
1555 cc += 2 + IMM2_SIZE;
1556 #ifdef SUPPORT_UTF
1557 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1558 #endif
1559 break;
1560
1561 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1562 if (PRIVATE_DATA(cc))
1563 private_data_length++;
1564 cc += 1;
1565 break;
1566
1567 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1568 if (PRIVATE_DATA(cc))
1569 private_data_length += 2;
1570 cc += 1;
1571 break;
1572
1573 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1574 if (PRIVATE_DATA(cc))
1575 private_data_length += 2;
1576 cc += 1 + IMM2_SIZE;
1577 break;
1578
1579 case OP_CLASS:
1580 case OP_NCLASS:
1581 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1582 case OP_XCLASS:
1583 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1584 #else
1585 size = 1 + 32 / (int)sizeof(pcre_uchar);
1586 #endif
1587 if (PRIVATE_DATA(cc))
1588 private_data_length += get_class_iterator_size(cc + size);
1589 cc += size;
1590 break;
1591
1592 default:
1593 cc = next_opcode(common, cc);
1594 SLJIT_ASSERT(cc != NULL);
1595 break;
1596 }
1597 }
1598 SLJIT_ASSERT(cc == ccend);
1599 return private_data_length;
1600 }
1601
1602 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1603 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1604 {
1605 DEFINE_COMPILER;
1606 int srcw[2];
1607 int count, size;
1608 BOOL tmp1next = TRUE;
1609 BOOL tmp1empty = TRUE;
1610 BOOL tmp2empty = TRUE;
1611 pcre_uchar *alternative;
1612 enum {
1613 start,
1614 loop,
1615 end
1616 } status;
1617
1618 status = save ? start : loop;
1619 stackptr = STACK(stackptr - 2);
1620 stacktop = STACK(stacktop - 1);
1621
1622 if (!save)
1623 {
1624 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1625 if (stackptr < stacktop)
1626 {
1627 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1628 stackptr += sizeof(sljit_sw);
1629 tmp1empty = FALSE;
1630 }
1631 if (stackptr < stacktop)
1632 {
1633 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1634 stackptr += sizeof(sljit_sw);
1635 tmp2empty = FALSE;
1636 }
1637 /* The tmp1next must be TRUE in either way. */
1638 }
1639
1640 do
1641 {
1642 count = 0;
1643 switch(status)
1644 {
1645 case start:
1646 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1647 count = 1;
1648 srcw[0] = common->recursive_head_ptr;
1649 if (needs_control_head)
1650 {
1651 SLJIT_ASSERT(common->control_head_ptr != 0);
1652 count = 2;
1653 srcw[1] = common->control_head_ptr;
1654 }
1655 status = loop;
1656 break;
1657
1658 case loop:
1659 if (cc >= ccend)
1660 {
1661 status = end;
1662 break;
1663 }
1664
1665 switch(*cc)
1666 {
1667 case OP_KET:
1668 if (PRIVATE_DATA(cc) != 0)
1669 {
1670 count = 1;
1671 srcw[0] = PRIVATE_DATA(cc);
1672 }
1673 cc += 1 + LINK_SIZE;
1674 break;
1675
1676 case OP_ASSERT:
1677 case OP_ASSERT_NOT:
1678 case OP_ASSERTBACK:
1679 case OP_ASSERTBACK_NOT:
1680 case OP_ONCE:
1681 case OP_ONCE_NC:
1682 case OP_BRAPOS:
1683 case OP_SBRA:
1684 case OP_SBRAPOS:
1685 case OP_SCOND:
1686 count = 1;
1687 srcw[0] = PRIVATE_DATA(cc);
1688 SLJIT_ASSERT(srcw[0] != 0);
1689 cc += 1 + LINK_SIZE;
1690 break;
1691
1692 case OP_CBRA:
1693 case OP_SCBRA:
1694 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1695 {
1696 count = 1;
1697 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1698 }
1699 cc += 1 + LINK_SIZE + IMM2_SIZE;
1700 break;
1701
1702 case OP_CBRAPOS:
1703 case OP_SCBRAPOS:
1704 count = 2;
1705 srcw[0] = PRIVATE_DATA(cc);
1706 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1707 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1708 cc += 1 + LINK_SIZE + IMM2_SIZE;
1709 break;
1710
1711 case OP_COND:
1712 /* Might be a hidden SCOND. */
1713 alternative = cc + GET(cc, 1);
1714 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1715 {
1716 count = 1;
1717 srcw[0] = PRIVATE_DATA(cc);
1718 SLJIT_ASSERT(srcw[0] != 0);
1719 }
1720 cc += 1 + LINK_SIZE;
1721 break;
1722
1723 CASE_ITERATOR_PRIVATE_DATA_1
1724 if (PRIVATE_DATA(cc))
1725 {
1726 count = 1;
1727 srcw[0] = PRIVATE_DATA(cc);
1728 }
1729 cc += 2;
1730 #ifdef SUPPORT_UTF
1731 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1732 #endif
1733 break;
1734
1735 CASE_ITERATOR_PRIVATE_DATA_2A
1736 if (PRIVATE_DATA(cc))
1737 {
1738 count = 2;
1739 srcw[0] = PRIVATE_DATA(cc);
1740 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1741 }
1742 cc += 2;
1743 #ifdef SUPPORT_UTF
1744 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1745 #endif
1746 break;
1747
1748 CASE_ITERATOR_PRIVATE_DATA_2B
1749 if (PRIVATE_DATA(cc))
1750 {
1751 count = 2;
1752 srcw[0] = PRIVATE_DATA(cc);
1753 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1754 }
1755 cc += 2 + IMM2_SIZE;
1756 #ifdef SUPPORT_UTF
1757 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1758 #endif
1759 break;
1760
1761 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1762 if (PRIVATE_DATA(cc))
1763 {
1764 count = 1;
1765 srcw[0] = PRIVATE_DATA(cc);
1766 }
1767 cc += 1;
1768 break;
1769
1770 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1771 if (PRIVATE_DATA(cc))
1772 {
1773 count = 2;
1774 srcw[0] = PRIVATE_DATA(cc);
1775 srcw[1] = srcw[0] + sizeof(sljit_sw);
1776 }
1777 cc += 1;
1778 break;
1779
1780 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1781 if (PRIVATE_DATA(cc))
1782 {
1783 count = 2;
1784 srcw[0] = PRIVATE_DATA(cc);
1785 srcw[1] = srcw[0] + sizeof(sljit_sw);
1786 }
1787 cc += 1 + IMM2_SIZE;
1788 break;
1789
1790 case OP_CLASS:
1791 case OP_NCLASS:
1792 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1793 case OP_XCLASS:
1794 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1795 #else
1796 size = 1 + 32 / (int)sizeof(pcre_uchar);
1797 #endif
1798 if (PRIVATE_DATA(cc))
1799 switch(get_class_iterator_size(cc + size))
1800 {
1801 case 1:
1802 count = 1;
1803 srcw[0] = PRIVATE_DATA(cc);
1804 break;
1805
1806 case 2:
1807 count = 2;
1808 srcw[0] = PRIVATE_DATA(cc);
1809 srcw[1] = srcw[0] + sizeof(sljit_sw);
1810 break;
1811
1812 default:
1813 SLJIT_ASSERT_STOP();
1814 break;
1815 }
1816 cc += size;
1817 break;
1818
1819 default:
1820 cc = next_opcode(common, cc);
1821 SLJIT_ASSERT(cc != NULL);
1822 break;
1823 }
1824 break;
1825
1826 case end:
1827 SLJIT_ASSERT_STOP();
1828 break;
1829 }
1830
1831 while (count > 0)
1832 {
1833 count--;
1834 if (save)
1835 {
1836 if (tmp1next)
1837 {
1838 if (!tmp1empty)
1839 {
1840 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1841 stackptr += sizeof(sljit_sw);
1842 }
1843 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1844 tmp1empty = FALSE;
1845 tmp1next = FALSE;
1846 }
1847 else
1848 {
1849 if (!tmp2empty)
1850 {
1851 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1852 stackptr += sizeof(sljit_sw);
1853 }
1854 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1855 tmp2empty = FALSE;
1856 tmp1next = TRUE;
1857 }
1858 }
1859 else
1860 {
1861 if (tmp1next)
1862 {
1863 SLJIT_ASSERT(!tmp1empty);
1864 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1865 tmp1empty = stackptr >= stacktop;
1866 if (!tmp1empty)
1867 {
1868 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1869 stackptr += sizeof(sljit_sw);
1870 }
1871 tmp1next = FALSE;
1872 }
1873 else
1874 {
1875 SLJIT_ASSERT(!tmp2empty);
1876 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1877 tmp2empty = stackptr >= stacktop;
1878 if (!tmp2empty)
1879 {
1880 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1881 stackptr += sizeof(sljit_sw);
1882 }
1883 tmp1next = TRUE;
1884 }
1885 }
1886 }
1887 }
1888 while (status != end);
1889
1890 if (save)
1891 {
1892 if (tmp1next)
1893 {
1894 if (!tmp1empty)
1895 {
1896 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1897 stackptr += sizeof(sljit_sw);
1898 }
1899 if (!tmp2empty)
1900 {
1901 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1902 stackptr += sizeof(sljit_sw);
1903 }
1904 }
1905 else
1906 {
1907 if (!tmp2empty)
1908 {
1909 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1910 stackptr += sizeof(sljit_sw);
1911 }
1912 if (!tmp1empty)
1913 {
1914 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1915 stackptr += sizeof(sljit_sw);
1916 }
1917 }
1918 }
1919 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1920 }
1921
1922 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1923 {
1924 pcre_uchar *end = bracketend(cc);
1925 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1926
1927 /* Assert captures then. */
1928 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1929 current_offset = NULL;
1930 /* Conditional block does not. */
1931 if (*cc == OP_COND || *cc == OP_SCOND)
1932 has_alternatives = FALSE;
1933
1934 cc = next_opcode(common, cc);
1935 if (has_alternatives)
1936 current_offset = common->then_offsets + (cc - common->start);
1937
1938 while (cc < end)
1939 {
1940 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1941 cc = set_then_offsets(common, cc, current_offset);
1942 else
1943 {
1944 if (*cc == OP_ALT && has_alternatives)
1945 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1946 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1947 *current_offset = 1;
1948 cc = next_opcode(common, cc);
1949 }
1950 }
1951
1952 return end;
1953 }
1954
1955 #undef CASE_ITERATOR_PRIVATE_DATA_1
1956 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1957 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1958 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1959 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1960 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1961
1962 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1963 {
1964 return (value & (value - 1)) == 0;
1965 }
1966
1967 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1968 {
1969 while (list)
1970 {
1971 /* sljit_set_label is clever enough to do nothing
1972 if either the jump or the label is NULL. */
1973 SET_LABEL(list->jump, label);
1974 list = list->next;
1975 }
1976 }
1977
1978 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1979 {
1980 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1981 if (list_item)
1982 {
1983 list_item->next = *list;
1984 list_item->jump = jump;
1985 *list = list_item;
1986 }
1987 }
1988
1989 static void add_stub(compiler_common *common, struct sljit_jump *start)
1990 {
1991 DEFINE_COMPILER;
1992 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1993
1994 if (list_item)
1995 {
1996 list_item->start = start;
1997 list_item->quit = LABEL();
1998 list_item->next = common->stubs;
1999 common->stubs = list_item;
2000 }
2001 }
2002
2003 static void flush_stubs(compiler_common *common)
2004 {
2005 DEFINE_COMPILER;
2006 stub_list* list_item = common->stubs;
2007
2008 while (list_item)
2009 {
2010 JUMPHERE(list_item->start);
2011 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2012 JUMPTO(SLJIT_JUMP, list_item->quit);
2013 list_item = list_item->next;
2014 }
2015 common->stubs = NULL;
2016 }
2017
2018 static SLJIT_INLINE void count_match(compiler_common *common)
2019 {
2020 DEFINE_COMPILER;
2021
2022 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2023 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2024 }
2025
2026 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2027 {
2028 /* May destroy all locals and registers except TMP2. */
2029 DEFINE_COMPILER;
2030
2031 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2032 #ifdef DESTROY_REGISTERS
2033 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2034 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2035 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2036 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2037 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2038 #endif
2039 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2040 }
2041
2042 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2043 {
2044 DEFINE_COMPILER;
2045 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2046 }
2047
2048 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2049 {
2050 DEFINE_COMPILER;
2051 struct sljit_label *loop;
2052 int i;
2053
2054 /* At this point we can freely use all temporary registers. */
2055 SLJIT_ASSERT(length > 1);
2056 /* TMP1 returns with begin - 1. */
2057 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2058 if (length < 8)
2059 {
2060 for (i = 1; i < length; i++)
2061 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2062 }
2063 else
2064 {
2065 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2066 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2067 loop = LABEL();
2068 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2069 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2070 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2071 }
2072 }
2073
2074 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2075 {
2076 DEFINE_COMPILER;
2077 struct sljit_label *loop;
2078 int i;
2079
2080 SLJIT_ASSERT(length > 1);
2081 /* OVECTOR(1) contains the "string begin - 1" constant. */
2082 if (length > 2)
2083 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2084 if (length < 8)
2085 {
2086 for (i = 2; i < length; i++)
2087 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2088 }
2089 else
2090 {
2091 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2092 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2093 loop = LABEL();
2094 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2095 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2096 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2097 }
2098
2099 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2100 if (common->mark_ptr != 0)
2101 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2102 if (common->control_head_ptr != 0)
2103 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2104 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2106 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2107 }
2108
2109 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2110 {
2111 while (current != NULL)
2112 {
2113 switch (current[-2])
2114 {
2115 case type_then_trap:
2116 break;
2117
2118 case type_mark:
2119 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2120 return current[-4];
2121 break;
2122
2123 default:
2124 SLJIT_ASSERT_STOP();
2125 break;
2126 }
2127 current = (sljit_sw*)current[-1];
2128 }
2129 return -1;
2130 }
2131
2132 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2133 {
2134 DEFINE_COMPILER;
2135 struct sljit_label *loop;
2136 struct sljit_jump *early_quit;
2137
2138 /* At this point we can freely use all registers. */
2139 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2141
2142 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2143 if (common->mark_ptr != 0)
2144 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2145 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2146 if (common->mark_ptr != 0)
2147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2148 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2149 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2150 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2151 /* Unlikely, but possible */
2152 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2153 loop = LABEL();
2154 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2155 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2156 /* Copy the integer value to the output buffer */
2157 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2158 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2159 #endif
2160 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2161 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2162 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2163 JUMPHERE(early_quit);
2164
2165 /* Calculate the return value, which is the maximum ovector value. */
2166 if (topbracket > 1)
2167 {
2168 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2169 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2170
2171 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2172 loop = LABEL();
2173 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2174 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2175 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2176 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2177 }
2178 else
2179 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2180 }
2181
2182 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2183 {
2184 DEFINE_COMPILER;
2185 struct sljit_jump *jump;
2186
2187 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2188 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2189 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2190
2191 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2192 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2193 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2194 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2195
2196 /* Store match begin and end. */
2197 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2198 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2199
2200 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2201 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2202 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2203 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2204 #endif
2205 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2206 JUMPHERE(jump);
2207
2208 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2209 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2210 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2211 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2212 #endif
2213 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2214
2215 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2216 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2217 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2218 #endif
2219 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2220
2221 JUMPTO(SLJIT_JUMP, quit);
2222 }
2223
2224 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2225 {
2226 /* May destroy TMP1. */
2227 DEFINE_COMPILER;
2228 struct sljit_jump *jump;
2229
2230 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2231 {
2232 /* The value of -1 must be kept for start_used_ptr! */
2233 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2234 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2235 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2236 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2237 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2238 JUMPHERE(jump);
2239 }
2240 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2241 {
2242 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2244 JUMPHERE(jump);
2245 }
2246 }
2247
2248 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2249 {
2250 /* Detects if the character has an othercase. */
2251 unsigned int c;
2252
2253 #ifdef SUPPORT_UTF
2254 if (common->utf)
2255 {
2256 GETCHAR(c, cc);
2257 if (c > 127)
2258 {
2259 #ifdef SUPPORT_UCP
2260 return c != UCD_OTHERCASE(c);
2261 #else
2262 return FALSE;
2263 #endif
2264 }
2265 #ifndef COMPILE_PCRE8
2266 return common->fcc[c] != c;
2267 #endif
2268 }
2269 else
2270 #endif
2271 c = *cc;
2272 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2273 }
2274
2275 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2276 {
2277 /* Returns with the othercase. */
2278 #ifdef SUPPORT_UTF
2279 if (common->utf && c > 127)
2280 {
2281 #ifdef SUPPORT_UCP
2282 return UCD_OTHERCASE(c);
2283 #else
2284 return c;
2285 #endif
2286 }
2287 #endif
2288 return TABLE_GET(c, common->fcc, c);
2289 }
2290
2291 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2292 {
2293 /* Detects if the character and its othercase has only 1 bit difference. */
2294 unsigned int c, oc, bit;
2295 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2296 int n;
2297 #endif
2298
2299 #ifdef SUPPORT_UTF
2300 if (common->utf)
2301 {
2302 GETCHAR(c, cc);
2303 if (c <= 127)
2304 oc = common->fcc[c];
2305 else
2306 {
2307 #ifdef SUPPORT_UCP
2308 oc = UCD_OTHERCASE(c);
2309 #else
2310 oc = c;
2311 #endif
2312 }
2313 }
2314 else
2315 {
2316 c = *cc;
2317 oc = TABLE_GET(c, common->fcc, c);
2318 }
2319 #else
2320 c = *cc;
2321 oc = TABLE_GET(c, common->fcc, c);
2322 #endif
2323
2324 SLJIT_ASSERT(c != oc);
2325
2326 bit = c ^ oc;
2327 /* Optimized for English alphabet. */
2328 if (c <= 127 && bit == 0x20)
2329 return (0 << 8) | 0x20;
2330
2331 /* Since c != oc, they must have at least 1 bit difference. */
2332 if (!is_powerof2(bit))
2333 return 0;
2334
2335 #if defined COMPILE_PCRE8
2336
2337 #ifdef SUPPORT_UTF
2338 if (common->utf && c > 127)
2339 {
2340 n = GET_EXTRALEN(*cc);
2341 while ((bit & 0x3f) == 0)
2342 {
2343 n--;
2344 bit >>= 6;
2345 }
2346 return (n << 8) | bit;
2347 }
2348 #endif /* SUPPORT_UTF */
2349 return (0 << 8) | bit;
2350
2351 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2352
2353 #ifdef SUPPORT_UTF
2354 if (common->utf && c > 65535)
2355 {
2356 if (bit >= (1 << 10))
2357 bit >>= 10;
2358 else
2359 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2360 }
2361 #endif /* SUPPORT_UTF */
2362 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2363
2364 #endif /* COMPILE_PCRE[8|16|32] */
2365 }
2366
2367 static void check_partial(compiler_common *common, BOOL force)
2368 {
2369 /* Checks whether a partial matching is occurred. Does not modify registers. */
2370 DEFINE_COMPILER;
2371 struct sljit_jump *jump = NULL;
2372
2373 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2374
2375 if (common->mode == JIT_COMPILE)
2376 return;
2377
2378 if (!force)
2379 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2380 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2381 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2382
2383 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2384 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2385 else
2386 {
2387 if (common->partialmatchlabel != NULL)
2388 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2389 else
2390 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2391 }
2392
2393 if (jump != NULL)
2394 JUMPHERE(jump);
2395 }
2396
2397 static void check_str_end(compiler_common *common, jump_list **end_reached)
2398 {
2399 /* Does not affect registers. Usually used in a tight spot. */
2400 DEFINE_COMPILER;
2401 struct sljit_jump *jump;
2402
2403 if (common->mode == JIT_COMPILE)
2404 {
2405 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2406 return;
2407 }
2408
2409 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2410 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2411 {
2412 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2414 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2415 }
2416 else
2417 {
2418 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2419 if (common->partialmatchlabel != NULL)
2420 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2421 else
2422 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2423 }
2424 JUMPHERE(jump);
2425 }
2426
2427 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2428 {
2429 DEFINE_COMPILER;
2430 struct sljit_jump *jump;
2431
2432 if (common->mode == JIT_COMPILE)
2433 {
2434 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2435 return;
2436 }
2437
2438 /* Partial matching mode. */
2439 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2440 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2441 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2442 {
2443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2444 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2445 }
2446 else
2447 {
2448 if (common->partialmatchlabel != NULL)
2449 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2450 else
2451 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2452 }
2453 JUMPHERE(jump);
2454 }
2455
2456 static void read_char(compiler_common *common)
2457 {
2458 /* Reads the character into TMP1, updates STR_PTR.
2459 Does not check STR_END. TMP2 Destroyed. */
2460 DEFINE_COMPILER;
2461 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2462 struct sljit_jump *jump;
2463 #endif
2464
2465 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2466 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2467 if (common->utf)
2468 {
2469 #if defined COMPILE_PCRE8
2470 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2471 #elif defined COMPILE_PCRE16
2472 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2473 #endif /* COMPILE_PCRE[8|16] */
2474 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2475 JUMPHERE(jump);
2476 }
2477 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2478 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2479 }
2480
2481 static void peek_char(compiler_common *common)
2482 {
2483 /* Reads the character into TMP1, keeps STR_PTR.
2484 Does not check STR_END. TMP2 Destroyed. */
2485 DEFINE_COMPILER;
2486 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2487 struct sljit_jump *jump;
2488 #endif
2489
2490 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2491 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2492 if (common->utf)
2493 {
2494 #if defined COMPILE_PCRE8
2495 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2496 #elif defined COMPILE_PCRE16
2497 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2498 #endif /* COMPILE_PCRE[8|16] */
2499 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2500 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2501 JUMPHERE(jump);
2502 }
2503 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2504 }
2505
2506 static void read_char8_type(compiler_common *common)
2507 {
2508 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2509 DEFINE_COMPILER;
2510 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2511 struct sljit_jump *jump;
2512 #endif
2513
2514 #ifdef SUPPORT_UTF
2515 if (common->utf)
2516 {
2517 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2518 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2519 #if defined COMPILE_PCRE8
2520 /* This can be an extra read in some situations, but hopefully
2521 it is needed in most cases. */
2522 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2523 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2524 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2525 JUMPHERE(jump);
2526 #elif defined COMPILE_PCRE16
2527 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2528 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2529 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2530 JUMPHERE(jump);
2531 /* Skip low surrogate if necessary. */
2532 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2533 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2534 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2535 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2536 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2537 #elif defined COMPILE_PCRE32
2538 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2539 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2540 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2541 JUMPHERE(jump);
2542 #endif /* COMPILE_PCRE[8|16|32] */
2543 return;
2544 }
2545 #endif /* SUPPORT_UTF */
2546 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2547 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2548 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2549 /* The ctypes array contains only 256 values. */
2550 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2551 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2552 #endif
2553 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2554 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2555 JUMPHERE(jump);
2556 #endif
2557 }
2558
2559 static void skip_char_back(compiler_common *common)
2560 {
2561 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2562 DEFINE_COMPILER;
2563 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2564 #if defined COMPILE_PCRE8
2565 struct sljit_label *label;
2566
2567 if (common->utf)
2568 {
2569 label = LABEL();
2570 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2571 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2572 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2573 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2574 return;
2575 }
2576 #elif defined COMPILE_PCRE16
2577 if (common->utf)
2578 {
2579 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2580 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2581 /* Skip low surrogate if necessary. */
2582 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2583 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2584 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2585 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2586 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2587 return;
2588 }
2589 #endif /* COMPILE_PCRE[8|16] */
2590 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2591 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2592 }
2593
2594 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2595 {
2596 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2597 DEFINE_COMPILER;
2598
2599 if (nltype == NLTYPE_ANY)
2600 {
2601 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2602 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2603 }
2604 else if (nltype == NLTYPE_ANYCRLF)
2605 {
2606 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2607 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2608 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2609 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2610 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2611 }
2612 else
2613 {
2614 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2615 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2616 }
2617 }
2618
2619 #ifdef SUPPORT_UTF
2620
2621 #if defined COMPILE_PCRE8
2622 static void do_utfreadchar(compiler_common *common)
2623 {
2624 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2625 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2626 DEFINE_COMPILER;
2627 struct sljit_jump *jump;
2628
2629 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2630 /* Searching for the first zero. */
2631 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2632 jump = JUMP(SLJIT_C_NOT_ZERO);
2633 /* Two byte sequence. */
2634 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2635 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2636 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2637 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2638 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2639 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2640 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2641 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2642 JUMPHERE(jump);
2643
2644 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2645 jump = JUMP(SLJIT_C_NOT_ZERO);
2646 /* Three byte sequence. */
2647 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2648 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2649 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2650 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2651 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2652 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2653 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2654 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2655 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2656 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2657 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2658 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2659 JUMPHERE(jump);
2660
2661 /* Four byte sequence. */
2662 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2663 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2664 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2665 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2666 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2667 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2668 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2669 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2670 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2671 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2672 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2673 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2674 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2675 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2676 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2677 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2678 }
2679
2680 static void do_utfreadtype8(compiler_common *common)
2681 {
2682 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2683 of the character (>= 0xc0). Return value in TMP1. */
2684 DEFINE_COMPILER;
2685 struct sljit_jump *jump;
2686 struct sljit_jump *compare;
2687
2688 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2689
2690 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2691 jump = JUMP(SLJIT_C_NOT_ZERO);
2692 /* Two byte sequence. */
2693 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2694 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2695 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2696 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2697 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2698 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2699 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2700 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2701 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2702
2703 JUMPHERE(compare);
2704 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2705 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2706 JUMPHERE(jump);
2707
2708 /* We only have types for characters less than 256. */
2709 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2710 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2711 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2712 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2713 }
2714
2715 #elif defined COMPILE_PCRE16
2716
2717 static void do_utfreadchar(compiler_common *common)
2718 {
2719 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2720 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2721 DEFINE_COMPILER;
2722 struct sljit_jump *jump;
2723
2724 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2725 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2726 /* Do nothing, only return. */
2727 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2728
2729 JUMPHERE(jump);
2730 /* Combine two 16 bit characters. */
2731 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2732 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2733 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2734 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2735 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2736 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2737 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2738 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2739 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2740 }
2741
2742 #endif /* COMPILE_PCRE[8|16] */
2743
2744 #endif /* SUPPORT_UTF */
2745
2746 #ifdef SUPPORT_UCP
2747
2748 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2749 #define UCD_BLOCK_MASK 127
2750 #define UCD_BLOCK_SHIFT 7
2751
2752 static void do_getucd(compiler_common *common)
2753 {
2754 /* Search the UCD record for the character comes in TMP1.
2755 Returns chartype in TMP1 and UCD offset in TMP2. */
2756 DEFINE_COMPILER;
2757
2758 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2759
2760 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2761 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2762 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2763 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2764 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2765 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2766 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2767 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2768 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2769 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2770 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2771 }
2772 #endif
2773
2774 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2775 {
2776 DEFINE_COMPILER;
2777 struct sljit_label *mainloop;
2778 struct sljit_label *newlinelabel = NULL;
2779 struct sljit_jump *start;
2780 struct sljit_jump *end = NULL;
2781 struct sljit_jump *nl = NULL;
2782 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2783 struct sljit_jump *singlechar;
2784 #endif
2785 jump_list *newline = NULL;
2786 BOOL newlinecheck = FALSE;
2787 BOOL readuchar = FALSE;
2788
2789 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2790 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2791 newlinecheck = TRUE;
2792
2793 if (firstline)
2794 {
2795 /* Search for the end of the first line. */
2796 SLJIT_ASSERT(common->first_line_end != 0);
2797 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2798
2799 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2800 {
2801 mainloop = LABEL();
2802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2803 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2804 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2805 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2806 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2807 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2808 JUMPHERE(end);
2809 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2810 }
2811 else
2812 {
2813 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2814 mainloop = LABEL();
2815 /* Continual stores does not cause data dependency. */
2816 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2817 read_char(common);
2818 check_newlinechar(common, common->nltype, &newline, TRUE);
2819 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2820 JUMPHERE(end);
2821 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2822 set_jumps(newline, LABEL());
2823 }
2824
2825 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2826 }
2827
2828 start = JUMP(SLJIT_JUMP);
2829
2830 if (newlinecheck)
2831 {
2832 newlinelabel = LABEL();
2833 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2834 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2835 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2836 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2837 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2838 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2839 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2840 #endif
2841 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2842 nl = JUMP(SLJIT_JUMP);
2843 }
2844
2845 mainloop = LABEL();
2846
2847 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2848 #ifdef SUPPORT_UTF
2849 if (common->utf) readuchar = TRUE;
2850 #endif
2851 if (newlinecheck) readuchar = TRUE;
2852
2853 if (readuchar)
2854 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2855
2856 if (newlinecheck)
2857 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2858
2859 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2860 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2861 #if defined COMPILE_PCRE8
2862 if (common->utf)
2863 {
2864 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2865 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2866 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2867 JUMPHERE(singlechar);
2868 }
2869 #elif defined COMPILE_PCRE16
2870 if (common->utf)
2871 {
2872 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2873 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2874 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2875 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2876 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2877 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2878 JUMPHERE(singlechar);
2879 }
2880 #endif /* COMPILE_PCRE[8|16] */
2881 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2882 JUMPHERE(start);
2883
2884 if (newlinecheck)
2885 {
2886 JUMPHERE(end);
2887 JUMPHERE(nl);
2888 }
2889
2890 return mainloop;
2891 }
2892
2893 #define MAX_N_CHARS 3
2894
2895 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2896 {
2897 DEFINE_COMPILER;
2898 struct sljit_label *start;
2899 struct sljit_jump *quit;
2900 pcre_uint32 chars[MAX_N_CHARS * 2];
2901 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2902 int location = 0;
2903 pcre_int32 len, c, bit, caseless;
2904 int must_stop;
2905
2906 /* We do not support alternatives now. */
2907 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2908 return FALSE;
2909
2910 while (TRUE)
2911 {
2912 caseless = 0;
2913 must_stop = 1;
2914 switch(*cc)
2915 {
2916 case OP_CHAR:
2917 must_stop = 0;
2918 cc++;
2919 break;
2920
2921 case OP_CHARI:
2922 caseless = 1;
2923 must_stop = 0;
2924 cc++;
2925 break;
2926
2927 case OP_SOD:
2928 case OP_SOM:
2929 case OP_SET_SOM:
2930 case OP_NOT_WORD_BOUNDARY:
2931 case OP_WORD_BOUNDARY:
2932 case OP_EODN:
2933 case OP_EOD:
2934 case OP_CIRC:
2935 case OP_CIRCM:
2936 case OP_DOLL:
2937 case OP_DOLLM:
2938 /* Zero width assertions. */
2939 cc++;
2940 continue;
2941
2942 case OP_PLUS:
2943 case OP_MINPLUS:
2944 case OP_POSPLUS:
2945 cc++;
2946 break;
2947
2948 case OP_EXACT:
2949 cc += 1 + IMM2_SIZE;
2950 break;
2951
2952 case OP_PLUSI:
2953 case OP_MINPLUSI:
2954 case OP_POSPLUSI:
2955 caseless = 1;
2956 cc++;
2957 break;
2958
2959 case OP_EXACTI:
2960 caseless = 1;
2961 cc += 1 + IMM2_SIZE;
2962 break;
2963
2964 default:
2965 must_stop = 2;
2966 break;
2967 }
2968
2969 if (must_stop == 2)
2970 break;
2971
2972 len = 1;
2973 #ifdef SUPPORT_UTF
2974 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2975 #endif
2976
2977 if (caseless && char_has_othercase(common, cc))
2978 {
2979 caseless = char_get_othercase_bit(common, cc);
2980 if (caseless == 0)
2981 return FALSE;
2982 #ifdef COMPILE_PCRE8
2983 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2984 #else
2985 if ((caseless & 0x100) != 0)
2986 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2987 else
2988 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2989 #endif
2990 }
2991 else
2992 caseless = 0;
2993
2994 while (len > 0 && location < MAX_N_CHARS * 2)
2995 {
2996 c = *cc;
2997 bit = 0;
2998 if (len == (caseless & 0xff))
2999 {
3000 bit = caseless >> 8;
3001 c |= bit;
3002 }
3003
3004 chars[location] = c;
3005 chars[location + 1] = bit;
3006
3007 len--;
3008 location += 2;
3009 cc++;
3010 }
3011
3012 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3013 break;
3014 }
3015
3016 /* At least two characters are required. */
3017 if (location < 2 * 2)
3018 return FALSE;
3019
3020 if (firstline)
3021 {
3022 SLJIT_ASSERT(common->first_line_end != 0);
3023 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3024 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3025 }
3026 else
3027 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3028
3029 start = LABEL();
3030 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3031
3032 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3033 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3034 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3035 if (chars[1] != 0)
3036 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3037 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3038 if (location > 2 * 2)
3039 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3040 if (chars[3] != 0)
3041 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3042 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3043 if (location > 2 * 2)
3044 {
3045 if (chars[5] != 0)
3046 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3047 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3048 }
3049 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3050
3051 JUMPHERE(quit);
3052
3053 if (firstline)
3054 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3055 else
3056 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3057 return TRUE;
3058 }
3059
3060 #undef MAX_N_CHARS
3061
3062 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3063 {
3064 DEFINE_COMPILER;
3065 struct sljit_label *start;
3066 struct sljit_jump *quit;
3067 struct sljit_jump *found;
3068 pcre_uchar oc, bit;
3069
3070 if (firstline)
3071 {
3072 SLJIT_ASSERT(common->first_line_end != 0);
3073 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3074 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3075 }
3076
3077 start = LABEL();
3078 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3079 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3080
3081 oc = first_char;
3082 if (caseless)
3083 {
3084 oc = TABLE_GET(first_char, common->fcc, first_char);
3085 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3086 if (first_char > 127 && common->utf)
3087 oc = UCD_OTHERCASE(first_char);
3088 #endif
3089 }
3090 if (first_char == oc)
3091 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3092 else
3093 {
3094 bit = first_char ^ oc;
3095 if (is_powerof2(bit))
3096 {
3097 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3098 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3099 }
3100 else
3101 {
3102 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3103 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3104 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3105 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3106 found = JUMP(SLJIT_C_NOT_ZERO);
3107 }
3108 }
3109
3110 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3111 JUMPTO(SLJIT_JUMP, start);
3112 JUMPHERE(found);
3113 JUMPHERE(quit);
3114
3115 if (firstline)
3116 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3117 }
3118
3119 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3120 {
3121 DEFINE_COMPILER;
3122 struct sljit_label *loop;
3123 struct sljit_jump *lastchar;
3124 struct sljit_jump *firstchar;
3125 struct sljit_jump *quit;
3126 struct sljit_jump *foundcr = NULL;
3127 struct sljit_jump *notfoundnl;
3128 jump_list *newline = NULL;
3129
3130 if (firstline)
3131 {
3132 SLJIT_ASSERT(common->first_line_end != 0);
3133 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3134 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3135 }
3136
3137 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3138 {
3139 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3140 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3141 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3142 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3143 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3144
3145 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3146 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3147 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3148 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3149 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3150 #endif
3151 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3152
3153 loop = LABEL();
3154 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3155 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3156 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3157 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3158 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3159 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3160
3161 JUMPHERE(quit);
3162 JUMPHERE(firstchar);
3163 JUMPHERE(lastchar);
3164
3165 if (firstline)
3166 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3167 return;
3168 }
3169
3170 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3171 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3172 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3173 skip_char_back(common);
3174
3175 loop = LABEL();
3176 read_char(common);
3177 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3178 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3179 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3180 check_newlinechar(common, common->nltype, &newline, FALSE);
3181 set_jumps(newline, loop);
3182
3183 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3184 {
3185 quit = JUMP(SLJIT_JUMP);
3186 JUMPHERE(foundcr);
3187 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3188 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3189 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3190 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3191 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3192 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3193 #endif
3194 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3195 JUMPHERE(notfoundnl);
3196 JUMPHERE(quit);
3197 }
3198 JUMPHERE(lastchar);
3199 JUMPHERE(firstchar);
3200
3201 if (firstline)
3202 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3203 }
3204
3205 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3206
3207 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3208 {
3209 DEFINE_COMPILER;
3210 struct sljit_label *start;
3211 struct sljit_jump *quit;
3212 struct sljit_jump *found = NULL;
3213 jump_list *matches = NULL;
3214 pcre_uint8 inverted_start_bits[32];
3215 int i;
3216 #ifndef COMPILE_PCRE8
3217 struct sljit_jump *jump;
3218 #endif
3219
3220 for (i = 0; i < 32; ++i)
3221 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3222
3223 if (firstline)
3224 {
3225 SLJIT_ASSERT(common->first_line_end != 0);
3226 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3227 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3228 }
3229
3230 start = LABEL();
3231 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3232 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3233 #ifdef SUPPORT_UTF
3234 if (common->utf)
3235 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3236 #endif
3237
3238 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3239 {
3240 #ifndef COMPILE_PCRE8
3241 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3242 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3243 JUMPHERE(jump);
3244 #endif
3245 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3246 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3247 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3248 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3249 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3250 found = JUMP(SLJIT_C_NOT_ZERO);
3251 }
3252
3253 #ifdef SUPPORT_UTF
3254 if (common->utf)
3255 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3256 #endif
3257 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3258 #ifdef SUPPORT_UTF
3259 #if defined COMPILE_PCRE8
3260 if (common->utf)
3261 {
3262 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3263 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3264 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3265 }
3266 #elif defined COMPILE_PCRE16
3267 if (common->utf)
3268 {
3269 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3270 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3271 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3272 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3273 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3274 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3275 }
3276 #endif /* COMPILE_PCRE[8|16] */
3277 #endif /* SUPPORT_UTF */
3278 JUMPTO(SLJIT_JUMP, start);
3279 if (found != NULL)
3280 JUMPHERE(found);
3281 if (matches != NULL)
3282 set_jumps(matches, LABEL());
3283 JUMPHERE(quit);
3284
3285 if (firstline)
3286 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3287 }
3288
3289 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3290 {
3291 DEFINE_COMPILER;
3292 struct sljit_label *loop;
3293 struct sljit_jump *toolong;
3294 struct sljit_jump *alreadyfound;
3295 struct sljit_jump *found;
3296 struct sljit_jump *foundoc = NULL;
3297 struct sljit_jump *notfound;
3298 pcre_uint32 oc, bit;
3299
3300 SLJIT_ASSERT(common->req_char_ptr != 0);
3301 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3302 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3303 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3304 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3305
3306 if (has_firstchar)
3307 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3308 else
3309 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3310
3311 loop = LABEL();
3312 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3313
3314 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3315 oc = req_char;
3316 if (caseless)
3317 {
3318 oc = TABLE_GET(req_char, common->fcc, req_char);
3319 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3320 if (req_char > 127 && common->utf)
3321 oc = UCD_OTHERCASE(req_char);
3322 #endif
3323 }
3324 if (req_char == oc)
3325 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3326 else
3327 {
3328 bit = req_char ^ oc;
3329 if (is_powerof2(bit))
3330 {
3331 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3332 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3333 }
3334 else
3335 {
3336 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3337 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3338 }
3339 }
3340 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3341 JUMPTO(SLJIT_JUMP, loop);
3342
3343 JUMPHERE(found);
3344 if (foundoc)
3345 JUMPHERE(foundoc);
3346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3347 JUMPHERE(alreadyfound);
3348 JUMPHERE(toolong);
3349 return notfound;
3350 }
3351
3352 static void do_revertframes(compiler_common *common)
3353 {
3354 DEFINE_COMPILER;
3355 struct sljit_jump *jump;
3356 struct sljit_label *mainloop;
3357
3358 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3359 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3360 GET_LOCAL_BASE(TMP3, 0, 0);
3361
3362 /* Drop frames until we reach STACK_TOP. */
3363 mainloop = LABEL();
3364 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3365 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3366 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3367
3368 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3369 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3370 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3371 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3372 JUMPTO(SLJIT_JUMP, mainloop);
3373
3374 JUMPHERE(jump);
3375 jump = JUMP(SLJIT_C_SIG_LESS);
3376 /* End of dropping frames. */
3377 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3378
3379 JUMPHERE(jump);
3380 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3381 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3382 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3383 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3384 JUMPTO(SLJIT_JUMP, mainloop);
3385 }
3386
3387 static void check_wordboundary(compiler_common *common)
3388 {
3389 DEFINE_COMPILER;
3390 struct sljit_jump *skipread;
3391 jump_list *skipread_list = NULL;
3392 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3393 struct sljit_jump *jump;
3394 #endif
3395
3396 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3397
3398 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3399 /* Get type of the previous char, and put it to LOCALS1. */
3400 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3401 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3402 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3403 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3404 skip_char_back(common);
3405 check_start_used_ptr(common);
3406 read_char(common);
3407
3408 /* Testing char type. */
3409 #ifdef SUPPORT_UCP
3410 if (common->use_ucp)
3411 {
3412 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3413 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3414 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3415 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3416 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3417 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3418 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3419 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3420 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3421 JUMPHERE(jump);
3422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3423 }
3424 else
3425 #endif
3426 {
3427 #ifndef COMPILE_PCRE8
3428 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3429 #elif defined SUPPORT_UTF
3430 /* Here LOCALS1 has already been zeroed. */
3431 jump = NULL;
3432 if (common->utf)
3433 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3434 #endif /* COMPILE_PCRE8 */
3435 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3436 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3437 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3438 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3439 #ifndef COMPILE_PCRE8
3440 JUMPHERE(jump);
3441 #elif defined SUPPORT_UTF
3442 if (jump != NULL)
3443 JUMPHERE(jump);
3444 #endif /* COMPILE_PCRE8 */
3445 }
3446 JUMPHERE(skipread);
3447
3448 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3449 check_str_end(common, &skipread_list);
3450 peek_char(common);
3451
3452 /* Testing char type. This is a code duplication. */
3453 #ifdef SUPPORT_UCP
3454 if (common->use_ucp)
3455 {
3456 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3457 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3458 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3459 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3460 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3461 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3462 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3463 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3464 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3465 JUMPHERE(jump);
3466 }
3467 else
3468 #endif
3469 {
3470 #ifndef COMPILE_PCRE8
3471 /* TMP2 may be destroyed by peek_char. */
3472 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3473 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3474 #elif defined SUPPORT_UTF
3475 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3476 jump = NULL;
3477 if (common->utf)
3478 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3479 #endif
3480 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3481 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3482 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3483 #ifndef COMPILE_PCRE8
3484 JUMPHERE(jump);
3485 #elif defined SUPPORT_UTF
3486 if (jump != NULL)
3487 JUMPHERE(jump);
3488 #endif /* COMPILE_PCRE8 */
3489 }
3490 set_jumps(skipread_list, LABEL());
3491
3492 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3493 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3494 }
3495
3496 /*
3497 range format:
3498
3499 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3500 ranges[1] = first bit (0 or 1)
3501 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3502 */
3503
3504 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3505 {
3506 DEFINE_COMPILER;
3507 struct sljit_jump *jump;
3508
3509 if (ranges[0] < 0)
3510 return FALSE;
3511
3512 switch(ranges[0])
3513 {
3514 case 1:
3515 if (readch)
3516 read_char(common);
3517 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3518 return TRUE;
3519
3520 case 2:
3521 if (readch)
3522 read_char(common);
3523 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3524 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3525 return TRUE;
3526
3527 case 4:
3528 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3529 {
3530 if (readch)
3531 read_char(common);
3532 if (ranges[1] != 0)
3533 {
3534 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3535 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3536 }
3537 else
3538 {
3539 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3540 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3541 JUMPHERE(jump);
3542 }
3543 return TRUE;
3544 }
3545 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3546 {
3547 if (readch)
3548 read_char(common);
3549 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3550 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3551 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3552 return TRUE;
3553 }
3554 return FALSE;
3555
3556 default:
3557 return FALSE;
3558 }
3559 }
3560
3561 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3562 {
3563 int i, bit, length;
3564 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3565
3566 bit = ctypes[0] & flag;
3567 ranges[0] = -1;
3568 ranges[1] = bit != 0 ? 1 : 0;
3569 length = 0;
3570
3571 for (i = 1; i < 256; i++)
3572 if ((ctypes[i] & flag) != bit)
3573 {
3574 if (length >= MAX_RANGE_SIZE)
3575 return;
3576 ranges[2 + length] = i;
3577 length++;
3578 bit ^= flag;
3579 }
3580
3581 if (bit != 0)
3582 {
3583 if (length >= MAX_RANGE_SIZE)
3584 return;
3585 ranges[2 + length] = 256;
3586 length++;
3587 }
3588 ranges[0] = length;
3589 }
3590
3591 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3592 {
3593 int ranges[2 + MAX_RANGE_SIZE];
3594 pcre_uint8 bit, cbit, all;
3595 int i, byte, length = 0;
3596
3597 bit = bits[0] & 0x1;
3598 ranges[1] = bit;
3599 /* Can be 0 or 255. */
3600 all = -bit;
3601
3602 for (i = 0; i < 256; )
3603 {
3604 byte = i >> 3;
3605 if ((i & 0x7) == 0 && bits[byte] == all)
3606 i += 8;
3607 else
3608 {
3609 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3610 if (cbit != bit)
3611 {
3612 if (length >= MAX_RANGE_SIZE)
3613 return FALSE;
3614 ranges[2 + length] = i;
3615 length++;
3616 bit = cbit;
3617 all = -cbit;
3618 }
3619 i++;
3620 }
3621 }
3622
3623 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3624 {
3625 if (length >= MAX_RANGE_SIZE)
3626 return FALSE;
3627 ranges[2 + length] = 256;
3628 length++;
3629 }
3630 ranges[0] = length;
3631
3632 return check_ranges(common, ranges, backtracks, FALSE);
3633 }
3634
3635 static void check_anynewline(compiler_common *common)
3636 {
3637 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3638 DEFINE_COMPILER;
3639
3640 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3641
3642 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3643 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3644 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3645 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3646 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3647 #ifdef COMPILE_PCRE8
3648 if (common->utf)
3649 {
3650 #endif
3651 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3652 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3653 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3654 #ifdef COMPILE_PCRE8
3655 }
3656 #endif
3657 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3658 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3659 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3660 }
3661
3662 static void check_hspace(compiler_common *common)
3663 {
3664 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3665 DEFINE_COMPILER;
3666
3667 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3668
3669 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3670 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3671 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3672 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3673 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3674 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3675 #ifdef COMPILE_PCRE8
3676 if (common->utf)
3677 {
3678 #endif
3679 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3680 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3681 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3682 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3683 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3684 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3685 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3686 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3687 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3688 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3689 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3690 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3691 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3692 #ifdef COMPILE_PCRE8
3693 }
3694 #endif
3695 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3696 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3697
3698 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3699 }
3700
3701 static void check_vspace(compiler_common *common)
3702 {
3703 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3704 DEFINE_COMPILER;
3705
3706 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3707
3708 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3709 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3710 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3711 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3712 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3713 #ifdef COMPILE_PCRE8
3714 if (common->utf)
3715 {
3716 #endif
3717 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3718 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3719 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3720 #ifdef COMPILE_PCRE8
3721 }
3722 #endif
3723 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3724 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3725
3726 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3727 }
3728
3729 #define CHAR1 STR_END
3730 #define CHAR2 STACK_TOP
3731
3732 static void do_casefulcmp(compiler_common *common)
3733 {
3734 DEFINE_COMPILER;
3735 struct sljit_jump *jump;
3736 struct sljit_label *label;
3737
3738 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3739 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3740 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3742 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3743 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3744
3745 label = LABEL();
3746 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3747 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3748 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3749 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3750 JUMPTO(SLJIT_C_NOT_ZERO, label);
3751
3752 JUMPHERE(jump);
3753 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3754 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3755 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3756 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3757 }
3758
3759 #define LCC_TABLE STACK_LIMIT
3760
3761 static void do_caselesscmp(compiler_common *common)
3762 {
3763 DEFINE_COMPILER;
3764 struct sljit_jump *jump;
3765 struct sljit_label *label;
3766
3767 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3768 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3769
3770 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3772 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3773 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3774 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3775 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3776
3777 label = LABEL();
3778 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3779 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3780 #ifndef COMPILE_PCRE8
3781 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3782 #endif
3783 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3784 #ifndef COMPILE_PCRE8
3785 JUMPHERE(jump);
3786 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3787 #endif
3788 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3789 #ifndef COMPILE_PCRE8
3790 JUMPHERE(jump);
3791 #endif
3792 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3793 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3794 JUMPTO(SLJIT_C_NOT_ZERO, label);
3795
3796 JUMPHERE(jump);
3797 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3798 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3799 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3800 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3801 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3802 }
3803
3804 #undef LCC_TABLE
3805 #undef CHAR1
3806 #undef CHAR2
3807
3808 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3809
3810 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3811 {
3812 /* This function would be ineffective to do in JIT level. */
3813 pcre_uint32 c1, c2;
3814 const pcre_uchar *src2 = args->uchar_ptr;
3815 const pcre_uchar *end2 = args->end;
3816 const ucd_record *ur;
3817 const pcre_uint32 *pp;
3818
3819 while (src1 < end1)
3820 {
3821 if (src2 >= end2)
3822 return (pcre_uchar*)1;
3823 GETCHARINC(c1, src1);
3824 GETCHARINC(c2, src2);
3825 ur = GET_UCD(c2);
3826 if (c1 != c2 && c1 != c2 + ur->other_case)
3827 {
3828 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3829 for (;;)
3830 {
3831 if (c1 < *pp) return NULL;
3832 if (c1 == *pp++) break;
3833 }
3834 }
3835 }
3836 return src2;
3837 }
3838
3839 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3840
3841 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3842 compare_context* context, jump_list **backtracks)
3843 {
3844 DEFINE_COMPILER;
3845 unsigned int othercasebit = 0;
3846 pcre_uchar *othercasechar = NULL;
3847 #ifdef SUPPORT_UTF
3848 int utflength;
3849 #endif
3850
3851 if (caseless && char_has_othercase(common, cc))
3852 {
3853 othercasebit = char_get_othercase_bit(common, cc);
3854 SLJIT_ASSERT(othercasebit);
3855 /* Extracting bit difference info. */
3856 #if defined COMPILE_PCRE8
3857 othercasechar = cc + (othercasebit >> 8);
3858 othercasebit &= 0xff;
3859 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3860 /* Note that this code only handles characters in the BMP. If there
3861 ever are characters outside the BMP whose othercase differs in only one
3862 bit from itself (there currently are none), this code will need to be
3863 revised for COMPILE_PCRE32. */
3864 othercasechar = cc + (othercasebit >> 9);
3865 if ((othercasebit & 0x100) != 0)
3866 othercasebit = (othercasebit & 0xff) << 8;
3867 else
3868 othercasebit &= 0xff;
3869 #endif /* COMPILE_PCRE[8|16|32] */
3870 }
3871
3872 if (context->sourcereg == -1)
3873 {
3874 #if defined COMPILE_PCRE8
3875 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3876 if (context->length >= 4)
3877 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3878 else if (context->length >= 2)
3879 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3880 else
3881 #endif
3882 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3883 #elif defined COMPILE_PCRE16
3884 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3885 if (context->length >= 4)
3886 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3887 else
3888 #endif
3889 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3890 #elif defined COMPILE_PCRE32
3891 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3892 #endif /* COMPILE_PCRE[8|16|32] */
3893 context->sourcereg = TMP2;
3894 }
3895
3896 #ifdef SUPPORT_UTF
3897 utflength = 1;
3898 if (common->utf && HAS_EXTRALEN(*cc))
3899 utflength += GET_EXTRALEN(*cc);
3900
3901 do
3902 {
3903 #endif
3904
3905 context->length -= IN_UCHARS(1);
3906 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3907
3908 /* Unaligned read is supported. */
3909 if (othercasebit != 0 && othercasechar == cc)
3910 {
3911 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3912 context->oc.asuchars[context->ucharptr] = othercasebit;
3913 }
3914 else
3915 {
3916 context->c.asuchars[context->ucharptr] = *cc;
3917 context->oc.asuchars[context->ucharptr] = 0;
3918 }
3919 context->ucharptr++;
3920
3921 #if defined COMPILE_PCRE8
3922 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3923 #else
3924 if (context->ucharptr >= 2 || context->length == 0)
3925 #endif
3926 {
3927 if (context->length >= 4)
3928 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3929 else if (context->length >= 2)
3930 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3931 #if defined COMPILE_PCRE8
3932 else if (context->length >= 1)
3933 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3934 #endif /* COMPILE_PCRE8 */
3935 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3936
3937 switch(context->ucharptr)
3938 {
3939 case 4 / sizeof(pcre_uchar):
3940 if (context->oc.asint != 0)
3941 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3942 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3943 break;
3944
3945 case 2 / sizeof(pcre_uchar):
3946 if (context->oc.asushort != 0)
3947 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3948 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3949 break;
3950
3951 #ifdef COMPILE_PCRE8
3952 case 1:
3953 if (context->oc.asbyte != 0)
3954 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3955 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3956 break;
3957 #endif
3958
3959 default:
3960 SLJIT_ASSERT_STOP();
3961 break;
3962 }
3963 context->ucharptr = 0;
3964 }
3965
3966 #else
3967
3968 /* Unaligned read is unsupported or in 32 bit mode. */
3969 if (context->length >= 1)
3970 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3971
3972 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3973
3974 if (othercasebit != 0 && othercasechar == cc)
3975 {
3976 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3977 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3978 }
3979 else
3980 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3981
3982 #endif
3983
3984 cc++;
3985 #ifdef SUPPORT_UTF
3986 utflength--;
3987 }
3988 while (utflength > 0);
3989 #endif
3990
3991 return cc;
3992 }
3993
3994 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3995
3996 #define SET_TYPE_OFFSET(value) \
3997 if ((value) != typeoffset) \
3998 { \
3999 if ((value) > typeoffset) \
4000 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4001 else \
4002 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4003 } \
4004 typeoffset = (value);
4005
4006 #define SET_CHAR_OFFSET(value) \
4007 if ((value) != charoffset) \
4008 { \
4009 if ((value) > charoffset) \
4010 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4011 else \
4012 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4013 } \
4014 charoffset = (value);
4015
4016 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4017 {
4018 DEFINE_COMPILER;
4019 jump_list *found = NULL;
4020 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4021 pcre_int32 c, charoffset;
4022 const pcre_uint32 *other_cases;
4023 struct sljit_jump *jump = NULL;
4024 pcre_uchar *ccbegin;
4025 int compares, invertcmp, numberofcmps;
4026 #ifdef SUPPORT_UCP
4027 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4028 BOOL charsaved = FALSE;
4029 int typereg = TMP1, scriptreg = TMP1;
4030 pcre_int32 typeoffset;
4031 #endif
4032
4033 /* Although SUPPORT_UTF must be defined, we are
4034 not necessary in utf mode even in 8 bit mode. */
4035 detect_partial_match(common, backtracks);
4036 read_char(common);
4037
4038 if ((*cc++ & XCL_MAP) != 0)
4039 {
4040 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4041 #ifndef COMPILE_PCRE8
4042 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4043 #elif defined SUPPORT_UTF
4044 if (common->utf)
4045 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4046 #endif
4047
4048 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4049 {
4050 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4051 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4052 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4053 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4054 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4055 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4056 }
4057
4058 #ifndef COMPILE_PCRE8
4059 JUMPHERE(jump);
4060 #elif defined SUPPORT_UTF
4061 if (common->utf)
4062 JUMPHERE(jump);
4063 #endif
4064 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4065 #ifdef SUPPORT_UCP
4066 charsaved = TRUE;
4067 #endif
4068 cc += 32 / sizeof(pcre_uchar);
4069 }
4070
4071 /* Scanning the necessary info. */
4072 ccbegin = cc;
4073 compares = 0;
4074 while (*cc != XCL_END)
4075 {
4076 compares++;
4077 if (*cc == XCL_SINGLE)
4078 {
4079 cc += 2;
4080 #ifdef SUPPORT_UTF
4081 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4082 #endif
4083 #ifdef SUPPORT_UCP
4084 needschar = TRUE;
4085 #endif
4086 }
4087 else if (*cc == XCL_RANGE)
4088 {
4089 cc += 2;
4090 #ifdef SUPPORT_UTF
4091 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4092 #endif
4093 cc++;
4094 #ifdef SUPPORT_UTF
4095 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4096 #endif
4097 #ifdef SUPPORT_UCP
4098 needschar = TRUE;
4099 #endif
4100 }
4101 #ifdef SUPPORT_UCP
4102 else
4103 {
4104 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4105 cc++;
4106 switch(*cc)
4107 {
4108 case PT_ANY:
4109 break;
4110
4111 case PT_LAMP:
4112 case PT_GC:
4113 case PT_PC:
4114 case PT_ALNUM:
4115 needstype = TRUE;
4116 break;
4117
4118 case PT_SC:
4119 needsscript = TRUE;
4120 break;
4121
4122 case PT_SPACE:
4123 case PT_PXSPACE:
4124 case PT_WORD:
4125 needstype = TRUE;
4126 needschar = TRUE;
4127 break;
4128
4129 case PT_CLIST:
4130 case PT_UCNC:
4131 needschar = TRUE;
4132 break;
4133
4134 default:
4135 SLJIT_ASSERT_STOP();
4136 break;
4137 }
4138 cc += 2;
4139 }
4140 #endif
4141 }
4142
4143 #ifdef SUPPORT_UCP
4144 /* Simple register allocation. TMP1 is preferred if possible. */
4145 if (needstype || needsscript)
4146 {
4147 if (needschar && !charsaved)
4148 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4149 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4150 if (needschar)
4151 {
4152 if (needstype)
4153 {
4154 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4155 typereg = RETURN_ADDR;
4156 }
4157
4158 if (needsscript)
4159 scriptreg = TMP3;
4160 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4161 }
4162 else if (needstype && needsscript)
4163 scriptreg = TMP3;
4164 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4165
4166 if (needsscript)
4167 {
4168 if (scriptreg == TMP1)
4169 {
4170 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4171 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4172 }
4173 else
4174 {
4175 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4176 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4177 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4178 }
4179 }
4180 }
4181 #endif
4182
4183 /* Generating code. */
4184 cc = ccbegin;
4185 charoffset = 0;
4186 numberofcmps = 0;
4187 #ifdef SUPPORT_UCP
4188 typeoffset = 0;
4189 #endif
4190
4191 while (*cc != XCL_END)
4192 {
4193 compares--;
4194 invertcmp = (compares == 0 && list != backtracks);
4195 jump = NULL;
4196
4197 if (*cc == XCL_SINGLE)
4198 {
4199 cc ++;
4200 #ifdef SUPPORT_UTF
4201 if (common->utf)
4202 {
4203 GETCHARINC(c, cc);
4204 }
4205 else
4206 #endif
4207 c = *cc++;
4208
4209 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4210 {
4211 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4212 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4213 numberofcmps++;
4214 }
4215 else if (numberofcmps > 0)
4216 {
4217 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4218 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4219 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4220 numberofcmps = 0;
4221 }
4222 else
4223 {
4224 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4225 numberofcmps = 0;
4226 }
4227 }
4228 else if (*cc == XCL_RANGE)
4229 {
4230 cc ++;
4231 #ifdef SUPPORT_UTF
4232 if (common->utf)
4233 {
4234 GETCHARINC(c, cc);
4235 }
4236 else
4237 #endif
4238 c = *cc++;
4239 SET_CHAR_OFFSET(c);
4240 #ifdef SUPPORT_UTF
4241 if (common->utf)
4242 {
4243 GETCHARINC(c, cc);
4244 }
4245 else
4246 #endif
4247 c = *cc++;
4248 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4249 {
4250 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4251 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4252 numberofcmps++;
4253 }
4254 else if (numberofcmps > 0)
4255 {
4256 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4257 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4258 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4259 numberofcmps = 0;
4260 }
4261 else
4262 {
4263 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4264 numberofcmps = 0;
4265 }
4266 }
4267 #ifdef SUPPORT_UCP
4268 else
4269 {
4270 if (*cc == XCL_NOTPROP)
4271 invertcmp ^= 0x1;
4272 cc++;
4273 switch(*cc)
4274 {
4275 case PT_ANY:
4276 if (list != backtracks)
4277 {
4278 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4279 continue;
4280 }
4281 else if (cc[-1] == XCL_NOTPROP)
4282 continue;
4283 jump = JUMP(SLJIT_JUMP);
4284 break;
4285
4286 case PT_LAMP:
4287 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4288 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4289 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4290 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4291 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4292 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4293 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4294 break;
4295
4296 case PT_GC:
4297 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4298 SET_TYPE_OFFSET(c);
4299 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4300 break;
4301
4302 case PT_PC:
4303 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4304 break;
4305
4306 case PT_SC:
4307 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4308 break;
4309
4310 case PT_SPACE:
4311 case PT_PXSPACE:
4312 SET_CHAR_OFFSET(9);
4313 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4314 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4315
4316 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4317 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4318
4319 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4320 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4321
4322 SET_TYPE_OFFSET(ucp_Zl);
4323 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4324 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4325 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4326 break;
4327
4328 case PT_WORD:
4329 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4330 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4331 /* Fall through. */
4332
4333 case PT_ALNUM:
4334 SET_TYPE_OFFSET(ucp_Ll);
4335 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4336 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4337 SET_TYPE_OFFSET(ucp_Nd);
4338 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4339 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4340 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4341 break;
4342
4343 case PT_CLIST:
4344 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4345
4346 /* At least three characters are required.
4347 Otherwise this case would be handled by the normal code path. */
4348 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4349 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4350
4351 /* Optimizing character pairs, if their difference is power of 2. */
4352 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4353 {
4354 if (charoffset == 0)
4355 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4356 else
4357 {
4358 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4359 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4360 }
4361 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4362 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4363 other_cases += 2;
4364 }
4365 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4366 {
4367 if (charoffset == 0)
4368 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4369 else
4370 {
4371 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4372 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4373 }
4374 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4375 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4376
4377 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4378 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4379
4380 other_cases += 3;
4381 }
4382 else
4383 {
4384 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4385 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4386 }
4387
4388 while (*other_cases != NOTACHAR)
4389 {
4390 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4391 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4392 }
4393 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4394 break;
4395
4396 case PT_UCNC:
4397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4398 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4399 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4400 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4401 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4402 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4403
4404 SET_CHAR_OFFSET(0xa0);
4405 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4406 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4407 SET_CHAR_OFFSET(0);
4408 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4409 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4410 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4411 break;
4412 }
4413 cc += 2;
4414 }
4415 #endif
4416
4417 if (jump != NULL)
4418 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4419 }
4420
4421 if (found != NULL)
4422 set_jumps(found, LABEL());
4423 }
4424
4425 #undef SET_TYPE_OFFSET
4426 #undef SET_CHAR_OFFSET
4427
4428 #endif
4429
4430 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4431 {
4432 DEFINE_COMPILER;
4433 int length;
4434 unsigned int c, oc, bit;
4435 compare_context context;
4436 struct sljit_jump *jump[4];
4437 jump_list *end_list;
4438 #ifdef SUPPORT_UTF
4439 struct sljit_label *label;
4440 #ifdef SUPPORT_UCP
4441 pcre_uchar propdata[5];
4442 #endif
4443 #endif
4444
4445 switch(type)
4446 {
4447 case OP_SOD:
4448 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4449 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4450 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4451 return cc;
4452
4453 case OP_SOM:
4454 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4455 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4456 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4457 return cc;
4458
4459 case OP_NOT_WORD_BOUNDARY:
4460 case OP_WORD_BOUNDARY:
4461 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4462 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4463 return cc;
4464
4465 case OP_NOT_DIGIT:
4466 case OP_DIGIT:
4467 /* Digits are usually 0-9, so it is worth to optimize them. */
4468 if (common->digits[0] == -2)
4469 get_ctype_ranges(common, ctype_digit, common->digits);
4470 detect_partial_match(common, backtracks);
4471 /* Flip the starting bit in the negative case. */
4472 if (type == OP_NOT_DIGIT)
4473 common->digits[1] ^= 1;
4474 if (!check_ranges(common, common->digits, backtracks, TRUE))
4475 {
4476 read_char8_type(common);
4477 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4478 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4479 }
4480 if (type == OP_NOT_DIGIT)
4481 common->digits[1] ^= 1;
4482 return cc;
4483
4484 case OP_NOT_WHITESPACE:
4485 case OP_WHITESPACE:
4486 detect_partial_match(common, backtracks);
4487 read_char8_type(common);
4488 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4489 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4490 return cc;
4491
4492 case OP_NOT_WORDCHAR:
4493 case OP_WORDCHAR:
4494 detect_partial_match(common, backtracks);
4495 read_char8_type(common);
4496 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4497 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4498 return cc;
4499
4500 case OP_ANY:
4501 detect_partial_match(common, backtracks);
4502 read_char(common);
4503 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4504 {
4505 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4506 end_list = NULL;
4507 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4508 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4509 else
4510 check_str_end(common, &end_list);
4511
4512 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4513 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4514 set_jumps(end_list, LABEL());
4515 JUMPHERE(jump[0]);
4516 }
4517 else
4518 check_newlinechar(common, common->nltype, backtracks, TRUE);
4519 return cc;
4520
4521 case OP_ALLANY:
4522 detect_partial_match(common, backtracks);
4523 #ifdef SUPPORT_UTF
4524 if (common->utf)
4525 {
4526 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4527 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4528 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4529 #if defined COMPILE_PCRE8
4530 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4531 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4532 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4533 #elif defined COMPILE_PCRE16
4534 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4535 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4536 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4537 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4538 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4539 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4540 #endif
4541 JUMPHERE(jump[0]);
4542 #endif /* COMPILE_PCRE[8|16] */
4543 return cc;
4544 }
4545 #endif
4546 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4547 return cc;
4548
4549 case OP_ANYBYTE:
4550 detect_partial_match(common, backtracks);
4551 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4552 return cc;
4553
4554 #ifdef SUPPORT_UTF
4555 #ifdef SUPPORT_UCP
4556 case OP_NOTPROP:
4557 case OP_PROP:
4558 propdata[0] = 0;
4559 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4560 propdata[2] = cc[0];
4561 propdata[3] = cc[1];
4562 propdata[4] = XCL_END;
4563 compile_xclass_matchingpath(common, propdata, backtracks);
4564 return cc + 2;
4565 #endif
4566 #endif
4567
4568 case OP_ANYNL:
4569 detect_partial_match(common, backtracks);
4570 read_char(common);
4571 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4572 /* We don't need to handle soft partial matching case. */
4573 end_list = NULL;
4574 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4575 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4576 else
4577 check_str_end(common, &end_list);
4578 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4579 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4580 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4581 jump[2] = JUMP(SLJIT_JUMP);
4582 JUMPHERE(jump[0]);
4583 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4584 set_jumps(end_list, LABEL());
4585 JUMPHERE(jump[1]);
4586 JUMPHERE(jump[2]);
4587 return cc;
4588
4589 case OP_NOT_HSPACE:
4590 case OP_HSPACE:
4591 detect_partial_match(common, backtracks);
4592 read_char(common);
4593 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4594 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4595 return cc;
4596
4597 case OP_NOT_VSPACE:
4598 case OP_VSPACE:
4599 detect_partial_match(common, backtracks);
4600 read_char(common);
4601 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4602 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4603 return cc;
4604
4605 #ifdef SUPPORT_UCP
4606 case OP_EXTUNI:
4607 detect_partial_match(common, backtracks);
4608 read_char(common);
4609 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4611 /* Optimize register allocation: use a real register. */
4612 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4613 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4614
4615 label = LABEL();
4616 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4617 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4618 read_char(common);
4619 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4620 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4621 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4622
4623 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4624 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4625 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4626 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4627 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4628 JUMPTO(SLJIT_C_NOT_ZERO, label);
4629
4630 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4631 JUMPHERE(jump[0]);
4632 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4633
4634 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4635 {
4636 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4637 /* Since we successfully read a char above, partial matching must occure. */
4638 check_partial(common, TRUE);
4639 JUMPHERE(jump[0]);
4640 }
4641 return cc;
4642 #endif
4643
4644 case OP_EODN:
4645 /* Requires rather complex checks. */
4646 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4647 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4648 {
4649 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4650 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4651 if (common->mode == JIT_COMPILE)
4652 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4653 else
4654 {
4655 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4656 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4657 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4658 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4659 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4660 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4661 check_partial(common, TRUE);
4662 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4663 JUMPHERE(jump[1]);
4664 }
4665 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4666 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4667 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4668 }
4669 else if (common->nltype == NLTYPE_FIXED)
4670 {
4671 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4672 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4673 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4674 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4675 }
4676 else
4677 {
4678 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4679 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4680 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4681 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4682 jump[2] = JUMP(SLJIT_C_GREATER);
4683 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4684 /* Equal. */
4685 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4686 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4687 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4688
4689 JUMPHERE(jump[1]);
4690 if (common->nltype == NLTYPE_ANYCRLF)
4691 {
4692 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4693 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4694 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4695 }
4696 else
4697 {
4698 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4699 read_char(common);
4700 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4701 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4702 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4703 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4704 }
4705 JUMPHERE(jump[2]);
4706 JUMPHERE(jump[3]);
4707 }
4708 JUMPHERE(jump[0]);
4709 check_partial(common, FALSE);
4710 return cc;
4711
4712 case OP_EOD:
4713 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4714 check_partial(common, FALSE);
4715 return cc;
4716
4717 case OP_CIRC:
4718 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4719 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4720 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4721 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4722 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4723 return cc;
4724
4725 case OP_CIRCM:
4726 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4727 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4728 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4729 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4730 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4731 jump[0] = JUMP(SLJIT_JUMP);
4732 JUMPHERE(jump[1]);
4733
4734 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4735 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4736 {
4737 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4738 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4739 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4740 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4741 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4742 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4743 }
4744 else
4745 {
4746 skip_char_back(common);
4747 read_char(common);
4748 check_newlinechar(common, common->nltype, backtracks, FALSE);
4749 }
4750 JUMPHERE(jump[0]);
4751 return cc;
4752
4753 case OP_DOLL:
4754 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4755 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4756 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4757
4758 if (!common->endonly)
4759 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4760 else
4761 {
4762 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4763 check_partial(common, FALSE);
4764 }
4765 return cc;
4766
4767 case OP_DOLLM:
4768 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4769 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4770 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4771 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4772 check_partial(common, FALSE);
4773 jump[0] = JUMP(SLJIT_JUMP);
4774 JUMPHERE(jump[1]);
4775
4776 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4777 {
4778 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4779 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4780 if (common->mode == JIT_COMPILE)
4781 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4782 else
4783 {
4784 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4785 /* STR_PTR = STR_END - IN_UCHARS(1) */
4786 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4787 check_partial(common, TRUE);
4788 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4789 JUMPHERE(jump[1]);
4790 }
4791
4792 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4793 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4794 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4795 }
4796 else
4797 {
4798 peek_char(common);
4799 check_newlinechar(common, common->nltype, backtracks, FALSE);
4800 }
4801 JUMPHERE(jump[0]);
4802 return cc;
4803
4804 case OP_CHAR:
4805 case OP_CHARI:
4806 length = 1;
4807 #ifdef SUPPORT_UTF
4808 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4809 #endif
4810 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4811 {
4812 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4813 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4814
4815 context.length = IN_UCHARS(length);
4816 context.sourcereg = -1;
4817 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4818 context.ucharptr = 0;
4819 #endif
4820 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4821 }
4822 detect_partial_match(common, backtracks);
4823 read_char(common);
4824 #ifdef SUPPORT_UTF
4825 if (common->utf)
4826 {
4827 GETCHAR(c, cc);
4828 }
4829 else
4830 #endif
4831 c = *cc;
4832 if (type == OP_CHAR || !char_has_othercase(common, cc))
4833 {
4834 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4835 return cc + length;
4836 }
4837 oc = char_othercase(common, c);
4838 bit = c ^ oc;
4839 if (is_powerof2(bit))
4840 {
4841 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4842 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4843 return cc + length;
4844 }
4845 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4846 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4847 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4848 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4849 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4850 return cc + length;
4851
4852 case OP_NOT:
4853 case OP_NOTI:
4854 detect_partial_match(common, backtracks);
4855 length = 1;
4856 #ifdef SUPPORT_UTF
4857 if (common->utf)
4858 {
4859 #ifdef COMPILE_PCRE8
4860 c = *cc;
4861 if (c < 128)
4862 {
4863 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4864 if (type == OP_NOT || !char_has_othercase(common, cc))
4865 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4866 else
4867 {
4868 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4869 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4870 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4871 }
4872 /* Skip the variable-length character. */
4873 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4874 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4875 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4876 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4877 JUMPHERE(jump[0]);
4878 return cc + 1;
4879 }
4880 else
4881 #endif /* COMPILE_PCRE8 */
4882 {
4883 GETCHARLEN(c, cc, length);
4884 read_char(common);
4885 }
4886 }
4887 else
4888 #endif /* SUPPORT_UTF */
4889 {
4890 read_char(common);
4891 c = *cc;
4892 }
4893
4894 if (type == OP_NOT || !char_has_othercase(common, cc))
4895 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4896 else
4897 {
4898 oc = char_othercase(common, c);
4899 bit = c ^ oc;
4900 if (is_powerof2(bit))
4901 {
4902 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4903 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4904 }
4905 else
4906 {
4907 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4908 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4909 }
4910 }
4911 return cc + length;
4912
4913 case OP_CLASS:
4914 case OP_NCLASS:
4915 detect_partial_match(common, backtracks);
4916 read_char(common);
4917 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4918 return cc + 32 / sizeof(pcre_uchar);
4919
4920 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4921 jump[0] = NULL;
4922 #ifdef COMPILE_PCRE8
4923 /* This check only affects 8 bit mode. In other modes, we
4924 always need to compare the value with 255. */
4925 if (common->utf)
4926 #endif /* COMPILE_PCRE8 */
4927 {
4928 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4929 if (type == OP_CLASS)
4930 {
4931 add_jump(compiler, backtracks, jump[0]);
4932 jump[0] = NULL;
4933 }
4934 }
4935 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4936 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4937 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4938 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4939 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4940 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4941 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4942 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4943 if (jump[0] != NULL)
4944 JUMPHERE(jump[0]);
4945 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4946 return cc + 32 / sizeof(pcre_uchar);
4947
4948 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4949 case OP_XCLASS:
4950 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4951 return cc + GET(cc, 0) - 1;
4952 #endif
4953
4954 case OP_REVERSE:
4955 length = GET(cc, 0);
4956 if (length == 0)
4957 return cc + LINK_SIZE;
4958 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4959 #ifdef SUPPORT_UTF
4960 if (common->utf)
4961 {
4962 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4963 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4964 label = LABEL();
4965 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4966 skip_char_back(common);
4967 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4968 JUMPTO(SLJIT_C_NOT_ZERO, label);
4969 }
4970 else
4971 #endif
4972 {
4973 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4974 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4975 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4976 }
4977 check_start_used_ptr(common);
4978 return cc + LINK_SIZE;
4979 }
4980 SLJIT_ASSERT_STOP();
4981 return cc;
4982 }
4983
4984 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4985 {
4986 /* This function consumes at least one input character. */
4987 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4988 DEFINE_COMPILER;
4989 pcre_uchar *ccbegin = cc;
4990 compare_context context;
4991 int size;
4992
4993 context.length = 0;
4994 do
4995 {
4996 if (cc >= ccend)
4997 break;
4998
4999 if (*cc == OP_CHAR)
5000 {
5001 size = 1;
5002 #ifdef SUPPORT_UTF
5003 if (common->utf && HAS_EXTRALEN(cc[1]))
5004 size += GET_EXTRALEN(cc[1]);
5005 #endif
5006 }
5007 else if (*cc == OP_CHARI)
5008 {
5009 size = 1;
5010 #ifdef SUPPORT_UTF
5011 if (common->utf)
5012 {
5013 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5014 size = 0;
5015 else if (HAS_EXTRALEN(cc[1]))
5016 size += GET_EXTRALEN(cc[1]);
5017 }
5018 else
5019 #endif
5020 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5021 size = 0;
5022 }
5023 else
5024 size = 0;
5025
5026 cc += 1 + size;
5027 context.length += IN_UCHARS(size);
5028 }
5029 while (size > 0 && context.length <= 128);
5030
5031 cc = ccbegin;
5032 if (context.length > 0)
5033 {
5034 /* We have a fixed-length byte sequence. */
5035 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5036 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5037
5038 context.sourcereg = -1;
5039 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5040 context.ucharptr = 0;
5041 #endif
5042 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5043 return cc;
5044 }
5045
5046 /* A non-fixed length character will be checked if length == 0. */
5047 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5048 }
5049
5050 /* Forward definitions. */
5051 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5052 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5053
5054 #define PUSH_BACKTRACK(size, ccstart, error) \
5055 do \
5056 { \
5057 backtrack = sljit_alloc_memory(compiler, (size)); \
5058 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5059 return error; \
5060 memset(backtrack, 0, size); \
5061 backtrack->prev = parent->top; \
5062 backtrack->cc = (ccstart); \
5063 parent->top = backtrack; \
5064 } \
5065 while (0)
5066
5067 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5068 do \
5069 { \
5070 backtrack = sljit_alloc_memory(compiler, (size)); \
5071 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5072 return; \
5073 memset(backtrack, 0, size); \
5074 backtrack->prev = parent->top; \
5075 backtrack->cc = (ccstart); \
5076 parent->top = backtrack; \
5077 } \
5078 while (0)
5079
5080 #define BACKTRACK_AS(type) ((type *)backtrack)
5081
5082 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5083 {
5084 /* The OVECTOR offset goes to TMP2. */
5085 DEFINE_COMPILER;
5086 int count = GET2(cc, 1 + IMM2_SIZE);
5087 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5088 unsigned int offset;
5089 jump_list *found = NULL;
5090
5091 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5092
5093 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5094
5095 count--;
5096 while (count-- > 0)
5097 {
5098 offset = GET2(slot, 0) << 1;
5099 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5100 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5101 slot += common->name_entry_size;
5102 }
5103
5104 offset = GET2(slot, 0) << 1;
5105 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5106 if (backtracks != NULL && !common->jscript_compat)
5107 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5108
5109 set_jumps(found, LABEL());
5110 }
5111
5112 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5113 {
5114 DEFINE_COMPILER;
5115 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5116 int offset = 0;
5117 struct sljit_jump *jump = NULL;
5118 struct sljit_jump *partial;
5119 struct sljit_jump *nopartial;
5120
5121 if (ref)
5122 {
5123 offset = GET2(cc, 1) << 1;
5124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5125 /* OVECTOR(1) contains the "string begin - 1" constant. */
5126 if (withchecks && !common->jscript_compat)
5127 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5128 }
5129 else
5130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5131
5132 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5133 if (common->utf && *cc == OP_REFI)
5134 {
5135 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5136 if (ref)
5137 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5138 else
5139 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5140
5141 if (withchecks)
5142 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5143
5144 /* Needed to save important temporary registers. */
5145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5146 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5148 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5149 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5150 if (common->mode == JIT_COMPILE)
5151 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5152 else
5153 {
5154 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5155 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5156 check_partial(common, FALSE);
5157 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5158 JUMPHERE(nopartial);
5159 }
5160 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5161 }
5162 else
5163 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5164 {
5165 if (ref)
5166 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5167 else
5168 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5169
5170 if (withchecks)
5171 jump = JUMP(SLJIT_C_ZERO);
5172
5173 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5174 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5175 if (common->mode == JIT_COMPILE)
5176 add_jump(compiler, backtracks, partial);
5177
5178 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5179 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5180
5181 if (common->mode != JIT_COMPILE)
5182 {
5183 nopartial = JUMP(SLJIT_JUMP);
5184 JUMPHERE(partial);
5185 /* TMP2 -= STR_END - STR_PTR */
5186 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5187 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5188 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5189 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5190 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5191 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5192 JUMPHERE(partial);
5193 check_partial(common, FALSE);
5194 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5195 JUMPHERE(nopartial);
5196 }
5197 }
5198
5199 if (jump != NULL)
5200 {
5201 if (emptyfail)
5202 add_jump(compiler, backtracks, jump);
5203 else
5204 JUMPHERE(jump);
5205 }
5206 }
5207
5208 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5209 {
5210 DEFINE_COMPILER;
5211 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5212 backtrack_common *backtrack;
5213 pcre_uchar type;
5214 int offset = 0;
5215 struct sljit_label *label;
5216 struct sljit_jump *zerolength;
5217 struct sljit_jump *jump = NULL;
5218 pcre_uchar *ccbegin = cc;
5219 int min = 0, max = 0;
5220 BOOL minimize;
5221
5222 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5223
5224 if (ref)
5225 offset = GET2(cc, 1) << 1;
5226 else
5227 cc += IMM2_SIZE;
5228 type = cc[1 + IMM2_SIZE];
5229
5230 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5231 minimize = (type & 0x1) != 0;
5232 switch(type)
5233 {
5234 case OP_CRSTAR:
5235 case OP_CRMINSTAR:
5236 min = 0;
5237 max = 0;
5238 cc += 1 + IMM2_SIZE + 1;
5239 break;
5240 case OP_CRPLUS:
5241 case OP_CRMINPLUS:
5242 min = 1;
5243 max = 0;
5244 cc += 1 + IMM2_SIZE + 1;
5245 break;
5246 case OP_CRQUERY:
5247 case OP_CRMINQUERY:
5248 min = 0;
5249 max = 1;
5250 cc += 1 + IMM2_SIZE + 1;
5251 break;
5252 case OP_CRRANGE:
5253 case OP_CRMINRANGE:
5254 min = GET2(cc, 1 + IMM2_SIZE + 1);
5255 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5256 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5257 break;
5258 default:
5259 SLJIT_ASSERT_STOP();
5260 break;
5261 }
5262
5263 if (!minimize)
5264 {
5265 if (min == 0)
5266 {
5267 allocate_stack(common, 2);
5268 if (ref)
5269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5272 /* Temporary release of STR_PTR. */
5273 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5274 /* Handles both invalid and empty cases. Since the minimum repeat,
5275 is zero the invalid case is basically the same as an empty case. */
5276 if (ref)
5277 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5278 else
5279 {
5280 compile_dnref_search(common, ccbegin, NULL);
5281 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5282 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5283 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5284 }
5285 /* Restore if not zero length. */
5286 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5287 }
5288 else
5289 {
5290 allocate_stack(common, 1);
5291 if (ref)
5292 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5294 if (ref)
5295 {
5296 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5297 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5298 }
5299 else
5300 {
5301 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5302 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5304 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5305 }
5306 }
5307
5308 if (min > 1 || max > 1)
5309 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5310
5311 label = LABEL();
5312 if (!ref)
5313 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5314 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5315
5316 if (min > 1 || max > 1)
5317 {
5318 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5319 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5320 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5321 if (min > 1)
5322 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5323 if (max > 1)
5324 {
5325 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5326 allocate_stack(common, 1);
5327 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5328 JUMPTO(SLJIT_JUMP, label);
5329 JUMPHERE(jump);
5330 }
5331 }
5332
5333 if (max == 0)
5334 {
5335 /* Includes min > 1 case as well. */
5336 allocate_stack(common, 1);
5337 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5338 JUMPTO(SLJIT_JUMP, label);
5339 }
5340
5341 JUMPHERE(zerolength);
5342 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5343
5344 count_match(common);
5345 return cc;
5346 }
5347
5348 allocate_stack(common, ref ? 2 : 3);
5349 if (ref)
5350 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5351 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5352 if (type != OP_CRMINSTAR)
5353 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5354
5355 if (min == 0)
5356 {
5357 /* Handles both invalid and empty cases. Since the minimum repeat,
5358 is zero the invalid case is basically the same as an empty case. */
5359 if (ref)
5360 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5361 else
5362 {
5363 compile_dnref_search(common, ccbegin, NULL);
5364 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5366 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5367 }
5368 /* Length is non-zero, we can match real repeats. */
5369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5370 jump = JUMP(SLJIT_JUMP);
5371 }
5372 else
5373 {
5374 if (ref)
5375 {
5376 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5377 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5378 }
5379 else
5380 {
5381 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5382 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5384 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5385 }
5386 }
5387
5388 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5389 if (max > 0)
5390 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5391
5392 if (!ref)
5393 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5394 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5395 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5396
5397 if (min > 1)
5398 {
5399 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5400 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5402 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5403 }
5404 else if (max > 0)
5405 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5406
5407 if (jump != NULL)
5408 JUMPHERE(jump);
5409 JUMPHERE(zerolength);
5410
5411 count_match(common);
5412 return cc;
5413 }
5414
5415 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5416 {
5417 DEFINE_COMPILER;
5418 backtrack_common *backtrack;
5419 recurse_entry *entry = common->entries;
5420 recurse_entry *prev = NULL;
5421 sljit_sw start = GET(cc, 1);
5422 pcre_uchar *start_cc;
5423 BOOL needs_control_head;
5424
5425 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5426
5427 /* Inlining simple patterns. */
5428 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5429 {
5430 start_cc = common->start + start;
5431 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5432 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5433 return cc + 1 + LINK_SIZE;
5434 }
5435
5436 while (entry != NULL)
5437 {
5438 if (entry->start == start)
5439 break;
5440 prev = entry;
5441 entry = entry->next;
5442 }
5443
5444 if (entry == NULL)
5445 {
5446 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5447 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5448 return NULL;
5449 entry->next = NULL;
5450 entry->entry = NULL;
5451 entry->calls = NULL;
5452 entry->start = start;
5453
5454 if (prev != NULL)
5455 prev->next = entry;
5456 else
5457 common->entries = entry;
5458 }
5459
5460 if (common->has_set_som && common->mark_ptr != 0)
5461 {
5462 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5463 allocate_stack(common, 2);
5464 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5465 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5467 }
5468 else if (common->has_set_som || common->mark_ptr != 0)
5469 {
5470 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5471 allocate_stack(common, 1);
5472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5473 }
5474
5475 if (entry->entry == NULL)
5476 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5477 else
5478 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5479 /* Leave if the match is failed. */
5480 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5481 return cc + 1 + LINK_SIZE;
5482 }
5483
5484 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5485 {
5486 const pcre_uchar *begin = arguments->begin;
5487 int *offset_vector = arguments->offsets;
5488 int offset_count = arguments->offset_count;
5489 int i;
5490
5491 if (PUBL(callout) == NULL)
5492 return 0;
5493
5494 callout_block->version = 2;
5495 callout_block->callout_data = arguments->callout_data;
5496
5497 /* Offsets in subject. */
5498 callout_block->subject_length = arguments->end - arguments->begin;
5499 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5500 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5501 #if defined COMPILE_PCRE8
5502 callout_block->subject = (PCRE_SPTR)begin;
5503 #elif defined COMPILE_PCRE16
5504 callout_block->subject = (PCRE_SPTR16)begin;
5505 #elif defined COMPILE_PCRE32
5506 callout_block->subject = (PCRE_SPTR32)begin;
5507 #endif
5508
5509 /* Convert and copy the JIT offset vector to the offset_vector array. */
5510 callout_block->capture_top = 0;
5511 callout_block->offset_vector = offset_vector;
5512 for (i = 2; i < offset_count; i += 2)
5513 {
5514 offset_vector[i] = jit_ovector[i] - begin;
5515 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5516 if (jit_ovector[i] >= begin)
5517 callout_block->capture_top = i;
5518 }
5519
5520 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5521 if (offset_count > 0)
5522 offset_vector[0] = -1;
5523 if (offset_count > 1)
5524 offset_vector[1] = -1;
5525 return (*PUBL(callout))(callout_block);
5526 }
5527
5528 /* Aligning to 8 byte. */
5529 #define CALLOUT_ARG_SIZE \
5530 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5531
5532 #define CALLOUT_ARG_OFFSET(arg) \
5533 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5534
5535 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5536 {
5537 DEFINE_COMPILER;
5538 backtrack_common *backtrack;
5539
5540 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5541
5542 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5543
5544 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5545 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5546 SLJIT_ASSERT(common->capture_last_ptr != 0);
5547 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5548 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5549
5550 /* These pointer sized fields temporarly stores internal variables. */
5551 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5552 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5553 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5554
5555 if (common->mark_ptr != 0)
5556 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5557 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5558 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5559 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5560
5561 /* Needed to save important temporary registers. */
5562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5563 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5564 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5565 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5566 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5567 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5568 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5569
5570 /* Check return value. */
5571 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5572 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5573 if (common->forced_quit_label == NULL)
5574 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5575 else
5576 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5577 return cc + 2 + 2 * LINK_SIZE;
5578 }
5579
5580 #undef CALLOUT_ARG_SIZE
5581 #undef CALLOUT_ARG_OFFSET
5582
5583 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5584 {
5585 DEFINE_COMPILER;
5586 int framesize;
5587 int extrasize;
5588 BOOL needs_control_head;
5589 int private_data_ptr;
5590 backtrack_common altbacktrack;
5591 pcre_uchar *ccbegin;
5592 pcre_uchar opcode;
5593 pcre_uchar bra = OP_BRA;
5594 jump_list *tmp = NULL;
5595 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5596 jump_list **found;
5597 /* Saving previous accept variables. */
5598 BOOL save_local_exit = common->local_exit;
5599 BOOL save_positive_assert = common->positive_assert;
5600 then_trap_backtrack *save_then_trap = common->then_trap;
5601 struct sljit_label *save_quit_label = common->quit_label;
5602 struct sljit_label *save_accept_label = common->accept_label;
5603 jump_list *save_quit = common->quit;
5604 jump_list *save_positive_assert_quit = common->positive_assert_quit;
5605 jump_list *save_accept = common->accept;
5606 struct sljit_jump *jump;
5607 struct sljit_jump *brajump = NULL;
5608
5609 /* Assert captures then. */
5610 common->then_trap = NULL;
5611
5612 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5613 {
5614 SLJIT_ASSERT(!conditional);
5615 bra = *cc;
5616 cc++;
5617 }
5618 private_data_ptr = PRIVATE_DATA(cc);
5619 SLJIT_ASSERT(private_data_ptr != 0);
5620 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5621 backtrack->framesize = framesize;
5622 backtrack->private_data_ptr = private_data_ptr;
5623 opcode = *cc;
5624 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5625 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5626 ccbegin = cc;
5627 cc += GET(cc, 1);
5628
5629 if (bra == OP_BRAMINZERO)
5630 {
5631 /* This is a braminzero backtrack path. */
5632 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5633 free_stack(common, 1);
5634 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5635 }
5636
5637 if (framesize < 0)
5638 {
5639 extrasize = needs_control_head ? 2 : 1;
5640 if (framesize == no_frame)
5641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5642 allocate_stack(common, extrasize);
5643 if (needs_control_head)
5644 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5645 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5646 if (needs_control_head)
5647 {
5648 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5649 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5650 }
5651 }
5652 else
5653 {
5654 extrasize = needs_control_head ? 3 : 2;
5655 allocate_stack(common, framesize + extrasize);
5656 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5657 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5658 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5659 if (needs_control_head)
5660 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5661 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5662 if (needs_control_head)
5663 {
5664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5666 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5667 }
5668 else
5669 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5670 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5671 }
5672
5673 memset(&altbacktrack, 0, sizeof(backtrack_common));
5674 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5675 {
5676 /* Negative assert is stronger than positive assert. */
5677 common->local_exit = TRUE;
5678 common->quit_label = NULL;
5679 common->quit = NULL;
5680 common->positive_assert = FALSE;
5681 }
5682 else
5683 common->positive_assert = TRUE;
5684 common->positive_assert_quit = NULL;
5685
5686 while (1)
5687 {
5688 common->accept_label = NULL;
5689 common->accept = NULL;
5690 altbacktrack.top = NULL;
5691 altbacktrack.topbacktracks = NULL;
5692
5693 if (*ccbegin == OP_ALT)
5694 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5695
5696 altbacktrack.cc = ccbegin;
5697 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5698 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5699 {
5700 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5701 {
5702 common->local_exit = save_local_exit;
5703 common->quit_label = save_quit_label;
5704 common->quit = save_quit;
5705 }
5706 common->positive_assert = save_positive_assert;
5707 common->then_trap = save_then_trap;
5708 common->accept_label = save_accept_label;
5709 common->positive_assert_quit = save_positive_assert_quit;
5710 common->accept = save_accept;
5711 return NULL;
5712 }
5713 common->accept_label = LABEL();
5714 if (common->accept != NULL)
5715 set_jumps(common->accept, common->accept_label);
5716
5717 /* Reset stack. */
5718 if (framesize < 0)
5719 {
5720 if (framesize == no_frame)
5721 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5722 else
5723 free_stack(common, extrasize);
5724 if (needs_control_head)
5725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5726 }
5727 else
5728 {
5729 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5730 {
5731 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5732 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5733 if (needs_control_head)
5734 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5735 }
5736 else
5737 {
5738 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5739 if (needs_control_head)
5740 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5741 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5742 }
5743 }
5744
5745 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5746 {
5747 /* We know that STR_PTR was stored on the top of the stack. */
5748 if (conditional)
5749 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5750 else if (bra == OP_BRAZERO)
5751 {
5752 if (framesize < 0)
5753 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5754 else
5755 {
5756 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5757 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5758 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5759 }
5760 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5761 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5762 }
5763 else if (framesize >= 0)
5764 {
5765 /* For OP_BRA and OP_BRAMINZERO. */
5766 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5767 }
5768 }
5769 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5770
5771 compile_backtrackingpath(common, altbacktrack.top);
5772 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5773 {
5774 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5775 {
5776 common->local_exit = save_local_exit;
5777 common->quit_label = save_quit_label;
5778 common->quit = save_quit;
5779 }
5780 common->positive_assert = save_positive_assert;
5781 common->then_trap = save_then_trap;
5782 common->accept_label = save_accept_label;
5783 common->positive_assert_quit = save_positive_assert_quit;
5784 common->accept = save_accept;
5785 return NULL;
5786 }
5787 set_jumps(altbacktrack.topbacktracks, LABEL());
5788
5789 if (*cc != OP_ALT)
5790 break;
5791
5792 ccbegin = cc;
5793 cc += GET(cc, 1);
5794 }
5795
5796 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5797 {
5798 SLJIT_ASSERT(common->positive_assert_quit == NULL);
5799 /* Makes the check less complicated below. */
5800 common->positive_assert_quit = common->quit;
5801 }
5802
5803 /* None of them matched. */
5804 if (common->positive_assert_quit != NULL)
5805 {
5806 jump = JUMP(SLJIT_JUMP);
5807 set_jumps(common->positive_assert_quit, LABEL());
5808 SLJIT_ASSERT(framesize != no_stack);
5809 if (framesize < 0)
5810 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5811 else
5812 {
5813 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5814 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5815 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5816 }
5817 JUMPHERE(jump);
5818 }
5819
5820 if (needs_control_head)
5821 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5822
5823 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5824 {
5825 /* Assert is failed. */
5826 if (conditional || bra == OP_BRAZERO)
5827 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5828
5829 if (framesize < 0)
5830 {
5831 /* The topmost item should be 0. */
5832 if (bra == OP_BRAZERO)
5833 {
5834 if (extrasize == 2)
5835 free_stack(common, 1);
5836 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5837 }
5838 else
5839 free_stack(common, extrasize);
5840 }
5841 else
5842 {
5843 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5844 /* The topmost item should be 0. */
5845 if (bra == OP_BRAZERO)
5846 {
5847 free_stack(common, framesize + extrasize - 1);
5848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5849 }
5850 else
5851 free_stack(common, framesize + extrasize);
5852 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5853 }
5854 jump = JUMP(SLJIT_JUMP);
5855 if (bra != OP_BRAZERO)
5856 add_jump(compiler, target, jump);
5857
5858 /* Assert is successful. */
5859 set_jumps(tmp, LABEL());
5860 if (framesize < 0)
5861 {
5862 /* We know that STR_PTR was stored on the top of the stack. */
5863 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5864 /* Keep the STR_PTR on the top of the stack. */
5865 if (bra == OP_BRAZERO)
5866 {
5867 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5868 if (extrasize == 2)
5869 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5870 }
5871 else if (bra == OP_BRAMINZERO)
5872 {
5873 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5874 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5875 }
5876 }
5877 else
5878 {
5879 if (bra == OP_BRA)
5880 {
5881 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5882 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5883 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5884 }
5885 else
5886 {
5887 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5888 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5889 if (extrasize == 2)
5890 {
5891 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5892 if (bra == OP_BRAMINZERO)
5893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5894 }
5895 else
5896 {
5897 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5898 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5899 }
5900 }
5901 }
5902
5903 if (bra == OP_BRAZERO)
5904 {
5905 backtrack->matchingpath = LABEL();
5906 SET_LABEL(jump, backtrack->matchingpath);
5907 }
5908 else if (bra == OP_BRAMINZERO)
5909 {
5910 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5911 JUMPHERE(brajump);
5912 if (framesize >= 0)
5913 {
5914 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5915 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5917 }
5918 set_jumps(backtrack->common.topbacktracks, LABEL());
5919 }
5920 }
5921 else
5922 {
5923 /* AssertNot is successful. */
5924 if (framesize < 0)
5925 {
5926 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5927 if (bra != OP_BRA)
5928 {
5929 if (extrasize == 2)
5930 free_stack(common, 1);
5931 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5932 }
5933 else
5934 free_stack(common, extrasize);
5935 }
5936 else
5937 {
5938 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5939 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5940 /* The topmost item should be 0. */
5941 if (bra != OP_BRA)
5942 {
5943 free_stack(common, framesize + extrasize - 1);
5944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5945 }
5946 else
5947 free_stack(common, framesize + extrasize);
5948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5949 }
5950
5951 if (bra == OP_BRAZERO)
5952 backtrack->matchingpath = LABEL();
5953 else if (bra == OP_BRAMINZERO)
5954 {
5955 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5956 JUMPHERE(brajump);
5957 }
5958
5959 if (bra != OP_BRA)
5960 {
5961 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5962 set_jumps(backtrack->common.topbacktracks, LABEL());
5963 backtrack->common.topbacktracks = NULL;
5964 }
5965 }
5966
5967 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5968 {
5969 common->local_exit = save_local_exit;
5970 common->quit_label = save_quit_label;
5971 common->quit = save_quit;
5972 }
5973 common->positive_assert = save_positive_assert;
5974 common->then_trap = save_then_trap;
5975 common->accept_label = save_accept_label;
5976 common->positive_assert_quit = save_positive_assert_quit;
5977 common->accept = save_accept;
5978 return cc + 1 + LINK_SIZE;
5979 }
5980
5981 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
5982 {
5983 DEFINE_COMPILER;
5984 int stacksize;
5985
5986 if (framesize < 0)
5987 {
5988 if (framesize == no_frame)
5989 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5990 else
5991 {
5992 stacksize = needs_control_head ? 1 : 0;
5993 if (ket != OP_KET || has_alternatives)
5994 stacksize++;
5995 free_stack(common, stacksize);
5996 }
5997
5998 if (needs_control_head)
5999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6000
6001 /* TMP2 which is set here used by OP_KETRMAX below. */
6002 if (ket == OP_KETRMAX)
6003 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6004 else if (ket == OP_KETRMIN)
6005 {
6006 /* Move the STR_PTR to the private_data_ptr. */
6007 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6008 }
6009 }
6010 else
6011 {
6012 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6013 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6014 if (needs_control_head)
6015 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6016
6017 if (ket == OP_KETRMAX)
6018 {
6019 /* TMP2 which is set here used by OP_KETRMAX below. */
6020 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6021 }
6022 }
6023 if (needs_control_head)
6024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6025 }
6026
6027 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6028 {
6029 DEFINE_COMPILER;
6030
6031 if (common->capture_last_ptr != 0)
6032 {
6033 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6035 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6036 stacksize++;
6037 }
6038 if (common->optimized_cbracket[offset >> 1] == 0)
6039 {
6040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6041 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6042 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6043 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6044 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6047 stacksize += 2;
6048 }
6049 return stacksize;
6050 }
6051
6052 /*
6053 Handling bracketed expressions is probably the most complex part.
6054
6055 Stack layout naming characters:
6056 S - Push the current STR_PTR
6057 0 - Push a 0 (NULL)
6058 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6059 before the next alternative. Not pushed if there are no alternatives.
6060 M - Any values pushed by the current alternative. Can be empty, or anything.
6061 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6062 L - Push the previous local (pointed by localptr) to the stack
6063 () - opional values stored on the stack
6064 ()* - optonal, can be stored multiple times
6065
6066 The following list shows the regular expression templates, their PCRE byte codes
6067 and stack layout supported by pcre-sljit.
6068
6069 (?:) OP_BRA | OP_KET A M
6070 () OP_CBRA | OP_KET C M
6071 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6072 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6073 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6074 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6075 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6076 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6077 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6078 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6079 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6080 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6081 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6082 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6083 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6084 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6085 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6086 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6087 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6088 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6089 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6090 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6091
6092
6093 Stack layout naming characters:
6094 A - Push the alternative index (starting from 0) on the stack.
6095 Not pushed if there is no alternatives.
6096 M - Any values pushed by the current alternative. Can be empty, or anything.
6097
6098 The next list shows the possible content of a bracket:
6099 (|) OP_*BRA | OP_ALT ... M A
6100 (?()|) OP_*COND | OP_ALT M A
6101 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6102 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6103 Or nothing, if trace is unnecessary
6104 */
6105
6106 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6107 {
6108 DEFINE_COMPILER;
6109 backtrack_common *backtrack;
6110 pcre_uchar opcode;
6111 int private_data_ptr = 0;
6112 int offset = 0;
6113 int i, stacksize;
6114 int repeat_ptr = 0, repeat_length = 0;
6115 int repeat_type = 0, repeat_count = 0;
6116 pcre_uchar *ccbegin;
6117 pcre_uchar *matchingpath;
6118 pcre_uchar *slot;
6119 pcre_uchar bra = OP_BRA;
6120 pcre_uchar ket;
6121 assert_backtrack *assert;
6122 BOOL has_alternatives;
6123 BOOL needs_control_head = FALSE;
6124 struct sljit_jump *jump;
6125 struct sljit_jump *skip;
6126 struct sljit_label *rmax_label = NULL;
6127 struct sljit_jump *braminzero = NULL;
6128
6129 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6130
6131 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6132 {
6133 bra = *cc;
6134 cc++;
6135 opcode = *cc;
6136 }
6137
6138 opcode = *cc;
6139 ccbegin = cc;
6140 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6141 ket = *matchingpath;
6142 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6143 {
6144 repeat_ptr = PRIVATE_DATA(matchingpath);
6145 repeat_length = PRIVATE_DATA(matchingpath + 1);
6146 repeat_type = PRIVATE_DATA(matchingpath + 2);
6147 repeat_count = PRIVATE_DATA(matchingpath + 3);
6148 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6149 if (repeat_type == OP_UPTO)
6150 ket = OP_KETRMAX;
6151 if (repeat_type == OP_MINUPTO)
6152 ket = OP_KETRMIN;
6153 }
6154
6155 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6156 {
6157 /* Drop this bracket_backtrack. */
6158 parent->top = backtrack->prev;
6159 return matchingpath + 1 + LINK_SIZE + repeat_length;
6160 }
6161
6162 matchingpath = ccbegin + 1 + LINK_SIZE;
6163 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6164 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6165 cc += GET(cc, 1);
6166
6167 has_alternatives = *cc == OP_ALT;
6168 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6169 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6170
6171 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6172 opcode = OP_SCOND;
6173 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6174 opcode = OP_ONCE;
6175
6176 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6177 {
6178 /* Capturing brackets has a pre-allocated space. */
6179 offset = GET2(ccbegin, 1 + LINK_SIZE);
6180 if (common->optimized_cbracket[offset] == 0)
6181 {
6182 private_data_ptr = OVECTOR_PRIV(offset);
6183 offset <<= 1;
6184 }
6185 else
6186 {
6187 offset <<= 1;
6188 private_data_ptr = OVECTOR(offset);
6189 }
6190 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6191 matchingpath += IMM2_SIZE;
6192 }
6193 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6194 {
6195 /* Other brackets simply allocate the next entry. */
6196 private_data_ptr = PRIVATE_DATA(ccbegin);
6197 SLJIT_ASSERT(private_data_ptr != 0);
6198 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6199 if (opcode == OP_ONCE)
6200 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6201 }
6202
6203 /* Instructions before the first alternative. */
6204 stacksize = 0;
6205 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6206 stacksize++;
6207 if (bra == OP_BRAZERO)
6208 stacksize++;
6209
6210 if (stacksize > 0)
6211 allocate_stack(common, stacksize);
6212
6213 stacksize = 0;
6214 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6215 {
6216 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6217 stacksize++;
6218 }
6219
6220 if (bra == OP_BRAZERO)
6221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6222
6223 if (bra == OP_BRAMINZERO)
6224 {
6225 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6226 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6227 if (ket != OP_KETRMIN)
6228 {
6229 free_stack(common, 1);
6230 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6231 }
6232 else
6233 {
6234 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6235 {
6236 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6237 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6238 /* Nothing stored during the first run. */
6239 skip = JUMP(SLJIT_JUMP);
6240 JUMPHERE(jump);
6241 /* Checking zero-length iteration. */
6242 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6243 {
6244 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6245 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6246 }
6247 else
6248 {
6249 /* Except when the whole stack frame must be saved. */
6250 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6251 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6252 }
6253 JUMPHERE(skip);
6254 }
6255 else
6256 {
6257 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6258 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6259 JUMPHERE(jump);
6260 }
6261 }
6262 }
6263
6264 if (repeat_type != 0)
6265 {
6266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6267 if (repeat_type == OP_EXACT)
6268 rmax_label = LABEL();
6269 }
6270
6271 if (ket == OP_KETRMIN)
6272 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6273
6274 if (ket == OP_KETRMAX)
6275 {
6276 rmax_label = LABEL();
6277 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6278 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6279 }
6280
6281 /* Handling capturing brackets and alternatives. */
6282 if (opcode == OP_ONCE)
6283 {
6284 stacksize = 0;
6285 if (needs_control_head)
6286 {
6287 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6288 stacksize++;
6289 }
6290
6291 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6292 {
6293 /* Neither capturing brackets nor recursions are found in the block. */
6294 if (ket == OP_KETRMIN)
6295 {
6296 stacksize += 2;
6297 if (!needs_control_head)
6298 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6299 }
6300 else
6301 {
6302 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6304 if (ket == OP_KETRMAX || has_alternatives)
6305 stacksize++;
6306 }
6307
6308 if (stacksize > 0)
6309 allocate_stack(common, stacksize);
6310
6311 stacksize = 0;
6312 if (needs_control_head)
6313 {
6314 stacksize++;
6315 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6316 }
6317
6318 if (ket == OP_KETRMIN)
6319 {
6320 if (needs_control_head)
6321 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6322 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6323 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6324 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6325 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6326 }
6327 else if (ket == OP_KETRMAX || has_alternatives)
6328 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6329 }
6330 else
6331 {
6332 if (ket != OP_KET || has_alternatives)
6333 stacksize++;
6334
6335 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6336 allocate_stack(common, stacksize);
6337
6338 if (needs_control_head)
6339 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6340
6341 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6342 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6343
6344 stacksize = needs_control_head ? 1 : 0;
6345 if (ket != OP_KET || has_alternatives)
6346 {
6347 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6349 stacksize++;
6350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6351 }
6352 else
6353 {
6354 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6355 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6356 }
6357 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6358 }
6359 }
6360 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6361 {
6362 /* Saving the previous values. */
6363 if (common->optimized_cbracket[offset >> 1] != 0)
6364 {
6365 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6366 allocate_stack(common, 2);
6367 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6368 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6369 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6372 }
6373 else
6374 {
6375 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6376 allocate_stack(common, 1);
6377 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6378 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6379 }
6380 }
6381 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6382 {
6383 /* Saving the previous value. */
6384 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6385 allocate_stack(common, 1);
6386 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6388 }
6389 else if (has_alternatives)
6390 {
6391 /* Pushing the starting string pointer. */
6392 allocate_stack(common, 1);
6393 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6394 }
6395
6396 /* Generating code for the first alternative. */
6397 if (opcode == OP_COND || opcode == OP_SCOND)
6398 {
6399 if (*matchingpath == OP_CREF)
6400 {
6401 SLJIT_ASSERT(has_alternatives);
6402 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6403 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6404 matchingpath += 1 + IMM2_SIZE;
6405 }
6406 else if (*matchingpath == OP_DNCREF)
6407 {
6408 SLJIT_ASSERT(has_alternatives);
6409
6410 i = GET2(matchingpath, 1 + IMM2_SIZE);
6411 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
6412 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6414 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
6415 slot += common->name_entry_size;
6416 i--;
6417 while (i-- > 0)
6418 {
6419 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
6420 OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
6421 slot += common->name_entry_size;
6422 }
6423 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6424 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
6425 matchingpath += 1 + 2 * IMM2_SIZE;
6426 }
6427 else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
6428 {
6429 /* Never has other case. */
6430 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6431 SLJIT_ASSERT(!has_alternatives);
6432
6433 if (*matchingpath == OP_RREF)
6434 {
6435 stacksize = GET2(matchingpath, 1);
6436 if (common->currententry == NULL)
6437 stacksize = 0;
6438 else if (stacksize == RREF_ANY)
6439 stacksize = 1;
6440 else if (common->currententry->start == 0)
6441 stacksize = stacksize == 0;
6442 else
6443 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6444
6445 if (stacksize != 0)
6446 matchingpath += 1 + IMM2_SIZE;
6447 }
6448 else
6449 {
6450 if (common->currententry == NULL || common->currententry->start == 0)
6451 stacksize = 0;
6452 else
6453 {
6454 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
6455 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
6456 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6457 while (stacksize > 0)
6458 {
6459 if ((int)GET2(slot, 0) == i)
6460 break;
6461 slot += common->name_entry_size;
6462 stacksize--;
6463 }
6464 }
6465
6466 if (stacksize != 0)
6467 matchingpath += 1 + 2 * IMM2_SIZE;
6468 }
6469
6470 /* The stacksize == 0 is a common "else" case. */
6471 if (stacksize == 0)
6472 {
6473 if (*cc == OP_ALT)
6474 {
6475 matchingpath = cc + 1 + LINK_SIZE;
6476 cc += GET(cc, 1);
6477 }
6478 else
6479 matchingpath = cc;
6480 }
6481 }
6482 else
6483 {
6484 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6485 /* Similar code as PUSH_BACKTRACK macro. */
6486 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6487 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6488 return NULL;
6489 memset(assert, 0, sizeof(assert_backtrack));
6490 assert->common.cc = matchingpath;
6491 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6492 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6493 }
6494 }
6495
6496 compile_matchingpath(common, matchingpath, cc, backtrack);
6497 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6498 return NULL;
6499
6500 if (opcode == OP_ONCE)
6501 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6502
6503 stacksize = 0;
6504 if (repeat_type == OP_MINUPTO)
6505 {
6506 /* We need to preserve the counter. TMP2 will be used below. */
6507 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6508 stacksize++;
6509 }
6510 if (ket != OP_KET || bra != OP_BRA)
6511 stacksize++;
6512 if (offset != 0)
6513 {
6514 if (common->capture_last_ptr != 0)
6515 stacksize++;
6516 if (common->optimized_cbracket[offset >> 1] == 0)
6517 stacksize += 2;
6518 }
6519 if (has_alternatives && opcode != OP_ONCE)
6520 stacksize++;
6521
6522 if (stacksize > 0)
6523 allocate_stack(common, stacksize);
6524
6525 stacksize = 0;
6526 if (repeat_type == OP_MINUPTO)
6527 {
6528 /* TMP2 was set above. */
6529 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
6530 stacksize++;
6531 }
6532
6533 if (ket != OP_KET || bra != OP_BRA)
6534 {
6535 if (ket != OP_KET)
6536 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6537 else
6538 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6539 stacksize++;
6540 }
6541
6542 if (offset != 0)
6543 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6544
6545 if (has_alternatives)
6546 {
6547 if (opcode != OP_ONCE)
6548 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6549 if (ket != OP_KETRMAX)
6550 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6551 }
6552
6553 /* Must be after the matchingpath label. */
6554 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6555 {
6556 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6558 }
6559
6560 if (ket == OP_KETRMAX)
6561 {
6562 if (repeat_type != 0)
6563 {
6564 if (has_alternatives)
6565 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6566 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6567 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6568 /* Drop STR_PTR for greedy plus quantifier. */
6569 if (opcode != OP_ONCE)
6570 free_stack(common, 1);
6571 }
6572 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
6573 {
6574 if (has_alternatives)
6575 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6576 /* Checking zero-length iteration. */
6577 if (opcode != OP_ONCE)
6578 {
6579 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
6580 /* Drop STR_PTR for greedy plus quantifier. */
6581 if (bra != OP_BRAZERO)
6582 free_stack(common, 1);
6583 }
6584 else
6585 /* TMP2 must contain the starting STR_PTR. */
6586 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
6587 }
6588 else
6589 JUMPTO(SLJIT_JUMP, rmax_label);
6590 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6591 }
6592
6593 if (repeat_type == OP_EXACT)
6594 {
6595 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6596 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6597 }
6598 else if (repeat_type == OP_UPTO)
6599 {
6600 /* We need to preserve the counter. */
6601 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6602 allocate_stack(common, 1);
6603 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6604 }
6605
6606 if (bra == OP_BRAZERO)
6607 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6608
6609 if (bra == OP_BRAMINZERO)
6610 {
6611 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6612 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6613 if (braminzero != NULL)
6614 {
6615 JUMPHERE(braminzero);
6616 /* We need to release the end pointer to perform the
6617 backtrack for the zero-length iteration. When
6618 framesize is < 0, OP_ONCE will do the release itself. */
6619 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6620 {
6621 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6622 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6623 }
6624 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6625 free_stack(common, 1);
6626 }
6627 /* Continue to the normal backtrack. */
6628 }
6629
6630 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6631 count_match(common);
6632
6633 /* Skip the other alternatives. */
6634 while (*cc == OP_ALT)
6635 cc += GET(cc, 1);
6636 cc += 1 + LINK_SIZE;
6637
6638 /* Temporarily encoding the needs_control_head in framesize. */
6639 if (opcode == OP_ONCE)
6640 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6641 return cc + repeat_length;
6642 }
6643
6644 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6645 {
6646 DEFINE_COMPILER;
6647 backtrack_common *backtrack;
6648 pcre_uchar opcode;
6649 int private_data_ptr;
6650 int cbraprivptr = 0;
6651 BOOL needs_control_head;
6652 int framesize;
6653 int stacksize;
6654 int offset = 0;
6655 BOOL zero = FALSE;
6656 pcre_uchar *ccbegin = NULL;
6657 int stack; /* Also contains the offset of control head. */
6658 struct sljit_label *loop = NULL;
6659 struct jump_list *emptymatch = NULL;
6660
6661 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6662 if (*cc == OP_BRAPOSZERO)
6663 {
6664 zero = TRUE;
6665 cc++;
6666 }
6667
6668 opcode = *cc;
6669 private_data_ptr = PRIVATE_DATA(cc);
6670 SLJIT_ASSERT(private_data_ptr != 0);
6671 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6672 switch(opcode)
6673 {
6674 case OP_BRAPOS:
6675 case OP_SBRAPOS:
6676 ccbegin = cc + 1 + LINK_SIZE;
6677 break;
6678
6679 case OP_CBRAPOS:
6680 case OP_SCBRAPOS:
6681 offset = GET2(cc, 1 + LINK_SIZE);
6682 /* This case cannot be optimized in the same was as
6683 normal capturing brackets. */
6684 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6685 cbraprivptr = OVECTOR_PRIV(offset);
6686 offset <<= 1;
6687 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6688 break;
6689
6690 default:
6691 SLJIT_ASSERT_STOP();
6692 break;
6693 }
6694
6695 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6696 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6697 if (framesize < 0)
6698 {
6699 if (offset != 0)
6700 {
6701 stacksize = 2;
6702 if (common->capture_last_ptr != 0)
6703 stacksize++;
6704 }
6705 else
6706 stacksize = 1;
6707
6708 if (needs_control_head)
6709 stacksize++;
6710 if (!zero)
6711 stacksize++;
6712
6713 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6714 allocate_stack(common, stacksize);
6715 if (framesize == no_frame)
6716 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6717
6718 stack = 0;
6719 if (offset != 0)
6720 {
6721 stack = 2;
6722 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6723 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6725 if (common->capture_last_ptr != 0)
6726 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6727 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6728 if (needs_control_head)
6729 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6730 if (common->capture_last_ptr != 0)
6731 {
6732 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6733 stack = 3;
6734 }
6735 }
6736 else