/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1290 - (show annotations)
Sat Mar 16 18:45:51 2013 UTC (6 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 300538 byte(s)
Recursive control verb priority is removed from JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum frame_types {
200 no_frame = -1,
201 no_stack = -2
202 };
203
204 enum control_types {
205 type_mark = 0,
206 type_then_trap = 1
207 };
208
209 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
210
211 /* The following structure is the key data type for the recursive
212 code generator. It is allocated by compile_matchingpath, and contains
213 the aguments for compile_backtrackingpath. Must be the first member
214 of its descendants. */
215 typedef struct backtrack_common {
216 /* Concatenation stack. */
217 struct backtrack_common *prev;
218 jump_list *nextbacktracks;
219 /* Internal stack (for component operators). */
220 struct backtrack_common *top;
221 jump_list *topbacktracks;
222 /* Opcode pointer. */
223 pcre_uchar *cc;
224 } backtrack_common;
225
226 typedef struct assert_backtrack {
227 backtrack_common common;
228 jump_list *condfailed;
229 /* Less than 0 if a frame is not needed. */
230 int framesize;
231 /* Points to our private memory word on the stack. */
232 int private_data_ptr;
233 /* For iterators. */
234 struct sljit_label *matchingpath;
235 } assert_backtrack;
236
237 typedef struct bracket_backtrack {
238 backtrack_common common;
239 /* Where to coninue if an alternative is successfully matched. */
240 struct sljit_label *alternative_matchingpath;
241 /* For rmin and rmax iterators. */
242 struct sljit_label *recursive_matchingpath;
243 /* For greedy ? operator. */
244 struct sljit_label *zero_matchingpath;
245 /* Contains the branches of a failed condition. */
246 union {
247 /* Both for OP_COND, OP_SCOND. */
248 jump_list *condfailed;
249 assert_backtrack *assert;
250 /* For OP_ONCE. Less than 0 if not needed. */
251 int framesize;
252 } u;
253 /* Points to our private memory word on the stack. */
254 int private_data_ptr;
255 } bracket_backtrack;
256
257 typedef struct bracketpos_backtrack {
258 backtrack_common common;
259 /* Points to our private memory word on the stack. */
260 int private_data_ptr;
261 /* Reverting stack is needed. */
262 int framesize;
263 /* Allocated stack size. */
264 int stacksize;
265 } bracketpos_backtrack;
266
267 typedef struct braminzero_backtrack {
268 backtrack_common common;
269 struct sljit_label *matchingpath;
270 } braminzero_backtrack;
271
272 typedef struct iterator_backtrack {
273 backtrack_common common;
274 /* Next iteration. */
275 struct sljit_label *matchingpath;
276 } iterator_backtrack;
277
278 typedef struct recurse_entry {
279 struct recurse_entry *next;
280 /* Contains the function entry. */
281 struct sljit_label *entry;
282 /* Collects the calls until the function is not created. */
283 jump_list *calls;
284 /* Points to the starting opcode. */
285 sljit_sw start;
286 } recurse_entry;
287
288 typedef struct recurse_backtrack {
289 backtrack_common common;
290 BOOL inlined_pattern;
291 } recurse_backtrack;
292
293 #define OP_THEN_TRAP OP_TABLE_LENGTH
294
295 typedef struct then_trap_backtrack {
296 backtrack_common common;
297 /* If then_trap is not NULL, this structure contains the real
298 then_trap for the backtracking path. */
299 struct then_trap_backtrack *then_trap;
300 /* Points to the starting opcode. */
301 sljit_sw start;
302 /* Exit point for the then opcodes of this alternative. */
303 jump_list *quit;
304 /* Frame size of the current alternative. */
305 int framesize;
306 } then_trap_backtrack;
307
308 #define MAX_RANGE_SIZE 6
309
310 typedef struct compiler_common {
311 /* The sljit ceneric compiler. */
312 struct sljit_compiler *compiler;
313 /* First byte code. */
314 pcre_uchar *start;
315 /* Maps private data offset to each opcode. */
316 int *private_data_ptrs;
317 /* Tells whether the capturing bracket is optimized. */
318 pcre_uint8 *optimized_cbracket;
319 /* Tells whether the starting offset is a target of then. */
320 pcre_uint8 *then_offsets;
321 /* Current position where a THEN must jump. */
322 then_trap_backtrack *then_trap;
323 /* Starting offset of private data for capturing brackets. */
324 int cbra_ptr;
325 /* Output vector starting point. Must be divisible by 2. */
326 int ovector_start;
327 /* Last known position of the requested byte. */
328 int req_char_ptr;
329 /* Head of the last recursion. */
330 int recursive_head_ptr;
331 /* First inspected character for partial matching. */
332 int start_used_ptr;
333 /* Starting pointer for partial soft matches. */
334 int hit_start;
335 /* End pointer of the first line. */
336 int first_line_end;
337 /* Points to the marked string. */
338 int mark_ptr;
339 /* Recursive control verb management chain. */
340 int control_head_ptr;
341 /* Points to the last matched capture block index. */
342 int capture_last_ptr;
343 /* Points to the starting position of the current match. */
344 int start_ptr;
345
346 /* Flipped and lower case tables. */
347 const pcre_uint8 *fcc;
348 sljit_sw lcc;
349 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
350 int mode;
351 /* \K is found in the pattern. */
352 BOOL has_set_som;
353 /* (*SKIP:arg) is found in the pattern. */
354 BOOL has_skip_arg;
355 /* (*THEN) is found in the pattern. */
356 BOOL has_then;
357 /* Needs to know the start position anytime. */
358 BOOL needs_start_ptr;
359 /* Currently in recurse or assert. */
360 BOOL local_exit;
361 /* Newline control. */
362 int nltype;
363 int newline;
364 int bsr_nltype;
365 /* Dollar endonly. */
366 int endonly;
367 /* Tables. */
368 sljit_sw ctypes;
369 int digits[2 + MAX_RANGE_SIZE];
370 /* Named capturing brackets. */
371 sljit_uw name_table;
372 sljit_sw name_count;
373 sljit_sw name_entry_size;
374
375 /* Labels and jump lists. */
376 struct sljit_label *partialmatchlabel;
377 struct sljit_label *quit_label;
378 struct sljit_label *forced_quit_label;
379 struct sljit_label *accept_label;
380 stub_list *stubs;
381 recurse_entry *entries;
382 recurse_entry *currententry;
383 jump_list *partialmatch;
384 jump_list *quit;
385 jump_list *forced_quit;
386 jump_list *accept;
387 jump_list *calllimit;
388 jump_list *stackalloc;
389 jump_list *revertframes;
390 jump_list *wordboundary;
391 jump_list *anynewline;
392 jump_list *hspace;
393 jump_list *vspace;
394 jump_list *casefulcmp;
395 jump_list *caselesscmp;
396 jump_list *reset_match;
397 BOOL jscript_compat;
398 #ifdef SUPPORT_UTF
399 BOOL utf;
400 #ifdef SUPPORT_UCP
401 BOOL use_ucp;
402 #endif
403 #ifndef COMPILE_PCRE32
404 jump_list *utfreadchar;
405 #endif
406 #ifdef COMPILE_PCRE8
407 jump_list *utfreadtype8;
408 #endif
409 #endif /* SUPPORT_UTF */
410 #ifdef SUPPORT_UCP
411 jump_list *getucd;
412 #endif
413 } compiler_common;
414
415 /* For byte_sequence_compare. */
416
417 typedef struct compare_context {
418 int length;
419 int sourcereg;
420 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
421 int ucharptr;
422 union {
423 sljit_si asint;
424 sljit_uh asushort;
425 #if defined COMPILE_PCRE8
426 sljit_ub asbyte;
427 sljit_ub asuchars[4];
428 #elif defined COMPILE_PCRE16
429 sljit_uh asuchars[2];
430 #elif defined COMPILE_PCRE32
431 sljit_ui asuchars[1];
432 #endif
433 } c;
434 union {
435 sljit_si asint;
436 sljit_uh asushort;
437 #if defined COMPILE_PCRE8
438 sljit_ub asbyte;
439 sljit_ub asuchars[4];
440 #elif defined COMPILE_PCRE16
441 sljit_uh asuchars[2];
442 #elif defined COMPILE_PCRE32
443 sljit_ui asuchars[1];
444 #endif
445 } oc;
446 #endif
447 } compare_context;
448
449 /* Undefine sljit macros. */
450 #undef CMP
451
452 /* Used for accessing the elements of the stack. */
453 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
454
455 #define TMP1 SLJIT_SCRATCH_REG1
456 #define TMP2 SLJIT_SCRATCH_REG3
457 #define TMP3 SLJIT_TEMPORARY_EREG2
458 #define STR_PTR SLJIT_SAVED_REG1
459 #define STR_END SLJIT_SAVED_REG2
460 #define STACK_TOP SLJIT_SCRATCH_REG2
461 #define STACK_LIMIT SLJIT_SAVED_REG3
462 #define ARGUMENTS SLJIT_SAVED_EREG1
463 #define CALL_COUNT SLJIT_SAVED_EREG2
464 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
465
466 /* Local space layout. */
467 /* These two locals can be used by the current opcode. */
468 #define LOCALS0 (0 * sizeof(sljit_sw))
469 #define LOCALS1 (1 * sizeof(sljit_sw))
470 /* Two local variables for possessive quantifiers (char1 cannot use them). */
471 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
472 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
473 /* Max limit of recursions. */
474 #define CALL_LIMIT (4 * sizeof(sljit_sw))
475 /* The output vector is stored on the stack, and contains pointers
476 to characters. The vector data is divided into two groups: the first
477 group contains the start / end character pointers, and the second is
478 the start pointers when the end of the capturing group has not yet reached. */
479 #define OVECTOR_START (common->ovector_start)
480 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
481 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
482 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
483
484 #if defined COMPILE_PCRE8
485 #define MOV_UCHAR SLJIT_MOV_UB
486 #define MOVU_UCHAR SLJIT_MOVU_UB
487 #elif defined COMPILE_PCRE16
488 #define MOV_UCHAR SLJIT_MOV_UH
489 #define MOVU_UCHAR SLJIT_MOVU_UH
490 #elif defined COMPILE_PCRE32
491 #define MOV_UCHAR SLJIT_MOV_UI
492 #define MOVU_UCHAR SLJIT_MOVU_UI
493 #else
494 #error Unsupported compiling mode
495 #endif
496
497 /* Shortcuts. */
498 #define DEFINE_COMPILER \
499 struct sljit_compiler *compiler = common->compiler
500 #define OP1(op, dst, dstw, src, srcw) \
501 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
502 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
503 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
504 #define LABEL() \
505 sljit_emit_label(compiler)
506 #define JUMP(type) \
507 sljit_emit_jump(compiler, (type))
508 #define JUMPTO(type, label) \
509 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
510 #define JUMPHERE(jump) \
511 sljit_set_label((jump), sljit_emit_label(compiler))
512 #define SET_LABEL(jump, label) \
513 sljit_set_label((jump), (label))
514 #define CMP(type, src1, src1w, src2, src2w) \
515 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
516 #define CMPTO(type, src1, src1w, src2, src2w, label) \
517 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
518 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
519 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
520 #define GET_LOCAL_BASE(dst, dstw, offset) \
521 sljit_get_local_base(compiler, (dst), (dstw), (offset))
522
523 static pcre_uchar* bracketend(pcre_uchar* cc)
524 {
525 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
526 do cc += GET(cc, 1); while (*cc == OP_ALT);
527 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
528 cc += 1 + LINK_SIZE;
529 return cc;
530 }
531
532 /* Functions whose might need modification for all new supported opcodes:
533 next_opcode
534 get_private_data_length
535 set_private_data_ptrs
536 get_framesize
537 init_frame
538 get_private_data_copy_length
539 copy_private_data
540 compile_matchingpath
541 compile_backtrackingpath
542 */
543
544 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
545 {
546 SLJIT_UNUSED_ARG(common);
547 switch(*cc)
548 {
549 case OP_SOD:
550 case OP_SOM:
551 case OP_SET_SOM:
552 case OP_NOT_WORD_BOUNDARY:
553 case OP_WORD_BOUNDARY:
554 case OP_NOT_DIGIT:
555 case OP_DIGIT:
556 case OP_NOT_WHITESPACE:
557 case OP_WHITESPACE:
558 case OP_NOT_WORDCHAR:
559 case OP_WORDCHAR:
560 case OP_ANY:
561 case OP_ALLANY:
562 case OP_NOTPROP:
563 case OP_PROP:
564 case OP_ANYNL:
565 case OP_NOT_HSPACE:
566 case OP_HSPACE:
567 case OP_NOT_VSPACE:
568 case OP_VSPACE:
569 case OP_EXTUNI:
570 case OP_EODN:
571 case OP_EOD:
572 case OP_CIRC:
573 case OP_CIRCM:
574 case OP_DOLL:
575 case OP_DOLLM:
576 case OP_CRSTAR:
577 case OP_CRMINSTAR:
578 case OP_CRPLUS:
579 case OP_CRMINPLUS:
580 case OP_CRQUERY:
581 case OP_CRMINQUERY:
582 case OP_CRRANGE:
583 case OP_CRMINRANGE:
584 case OP_CLASS:
585 case OP_NCLASS:
586 case OP_REF:
587 case OP_REFI:
588 case OP_RECURSE:
589 case OP_CALLOUT:
590 case OP_ALT:
591 case OP_KET:
592 case OP_KETRMAX:
593 case OP_KETRMIN:
594 case OP_KETRPOS:
595 case OP_REVERSE:
596 case OP_ASSERT:
597 case OP_ASSERT_NOT:
598 case OP_ASSERTBACK:
599 case OP_ASSERTBACK_NOT:
600 case OP_ONCE:
601 case OP_ONCE_NC:
602 case OP_BRA:
603 case OP_BRAPOS:
604 case OP_CBRA:
605 case OP_CBRAPOS:
606 case OP_COND:
607 case OP_SBRA:
608 case OP_SBRAPOS:
609 case OP_SCBRA:
610 case OP_SCBRAPOS:
611 case OP_SCOND:
612 case OP_CREF:
613 case OP_NCREF:
614 case OP_RREF:
615 case OP_NRREF:
616 case OP_DEF:
617 case OP_BRAZERO:
618 case OP_BRAMINZERO:
619 case OP_BRAPOSZERO:
620 case OP_PRUNE:
621 case OP_SKIP:
622 case OP_THEN:
623 case OP_COMMIT:
624 case OP_FAIL:
625 case OP_ACCEPT:
626 case OP_ASSERT_ACCEPT:
627 case OP_CLOSE:
628 case OP_SKIPZERO:
629 return cc + PRIV(OP_lengths)[*cc];
630
631 case OP_CHAR:
632 case OP_CHARI:
633 case OP_NOT:
634 case OP_NOTI:
635 case OP_STAR:
636 case OP_MINSTAR:
637 case OP_PLUS:
638 case OP_MINPLUS:
639 case OP_QUERY:
640 case OP_MINQUERY:
641 case OP_UPTO:
642 case OP_MINUPTO:
643 case OP_EXACT:
644 case OP_POSSTAR:
645 case OP_POSPLUS:
646 case OP_POSQUERY:
647 case OP_POSUPTO:
648 case OP_STARI:
649 case OP_MINSTARI:
650 case OP_PLUSI:
651 case OP_MINPLUSI:
652 case OP_QUERYI:
653 case OP_MINQUERYI:
654 case OP_UPTOI:
655 case OP_MINUPTOI:
656 case OP_EXACTI:
657 case OP_POSSTARI:
658 case OP_POSPLUSI:
659 case OP_POSQUERYI:
660 case OP_POSUPTOI:
661 case OP_NOTSTAR:
662 case OP_NOTMINSTAR:
663 case OP_NOTPLUS:
664 case OP_NOTMINPLUS:
665 case OP_NOTQUERY:
666 case OP_NOTMINQUERY:
667 case OP_NOTUPTO:
668 case OP_NOTMINUPTO:
669 case OP_NOTEXACT:
670 case OP_NOTPOSSTAR:
671 case OP_NOTPOSPLUS:
672 case OP_NOTPOSQUERY:
673 case OP_NOTPOSUPTO:
674 case OP_NOTSTARI:
675 case OP_NOTMINSTARI:
676 case OP_NOTPLUSI:
677 case OP_NOTMINPLUSI:
678 case OP_NOTQUERYI:
679 case OP_NOTMINQUERYI:
680 case OP_NOTUPTOI:
681 case OP_NOTMINUPTOI:
682 case OP_NOTEXACTI:
683 case OP_NOTPOSSTARI:
684 case OP_NOTPOSPLUSI:
685 case OP_NOTPOSQUERYI:
686 case OP_NOTPOSUPTOI:
687 cc += PRIV(OP_lengths)[*cc];
688 #ifdef SUPPORT_UTF
689 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
690 #endif
691 return cc;
692
693 /* Special cases. */
694 case OP_TYPESTAR:
695 case OP_TYPEMINSTAR:
696 case OP_TYPEPLUS:
697 case OP_TYPEMINPLUS:
698 case OP_TYPEQUERY:
699 case OP_TYPEMINQUERY:
700 case OP_TYPEUPTO:
701 case OP_TYPEMINUPTO:
702 case OP_TYPEEXACT:
703 case OP_TYPEPOSSTAR:
704 case OP_TYPEPOSPLUS:
705 case OP_TYPEPOSQUERY:
706 case OP_TYPEPOSUPTO:
707 return cc + PRIV(OP_lengths)[*cc] - 1;
708
709 case OP_ANYBYTE:
710 #ifdef SUPPORT_UTF
711 if (common->utf) return NULL;
712 #endif
713 return cc + 1;
714
715 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
716 case OP_XCLASS:
717 return cc + GET(cc, 1);
718 #endif
719
720 case OP_MARK:
721 case OP_PRUNE_ARG:
722 case OP_SKIP_ARG:
723 case OP_THEN_ARG:
724 return cc + 1 + 2 + cc[1];
725
726 default:
727 /* All opcodes are supported now! */
728 SLJIT_ASSERT_STOP();
729 return NULL;
730 }
731 }
732
733 #define CASE_ITERATOR_PRIVATE_DATA_1 \
734 case OP_MINSTAR: \
735 case OP_MINPLUS: \
736 case OP_QUERY: \
737 case OP_MINQUERY: \
738 case OP_MINSTARI: \
739 case OP_MINPLUSI: \
740 case OP_QUERYI: \
741 case OP_MINQUERYI: \
742 case OP_NOTMINSTAR: \
743 case OP_NOTMINPLUS: \
744 case OP_NOTQUERY: \
745 case OP_NOTMINQUERY: \
746 case OP_NOTMINSTARI: \
747 case OP_NOTMINPLUSI: \
748 case OP_NOTQUERYI: \
749 case OP_NOTMINQUERYI:
750
751 #define CASE_ITERATOR_PRIVATE_DATA_2A \
752 case OP_STAR: \
753 case OP_PLUS: \
754 case OP_STARI: \
755 case OP_PLUSI: \
756 case OP_NOTSTAR: \
757 case OP_NOTPLUS: \
758 case OP_NOTSTARI: \
759 case OP_NOTPLUSI:
760
761 #define CASE_ITERATOR_PRIVATE_DATA_2B \
762 case OP_UPTO: \
763 case OP_MINUPTO: \
764 case OP_UPTOI: \
765 case OP_MINUPTOI: \
766 case OP_NOTUPTO: \
767 case OP_NOTMINUPTO: \
768 case OP_NOTUPTOI: \
769 case OP_NOTMINUPTOI:
770
771 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
772 case OP_TYPEMINSTAR: \
773 case OP_TYPEMINPLUS: \
774 case OP_TYPEQUERY: \
775 case OP_TYPEMINQUERY:
776
777 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
778 case OP_TYPESTAR: \
779 case OP_TYPEPLUS:
780
781 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
782 case OP_TYPEUPTO: \
783 case OP_TYPEMINUPTO:
784
785 static int get_class_iterator_size(pcre_uchar *cc)
786 {
787 switch(*cc)
788 {
789 case OP_CRSTAR:
790 case OP_CRPLUS:
791 return 2;
792
793 case OP_CRMINSTAR:
794 case OP_CRMINPLUS:
795 case OP_CRQUERY:
796 case OP_CRMINQUERY:
797 return 1;
798
799 case OP_CRRANGE:
800 case OP_CRMINRANGE:
801 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
802 return 0;
803 return 2;
804
805 default:
806 return 0;
807 }
808 }
809
810 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
811 {
812 int private_data_length = 0;
813 pcre_uchar *alternative;
814 pcre_uchar *name;
815 pcre_uchar *end = NULL;
816 int space, size, i;
817 pcre_uint32 bracketlen;
818
819 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
820 while (cc < ccend)
821 {
822 space = 0;
823 size = 0;
824 bracketlen = 0;
825 switch(*cc)
826 {
827 case OP_SET_SOM:
828 common->has_set_som = TRUE;
829 cc += 1;
830 break;
831
832 case OP_REF:
833 case OP_REFI:
834 common->optimized_cbracket[GET2(cc, 1)] = 0;
835 cc += 1 + IMM2_SIZE;
836 break;
837
838 case OP_ASSERT:
839 case OP_ASSERT_NOT:
840 case OP_ASSERTBACK:
841 case OP_ASSERTBACK_NOT:
842 case OP_ONCE:
843 case OP_ONCE_NC:
844 case OP_BRAPOS:
845 case OP_SBRA:
846 case OP_SBRAPOS:
847 private_data_length += sizeof(sljit_sw);
848 bracketlen = 1 + LINK_SIZE;
849 break;
850
851 case OP_CBRAPOS:
852 case OP_SCBRAPOS:
853 private_data_length += sizeof(sljit_sw);
854 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
855 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
856 break;
857
858 case OP_COND:
859 case OP_SCOND:
860 /* Only AUTO_CALLOUT can insert this opcode. We do
861 not intend to support this case. */
862 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
863 return -1;
864
865 if (*cc == OP_COND)
866 {
867 /* Might be a hidden SCOND. */
868 alternative = cc + GET(cc, 1);
869 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
870 private_data_length += sizeof(sljit_sw);
871 }
872 else
873 private_data_length += sizeof(sljit_sw);
874 bracketlen = 1 + LINK_SIZE;
875 break;
876
877 case OP_CREF:
878 i = GET2(cc, 1);
879 common->optimized_cbracket[i] = 0;
880 cc += 1 + IMM2_SIZE;
881 break;
882
883 case OP_NCREF:
884 bracketlen = GET2(cc, 1);
885 name = (pcre_uchar *)common->name_table;
886 alternative = name;
887 for (i = 0; i < common->name_count; i++)
888 {
889 if (GET2(name, 0) == bracketlen) break;
890 name += common->name_entry_size;
891 }
892 SLJIT_ASSERT(i != common->name_count);
893
894 for (i = 0; i < common->name_count; i++)
895 {
896 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
897 common->optimized_cbracket[GET2(alternative, 0)] = 0;
898 alternative += common->name_entry_size;
899 }
900 bracketlen = 0;
901 cc += 1 + IMM2_SIZE;
902 break;
903
904 case OP_BRA:
905 bracketlen = 1 + LINK_SIZE;
906 break;
907
908 case OP_CBRA:
909 case OP_SCBRA:
910 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
911 break;
912
913 CASE_ITERATOR_PRIVATE_DATA_1
914 space = 1;
915 size = -2;
916 break;
917
918 CASE_ITERATOR_PRIVATE_DATA_2A
919 space = 2;
920 size = -2;
921 break;
922
923 CASE_ITERATOR_PRIVATE_DATA_2B
924 space = 2;
925 size = -(2 + IMM2_SIZE);
926 break;
927
928 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
929 space = 1;
930 size = 1;
931 break;
932
933 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
934 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
935 space = 2;
936 size = 1;
937 break;
938
939 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
940 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
941 space = 2;
942 size = 1 + IMM2_SIZE;
943 break;
944
945 case OP_CLASS:
946 case OP_NCLASS:
947 size += 1 + 32 / sizeof(pcre_uchar);
948 space = get_class_iterator_size(cc + size);
949 break;
950
951 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
952 case OP_XCLASS:
953 size = GET(cc, 1);
954 space = get_class_iterator_size(cc + size);
955 break;
956 #endif
957
958 case OP_RECURSE:
959 /* Set its value only once. */
960 if (common->recursive_head_ptr == 0)
961 {
962 common->recursive_head_ptr = common->ovector_start;
963 common->ovector_start += sizeof(sljit_sw);
964 }
965 cc += 1 + LINK_SIZE;
966 break;
967
968 case OP_CALLOUT:
969 if (common->capture_last_ptr == 0)
970 {
971 common->capture_last_ptr = common->ovector_start;
972 common->ovector_start += sizeof(sljit_sw);
973 }
974 cc += 2 + 2 * LINK_SIZE;
975 break;
976
977 case OP_THEN_ARG:
978 common->has_then = TRUE;
979 common->control_head_ptr = 1;
980 /* Fall through. */
981
982 case OP_PRUNE_ARG:
983 common->needs_start_ptr = TRUE;
984 /* Fall through. */
985
986 case OP_MARK:
987 if (common->mark_ptr == 0)
988 {
989 common->mark_ptr = common->ovector_start;
990 common->ovector_start += sizeof(sljit_sw);
991 }
992 cc += 1 + 2 + cc[1];
993 break;
994
995 case OP_THEN:
996 common->has_then = TRUE;
997 common->control_head_ptr = 1;
998 /* Fall through. */
999
1000 case OP_PRUNE:
1001 case OP_SKIP:
1002 common->needs_start_ptr = TRUE;
1003 cc += 1;
1004 break;
1005
1006 case OP_SKIP_ARG:
1007 common->control_head_ptr = 1;
1008 common->has_skip_arg = TRUE;
1009 cc += 1 + 2 + cc[1];
1010 break;
1011
1012 default:
1013 cc = next_opcode(common, cc);
1014 if (cc == NULL)
1015 return -1;
1016 break;
1017 }
1018
1019 if (space > 0 && cc >= end)
1020 private_data_length += sizeof(sljit_sw) * space;
1021
1022 if (size != 0)
1023 {
1024 if (size < 0)
1025 {
1026 cc += -size;
1027 #ifdef SUPPORT_UTF
1028 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1029 #endif
1030 }
1031 else
1032 cc += size;
1033 }
1034
1035 if (bracketlen != 0)
1036 {
1037 if (cc >= end)
1038 {
1039 end = bracketend(cc);
1040 if (end[-1 - LINK_SIZE] == OP_KET)
1041 end = NULL;
1042 }
1043 cc += bracketlen;
1044 }
1045 }
1046 return private_data_length;
1047 }
1048
1049 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
1050 {
1051 pcre_uchar *cc = common->start;
1052 pcre_uchar *alternative;
1053 pcre_uchar *end = NULL;
1054 int space, size, bracketlen;
1055
1056 while (cc < ccend)
1057 {
1058 space = 0;
1059 size = 0;
1060 bracketlen = 0;
1061 switch(*cc)
1062 {
1063 case OP_ASSERT:
1064 case OP_ASSERT_NOT:
1065 case OP_ASSERTBACK:
1066 case OP_ASSERTBACK_NOT:
1067 case OP_ONCE:
1068 case OP_ONCE_NC:
1069 case OP_BRAPOS:
1070 case OP_SBRA:
1071 case OP_SBRAPOS:
1072 case OP_SCOND:
1073 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1074 private_data_ptr += sizeof(sljit_sw);
1075 bracketlen = 1 + LINK_SIZE;
1076 break;
1077
1078 case OP_CBRAPOS:
1079 case OP_SCBRAPOS:
1080 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1081 private_data_ptr += sizeof(sljit_sw);
1082 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1083 break;
1084
1085 case OP_COND:
1086 /* Might be a hidden SCOND. */
1087 alternative = cc + GET(cc, 1);
1088 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1089 {
1090 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091 private_data_ptr += sizeof(sljit_sw);
1092 }
1093 bracketlen = 1 + LINK_SIZE;
1094 break;
1095
1096 case OP_BRA:
1097 bracketlen = 1 + LINK_SIZE;
1098 break;
1099
1100 case OP_CBRA:
1101 case OP_SCBRA:
1102 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1103 break;
1104
1105 CASE_ITERATOR_PRIVATE_DATA_1
1106 space = 1;
1107 size = -2;
1108 break;
1109
1110 CASE_ITERATOR_PRIVATE_DATA_2A
1111 space = 2;
1112 size = -2;
1113 break;
1114
1115 CASE_ITERATOR_PRIVATE_DATA_2B
1116 space = 2;
1117 size = -(2 + IMM2_SIZE);
1118 break;
1119
1120 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1121 space = 1;
1122 size = 1;
1123 break;
1124
1125 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1126 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1127 space = 2;
1128 size = 1;
1129 break;
1130
1131 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1132 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1133 space = 2;
1134 size = 1 + IMM2_SIZE;
1135 break;
1136
1137 case OP_CLASS:
1138 case OP_NCLASS:
1139 size += 1 + 32 / sizeof(pcre_uchar);
1140 space = get_class_iterator_size(cc + size);
1141 break;
1142
1143 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1144 case OP_XCLASS:
1145 size = GET(cc, 1);
1146 space = get_class_iterator_size(cc + size);
1147 break;
1148 #endif
1149
1150 default:
1151 cc = next_opcode(common, cc);
1152 SLJIT_ASSERT(cc != NULL);
1153 break;
1154 }
1155
1156 if (space > 0 && cc >= end)
1157 {
1158 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1159 private_data_ptr += sizeof(sljit_sw) * space;
1160 }
1161
1162 if (size != 0)
1163 {
1164 if (size < 0)
1165 {
1166 cc += -size;
1167 #ifdef SUPPORT_UTF
1168 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1169 #endif
1170 }
1171 else
1172 cc += size;
1173 }
1174
1175 if (bracketlen > 0)
1176 {
1177 if (cc >= end)
1178 {
1179 end = bracketend(cc);
1180 if (end[-1 - LINK_SIZE] == OP_KET)
1181 end = NULL;
1182 }
1183 cc += bracketlen;
1184 }
1185 }
1186 }
1187
1188 /* Returns with a frame_types (always < 0) if no need for frame. */
1189 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1190 {
1191 int length = 0;
1192 int possessive = 0;
1193 BOOL stack_restore = FALSE;
1194 BOOL setsom_found = recursive;
1195 BOOL setmark_found = recursive;
1196 /* The last capture is a local variable even for recursions. */
1197 BOOL capture_last_found = FALSE;
1198
1199 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1200 SLJIT_ASSERT(common->control_head_ptr != 0);
1201 *needs_control_head = TRUE;
1202 #else
1203 *needs_control_head = FALSE;
1204 #endif
1205
1206 if (ccend == NULL)
1207 {
1208 ccend = bracketend(cc) - (1 + LINK_SIZE);
1209 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1210 {
1211 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1212 /* This is correct regardless of common->capture_last_ptr. */
1213 capture_last_found = TRUE;
1214 }
1215 cc = next_opcode(common, cc);
1216 }
1217
1218 SLJIT_ASSERT(cc != NULL);
1219 while (cc < ccend)
1220 switch(*cc)
1221 {
1222 case OP_SET_SOM:
1223 SLJIT_ASSERT(common->has_set_som);
1224 stack_restore = TRUE;
1225 if (!setsom_found)
1226 {
1227 length += 2;
1228 setsom_found = TRUE;
1229 }
1230 cc += 1;
1231 break;
1232
1233 case OP_MARK:
1234 case OP_PRUNE_ARG:
1235 case OP_THEN_ARG:
1236 SLJIT_ASSERT(common->mark_ptr != 0);
1237 stack_restore = TRUE;
1238 if (!setmark_found)
1239 {
1240 length += 2;
1241 setmark_found = TRUE;
1242 }
1243 if (common->control_head_ptr != 0)
1244 *needs_control_head = TRUE;
1245 cc += 1 + 2 + cc[1];
1246 break;
1247
1248 case OP_RECURSE:
1249 stack_restore = TRUE;
1250 if (common->has_set_som && !setsom_found)
1251 {
1252 length += 2;
1253 setsom_found = TRUE;
1254 }
1255 if (common->mark_ptr != 0 && !setmark_found)
1256 {
1257 length += 2;
1258 setmark_found = TRUE;
1259 }
1260 if (common->capture_last_ptr != 0 && !capture_last_found)
1261 {
1262 length += 2;
1263 capture_last_found = TRUE;
1264 }
1265 cc += 1 + LINK_SIZE;
1266 break;
1267
1268 case OP_CBRA:
1269 case OP_CBRAPOS:
1270 case OP_SCBRA:
1271 case OP_SCBRAPOS:
1272 stack_restore = TRUE;
1273 if (common->capture_last_ptr != 0 && !capture_last_found)
1274 {
1275 length += 2;
1276 capture_last_found = TRUE;
1277 }
1278 length += 3;
1279 cc += 1 + LINK_SIZE + IMM2_SIZE;
1280 break;
1281
1282 default:
1283 stack_restore = TRUE;
1284 /* Fall through. */
1285
1286 case OP_NOT_WORD_BOUNDARY:
1287 case OP_WORD_BOUNDARY:
1288 case OP_NOT_DIGIT:
1289 case OP_DIGIT:
1290 case OP_NOT_WHITESPACE:
1291 case OP_WHITESPACE:
1292 case OP_NOT_WORDCHAR:
1293 case OP_WORDCHAR:
1294 case OP_ANY:
1295 case OP_ALLANY:
1296 case OP_ANYBYTE:
1297 case OP_NOTPROP:
1298 case OP_PROP:
1299 case OP_ANYNL:
1300 case OP_NOT_HSPACE:
1301 case OP_HSPACE:
1302 case OP_NOT_VSPACE:
1303 case OP_VSPACE:
1304 case OP_EXTUNI:
1305 case OP_EODN:
1306 case OP_EOD:
1307 case OP_CIRC:
1308 case OP_CIRCM:
1309 case OP_DOLL:
1310 case OP_DOLLM:
1311 case OP_CHAR:
1312 case OP_CHARI:
1313 case OP_NOT:
1314 case OP_NOTI:
1315
1316 case OP_EXACT:
1317 case OP_POSSTAR:
1318 case OP_POSPLUS:
1319 case OP_POSQUERY:
1320 case OP_POSUPTO:
1321
1322 case OP_EXACTI:
1323 case OP_POSSTARI:
1324 case OP_POSPLUSI:
1325 case OP_POSQUERYI:
1326 case OP_POSUPTOI:
1327
1328 case OP_NOTEXACT:
1329 case OP_NOTPOSSTAR:
1330 case OP_NOTPOSPLUS:
1331 case OP_NOTPOSQUERY:
1332 case OP_NOTPOSUPTO:
1333
1334 case OP_NOTEXACTI:
1335 case OP_NOTPOSSTARI:
1336 case OP_NOTPOSPLUSI:
1337 case OP_NOTPOSQUERYI:
1338 case OP_NOTPOSUPTOI:
1339
1340 case OP_TYPEEXACT:
1341 case OP_TYPEPOSSTAR:
1342 case OP_TYPEPOSPLUS:
1343 case OP_TYPEPOSQUERY:
1344 case OP_TYPEPOSUPTO:
1345
1346 case OP_CLASS:
1347 case OP_NCLASS:
1348 case OP_XCLASS:
1349
1350 cc = next_opcode(common, cc);
1351 SLJIT_ASSERT(cc != NULL);
1352 break;
1353 }
1354
1355 /* Possessive quantifiers can use a special case. */
1356 if (SLJIT_UNLIKELY(possessive == length))
1357 return stack_restore ? no_frame : no_stack;
1358
1359 if (length > 0)
1360 return length + 1;
1361 return stack_restore ? no_frame : no_stack;
1362 }
1363
1364 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1365 {
1366 DEFINE_COMPILER;
1367 BOOL setsom_found = recursive;
1368 BOOL setmark_found = recursive;
1369 /* The last capture is a local variable even for recursions. */
1370 BOOL capture_last_found = FALSE;
1371 int offset;
1372
1373 /* >= 1 + shortest item size (2) */
1374 SLJIT_UNUSED_ARG(stacktop);
1375 SLJIT_ASSERT(stackpos >= stacktop + 2);
1376
1377 stackpos = STACK(stackpos);
1378 if (ccend == NULL)
1379 {
1380 ccend = bracketend(cc) - (1 + LINK_SIZE);
1381 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1382 cc = next_opcode(common, cc);
1383 }
1384
1385 SLJIT_ASSERT(cc != NULL);
1386 while (cc < ccend)
1387 switch(*cc)
1388 {
1389 case OP_SET_SOM:
1390 SLJIT_ASSERT(common->has_set_som);
1391 if (!setsom_found)
1392 {
1393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1395 stackpos += (int)sizeof(sljit_sw);
1396 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1397 stackpos += (int)sizeof(sljit_sw);
1398 setsom_found = TRUE;
1399 }
1400 cc += 1;
1401 break;
1402
1403 case OP_MARK:
1404 case OP_PRUNE_ARG:
1405 case OP_THEN_ARG:
1406 SLJIT_ASSERT(common->mark_ptr != 0);
1407 if (!setmark_found)
1408 {
1409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1411 stackpos += (int)sizeof(sljit_sw);
1412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1413 stackpos += (int)sizeof(sljit_sw);
1414 setmark_found = TRUE;
1415 }
1416 cc += 1 + 2 + cc[1];
1417 break;
1418
1419 case OP_RECURSE:
1420 if (common->has_set_som && !setsom_found)
1421 {
1422 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1424 stackpos += (int)sizeof(sljit_sw);
1425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1426 stackpos += (int)sizeof(sljit_sw);
1427 setsom_found = TRUE;
1428 }
1429 if (common->mark_ptr != 0 && !setmark_found)
1430 {
1431 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1433 stackpos += (int)sizeof(sljit_sw);
1434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1435 stackpos += (int)sizeof(sljit_sw);
1436 setmark_found = TRUE;
1437 }
1438 if (common->capture_last_ptr != 0 && !capture_last_found)
1439 {
1440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1442 stackpos += (int)sizeof(sljit_sw);
1443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1444 stackpos += (int)sizeof(sljit_sw);
1445 capture_last_found = TRUE;
1446 }
1447 cc += 1 + LINK_SIZE;
1448 break;
1449
1450 case OP_CBRA:
1451 case OP_CBRAPOS:
1452 case OP_SCBRA:
1453 case OP_SCBRAPOS:
1454 if (common->capture_last_ptr != 0 && !capture_last_found)
1455 {
1456 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1458 stackpos += (int)sizeof(sljit_sw);
1459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1460 stackpos += (int)sizeof(sljit_sw);
1461 capture_last_found = TRUE;
1462 }
1463 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1465 stackpos += (int)sizeof(sljit_sw);
1466 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1467 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1469 stackpos += (int)sizeof(sljit_sw);
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1471 stackpos += (int)sizeof(sljit_sw);
1472
1473 cc += 1 + LINK_SIZE + IMM2_SIZE;
1474 break;
1475
1476 default:
1477 cc = next_opcode(common, cc);
1478 SLJIT_ASSERT(cc != NULL);
1479 break;
1480 }
1481
1482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1483 SLJIT_ASSERT(stackpos == STACK(stacktop));
1484 }
1485
1486 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1487 {
1488 int private_data_length = needs_control_head ? 3 : 2;
1489 int size;
1490 pcre_uchar *alternative;
1491 /* Calculate the sum of the private machine words. */
1492 while (cc < ccend)
1493 {
1494 size = 0;
1495 switch(*cc)
1496 {
1497 case OP_ASSERT:
1498 case OP_ASSERT_NOT:
1499 case OP_ASSERTBACK:
1500 case OP_ASSERTBACK_NOT:
1501 case OP_ONCE:
1502 case OP_ONCE_NC:
1503 case OP_BRAPOS:
1504 case OP_SBRA:
1505 case OP_SBRAPOS:
1506 case OP_SCOND:
1507 private_data_length++;
1508 cc += 1 + LINK_SIZE;
1509 break;
1510
1511 case OP_CBRA:
1512 case OP_SCBRA:
1513 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1514 private_data_length++;
1515 cc += 1 + LINK_SIZE + IMM2_SIZE;
1516 break;
1517
1518 case OP_CBRAPOS:
1519 case OP_SCBRAPOS:
1520 private_data_length += 2;
1521 cc += 1 + LINK_SIZE + IMM2_SIZE;
1522 break;
1523
1524 case OP_COND:
1525 /* Might be a hidden SCOND. */
1526 alternative = cc + GET(cc, 1);
1527 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1528 private_data_length++;
1529 cc += 1 + LINK_SIZE;
1530 break;
1531
1532 CASE_ITERATOR_PRIVATE_DATA_1
1533 if (PRIVATE_DATA(cc))
1534 private_data_length++;
1535 cc += 2;
1536 #ifdef SUPPORT_UTF
1537 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1538 #endif
1539 break;
1540
1541 CASE_ITERATOR_PRIVATE_DATA_2A
1542 if (PRIVATE_DATA(cc))
1543 private_data_length += 2;
1544 cc += 2;
1545 #ifdef SUPPORT_UTF
1546 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1547 #endif
1548 break;
1549
1550 CASE_ITERATOR_PRIVATE_DATA_2B
1551 if (PRIVATE_DATA(cc))
1552 private_data_length += 2;
1553 cc += 2 + IMM2_SIZE;
1554 #ifdef SUPPORT_UTF
1555 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1556 #endif
1557 break;
1558
1559 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1560 if (PRIVATE_DATA(cc))
1561 private_data_length++;
1562 cc += 1;
1563 break;
1564
1565 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1566 if (PRIVATE_DATA(cc))
1567 private_data_length += 2;
1568 cc += 1;
1569 break;
1570
1571 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1572 if (PRIVATE_DATA(cc))
1573 private_data_length += 2;
1574 cc += 1 + IMM2_SIZE;
1575 break;
1576
1577 case OP_CLASS:
1578 case OP_NCLASS:
1579 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1580 case OP_XCLASS:
1581 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1582 #else
1583 size = 1 + 32 / (int)sizeof(pcre_uchar);
1584 #endif
1585 if (PRIVATE_DATA(cc))
1586 private_data_length += get_class_iterator_size(cc + size);
1587 cc += size;
1588 break;
1589
1590 default:
1591 cc = next_opcode(common, cc);
1592 SLJIT_ASSERT(cc != NULL);
1593 break;
1594 }
1595 }
1596 SLJIT_ASSERT(cc == ccend);
1597 return private_data_length;
1598 }
1599
1600 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1601 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1602 {
1603 DEFINE_COMPILER;
1604 int srcw[2];
1605 int count, size;
1606 BOOL tmp1next = TRUE;
1607 BOOL tmp1empty = TRUE;
1608 BOOL tmp2empty = TRUE;
1609 pcre_uchar *alternative;
1610 enum {
1611 start,
1612 loop,
1613 end
1614 } status;
1615
1616 status = save ? start : loop;
1617 stackptr = STACK(stackptr - 2);
1618 stacktop = STACK(stacktop - 1);
1619
1620 if (!save)
1621 {
1622 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1623 if (stackptr < stacktop)
1624 {
1625 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1626 stackptr += sizeof(sljit_sw);
1627 tmp1empty = FALSE;
1628 }
1629 if (stackptr < stacktop)
1630 {
1631 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1632 stackptr += sizeof(sljit_sw);
1633 tmp2empty = FALSE;
1634 }
1635 /* The tmp1next must be TRUE in either way. */
1636 }
1637
1638 do
1639 {
1640 count = 0;
1641 switch(status)
1642 {
1643 case start:
1644 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1645 count = 1;
1646 srcw[0] = common->recursive_head_ptr;
1647 if (needs_control_head)
1648 {
1649 SLJIT_ASSERT(common->control_head_ptr != 0);
1650 count = 2;
1651 srcw[1] = common->control_head_ptr;
1652 }
1653 status = loop;
1654 break;
1655
1656 case loop:
1657 if (cc >= ccend)
1658 {
1659 status = end;
1660 break;
1661 }
1662
1663 switch(*cc)
1664 {
1665 case OP_ASSERT:
1666 case OP_ASSERT_NOT:
1667 case OP_ASSERTBACK:
1668 case OP_ASSERTBACK_NOT:
1669 case OP_ONCE:
1670 case OP_ONCE_NC:
1671 case OP_BRAPOS:
1672 case OP_SBRA:
1673 case OP_SBRAPOS:
1674 case OP_SCOND:
1675 count = 1;
1676 srcw[0] = PRIVATE_DATA(cc);
1677 SLJIT_ASSERT(srcw[0] != 0);
1678 cc += 1 + LINK_SIZE;
1679 break;
1680
1681 case OP_CBRA:
1682 case OP_SCBRA:
1683 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1684 {
1685 count = 1;
1686 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1687 }
1688 cc += 1 + LINK_SIZE + IMM2_SIZE;
1689 break;
1690
1691 case OP_CBRAPOS:
1692 case OP_SCBRAPOS:
1693 count = 2;
1694 srcw[0] = PRIVATE_DATA(cc);
1695 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1696 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1697 cc += 1 + LINK_SIZE + IMM2_SIZE;
1698 break;
1699
1700 case OP_COND:
1701 /* Might be a hidden SCOND. */
1702 alternative = cc + GET(cc, 1);
1703 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1704 {
1705 count = 1;
1706 srcw[0] = PRIVATE_DATA(cc);
1707 SLJIT_ASSERT(srcw[0] != 0);
1708 }
1709 cc += 1 + LINK_SIZE;
1710 break;
1711
1712 CASE_ITERATOR_PRIVATE_DATA_1
1713 if (PRIVATE_DATA(cc))
1714 {
1715 count = 1;
1716 srcw[0] = PRIVATE_DATA(cc);
1717 }
1718 cc += 2;
1719 #ifdef SUPPORT_UTF
1720 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1721 #endif
1722 break;
1723
1724 CASE_ITERATOR_PRIVATE_DATA_2A
1725 if (PRIVATE_DATA(cc))
1726 {
1727 count = 2;
1728 srcw[0] = PRIVATE_DATA(cc);
1729 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1730 }
1731 cc += 2;
1732 #ifdef SUPPORT_UTF
1733 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1734 #endif
1735 break;
1736
1737 CASE_ITERATOR_PRIVATE_DATA_2B
1738 if (PRIVATE_DATA(cc))
1739 {
1740 count = 2;
1741 srcw[0] = PRIVATE_DATA(cc);
1742 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1743 }
1744 cc += 2 + IMM2_SIZE;
1745 #ifdef SUPPORT_UTF
1746 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1747 #endif
1748 break;
1749
1750 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1751 if (PRIVATE_DATA(cc))
1752 {
1753 count = 1;
1754 srcw[0] = PRIVATE_DATA(cc);
1755 }
1756 cc += 1;
1757 break;
1758
1759 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1760 if (PRIVATE_DATA(cc))
1761 {
1762 count = 2;
1763 srcw[0] = PRIVATE_DATA(cc);
1764 srcw[1] = srcw[0] + sizeof(sljit_sw);
1765 }
1766 cc += 1;
1767 break;
1768
1769 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1770 if (PRIVATE_DATA(cc))
1771 {
1772 count = 2;
1773 srcw[0] = PRIVATE_DATA(cc);
1774 srcw[1] = srcw[0] + sizeof(sljit_sw);
1775 }
1776 cc += 1 + IMM2_SIZE;
1777 break;
1778
1779 case OP_CLASS:
1780 case OP_NCLASS:
1781 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1782 case OP_XCLASS:
1783 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1784 #else
1785 size = 1 + 32 / (int)sizeof(pcre_uchar);
1786 #endif
1787 if (PRIVATE_DATA(cc))
1788 switch(get_class_iterator_size(cc + size))
1789 {
1790 case 1:
1791 count = 1;
1792 srcw[0] = PRIVATE_DATA(cc);
1793 break;
1794
1795 case 2:
1796 count = 2;
1797 srcw[0] = PRIVATE_DATA(cc);
1798 srcw[1] = srcw[0] + sizeof(sljit_sw);
1799 break;
1800
1801 default:
1802 SLJIT_ASSERT_STOP();
1803 break;
1804 }
1805 cc += size;
1806 break;
1807
1808 default:
1809 cc = next_opcode(common, cc);
1810 SLJIT_ASSERT(cc != NULL);
1811 break;
1812 }
1813 break;
1814
1815 case end:
1816 SLJIT_ASSERT_STOP();
1817 break;
1818 }
1819
1820 while (count > 0)
1821 {
1822 count--;
1823 if (save)
1824 {
1825 if (tmp1next)
1826 {
1827 if (!tmp1empty)
1828 {
1829 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1830 stackptr += sizeof(sljit_sw);
1831 }
1832 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1833 tmp1empty = FALSE;
1834 tmp1next = FALSE;
1835 }
1836 else
1837 {
1838 if (!tmp2empty)
1839 {
1840 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1841 stackptr += sizeof(sljit_sw);
1842 }
1843 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1844 tmp2empty = FALSE;
1845 tmp1next = TRUE;
1846 }
1847 }
1848 else
1849 {
1850 if (tmp1next)
1851 {
1852 SLJIT_ASSERT(!tmp1empty);
1853 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1854 tmp1empty = stackptr >= stacktop;
1855 if (!tmp1empty)
1856 {
1857 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1858 stackptr += sizeof(sljit_sw);
1859 }
1860 tmp1next = FALSE;
1861 }
1862 else
1863 {
1864 SLJIT_ASSERT(!tmp2empty);
1865 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1866 tmp2empty = stackptr >= stacktop;
1867 if (!tmp2empty)
1868 {
1869 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1870 stackptr += sizeof(sljit_sw);
1871 }
1872 tmp1next = TRUE;
1873 }
1874 }
1875 }
1876 }
1877 while (status != end);
1878
1879 if (save)
1880 {
1881 if (tmp1next)
1882 {
1883 if (!tmp1empty)
1884 {
1885 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1886 stackptr += sizeof(sljit_sw);
1887 }
1888 if (!tmp2empty)
1889 {
1890 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1891 stackptr += sizeof(sljit_sw);
1892 }
1893 }
1894 else
1895 {
1896 if (!tmp2empty)
1897 {
1898 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1899 stackptr += sizeof(sljit_sw);
1900 }
1901 if (!tmp1empty)
1902 {
1903 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1904 stackptr += sizeof(sljit_sw);
1905 }
1906 }
1907 }
1908 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1909 }
1910
1911 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1912 {
1913 pcre_uchar *end = bracketend(cc);
1914 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1915
1916 /* Assert captures then. */
1917 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1918 current_offset = NULL;
1919 /* Conditional block does not. */
1920 if (*cc == OP_COND || *cc == OP_SCOND)
1921 has_alternatives = FALSE;
1922
1923 cc = next_opcode(common, cc);
1924 if (has_alternatives)
1925 current_offset = common->then_offsets + (cc - common->start);
1926
1927 while (cc < end)
1928 {
1929 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1930 cc = set_then_offsets(common, cc, current_offset);
1931 else
1932 {
1933 if (*cc == OP_ALT && has_alternatives)
1934 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1935 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1936 *current_offset = 1;
1937 cc = next_opcode(common, cc);
1938 }
1939 }
1940
1941 return end;
1942 }
1943
1944 #undef CASE_ITERATOR_PRIVATE_DATA_1
1945 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1946 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1947 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1948 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1949 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1950
1951 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1952 {
1953 return (value & (value - 1)) == 0;
1954 }
1955
1956 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1957 {
1958 while (list)
1959 {
1960 /* sljit_set_label is clever enough to do nothing
1961 if either the jump or the label is NULL. */
1962 SET_LABEL(list->jump, label);
1963 list = list->next;
1964 }
1965 }
1966
1967 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1968 {
1969 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1970 if (list_item)
1971 {
1972 list_item->next = *list;
1973 list_item->jump = jump;
1974 *list = list_item;
1975 }
1976 }
1977
1978 static void add_stub(compiler_common *common, struct sljit_jump *start)
1979 {
1980 DEFINE_COMPILER;
1981 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1982
1983 if (list_item)
1984 {
1985 list_item->start = start;
1986 list_item->quit = LABEL();
1987 list_item->next = common->stubs;
1988 common->stubs = list_item;
1989 }
1990 }
1991
1992 static void flush_stubs(compiler_common *common)
1993 {
1994 DEFINE_COMPILER;
1995 stub_list* list_item = common->stubs;
1996
1997 while (list_item)
1998 {
1999 JUMPHERE(list_item->start);
2000 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2001 JUMPTO(SLJIT_JUMP, list_item->quit);
2002 list_item = list_item->next;
2003 }
2004 common->stubs = NULL;
2005 }
2006
2007 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
2008 {
2009 DEFINE_COMPILER;
2010
2011 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
2012 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2013 }
2014
2015 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2016 {
2017 /* May destroy all locals and registers except TMP2. */
2018 DEFINE_COMPILER;
2019
2020 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2021 #ifdef DESTROY_REGISTERS
2022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2023 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2024 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2027 #endif
2028 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2029 }
2030
2031 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2032 {
2033 DEFINE_COMPILER;
2034 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2035 }
2036
2037 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2038 {
2039 DEFINE_COMPILER;
2040 struct sljit_label *loop;
2041 int i;
2042
2043 /* At this point we can freely use all temporary registers. */
2044 SLJIT_ASSERT(length > 1);
2045 /* TMP1 returns with begin - 1. */
2046 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2047 if (length < 8)
2048 {
2049 for (i = 1; i < length; i++)
2050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2051 }
2052 else
2053 {
2054 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2055 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2056 loop = LABEL();
2057 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2058 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2059 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2060 }
2061 }
2062
2063 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2064 {
2065 DEFINE_COMPILER;
2066 struct sljit_label *loop;
2067 int i;
2068
2069 SLJIT_ASSERT(length > 1);
2070 /* OVECTOR(1) contains the "string begin - 1" constant. */
2071 if (length > 2)
2072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2073 if (length < 8)
2074 {
2075 for (i = 2; i < length; i++)
2076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2077 }
2078 else
2079 {
2080 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2081 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2082 loop = LABEL();
2083 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2084 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2085 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2086 }
2087
2088 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2089 if (common->mark_ptr != 0)
2090 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2091 if (common->control_head_ptr != 0)
2092 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2093 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2094 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2095 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2096 }
2097
2098 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2099 {
2100 while (current != NULL)
2101 {
2102 switch (current[-2])
2103 {
2104 case type_then_trap:
2105 break;
2106
2107 case type_mark:
2108 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2109 return current[-4];
2110 break;
2111
2112 default:
2113 SLJIT_ASSERT_STOP();
2114 break;
2115 }
2116 current = (sljit_sw*)current[-1];
2117 }
2118 return -1;
2119 }
2120
2121 static sljit_sw SLJIT_CALL do_search_then_trap(sljit_sw *current, sljit_sw start)
2122 {
2123 do
2124 {
2125 SLJIT_ASSERT(current != NULL);
2126 switch (current[-2])
2127 {
2128 case type_then_trap:
2129 if (current[-3] == start)
2130 return (sljit_sw)current;
2131 break;
2132
2133 case type_mark:
2134 break;
2135
2136 default:
2137 SLJIT_ASSERT_STOP();
2138 break;
2139 }
2140 current = (sljit_sw*)current[-1];
2141 }
2142 while (TRUE);
2143 }
2144
2145 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2146 {
2147 DEFINE_COMPILER;
2148 struct sljit_label *loop;
2149 struct sljit_jump *early_quit;
2150
2151 /* At this point we can freely use all registers. */
2152 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2154
2155 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2156 if (common->mark_ptr != 0)
2157 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2158 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2159 if (common->mark_ptr != 0)
2160 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2161 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2162 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2163 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2164 /* Unlikely, but possible */
2165 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2166 loop = LABEL();
2167 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2168 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2169 /* Copy the integer value to the output buffer */
2170 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2171 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2172 #endif
2173 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2174 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2175 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2176 JUMPHERE(early_quit);
2177
2178 /* Calculate the return value, which is the maximum ovector value. */
2179 if (topbracket > 1)
2180 {
2181 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2182 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2183
2184 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2185 loop = LABEL();
2186 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2187 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2188 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2189 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2190 }
2191 else
2192 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2193 }
2194
2195 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2196 {
2197 DEFINE_COMPILER;
2198 struct sljit_jump *jump;
2199
2200 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2201 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2202 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2203
2204 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2205 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2206 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2207 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2208
2209 /* Store match begin and end. */
2210 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2211 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2212
2213 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2214 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2215 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2216 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2217 #endif
2218 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2219 JUMPHERE(jump);
2220
2221 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2222 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2223 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2224 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2225 #endif
2226 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2227
2228 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2229 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2230 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2231 #endif
2232 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2233
2234 JUMPTO(SLJIT_JUMP, quit);
2235 }
2236
2237 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2238 {
2239 /* May destroy TMP1. */
2240 DEFINE_COMPILER;
2241 struct sljit_jump *jump;
2242
2243 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2244 {
2245 /* The value of -1 must be kept for start_used_ptr! */
2246 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2247 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2248 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2249 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2251 JUMPHERE(jump);
2252 }
2253 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2254 {
2255 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2257 JUMPHERE(jump);
2258 }
2259 }
2260
2261 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2262 {
2263 /* Detects if the character has an othercase. */
2264 unsigned int c;
2265
2266 #ifdef SUPPORT_UTF
2267 if (common->utf)
2268 {
2269 GETCHAR(c, cc);
2270 if (c > 127)
2271 {
2272 #ifdef SUPPORT_UCP
2273 return c != UCD_OTHERCASE(c);
2274 #else
2275 return FALSE;
2276 #endif
2277 }
2278 #ifndef COMPILE_PCRE8
2279 return common->fcc[c] != c;
2280 #endif
2281 }
2282 else
2283 #endif
2284 c = *cc;
2285 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2286 }
2287
2288 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2289 {
2290 /* Returns with the othercase. */
2291 #ifdef SUPPORT_UTF
2292 if (common->utf && c > 127)
2293 {
2294 #ifdef SUPPORT_UCP
2295 return UCD_OTHERCASE(c);
2296 #else
2297 return c;
2298 #endif
2299 }
2300 #endif
2301 return TABLE_GET(c, common->fcc, c);
2302 }
2303
2304 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2305 {
2306 /* Detects if the character and its othercase has only 1 bit difference. */
2307 unsigned int c, oc, bit;
2308 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2309 int n;
2310 #endif
2311
2312 #ifdef SUPPORT_UTF
2313 if (common->utf)
2314 {
2315 GETCHAR(c, cc);
2316 if (c <= 127)
2317 oc = common->fcc[c];
2318 else
2319 {
2320 #ifdef SUPPORT_UCP
2321 oc = UCD_OTHERCASE(c);
2322 #else
2323 oc = c;
2324 #endif
2325 }
2326 }
2327 else
2328 {
2329 c = *cc;
2330 oc = TABLE_GET(c, common->fcc, c);
2331 }
2332 #else
2333 c = *cc;
2334 oc = TABLE_GET(c, common->fcc, c);
2335 #endif
2336
2337 SLJIT_ASSERT(c != oc);
2338
2339 bit = c ^ oc;
2340 /* Optimized for English alphabet. */
2341 if (c <= 127 && bit == 0x20)
2342 return (0 << 8) | 0x20;
2343
2344 /* Since c != oc, they must have at least 1 bit difference. */
2345 if (!is_powerof2(bit))
2346 return 0;
2347
2348 #if defined COMPILE_PCRE8
2349
2350 #ifdef SUPPORT_UTF
2351 if (common->utf && c > 127)
2352 {
2353 n = GET_EXTRALEN(*cc);
2354 while ((bit & 0x3f) == 0)
2355 {
2356 n--;
2357 bit >>= 6;
2358 }
2359 return (n << 8) | bit;
2360 }
2361 #endif /* SUPPORT_UTF */
2362 return (0 << 8) | bit;
2363
2364 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2365
2366 #ifdef SUPPORT_UTF
2367 if (common->utf && c > 65535)
2368 {
2369 if (bit >= (1 << 10))
2370 bit >>= 10;
2371 else
2372 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2373 }
2374 #endif /* SUPPORT_UTF */
2375 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2376
2377 #endif /* COMPILE_PCRE[8|16|32] */
2378 }
2379
2380 static void check_partial(compiler_common *common, BOOL force)
2381 {
2382 /* Checks whether a partial matching is occured. Does not modify registers. */
2383 DEFINE_COMPILER;
2384 struct sljit_jump *jump = NULL;
2385
2386 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2387
2388 if (common->mode == JIT_COMPILE)
2389 return;
2390
2391 if (!force)
2392 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2393 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2394 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2395
2396 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2397 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2398 else
2399 {
2400 if (common->partialmatchlabel != NULL)
2401 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2402 else
2403 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2404 }
2405
2406 if (jump != NULL)
2407 JUMPHERE(jump);
2408 }
2409
2410 static void check_str_end(compiler_common *common, jump_list **end_reached)
2411 {
2412 /* Does not affect registers. Usually used in a tight spot. */
2413 DEFINE_COMPILER;
2414 struct sljit_jump *jump;
2415
2416 if (common->mode == JIT_COMPILE)
2417 {
2418 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2419 return;
2420 }
2421
2422 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2423 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2424 {
2425 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2426 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2427 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2428 }
2429 else
2430 {
2431 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2432 if (common->partialmatchlabel != NULL)
2433 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2434 else
2435 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2436 }
2437 JUMPHERE(jump);
2438 }
2439
2440 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2441 {
2442 DEFINE_COMPILER;
2443 struct sljit_jump *jump;
2444
2445 if (common->mode == JIT_COMPILE)
2446 {
2447 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2448 return;
2449 }
2450
2451 /* Partial matching mode. */
2452 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2453 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2454 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2455 {
2456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2457 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2458 }
2459 else
2460 {
2461 if (common->partialmatchlabel != NULL)
2462 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2463 else
2464 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2465 }
2466 JUMPHERE(jump);
2467 }
2468
2469 static void read_char(compiler_common *common)
2470 {
2471 /* Reads the character into TMP1, updates STR_PTR.
2472 Does not check STR_END. TMP2 Destroyed. */
2473 DEFINE_COMPILER;
2474 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2475 struct sljit_jump *jump;
2476 #endif
2477
2478 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2479 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2480 if (common->utf)
2481 {
2482 #if defined COMPILE_PCRE8
2483 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2484 #elif defined COMPILE_PCRE16
2485 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2486 #endif /* COMPILE_PCRE[8|16] */
2487 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2488 JUMPHERE(jump);
2489 }
2490 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2491 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2492 }
2493
2494 static void peek_char(compiler_common *common)
2495 {
2496 /* Reads the character into TMP1, keeps STR_PTR.
2497 Does not check STR_END. TMP2 Destroyed. */
2498 DEFINE_COMPILER;
2499 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2500 struct sljit_jump *jump;
2501 #endif
2502
2503 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2504 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2505 if (common->utf)
2506 {
2507 #if defined COMPILE_PCRE8
2508 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2509 #elif defined COMPILE_PCRE16
2510 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2511 #endif /* COMPILE_PCRE[8|16] */
2512 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2513 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2514 JUMPHERE(jump);
2515 }
2516 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2517 }
2518
2519 static void read_char8_type(compiler_common *common)
2520 {
2521 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2522 DEFINE_COMPILER;
2523 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2524 struct sljit_jump *jump;
2525 #endif
2526
2527 #ifdef SUPPORT_UTF
2528 if (common->utf)
2529 {
2530 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2531 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2532 #if defined COMPILE_PCRE8
2533 /* This can be an extra read in some situations, but hopefully
2534 it is needed in most cases. */
2535 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2536 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2537 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2538 JUMPHERE(jump);
2539 #elif defined COMPILE_PCRE16
2540 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2541 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2542 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2543 JUMPHERE(jump);
2544 /* Skip low surrogate if necessary. */
2545 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2546 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2547 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2548 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2549 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2550 #elif defined COMPILE_PCRE32
2551 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2552 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2553 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2554 JUMPHERE(jump);
2555 #endif /* COMPILE_PCRE[8|16|32] */
2556 return;
2557 }
2558 #endif /* SUPPORT_UTF */
2559 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2560 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2561 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2562 /* The ctypes array contains only 256 values. */
2563 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2564 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2565 #endif
2566 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2567 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2568 JUMPHERE(jump);
2569 #endif
2570 }
2571
2572 static void skip_char_back(compiler_common *common)
2573 {
2574 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2575 DEFINE_COMPILER;
2576 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2577 #if defined COMPILE_PCRE8
2578 struct sljit_label *label;
2579
2580 if (common->utf)
2581 {
2582 label = LABEL();
2583 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2584 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2585 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2586 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2587 return;
2588 }
2589 #elif defined COMPILE_PCRE16
2590 if (common->utf)
2591 {
2592 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2593 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2594 /* Skip low surrogate if necessary. */
2595 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2596 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2597 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2598 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2599 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2600 return;
2601 }
2602 #endif /* COMPILE_PCRE[8|16] */
2603 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2604 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2605 }
2606
2607 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2608 {
2609 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2610 DEFINE_COMPILER;
2611
2612 if (nltype == NLTYPE_ANY)
2613 {
2614 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2615 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2616 }
2617 else if (nltype == NLTYPE_ANYCRLF)
2618 {
2619 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2620 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2621 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2622 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2623 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2624 }
2625 else
2626 {
2627 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2628 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2629 }
2630 }
2631
2632 #ifdef SUPPORT_UTF
2633
2634 #if defined COMPILE_PCRE8
2635 static void do_utfreadchar(compiler_common *common)
2636 {
2637 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2638 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2639 DEFINE_COMPILER;
2640 struct sljit_jump *jump;
2641
2642 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2643 /* Searching for the first zero. */
2644 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2645 jump = JUMP(SLJIT_C_NOT_ZERO);
2646 /* Two byte sequence. */
2647 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2648 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2649 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2650 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2651 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2652 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2653 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2654 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2655 JUMPHERE(jump);
2656
2657 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2658 jump = JUMP(SLJIT_C_NOT_ZERO);
2659 /* Three byte sequence. */
2660 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2661 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2662 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2663 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2664 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2665 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2666 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2667 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2668 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2669 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2670 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2671 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2672 JUMPHERE(jump);
2673
2674 /* Four byte sequence. */
2675 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2676 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2677 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2678 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2679 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2680 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2681 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2682 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2683 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2684 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2685 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2686 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2687 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2688 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2689 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2690 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2691 }
2692
2693 static void do_utfreadtype8(compiler_common *common)
2694 {
2695 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2696 of the character (>= 0xc0). Return value in TMP1. */
2697 DEFINE_COMPILER;
2698 struct sljit_jump *jump;
2699 struct sljit_jump *compare;
2700
2701 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2702
2703 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2704 jump = JUMP(SLJIT_C_NOT_ZERO);
2705 /* Two byte sequence. */
2706 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2707 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2708 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2709 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2710 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2711 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2712 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2713 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2714 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2715
2716 JUMPHERE(compare);
2717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2718 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2719 JUMPHERE(jump);
2720
2721 /* We only have types for characters less than 256. */
2722 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2723 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2725 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2726 }
2727
2728 #elif defined COMPILE_PCRE16
2729
2730 static void do_utfreadchar(compiler_common *common)
2731 {
2732 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2733 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2734 DEFINE_COMPILER;
2735 struct sljit_jump *jump;
2736
2737 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2738 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2739 /* Do nothing, only return. */
2740 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2741
2742 JUMPHERE(jump);
2743 /* Combine two 16 bit characters. */
2744 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2745 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2746 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2747 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2748 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2749 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2750 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2751 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2752 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2753 }
2754
2755 #endif /* COMPILE_PCRE[8|16] */
2756
2757 #endif /* SUPPORT_UTF */
2758
2759 #ifdef SUPPORT_UCP
2760
2761 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2762 #define UCD_BLOCK_MASK 127
2763 #define UCD_BLOCK_SHIFT 7
2764
2765 static void do_getucd(compiler_common *common)
2766 {
2767 /* Search the UCD record for the character comes in TMP1.
2768 Returns chartype in TMP1 and UCD offset in TMP2. */
2769 DEFINE_COMPILER;
2770
2771 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2772
2773 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2774 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2775 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2776 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2777 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2778 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2779 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2780 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2781 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2782 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2783 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2784 }
2785 #endif
2786
2787 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2788 {
2789 DEFINE_COMPILER;
2790 struct sljit_label *mainloop;
2791 struct sljit_label *newlinelabel = NULL;
2792 struct sljit_jump *start;
2793 struct sljit_jump *end = NULL;
2794 struct sljit_jump *nl = NULL;
2795 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2796 struct sljit_jump *singlechar;
2797 #endif
2798 jump_list *newline = NULL;
2799 BOOL newlinecheck = FALSE;
2800 BOOL readuchar = FALSE;
2801
2802 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2803 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2804 newlinecheck = TRUE;
2805
2806 if (firstline)
2807 {
2808 /* Search for the end of the first line. */
2809 SLJIT_ASSERT(common->first_line_end != 0);
2810 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2811
2812 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2813 {
2814 mainloop = LABEL();
2815 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2816 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2817 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2818 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2819 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2820 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2821 JUMPHERE(end);
2822 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2823 }
2824 else
2825 {
2826 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2827 mainloop = LABEL();
2828 /* Continual stores does not cause data dependency. */
2829 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2830 read_char(common);
2831 check_newlinechar(common, common->nltype, &newline, TRUE);
2832 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2833 JUMPHERE(end);
2834 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2835 set_jumps(newline, LABEL());
2836 }
2837
2838 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2839 }
2840
2841 start = JUMP(SLJIT_JUMP);
2842
2843 if (newlinecheck)
2844 {
2845 newlinelabel = LABEL();
2846 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2847 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2848 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2849 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2850 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2851 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2852 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2853 #endif
2854 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2855 nl = JUMP(SLJIT_JUMP);
2856 }
2857
2858 mainloop = LABEL();
2859
2860 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2861 #ifdef SUPPORT_UTF
2862 if (common->utf) readuchar = TRUE;
2863 #endif
2864 if (newlinecheck) readuchar = TRUE;
2865
2866 if (readuchar)
2867 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2868
2869 if (newlinecheck)
2870 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2871
2872 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2873 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2874 #if defined COMPILE_PCRE8
2875 if (common->utf)
2876 {
2877 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2878 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2879 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2880 JUMPHERE(singlechar);
2881 }
2882 #elif defined COMPILE_PCRE16
2883 if (common->utf)
2884 {
2885 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2886 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2887 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2888 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2889 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2890 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2891 JUMPHERE(singlechar);
2892 }
2893 #endif /* COMPILE_PCRE[8|16] */
2894 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2895 JUMPHERE(start);
2896
2897 if (newlinecheck)
2898 {
2899 JUMPHERE(end);
2900 JUMPHERE(nl);
2901 }
2902
2903 return mainloop;
2904 }
2905
2906 #define MAX_N_CHARS 3
2907
2908 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2909 {
2910 DEFINE_COMPILER;
2911 struct sljit_label *start;
2912 struct sljit_jump *quit;
2913 pcre_uint32 chars[MAX_N_CHARS * 2];
2914 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2915 int location = 0;
2916 pcre_int32 len, c, bit, caseless;
2917 int must_stop;
2918
2919 /* We do not support alternatives now. */
2920 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2921 return FALSE;
2922
2923 while (TRUE)
2924 {
2925 caseless = 0;
2926 must_stop = 1;
2927 switch(*cc)
2928 {
2929 case OP_CHAR:
2930 must_stop = 0;
2931 cc++;
2932 break;
2933
2934 case OP_CHARI:
2935 caseless = 1;
2936 must_stop = 0;
2937 cc++;
2938 break;
2939
2940 case OP_SOD:
2941 case OP_SOM:
2942 case OP_SET_SOM:
2943 case OP_NOT_WORD_BOUNDARY:
2944 case OP_WORD_BOUNDARY:
2945 case OP_EODN:
2946 case OP_EOD:
2947 case OP_CIRC:
2948 case OP_CIRCM:
2949 case OP_DOLL:
2950 case OP_DOLLM:
2951 /* Zero width assertions. */
2952 cc++;
2953 continue;
2954
2955 case OP_PLUS:
2956 case OP_MINPLUS:
2957 case OP_POSPLUS:
2958 cc++;
2959 break;
2960
2961 case OP_EXACT:
2962 cc += 1 + IMM2_SIZE;
2963 break;
2964
2965 case OP_PLUSI:
2966 case OP_MINPLUSI:
2967 case OP_POSPLUSI:
2968 caseless = 1;
2969 cc++;
2970 break;
2971
2972 case OP_EXACTI:
2973 caseless = 1;
2974 cc += 1 + IMM2_SIZE;
2975 break;
2976
2977 default:
2978 must_stop = 2;
2979 break;
2980 }
2981
2982 if (must_stop == 2)
2983 break;
2984
2985 len = 1;
2986 #ifdef SUPPORT_UTF
2987 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2988 #endif
2989
2990 if (caseless && char_has_othercase(common, cc))
2991 {
2992 caseless = char_get_othercase_bit(common, cc);
2993 if (caseless == 0)
2994 return FALSE;
2995 #ifdef COMPILE_PCRE8
2996 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2997 #else
2998 if ((caseless & 0x100) != 0)
2999 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3000 else
3001 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3002 #endif
3003 }
3004 else
3005 caseless = 0;
3006
3007 while (len > 0 && location < MAX_N_CHARS * 2)
3008 {
3009 c = *cc;
3010 bit = 0;
3011 if (len == (caseless & 0xff))
3012 {
3013 bit = caseless >> 8;
3014 c |= bit;
3015 }
3016
3017 chars[location] = c;
3018 chars[location + 1] = bit;
3019
3020 len--;
3021 location += 2;
3022 cc++;
3023 }
3024
3025 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3026 break;
3027 }
3028
3029 /* At least two characters are required. */
3030 if (location < 2 * 2)
3031 return FALSE;
3032
3033 if (firstline)
3034 {
3035 SLJIT_ASSERT(common->first_line_end != 0);
3036 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3037 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3038 }
3039 else
3040 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3041
3042 start = LABEL();
3043 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3044
3045 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3046 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3047 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3048 if (chars[1] != 0)
3049 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3050 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3051 if (location > 2 * 2)
3052 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3053 if (chars[3] != 0)
3054 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3055 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3056 if (location > 2 * 2)
3057 {
3058 if (chars[5] != 0)
3059 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3060 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3061 }
3062 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3063
3064 JUMPHERE(quit);
3065
3066 if (firstline)
3067 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3068 else
3069 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3070 return TRUE;
3071 }
3072
3073 #undef MAX_N_CHARS
3074
3075 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3076 {
3077 DEFINE_COMPILER;
3078 struct sljit_label *start;
3079 struct sljit_jump *quit;
3080 struct sljit_jump *found;
3081 pcre_uchar oc, bit;
3082
3083 if (firstline)
3084 {
3085 SLJIT_ASSERT(common->first_line_end != 0);
3086 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3087 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3088 }
3089
3090 start = LABEL();
3091 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3092 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3093
3094 oc = first_char;
3095 if (caseless)
3096 {
3097 oc = TABLE_GET(first_char, common->fcc, first_char);
3098 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3099 if (first_char > 127 && common->utf)
3100 oc = UCD_OTHERCASE(first_char);
3101 #endif
3102 }
3103 if (first_char == oc)
3104 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3105 else
3106 {
3107 bit = first_char ^ oc;
3108 if (is_powerof2(bit))
3109 {
3110 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3111 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3112 }
3113 else
3114 {
3115 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3116 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3117 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3118 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3119 found = JUMP(SLJIT_C_NOT_ZERO);
3120 }
3121 }
3122
3123 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3124 JUMPTO(SLJIT_JUMP, start);
3125 JUMPHERE(found);
3126 JUMPHERE(quit);
3127
3128 if (firstline)
3129 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3130 }
3131
3132 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3133 {
3134 DEFINE_COMPILER;
3135 struct sljit_label *loop;
3136 struct sljit_jump *lastchar;
3137 struct sljit_jump *firstchar;
3138 struct sljit_jump *quit;
3139 struct sljit_jump *foundcr = NULL;
3140 struct sljit_jump *notfoundnl;
3141 jump_list *newline = NULL;
3142
3143 if (firstline)
3144 {
3145 SLJIT_ASSERT(common->first_line_end != 0);
3146 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3147 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3148 }
3149
3150 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3151 {
3152 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3153 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3154 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3155 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3156 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3157
3158 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3159 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3160 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3161 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3162 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3163 #endif
3164 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3165
3166 loop = LABEL();
3167 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3168 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3169 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3170 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3171 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3172 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3173
3174 JUMPHERE(quit);
3175 JUMPHERE(firstchar);
3176 JUMPHERE(lastchar);
3177
3178 if (firstline)
3179 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3180 return;
3181 }
3182
3183 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3185 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3186 skip_char_back(common);
3187
3188 loop = LABEL();
3189 read_char(common);
3190 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3191 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3192 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3193 check_newlinechar(common, common->nltype, &newline, FALSE);
3194 set_jumps(newline, loop);
3195
3196 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3197 {
3198 quit = JUMP(SLJIT_JUMP);
3199 JUMPHERE(foundcr);
3200 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3201 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3202 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3203 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3204 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3205 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3206 #endif
3207 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3208 JUMPHERE(notfoundnl);
3209 JUMPHERE(quit);
3210 }
3211 JUMPHERE(lastchar);
3212 JUMPHERE(firstchar);
3213
3214 if (firstline)
3215 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3216 }
3217
3218 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3219
3220 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3221 {
3222 DEFINE_COMPILER;
3223 struct sljit_label *start;
3224 struct sljit_jump *quit;
3225 struct sljit_jump *found = NULL;
3226 jump_list *matches = NULL;
3227 pcre_uint8 inverted_start_bits[32];
3228 int i;
3229 #ifndef COMPILE_PCRE8
3230 struct sljit_jump *jump;
3231 #endif
3232
3233 for (i = 0; i < 32; ++i)
3234 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3235
3236 if (firstline)
3237 {
3238 SLJIT_ASSERT(common->first_line_end != 0);
3239 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3240 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3241 }
3242
3243 start = LABEL();
3244 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3245 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3246 #ifdef SUPPORT_UTF
3247 if (common->utf)
3248 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3249 #endif
3250
3251 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3252 {
3253 #ifndef COMPILE_PCRE8
3254 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3256 JUMPHERE(jump);
3257 #endif
3258 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3259 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3260 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3261 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3262 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3263 found = JUMP(SLJIT_C_NOT_ZERO);
3264 }
3265
3266 #ifdef SUPPORT_UTF
3267 if (common->utf)
3268 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3269 #endif
3270 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3271 #ifdef SUPPORT_UTF
3272 #if defined COMPILE_PCRE8
3273 if (common->utf)
3274 {
3275 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3276 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3277 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3278 }
3279 #elif defined COMPILE_PCRE16
3280 if (common->utf)
3281 {
3282 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3283 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3284 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3285 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3286 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3287 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3288 }
3289 #endif /* COMPILE_PCRE[8|16] */
3290 #endif /* SUPPORT_UTF */
3291 JUMPTO(SLJIT_JUMP, start);
3292 if (found != NULL)
3293 JUMPHERE(found);
3294 if (matches != NULL)
3295 set_jumps(matches, LABEL());
3296 JUMPHERE(quit);
3297
3298 if (firstline)
3299 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3300 }
3301
3302 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3303 {
3304 DEFINE_COMPILER;
3305 struct sljit_label *loop;
3306 struct sljit_jump *toolong;
3307 struct sljit_jump *alreadyfound;
3308 struct sljit_jump *found;
3309 struct sljit_jump *foundoc = NULL;
3310 struct sljit_jump *notfound;
3311 pcre_uint32 oc, bit;
3312
3313 SLJIT_ASSERT(common->req_char_ptr != 0);
3314 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3315 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3316 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3317 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3318
3319 if (has_firstchar)
3320 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3321 else
3322 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3323
3324 loop = LABEL();
3325 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3326
3327 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3328 oc = req_char;
3329 if (caseless)
3330 {
3331 oc = TABLE_GET(req_char, common->fcc, req_char);
3332 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3333 if (req_char > 127 && common->utf)
3334 oc = UCD_OTHERCASE(req_char);
3335 #endif
3336 }
3337 if (req_char == oc)
3338 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3339 else
3340 {
3341 bit = req_char ^ oc;
3342 if (is_powerof2(bit))
3343 {
3344 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3345 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3346 }
3347 else
3348 {
3349 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3350 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3351 }
3352 }
3353 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3354 JUMPTO(SLJIT_JUMP, loop);
3355
3356 JUMPHERE(found);
3357 if (foundoc)
3358 JUMPHERE(foundoc);
3359 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3360 JUMPHERE(alreadyfound);
3361 JUMPHERE(toolong);
3362 return notfound;
3363 }
3364
3365 static void do_revertframes(compiler_common *common)
3366 {
3367 DEFINE_COMPILER;
3368 struct sljit_jump *jump;
3369 struct sljit_label *mainloop;
3370
3371 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3372 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3373 GET_LOCAL_BASE(TMP3, 0, 0);
3374
3375 /* Drop frames until we reach STACK_TOP. */
3376 mainloop = LABEL();
3377 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3378 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3379 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3380
3381 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3382 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3383 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3384 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3385 JUMPTO(SLJIT_JUMP, mainloop);
3386
3387 JUMPHERE(jump);
3388 jump = JUMP(SLJIT_C_SIG_LESS);
3389 /* End of dropping frames. */
3390 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3391
3392 JUMPHERE(jump);
3393 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3394 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3395 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3396 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3397 JUMPTO(SLJIT_JUMP, mainloop);
3398 }
3399
3400 static void check_wordboundary(compiler_common *common)
3401 {
3402 DEFINE_COMPILER;
3403 struct sljit_jump *skipread;
3404 jump_list *skipread_list = NULL;
3405 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3406 struct sljit_jump *jump;
3407 #endif
3408
3409 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3410
3411 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3412 /* Get type of the previous char, and put it to LOCALS1. */
3413 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3414 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3416 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3417 skip_char_back(common);
3418 check_start_used_ptr(common);
3419 read_char(common);
3420
3421 /* Testing char type. */
3422 #ifdef SUPPORT_UCP
3423 if (common->use_ucp)
3424 {
3425 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3426 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3427 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3428 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3429 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3430 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3431 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3432 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3433 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3434 JUMPHERE(jump);
3435 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3436 }
3437 else
3438 #endif
3439 {
3440 #ifndef COMPILE_PCRE8
3441 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3442 #elif defined SUPPORT_UTF
3443 /* Here LOCALS1 has already been zeroed. */
3444 jump = NULL;
3445 if (common->utf)
3446 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3447 #endif /* COMPILE_PCRE8 */
3448 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3449 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3450 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3451 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3452 #ifndef COMPILE_PCRE8
3453 JUMPHERE(jump);
3454 #elif defined SUPPORT_UTF
3455 if (jump != NULL)
3456 JUMPHERE(jump);
3457 #endif /* COMPILE_PCRE8 */
3458 }
3459 JUMPHERE(skipread);
3460
3461 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3462 check_str_end(common, &skipread_list);
3463 peek_char(common);
3464
3465 /* Testing char type. This is a code duplication. */
3466 #ifdef SUPPORT_UCP
3467 if (common->use_ucp)
3468 {
3469 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3470 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3471 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3472 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3473 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3474 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3475 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3476 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3477 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3478 JUMPHERE(jump);
3479 }
3480 else
3481 #endif
3482 {
3483 #ifndef COMPILE_PCRE8
3484 /* TMP2 may be destroyed by peek_char. */
3485 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3486 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3487 #elif defined SUPPORT_UTF
3488 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3489 jump = NULL;
3490 if (common->utf)
3491 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3492 #endif
3493 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3494 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3495 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3496 #ifndef COMPILE_PCRE8
3497 JUMPHERE(jump);
3498 #elif defined SUPPORT_UTF
3499 if (jump != NULL)
3500 JUMPHERE(jump);
3501 #endif /* COMPILE_PCRE8 */
3502 }
3503 set_jumps(skipread_list, LABEL());
3504
3505 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3506 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3507 }
3508
3509 /*
3510 range format:
3511
3512 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3513 ranges[1] = first bit (0 or 1)
3514 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3515 */
3516
3517 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3518 {
3519 DEFINE_COMPILER;
3520 struct sljit_jump *jump;
3521
3522 if (ranges[0] < 0)
3523 return FALSE;
3524
3525 switch(ranges[0])
3526 {
3527 case 1:
3528 if (readch)
3529 read_char(common);
3530 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3531 return TRUE;
3532
3533 case 2:
3534 if (readch)
3535 read_char(common);
3536 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3537 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3538 return TRUE;
3539
3540 case 4:
3541 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3542 {
3543 if (readch)
3544 read_char(common);
3545 if (ranges[1] != 0)
3546 {
3547 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3548 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3549 }
3550 else
3551 {
3552 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3553 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3554 JUMPHERE(jump);
3555 }
3556 return TRUE;
3557 }
3558 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3559 {
3560 if (readch)
3561 read_char(common);
3562 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3563 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3564 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3565 return TRUE;
3566 }
3567 return FALSE;
3568
3569 default:
3570 return FALSE;
3571 }
3572 }
3573
3574 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3575 {
3576 int i, bit, length;
3577 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3578
3579 bit = ctypes[0] & flag;
3580 ranges[0] = -1;
3581 ranges[1] = bit != 0 ? 1 : 0;
3582 length = 0;
3583
3584 for (i = 1; i < 256; i++)
3585 if ((ctypes[i] & flag) != bit)
3586 {
3587 if (length >= MAX_RANGE_SIZE)
3588 return;
3589 ranges[2 + length] = i;
3590 length++;
3591 bit ^= flag;
3592 }
3593
3594 if (bit != 0)
3595 {
3596 if (length >= MAX_RANGE_SIZE)
3597 return;
3598 ranges[2 + length] = 256;
3599 length++;
3600 }
3601 ranges[0] = length;
3602 }
3603
3604 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3605 {
3606 int ranges[2 + MAX_RANGE_SIZE];
3607 pcre_uint8 bit, cbit, all;
3608 int i, byte, length = 0;
3609
3610 bit = bits[0] & 0x1;
3611 ranges[1] = bit;
3612 /* Can be 0 or 255. */
3613 all = -bit;
3614
3615 for (i = 0; i < 256; )
3616 {
3617 byte = i >> 3;
3618 if ((i & 0x7) == 0 && bits[byte] == all)
3619 i += 8;
3620 else
3621 {
3622 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3623 if (cbit != bit)
3624 {
3625 if (length >= MAX_RANGE_SIZE)
3626 return FALSE;
3627 ranges[2 + length] = i;
3628 length++;
3629 bit = cbit;
3630 all = -cbit;
3631 }
3632 i++;
3633 }
3634 }
3635
3636 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3637 {
3638 if (length >= MAX_RANGE_SIZE)
3639 return FALSE;
3640 ranges[2 + length] = 256;
3641 length++;
3642 }
3643 ranges[0] = length;
3644
3645 return check_ranges(common, ranges, backtracks, FALSE);
3646 }
3647
3648 static void check_anynewline(compiler_common *common)
3649 {
3650 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3651 DEFINE_COMPILER;
3652
3653 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3654
3655 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3656 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3657 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3658 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3659 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3660 #ifdef COMPILE_PCRE8
3661 if (common->utf)
3662 {
3663 #endif
3664 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3665 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3666 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3667 #ifdef COMPILE_PCRE8
3668 }
3669 #endif
3670 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3671 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3672 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3673 }
3674
3675 static void check_hspace(compiler_common *common)
3676 {
3677 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3678 DEFINE_COMPILER;
3679
3680 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3681
3682 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3683 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3684 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3685 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3686 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3687 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3688 #ifdef COMPILE_PCRE8
3689 if (common->utf)
3690 {
3691 #endif
3692 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3693 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3694 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3695 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3696 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3697 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3698 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3699 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3700 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3701 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3702 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3703 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3704 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3705 #ifdef COMPILE_PCRE8
3706 }
3707 #endif
3708 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3709 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3710
3711 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3712 }
3713
3714 static void check_vspace(compiler_common *common)
3715 {
3716 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3717 DEFINE_COMPILER;
3718
3719 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3720
3721 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3722 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3723 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3724 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3725 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3726 #ifdef COMPILE_PCRE8
3727 if (common->utf)
3728 {
3729 #endif
3730 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3731 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3732 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3733 #ifdef COMPILE_PCRE8
3734 }
3735 #endif
3736 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3737 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3738
3739 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3740 }
3741
3742 #define CHAR1 STR_END
3743 #define CHAR2 STACK_TOP
3744
3745 static void do_casefulcmp(compiler_common *common)
3746 {
3747 DEFINE_COMPILER;
3748 struct sljit_jump *jump;
3749 struct sljit_label *label;
3750
3751 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3752 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3753 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3755 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3756 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3757
3758 label = LABEL();
3759 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3760 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3761 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3762 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3763 JUMPTO(SLJIT_C_NOT_ZERO, label);
3764
3765 JUMPHERE(jump);
3766 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3767 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3768 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3769 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3770 }
3771
3772 #define LCC_TABLE STACK_LIMIT
3773
3774 static void do_caselesscmp(compiler_common *common)
3775 {
3776 DEFINE_COMPILER;
3777 struct sljit_jump *jump;
3778 struct sljit_label *label;
3779
3780 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3781 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3782
3783 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3784 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3785 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3786 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3787 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3788 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3789
3790 label = LABEL();
3791 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3792 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3793 #ifndef COMPILE_PCRE8
3794 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3795 #endif
3796 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3797 #ifndef COMPILE_PCRE8
3798 JUMPHERE(jump);
3799 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3800 #endif
3801 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3802 #ifndef COMPILE_PCRE8
3803 JUMPHERE(jump);
3804 #endif
3805 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3806 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3807 JUMPTO(SLJIT_C_NOT_ZERO, label);
3808
3809 JUMPHERE(jump);
3810 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3811 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3812 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3813 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3814 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3815 }
3816
3817 #undef LCC_TABLE
3818 #undef CHAR1
3819 #undef CHAR2
3820
3821 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3822
3823 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3824 {
3825 /* This function would be ineffective to do in JIT level. */
3826 pcre_uint32 c1, c2;
3827 const pcre_uchar *src2 = args->uchar_ptr;
3828 const pcre_uchar *end2 = args->end;
3829 const ucd_record *ur;
3830 const pcre_uint32 *pp;
3831
3832 while (src1 < end1)
3833 {
3834 if (src2 >= end2)
3835 return (pcre_uchar*)1;
3836 GETCHARINC(c1, src1);
3837 GETCHARINC(c2, src2);
3838 ur = GET_UCD(c2);
3839 if (c1 != c2 && c1 != c2 + ur->other_case)
3840 {
3841 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3842 for (;;)
3843 {
3844 if (c1 < *pp) return NULL;
3845 if (c1 == *pp++) break;
3846 }
3847 }
3848 }
3849 return src2;
3850 }
3851
3852 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3853
3854 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3855 compare_context* context, jump_list **backtracks)
3856 {
3857 DEFINE_COMPILER;
3858 unsigned int othercasebit = 0;
3859 pcre_uchar *othercasechar = NULL;
3860 #ifdef SUPPORT_UTF
3861 int utflength;
3862 #endif
3863
3864 if (caseless && char_has_othercase(common, cc))
3865 {
3866 othercasebit = char_get_othercase_bit(common, cc);
3867 SLJIT_ASSERT(othercasebit);
3868 /* Extracting bit difference info. */
3869 #if defined COMPILE_PCRE8
3870 othercasechar = cc + (othercasebit >> 8);
3871 othercasebit &= 0xff;
3872 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3873 /* Note that this code only handles characters in the BMP. If there
3874 ever are characters outside the BMP whose othercase differs in only one
3875 bit from itself (there currently are none), this code will need to be
3876 revised for COMPILE_PCRE32. */
3877 othercasechar = cc + (othercasebit >> 9);
3878 if ((othercasebit & 0x100) != 0)
3879 othercasebit = (othercasebit & 0xff) << 8;
3880 else
3881 othercasebit &= 0xff;
3882 #endif /* COMPILE_PCRE[8|16|32] */
3883 }
3884
3885 if (context->sourcereg == -1)
3886 {
3887 #if defined COMPILE_PCRE8
3888 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3889 if (context->length >= 4)
3890 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3891 else if (context->length >= 2)
3892 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3893 else
3894 #endif
3895 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3896 #elif defined COMPILE_PCRE16
3897 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3898 if (context->length >= 4)
3899 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3900 else
3901 #endif
3902 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3903 #elif defined COMPILE_PCRE32
3904 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3905 #endif /* COMPILE_PCRE[8|16|32] */
3906 context->sourcereg = TMP2;
3907 }
3908
3909 #ifdef SUPPORT_UTF
3910 utflength = 1;
3911 if (common->utf && HAS_EXTRALEN(*cc))
3912 utflength += GET_EXTRALEN(*cc);
3913
3914 do
3915 {
3916 #endif
3917
3918 context->length -= IN_UCHARS(1);
3919 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3920
3921 /* Unaligned read is supported. */
3922 if (othercasebit != 0 && othercasechar == cc)
3923 {
3924 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3925 context->oc.asuchars[context->ucharptr] = othercasebit;
3926 }
3927 else
3928 {
3929 context->c.asuchars[context->ucharptr] = *cc;
3930 context->oc.asuchars[context->ucharptr] = 0;
3931 }
3932 context->ucharptr++;
3933
3934 #if defined COMPILE_PCRE8
3935 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3936 #else
3937 if (context->ucharptr >= 2 || context->length == 0)
3938 #endif
3939 {
3940 if (context->length >= 4)
3941 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3942 else if (context->length >= 2)
3943 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3944 #if defined COMPILE_PCRE8
3945 else if (context->length >= 1)
3946 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3947 #endif /* COMPILE_PCRE8 */
3948 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3949
3950 switch(context->ucharptr)
3951 {
3952 case 4 / sizeof(pcre_uchar):
3953 if (context->oc.asint != 0)
3954 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3955 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3956 break;
3957
3958 case 2 / sizeof(pcre_uchar):
3959 if (context->oc.asushort != 0)
3960 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3961 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3962 break;
3963
3964 #ifdef COMPILE_PCRE8
3965 case 1:
3966 if (context->oc.asbyte != 0)
3967 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3968 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3969 break;
3970 #endif
3971
3972 default:
3973 SLJIT_ASSERT_STOP();
3974 break;
3975 }
3976 context->ucharptr = 0;
3977 }
3978
3979 #else
3980
3981 /* Unaligned read is unsupported or in 32 bit mode. */
3982 if (context->length >= 1)
3983 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3984
3985 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3986
3987 if (othercasebit != 0 && othercasechar == cc)
3988 {
3989 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3990 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3991 }
3992 else
3993 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3994
3995 #endif
3996
3997 cc++;
3998 #ifdef SUPPORT_UTF
3999 utflength--;
4000 }
4001 while (utflength > 0);
4002 #endif
4003
4004 return cc;
4005 }
4006
4007 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4008
4009 #define SET_TYPE_OFFSET(value) \
4010 if ((value) != typeoffset) \
4011 { \
4012 if ((value) > typeoffset) \
4013 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4014 else \
4015 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4016 } \
4017 typeoffset = (value);
4018
4019 #define SET_CHAR_OFFSET(value) \
4020 if ((value) != charoffset) \
4021 { \
4022 if ((value) > charoffset) \
4023 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4024 else \
4025 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4026 } \
4027 charoffset = (value);
4028
4029 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4030 {
4031 DEFINE_COMPILER;
4032 jump_list *found = NULL;
4033 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4034 pcre_int32 c, charoffset;
4035 const pcre_uint32 *other_cases;
4036 struct sljit_jump *jump = NULL;
4037 pcre_uchar *ccbegin;
4038 int compares, invertcmp, numberofcmps;
4039 #ifdef SUPPORT_UCP
4040 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4041 BOOL charsaved = FALSE;
4042 int typereg = TMP1, scriptreg = TMP1;
4043 pcre_int32 typeoffset;
4044 #endif
4045
4046 /* Although SUPPORT_UTF must be defined, we are
4047 not necessary in utf mode even in 8 bit mode. */
4048 detect_partial_match(common, backtracks);
4049 read_char(common);
4050
4051 if ((*cc++ & XCL_MAP) != 0)
4052 {
4053 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4054 #ifndef COMPILE_PCRE8
4055 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4056 #elif defined SUPPORT_UTF
4057 if (common->utf)
4058 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4059 #endif
4060
4061 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4062 {
4063 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4064 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4065 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4066 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4067 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4068 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4069 }
4070
4071 #ifndef COMPILE_PCRE8
4072 JUMPHERE(jump);
4073 #elif defined SUPPORT_UTF
4074 if (common->utf)
4075 JUMPHERE(jump);
4076 #endif
4077 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4078 #ifdef SUPPORT_UCP
4079 charsaved = TRUE;
4080 #endif
4081 cc += 32 / sizeof(pcre_uchar);
4082 }
4083
4084 /* Scanning the necessary info. */
4085 ccbegin = cc;
4086 compares = 0;
4087 while (*cc != XCL_END)
4088 {
4089 compares++;
4090 if (*cc == XCL_SINGLE)
4091 {
4092 cc += 2;
4093 #ifdef SUPPORT_UTF
4094 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4095 #endif
4096 #ifdef SUPPORT_UCP
4097 needschar = TRUE;
4098 #endif
4099 }
4100 else if (*cc == XCL_RANGE)
4101 {
4102 cc += 2;
4103 #ifdef SUPPORT_UTF
4104 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4105 #endif
4106 cc++;
4107 #ifdef SUPPORT_UTF
4108 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4109 #endif
4110 #ifdef SUPPORT_UCP
4111 needschar = TRUE;
4112 #endif
4113 }
4114 #ifdef SUPPORT_UCP
4115 else
4116 {
4117 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4118 cc++;
4119 switch(*cc)
4120 {
4121 case PT_ANY:
4122 break;
4123
4124 case PT_LAMP:
4125 case PT_GC:
4126 case PT_PC:
4127 case PT_ALNUM:
4128 needstype = TRUE;
4129 break;
4130
4131 case PT_SC:
4132 needsscript = TRUE;
4133 break;
4134
4135 case PT_SPACE:
4136 case PT_PXSPACE:
4137 case PT_WORD:
4138 needstype = TRUE;
4139 needschar = TRUE;
4140 break;
4141
4142 case PT_CLIST:
4143 case PT_UCNC:
4144 needschar = TRUE;
4145 break;
4146
4147 default:
4148 SLJIT_ASSERT_STOP();
4149 break;
4150 }
4151 cc += 2;
4152 }
4153 #endif
4154 }
4155
4156 #ifdef SUPPORT_UCP
4157 /* Simple register allocation. TMP1 is preferred if possible. */
4158 if (needstype || needsscript)
4159 {
4160 if (needschar && !charsaved)
4161 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4162 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4163 if (needschar)
4164 {
4165 if (needstype)
4166 {
4167 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4168 typereg = RETURN_ADDR;
4169 }
4170
4171 if (needsscript)
4172 scriptreg = TMP3;
4173 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4174 }
4175 else if (needstype && needsscript)
4176 scriptreg = TMP3;
4177 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4178
4179 if (needsscript)
4180 {
4181 if (scriptreg == TMP1)
4182 {
4183 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4184 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4185 }
4186 else
4187 {
4188 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4189 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4190 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4191 }
4192 }
4193 }
4194 #endif
4195
4196 /* Generating code. */
4197 cc = ccbegin;
4198 charoffset = 0;
4199 numberofcmps = 0;
4200 #ifdef SUPPORT_UCP
4201 typeoffset = 0;
4202 #endif
4203
4204 while (*cc != XCL_END)
4205 {
4206 compares--;
4207 invertcmp = (compares == 0 && list != backtracks);
4208 jump = NULL;
4209
4210 if (*cc == XCL_SINGLE)
4211 {
4212 cc ++;
4213 #ifdef SUPPORT_UTF
4214 if (common->utf)
4215 {
4216 GETCHARINC(c, cc);
4217 }
4218 else
4219 #endif
4220 c = *cc++;
4221
4222 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4223 {
4224 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4225 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4226 numberofcmps++;
4227 }
4228 else if (numberofcmps > 0)
4229 {
4230 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4231 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4232 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4233 numberofcmps = 0;
4234 }
4235 else
4236 {
4237 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4238 numberofcmps = 0;
4239 }
4240 }
4241 else if (*cc == XCL_RANGE)
4242 {
4243 cc ++;
4244 #ifdef SUPPORT_UTF
4245 if (common->utf)
4246 {
4247 GETCHARINC(c, cc);
4248 }
4249 else
4250 #endif
4251 c = *cc++;
4252 SET_CHAR_OFFSET(c);
4253 #ifdef SUPPORT_UTF
4254 if (common->utf)
4255 {
4256 GETCHARINC(c, cc);
4257 }
4258 else
4259 #endif
4260 c = *cc++;
4261 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4262 {
4263 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4264 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4265 numberofcmps++;
4266 }
4267 else if (numberofcmps > 0)
4268 {
4269 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4270 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4271 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4272 numberofcmps = 0;
4273 }
4274 else
4275 {
4276 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4277 numberofcmps = 0;
4278 }
4279 }
4280 #ifdef SUPPORT_UCP
4281 else
4282 {
4283 if (*cc == XCL_NOTPROP)
4284 invertcmp ^= 0x1;
4285 cc++;
4286 switch(*cc)
4287 {
4288 case PT_ANY:
4289 if (list != backtracks)
4290 {
4291 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4292 continue;
4293 }
4294 else if (cc[-1] == XCL_NOTPROP)
4295 continue;
4296 jump = JUMP(SLJIT_JUMP);
4297 break;
4298
4299 case PT_LAMP:
4300 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4301 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4302 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4303 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4304 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4305 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4306 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4307 break;
4308
4309 case PT_GC:
4310 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4311 SET_TYPE_OFFSET(c);
4312 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4313 break;
4314
4315 case PT_PC:
4316 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4317 break;
4318
4319 case PT_SC:
4320 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4321 break;
4322
4323 case PT_SPACE:
4324 case PT_PXSPACE:
4325 if (*cc == PT_SPACE)
4326 {
4327 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4328 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4329 }
4330 SET_CHAR_OFFSET(9);
4331 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4332 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4333 if (*cc == PT_SPACE)
4334 JUMPHERE(jump);
4335
4336 SET_TYPE_OFFSET(ucp_Zl);
4337 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4338 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4339 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4340 break;
4341
4342 case PT_WORD:
4343 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4344 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4345 /* Fall through. */
4346
4347 case PT_ALNUM:
4348 SET_TYPE_OFFSET(ucp_Ll);
4349 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4350 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4351 SET_TYPE_OFFSET(ucp_Nd);
4352 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4353 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4354 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4355 break;
4356
4357 case PT_CLIST:
4358 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4359
4360 /* At least three characters are required.
4361 Otherwise this case would be handled by the normal code path. */
4362 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4363 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4364
4365 /* Optimizing character pairs, if their difference is power of 2. */
4366 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4367 {
4368 if (charoffset == 0)
4369 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4370 else
4371 {
4372 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4373 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4374 }
4375 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4376 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4377 other_cases += 2;
4378 }
4379 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4380 {
4381 if (charoffset == 0)
4382 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4383 else
4384 {
4385 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4386 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4387 }
4388 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4389 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4390
4391 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4392 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4393
4394 other_cases += 3;
4395 }
4396 else
4397 {
4398 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4399 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4400 }
4401
4402 while (*other_cases != NOTACHAR)
4403 {
4404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4405 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4406 }
4407 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4408 break;
4409
4410 case PT_UCNC:
4411 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4412 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4413 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4414 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4415 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4416 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4417
4418 SET_CHAR_OFFSET(0xa0);
4419 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4420 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4421 SET_CHAR_OFFSET(0);
4422 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4423 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4424 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4425 break;
4426 }
4427 cc += 2;
4428 }
4429 #endif
4430
4431 if (jump != NULL)
4432 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4433 }
4434
4435 if (found != NULL)
4436 set_jumps(found, LABEL());
4437 }
4438
4439 #undef SET_TYPE_OFFSET
4440 #undef SET_CHAR_OFFSET
4441
4442 #endif
4443
4444 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4445 {
4446 DEFINE_COMPILER;
4447 int length;
4448 unsigned int c, oc, bit;
4449 compare_context context;
4450 struct sljit_jump *jump[4];
4451 jump_list *end_list;
4452 #ifdef SUPPORT_UTF
4453 struct sljit_label *label;
4454 #ifdef SUPPORT_UCP
4455 pcre_uchar propdata[5];
4456 #endif
4457 #endif
4458
4459 switch(type)
4460 {
4461 case OP_SOD:
4462 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4464 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4465 return cc;
4466
4467 case OP_SOM:
4468 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4470 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4471 return cc;
4472
4473 case OP_NOT_WORD_BOUNDARY:
4474 case OP_WORD_BOUNDARY:
4475 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4476 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4477 return cc;
4478
4479 case OP_NOT_DIGIT:
4480 case OP_DIGIT:
4481 /* Digits are usually 0-9, so it is worth to optimize them. */
4482 if (common->digits[0] == -2)
4483 get_ctype_ranges(common, ctype_digit, common->digits);
4484 detect_partial_match(common, backtracks);
4485 /* Flip the starting bit in the negative case. */
4486 if (type == OP_NOT_DIGIT)
4487 common->digits[1] ^= 1;
4488 if (!check_ranges(common, common->digits, backtracks, TRUE))
4489 {
4490 read_char8_type(common);
4491 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4492 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4493 }
4494 if (type == OP_NOT_DIGIT)
4495 common->digits[1] ^= 1;
4496 return cc;
4497
4498 case OP_NOT_WHITESPACE:
4499 case OP_WHITESPACE:
4500 detect_partial_match(common, backtracks);
4501 read_char8_type(common);
4502 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4503 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4504 return cc;
4505
4506 case OP_NOT_WORDCHAR:
4507 case OP_WORDCHAR:
4508 detect_partial_match(common, backtracks);
4509 read_char8_type(common);
4510 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4511 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4512 return cc;
4513
4514 case OP_ANY:
4515 detect_partial_match(common, backtracks);
4516 read_char(common);
4517 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4518 {
4519 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4520 end_list = NULL;
4521 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4522 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4523 else
4524 check_str_end(common, &end_list);
4525
4526 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4527 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4528 set_jumps(end_list, LABEL());
4529 JUMPHERE(jump[0]);
4530 }
4531 else
4532 check_newlinechar(common, common->nltype, backtracks, TRUE);
4533 return cc;
4534
4535 case OP_ALLANY:
4536 detect_partial_match(common, backtracks);
4537 #ifdef SUPPORT_UTF
4538 if (common->utf)
4539 {
4540 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4541 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4542 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4543 #if defined COMPILE_PCRE8
4544 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4545 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4546 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4547 #elif defined COMPILE_PCRE16
4548 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4549 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4550 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4551 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4552 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4553 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4554 #endif
4555 JUMPHERE(jump[0]);
4556 #endif /* COMPILE_PCRE[8|16] */
4557 return cc;
4558 }
4559 #endif
4560 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4561 return cc;
4562
4563 case OP_ANYBYTE:
4564 detect_partial_match(common, backtracks);
4565 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4566 return cc;
4567
4568 #ifdef SUPPORT_UTF
4569 #ifdef SUPPORT_UCP
4570 case OP_NOTPROP:
4571 case OP_PROP:
4572 propdata[0] = 0;
4573 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4574 propdata[2] = cc[0];
4575 propdata[3] = cc[1];
4576 propdata[4] = XCL_END;
4577 compile_xclass_matchingpath(common, propdata, backtracks);
4578 return cc + 2;
4579 #endif
4580 #endif
4581
4582 case OP_ANYNL:
4583 detect_partial_match(common, backtracks);
4584 read_char(common);
4585 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4586 /* We don't need to handle soft partial matching case. */
4587 end_list = NULL;
4588 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4589 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4590 else
4591 check_str_end(common, &end_list);
4592 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4593 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4595 jump[2] = JUMP(SLJIT_JUMP);
4596 JUMPHERE(jump[0]);
4597 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4598 set_jumps(end_list, LABEL());
4599 JUMPHERE(jump[1]);
4600 JUMPHERE(jump[2]);
4601 return cc;
4602
4603 case OP_NOT_HSPACE:
4604 case OP_HSPACE:
4605 detect_partial_match(common, backtracks);
4606 read_char(common);
4607 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4608 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4609 return cc;
4610
4611 case OP_NOT_VSPACE:
4612 case OP_VSPACE:
4613 detect_partial_match(common, backtracks);
4614 read_char(common);
4615 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4616 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4617 return cc;
4618
4619 #ifdef SUPPORT_UCP
4620 case OP_EXTUNI:
4621 detect_partial_match(common, backtracks);
4622 read_char(common);
4623 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4624 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4625 /* Optimize register allocation: use a real register. */
4626 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4627 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4628
4629 label = LABEL();
4630 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4631 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4632 read_char(common);
4633 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4634 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4635 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4636
4637 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4638 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4639 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4640 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4641 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4642 JUMPTO(SLJIT_C_NOT_ZERO, label);
4643
4644 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4645 JUMPHERE(jump[0]);
4646 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4647
4648 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4649 {
4650 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4651 /* Since we successfully read a char above, partial matching must occure. */
4652 check_partial(common, TRUE);
4653 JUMPHERE(jump[0]);
4654 }
4655 return cc;
4656 #endif
4657
4658 case OP_EODN:
4659 /* Requires rather complex checks. */
4660 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4661 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4662 {
4663 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4664 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4665 if (common->mode == JIT_COMPILE)
4666 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4667 else
4668 {
4669 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4670 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4671 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4672 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4673 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4674 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4675 check_partial(common, TRUE);
4676 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4677 JUMPHERE(jump[1]);
4678 }
4679 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4680 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4681 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4682 }
4683 else if (common->nltype == NLTYPE_FIXED)
4684 {
4685 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4686 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4687 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4688 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4689 }
4690 else
4691 {
4692 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4693 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4694 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4695 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4696 jump[2] = JUMP(SLJIT_C_GREATER);
4697 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4698 /* Equal. */
4699 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4700 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4701 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4702
4703 JUMPHERE(jump[1]);
4704 if (common->nltype == NLTYPE_ANYCRLF)
4705 {
4706 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4707 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4708 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4709 }
4710 else
4711 {
4712 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4713 read_char(common);
4714 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4715 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4716 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4717 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4718 }
4719 JUMPHERE(jump[2]);
4720 JUMPHERE(jump[3]);
4721 }
4722 JUMPHERE(jump[0]);
4723 check_partial(common, FALSE);
4724 return cc;
4725
4726 case OP_EOD:
4727 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4728 check_partial(common, FALSE);
4729 return cc;
4730
4731 case OP_CIRC:
4732 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4733 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4734 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4735 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4736 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4737 return cc;
4738
4739 case OP_CIRCM:
4740 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4741 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4742 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4743 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4744 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4745 jump[0] = JUMP(SLJIT_JUMP);
4746 JUMPHERE(jump[1]);
4747
4748 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4749 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4750 {
4751 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4752 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4753 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4754 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4755 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4756 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4757 }
4758 else
4759 {
4760 skip_char_back(common);
4761 read_char(common);
4762 check_newlinechar(common, common->nltype, backtracks, FALSE);
4763 }
4764 JUMPHERE(jump[0]);
4765 return cc;
4766
4767 case OP_DOLL:
4768 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4769 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4770 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4771
4772 if (!common->endonly)
4773 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4774 else
4775 {
4776 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4777 check_partial(common, FALSE);
4778 }
4779 return cc;
4780
4781 case OP_DOLLM:
4782 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4783 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4784 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4785 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4786 check_partial(common, FALSE);
4787 jump[0] = JUMP(SLJIT_JUMP);
4788 JUMPHERE(jump[1]);
4789
4790 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4791 {
4792 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4793 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4794 if (common->mode == JIT_COMPILE)
4795 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4796 else
4797 {
4798 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4799 /* STR_PTR = STR_END - IN_UCHARS(1) */
4800 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4801 check_partial(common, TRUE);
4802 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4803 JUMPHERE(jump[1]);
4804 }
4805
4806 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4807 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4808 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4809 }
4810 else
4811 {
4812 peek_char(common);
4813 check_newlinechar(common, common->nltype, backtracks, FALSE);
4814 }
4815 JUMPHERE(jump[0]);
4816 return cc;
4817
4818 case OP_CHAR:
4819 case OP_CHARI:
4820 length = 1;
4821 #ifdef SUPPORT_UTF
4822 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4823 #endif
4824 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4825 {
4826 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4827 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4828
4829 context.length = IN_UCHARS(length);
4830 context.sourcereg = -1;
4831 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4832 context.ucharptr = 0;
4833 #endif
4834 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4835 }
4836 detect_partial_match(common, backtracks);
4837 read_char(common);
4838 #ifdef SUPPORT_UTF
4839 if (common->utf)
4840 {
4841 GETCHAR(c, cc);
4842 }
4843 else
4844 #endif
4845 c = *cc;
4846 if (type == OP_CHAR || !char_has_othercase(common, cc))
4847 {
4848 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4849 return cc + length;
4850 }
4851 oc = char_othercase(common, c);
4852 bit = c ^ oc;
4853 if (is_powerof2(bit))
4854 {
4855 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4856 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4857 return cc + length;
4858 }
4859 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4860 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4861 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4862 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4863 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4864 return cc + length;
4865
4866 case OP_NOT:
4867 case OP_NOTI:
4868 detect_partial_match(common, backtracks);
4869 length = 1;
4870 #ifdef SUPPORT_UTF
4871 if (common->utf)
4872 {
4873 #ifdef COMPILE_PCRE8
4874 c = *cc;
4875 if (c < 128)
4876 {
4877 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4878 if (type == OP_NOT || !char_has_othercase(common, cc))
4879 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4880 else
4881 {
4882 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4883 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4884 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4885 }
4886 /* Skip the variable-length character. */
4887 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4888 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4889 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4890 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4891 JUMPHERE(jump[0]);
4892 return cc + 1;
4893 }
4894 else
4895 #endif /* COMPILE_PCRE8 */
4896 {
4897 GETCHARLEN(c, cc, length);
4898 read_char(common);
4899 }
4900 }
4901 else
4902 #endif /* SUPPORT_UTF */
4903 {
4904 read_char(common);
4905 c = *cc;
4906 }
4907
4908 if (type == OP_NOT || !char_has_othercase(common, cc))
4909 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4910 else
4911 {
4912 oc = char_othercase(common, c);
4913 bit = c ^ oc;
4914 if (is_powerof2(bit))
4915 {
4916 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4917 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4918 }
4919 else
4920 {
4921 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4922 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4923 }
4924 }
4925 return cc + length;
4926
4927 case OP_CLASS:
4928 case OP_NCLASS:
4929 detect_partial_match(common, backtracks);
4930 read_char(common);
4931 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4932 return cc + 32 / sizeof(pcre_uchar);
4933
4934 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4935 jump[0] = NULL;
4936 #ifdef COMPILE_PCRE8
4937 /* This check only affects 8 bit mode. In other modes, we
4938 always need to compare the value with 255. */
4939 if (common->utf)
4940 #endif /* COMPILE_PCRE8 */
4941 {
4942 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4943 if (type == OP_CLASS)
4944 {
4945 add_jump(compiler, backtracks, jump[0]);
4946 jump[0] = NULL;
4947 }
4948 }
4949 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4950 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4951 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4952 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4953 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4954 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4955 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4956 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4957 if (jump[0] != NULL)
4958 JUMPHERE(jump[0]);
4959 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4960 return cc + 32 / sizeof(pcre_uchar);
4961
4962 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4963 case OP_XCLASS:
4964 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4965 return cc + GET(cc, 0) - 1;
4966 #endif
4967
4968 case OP_REVERSE:
4969 length = GET(cc, 0);
4970 if (length == 0)
4971 return cc + LINK_SIZE;
4972 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4973 #ifdef SUPPORT_UTF
4974 if (common->utf)
4975 {
4976 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4977 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4978 label = LABEL();
4979 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4980 skip_char_back(common);
4981 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4982 JUMPTO(SLJIT_C_NOT_ZERO, label);
4983 }
4984 else
4985 #endif
4986 {
4987 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4988 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4989 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4990 }
4991 check_start_used_ptr(common);
4992 return cc + LINK_SIZE;
4993 }
4994 SLJIT_ASSERT_STOP();
4995 return cc;
4996 }
4997
4998 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4999 {
5000 /* This function consumes at least one input character. */
5001 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5002 DEFINE_COMPILER;
5003 pcre_uchar *ccbegin = cc;
5004 compare_context context;
5005 int size;
5006
5007 context.length = 0;
5008 do
5009 {
5010 if (cc >= ccend)
5011 break;
5012
5013 if (*cc == OP_CHAR)
5014 {
5015 size = 1;
5016 #ifdef SUPPORT_UTF
5017 if (common->utf && HAS_EXTRALEN(cc[1]))
5018 size += GET_EXTRALEN(cc[1]);
5019 #endif
5020 }
5021 else if (*cc == OP_CHARI)
5022 {
5023 size = 1;
5024 #ifdef SUPPORT_UTF
5025 if (common->utf)
5026 {
5027 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5028 size = 0;
5029 else if (HAS_EXTRALEN(cc[1]))
5030 size += GET_EXTRALEN(cc[1]);
5031 }
5032 else
5033 #endif
5034 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5035 size = 0;
5036 }
5037 else
5038 size = 0;
5039
5040 cc += 1 + size;
5041 context.length += IN_UCHARS(size);
5042 }
5043 while (size > 0 && context.length <= 128);
5044
5045 cc = ccbegin;
5046 if (context.length > 0)
5047 {
5048 /* We have a fixed-length byte sequence. */
5049 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5050 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5051
5052 context.sourcereg = -1;
5053 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5054 context.ucharptr = 0;
5055 #endif
5056 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5057 return cc;
5058 }
5059
5060 /* A non-fixed length character will be checked if length == 0. */
5061 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5062 }
5063
5064 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5065 {
5066 DEFINE_COMPILER;
5067 int offset = GET2(cc, 1) << 1;
5068
5069 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5070 if (!common->jscript_compat)
5071 {
5072 if (backtracks == NULL)
5073 {
5074 /* OVECTOR(1) contains the "string begin - 1" constant. */
5075 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5076 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5077 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5078 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5079 return JUMP(SLJIT_C_NOT_ZERO);
5080 }
5081 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5082 }
5083 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5084 }
5085
5086 /* Forward definitions. */
5087 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5088 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5089
5090 #define PUSH_BACKTRACK(size, ccstart, error) \
5091 do \
5092 { \
5093 backtrack = sljit_alloc_memory(compiler, (size)); \
5094 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5095 return error; \
5096 memset(backtrack, 0, size); \
5097 backtrack->prev = parent->top; \
5098 backtrack->cc = (ccstart); \
5099 parent->top = backtrack; \
5100 } \
5101 while (0)
5102
5103 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5104 do \
5105 { \
5106 backtrack = sljit_alloc_memory(compiler, (size)); \
5107 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5108 return; \
5109 memset(backtrack, 0, size); \
5110 backtrack->prev = parent->top; \
5111 backtrack->cc = (ccstart); \
5112 parent->top = backtrack; \
5113 } \
5114 while (0)
5115
5116 #define BACKTRACK_AS(type) ((type *)backtrack)
5117
5118 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5119 {
5120 DEFINE_COMPILER;
5121 int offset = GET2(cc, 1) << 1;
5122 struct sljit_jump *jump = NULL;
5123 struct sljit_jump *partial;
5124 struct sljit_jump *nopartial;
5125
5126 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5127 /* OVECTOR(1) contains the "string begin - 1" constant. */
5128 if (withchecks && !common->jscript_compat)
5129 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5130
5131 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5132 if (common->utf && *cc == OP_REFI)
5133 {
5134 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5135 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5136 if (withchecks)
5137 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5138
5139 /* Needed to save important temporary registers. */
5140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5141 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5142 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5143 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5144 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5145 if (common->mode == JIT_COMPILE)
5146 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5147 else
5148 {
5149 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5150 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5151 check_partial(common, FALSE);
5152 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5153 JUMPHERE(nopartial);
5154 }
5155 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5156 }
5157 else
5158 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5159 {
5160 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5161 if (withchecks)
5162 jump = JUMP(SLJIT_C_ZERO);
5163
5164 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5165 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5166 if (common->mode == JIT_COMPILE)
5167 add_jump(compiler, backtracks, partial);
5168
5169 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5170 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5171
5172 if (common->mode != JIT_COMPILE)
5173 {
5174 nopartial = JUMP(SLJIT_JUMP);
5175 JUMPHERE(partial);
5176 /* TMP2 -= STR_END - STR_PTR */
5177 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5178 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5179 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5180 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5181 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5182 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5183 JUMPHERE(partial);
5184 check_partial(common, FALSE);
5185 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5186 JUMPHERE(nopartial);
5187 }
5188 }
5189
5190 if (jump != NULL)
5191 {
5192 if (emptyfail)
5193 add_jump(compiler, backtracks, jump);
5194 else
5195 JUMPHERE(jump);
5196 }
5197 return cc + 1 + IMM2_SIZE;
5198 }
5199
5200 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5201 {
5202 DEFINE_COMPILER;
5203 backtrack_common *backtrack;
5204 pcre_uchar type;
5205 struct sljit_label *label;
5206 struct sljit_jump *zerolength;
5207 struct sljit_jump *jump = NULL;
5208 pcre_uchar *ccbegin = cc;
5209 int min = 0, max = 0;
5210 BOOL minimize;
5211
5212 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5213
5214 type = cc[1 + IMM2_SIZE];
5215 minimize = (type & 0x1) != 0;
5216 switch(type)
5217 {
5218 case OP_CRSTAR:
5219 case OP_CRMINSTAR:
5220 min = 0;
5221 max = 0;
5222 cc += 1 + IMM2_SIZE + 1;
5223 break;
5224 case OP_CRPLUS:
5225 case OP_CRMINPLUS:
5226 min = 1;
5227 max = 0;
5228 cc += 1 + IMM2_SIZE + 1;
5229 break;
5230 case OP_CRQUERY:
5231 case OP_CRMINQUERY:
5232 min = 0;
5233 max = 1;
5234 cc += 1 + IMM2_SIZE + 1;
5235 break;
5236 case OP_CRRANGE:
5237 case OP_CRMINRANGE:
5238 min = GET2(cc, 1 + IMM2_SIZE + 1);
5239 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5240 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5241 break;
5242 default:
5243 SLJIT_ASSERT_STOP();
5244 break;
5245 }
5246
5247 if (!minimize)
5248 {
5249 if (min == 0)
5250 {
5251 allocate_stack(common, 2);
5252 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5254 /* Temporary release of STR_PTR. */
5255 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5256 zerolength = compile_ref_checks(common, ccbegin, NULL);
5257 /* Restore if not zero length. */
5258 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5259 }
5260 else
5261 {
5262 allocate_stack(common, 1);
5263 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5264 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5265 }
5266
5267 if (min > 1 || max > 1)
5268 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5269
5270 label = LABEL();
5271 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5272
5273 if (min > 1 || max > 1)
5274 {
5275 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5276 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5277 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5278 if (min > 1)
5279 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5280 if (max > 1)
5281 {
5282 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5283 allocate_stack(common, 1);
5284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5285 JUMPTO(SLJIT_JUMP, label);
5286 JUMPHERE(jump);
5287 }
5288 }
5289
5290 if (max == 0)
5291 {
5292 /* Includes min > 1 case as well. */
5293 allocate_stack(common, 1);
5294 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5295 JUMPTO(SLJIT_JUMP, label);
5296 }
5297
5298 JUMPHERE(zerolength);
5299 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5300
5301 decrease_call_count(common);
5302 return cc;
5303 }
5304
5305 allocate_stack(common, 2);
5306 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5307 if (type != OP_CRMINSTAR)
5308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5309
5310 if (min == 0)
5311 {
5312 zerolength = compile_ref_checks(common, ccbegin, NULL);
5313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5314 jump = JUMP(SLJIT_JUMP);
5315 }
5316 else
5317 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5318
5319 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5320 if (max > 0)
5321 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5322
5323 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5324 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5325
5326 if (min > 1)
5327 {
5328 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5329 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5330 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5331 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5332 }
5333 else if (max > 0)
5334 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5335
5336 if (jump != NULL)
5337 JUMPHERE(jump);
5338 JUMPHERE(zerolength);
5339
5340 decrease_call_count(common);
5341 return cc;
5342 }
5343
5344 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5345 {
5346 DEFINE_COMPILER;
5347 backtrack_common *backtrack;
5348 recurse_entry *entry = common->entries;
5349 recurse_entry *prev = NULL;
5350 sljit_sw start = GET(cc, 1);
5351 pcre_uchar *start_cc;
5352 BOOL needs_control_head;
5353
5354 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5355
5356 /* Inlining simple patterns. */
5357 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5358 {
5359 start_cc = common->start + start;
5360 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5361 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5362 return cc + 1 + LINK_SIZE;
5363 }
5364
5365 while (entry != NULL)
5366 {
5367 if (entry->start == start)
5368 break;
5369 prev = entry;
5370 entry = entry->next;
5371 }
5372
5373 if (entry == NULL)
5374 {
5375 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5376 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5377 return NULL;
5378 entry->next = NULL;
5379 entry->entry = NULL;
5380 entry->calls = NULL;
5381 entry->start = start;
5382
5383 if (prev != NULL)
5384 prev->next = entry;
5385 else
5386 common->entries = entry;
5387 }
5388
5389 if (common->has_set_som && common->mark_ptr != 0)
5390 {
5391 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5392 allocate_stack(common, 2);
5393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5395 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5396 }
5397 else if (common->has_set_som || common->mark_ptr != 0)
5398 {
5399 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5400 allocate_stack(common, 1);
5401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5402 }
5403
5404 if (entry->entry == NULL)
5405 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5406 else
5407 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5408 /* Leave if the match is failed. */
5409 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5410 return cc + 1 + LINK_SIZE;
5411 }
5412
5413 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5414 {
5415 const pcre_uchar *begin = arguments->begin;
5416 int *offset_vector = arguments->offsets;
5417 int offset_count = arguments->offset_count;
5418 int i;
5419
5420 if (PUBL(callout) == NULL)
5421 return 0;
5422
5423 callout_block->version = 2;
5424 callout_block->callout_data = arguments->callout_data;
5425
5426 /* Offsets in subject. */
5427 callout_block->subject_length = arguments->end - arguments->begin;
5428 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5429 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5430 #if defined COMPILE_PCRE8
5431 callout_block->subject = (PCRE_SPTR)begin;
5432 #elif defined COMPILE_PCRE16
5433 callout_block->subject = (PCRE_SPTR16)begin;
5434 #elif defined COMPILE_PCRE32
5435 callout_block->subject = (PCRE_SPTR32)begin;
5436 #endif
5437
5438 /* Convert and copy the JIT offset vector to the offset_vector array. */
5439 callout_block->capture_top = 0;
5440 callout_block->offset_vector = offset_vector;
5441 for (i = 2; i < offset_count; i += 2)
5442 {
5443 offset_vector[i] = jit_ovector[i] - begin;
5444 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5445 if (jit_ovector[i] >= begin)
5446 callout_block->capture_top = i;
5447 }
5448
5449 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5450 if (offset_count > 0)
5451 offset_vector[0] = -1;
5452 if (offset_count > 1)
5453 offset_vector[1] = -1;
5454 return (*PUBL(callout))(callout_block);
5455 }
5456
5457 /* Aligning to 8 byte. */
5458 #define CALLOUT_ARG_SIZE \
5459 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5460
5461 #define CALLOUT_ARG_OFFSET(arg) \
5462 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5463
5464 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5465 {
5466 DEFINE_COMPILER;
5467 backtrack_common *backtrack;
5468
5469 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5470
5471 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5472
5473 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5474 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5475 SLJIT_ASSERT(common->capture_last_ptr != 0);
5476 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5477 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5478
5479 /* These pointer sized fields temporarly stores internal variables. */
5480 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5483
5484 if (common->mark_ptr != 0)
5485 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5486 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5487 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5489
5490 /* Needed to save important temporary registers. */
5491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5492 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5493 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5494 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5495 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5496 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5497 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5498
5499 /* Check return value. */
5500 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5501 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5502 if (common->forced_quit_label == NULL)
5503 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5504 else
5505 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5506 return cc + 2 + 2 * LINK_SIZE;
5507 }
5508
5509 #undef CALLOUT_ARG_SIZE
5510 #undef CALLOUT_ARG_OFFSET
5511
5512 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5513 {
5514 DEFINE_COMPILER;
5515 int framesize;
5516 int extrasize;
5517 BOOL needs_control_head;
5518 int private_data_ptr;
5519 backtrack_common altbacktrack;
5520 pcre_uchar *ccbegin;
5521 pcre_uchar opcode;
5522 pcre_uchar bra = OP_BRA;
5523 jump_list *tmp = NULL;
5524 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5525 jump_list **found;
5526 /* Saving previous accept variables. */
5527 BOOL save_local_exit = common->local_exit;
5528 then_trap_backtrack *save_then_trap = common->then_trap;
5529 struct sljit_label *save_quit_label = common->quit_label;
5530 struct sljit_label *save_accept_label = common->accept_label;
5531 jump_list *save_quit = common->quit;
5532 jump_list *save_accept = common->accept;
5533 struct sljit_jump *jump;
5534 struct sljit_jump *brajump = NULL;
5535
5536 /* Assert captures then. */
5537 common->then_trap = NULL;
5538
5539 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5540 {
5541 SLJIT_ASSERT(!conditional);
5542 bra = *cc;
5543 cc++;
5544 }
5545 private_data_ptr = PRIVATE_DATA(cc);
5546 SLJIT_ASSERT(private_data_ptr != 0);
5547 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5548 backtrack->framesize = framesize;
5549 backtrack->private_data_ptr = private_data_ptr;
5550 opcode = *cc;
5551 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5552 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5553 ccbegin = cc;
5554 cc += GET(cc, 1);
5555
5556 if (bra == OP_BRAMINZERO)
5557 {
5558 /* This is a braminzero backtrack path. */
5559 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5560 free_stack(common, 1);
5561 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5562 }
5563
5564 if (framesize < 0)
5565 {
5566 extrasize = needs_control_head ? 2 : 1;
5567 if (framesize == no_frame)
5568 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5569 allocate_stack(common, extrasize);
5570 if (needs_control_head)
5571 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5573 if (needs_control_head)
5574 {
5575 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5576 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5577 }
5578 }
5579 else
5580 {
5581 extrasize = needs_control_head ? 3 : 2;
5582 allocate_stack(common, framesize + extrasize);
5583 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5584 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5585 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5586 if (needs_control_head)
5587 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5588 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5589 if (needs_control_head)
5590 {
5591 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5592 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5593 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5594 }
5595 else
5596 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5597 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5598 }
5599
5600 memset(&altbacktrack, 0, sizeof(backtrack_common));
5601 common->local_exit = TRUE;
5602 common->quit_label = NULL;
5603 common->quit = NULL;
5604 while (1)
5605 {
5606 common->accept_label = NULL;
5607 common->accept = NULL;
5608 altbacktrack.top = NULL;
5609 altbacktrack.topbacktracks = NULL;
5610
5611 if (*ccbegin == OP_ALT)
5612 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5613
5614 altbacktrack.cc = ccbegin;
5615 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5616 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5617 {
5618 common->local_exit = save_local_exit;
5619 common->then_trap = save_then_trap;
5620 common->quit_label = save_quit_label;
5621 common->accept_label = save_accept_label;
5622 common->quit = save_quit;
5623 common->accept = save_accept;
5624 return NULL;
5625 }
5626 common->accept_label = LABEL();
5627 if (common->accept != NULL)
5628 set_jumps(common->accept, common->accept_label);
5629
5630 /* Reset stack. */
5631 if (framesize < 0)
5632 {
5633 if (framesize == no_frame)
5634 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5635 else
5636 free_stack(common, extrasize);
5637 if (needs_control_head)
5638 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5639 }
5640 else
5641 {
5642 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5643 {
5644 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5645 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5646 if (needs_control_head)
5647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5648 }
5649 else
5650 {
5651 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5652 if (needs_control_head)
5653 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5654 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5655 }
5656 }
5657
5658 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5659 {
5660 /* We know that STR_PTR was stored on the top of the stack. */
5661 if (conditional)
5662 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5663 else if (bra == OP_BRAZERO)
5664 {
5665 if (framesize < 0)
5666 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5667 else
5668 {
5669 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5670 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5672 }
5673 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5675 }
5676 else if (framesize >= 0)
5677 {
5678 /* For OP_BRA and OP_BRAMINZERO. */
5679 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5680 }
5681 }
5682 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5683
5684 compile_backtrackingpath(common, altbacktrack.top);
5685 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5686 {
5687 common->local_exit = save_local_exit;
5688 common->then_trap = save_then_trap;
5689 common->quit_label = save_quit_label;
5690 common->accept_label = save_accept_label;
5691 common->quit = save_quit;
5692 common->accept = save_accept;
5693 return NULL;
5694 }
5695 set_jumps(altbacktrack.topbacktracks, LABEL());
5696
5697 if (*cc != OP_ALT)
5698 break;
5699
5700 ccbegin = cc;
5701 cc += GET(cc, 1);
5702 }
5703
5704 /* None of them matched. */
5705 if (common->quit != NULL)
5706 {
5707 jump = JUMP(SLJIT_JUMP);
5708 set_jumps(common->quit, LABEL());
5709 SLJIT_ASSERT(framesize != no_stack);
5710 if (framesize < 0)
5711 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5712 else
5713 {
5714 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5715 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5716 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5717 }
5718 JUMPHERE(jump);
5719 }
5720
5721 if (needs_control_head)
5722 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5723
5724 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5725 {
5726 /* Assert is failed. */
5727 if (conditional || bra == OP_BRAZERO)
5728 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5729
5730 if (framesize < 0)
5731 {
5732 /* The topmost item should be 0. */
5733 if (bra == OP_BRAZERO)
5734 {
5735 if (extrasize == 2)
5736 free_stack(common, 1);
5737 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5738 }
5739 else
5740 free_stack(common, extrasize);
5741 }
5742 else
5743 {
5744 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5745 /* The topmost item should be 0. */
5746 if (bra == OP_BRAZERO)
5747 {
5748 free_stack(common, framesize + extrasize - 1);
5749 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5750 }
5751 else
5752 free_stack(common, framesize + extrasize);
5753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5754 }
5755 jump = JUMP(SLJIT_JUMP);
5756 if (bra != OP_BRAZERO)
5757 add_jump(compiler, target, jump);
5758
5759 /* Assert is successful. */
5760 set_jumps(tmp, LABEL());
5761 if (framesize < 0)
5762 {
5763 /* We know that STR_PTR was stored on the top of the stack. */
5764 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5765 /* Keep the STR_PTR on the top of the stack. */
5766 if (bra == OP_BRAZERO)
5767 {
5768 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5769 if (extrasize == 2)
5770 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5771 }
5772 else if (bra == OP_BRAMINZERO)
5773 {
5774 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5775 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5776 }
5777 }
5778 else
5779 {
5780 if (bra == OP_BRA)
5781 {
5782 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5783 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5784 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5785 }
5786 else
5787 {
5788 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5789 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5790 if (extrasize == 2)
5791 {
5792 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5793 if (bra == OP_BRAMINZERO)
5794 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5795 }
5796 else
5797 {
5798 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5799 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5800 }
5801 }
5802 }
5803
5804 if (bra == OP_BRAZERO)
5805 {
5806 backtrack->matchingpath = LABEL();
5807 SET_LABEL(jump, backtrack->matchingpath);
5808 }
5809 else if (bra == OP_BRAMINZERO)
5810 {
5811 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5812 JUMPHERE(brajump);
5813 if (framesize >= 0)
5814 {
5815 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5816 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5817 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5818 }
5819 set_jumps(backtrack->common.topbacktracks, LABEL());
5820 }
5821 }
5822 else
5823 {
5824 /* AssertNot is successful. */
5825 if (framesize < 0)
5826 {
5827 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5828 if (bra != OP_BRA)
5829 {
5830 if (extrasize == 2)
5831 free_stack(common, 1);
5832 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5833 }
5834 else
5835 free_stack(common, extrasize);
5836 }
5837 else
5838 {
5839 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5840 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5841 /* The topmost item should be 0. */
5842 if (bra != OP_BRA)
5843 {
5844 free_stack(common, framesize + extrasize - 1);
5845 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5846 }
5847 else
5848 free_stack(common, framesize + extrasize);
5849 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5850 }
5851
5852 if (bra == OP_BRAZERO)
5853 backtrack->matchingpath = LABEL();
5854 else if (bra == OP_BRAMINZERO)
5855 {
5856 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5857 JUMPHERE(brajump);
5858 }
5859
5860 if (bra != OP_BRA)
5861 {
5862 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5863 set_jumps(backtrack->common.topbacktracks, LABEL());
5864 backtrack->common.topbacktracks = NULL;
5865 }
5866 }
5867
5868 common->local_exit = save_local_exit;
5869 common->then_trap = save_then_trap;
5870 common->quit_label = save_quit_label;
5871 common->accept_label = save_accept_label;
5872 common->quit = save_quit;
5873 common->accept = save_accept;
5874 return cc + 1 + LINK_SIZE;
5875 }
5876
5877 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5878 {
5879 int condition = FALSE;
5880 pcre_uchar *slotA = name_table;
5881 pcre_uchar *slotB;
5882 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5883 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5884 sljit_sw no_capture;
5885 int i;
5886
5887 locals += refno & 0xff;
5888 refno >>= 8;
5889 no_capture = locals[1];
5890
5891 for (i = 0; i < name_count; i++)
5892 {
5893 if (GET2(slotA, 0) == refno) break;
5894 slotA += name_entry_size;
5895 }
5896
5897 if (i < name_count)
5898 {
5899 /* Found a name for the number - there can be only one; duplicate names
5900 for different numbers are allowed, but not vice versa. First scan down
5901 for duplicates. */
5902
5903 slotB = slotA;
5904 while (slotB > name_table)
5905 {
5906 slotB -= name_entry_size;
5907 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5908 {
5909 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5910 if (condition) break;
5911 }
5912 else break;
5913 }
5914
5915 /* Scan up for duplicates */
5916 if (!condition)
5917 {
5918 slotB = slotA;
5919 for (i++; i < name_count; i++)
5920 {
5921 slotB += name_entry_size;
5922 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5923 {
5924 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5925 if (condition) break;
5926 }
5927 else break;
5928 }
5929 }
5930 }
5931 return condition;
5932 }
5933
5934 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5935 {
5936 int condition = FALSE;
5937 pcre_uchar *slotA = name_table;
5938 pcre_uchar *slotB;
5939 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5940 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5941 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5942 sljit_uw i;
5943
5944 for (i = 0; i < name_count; i++)
5945 {
5946 if (GET2(slotA, 0) == recno) break;
5947 slotA += name_entry_size;
5948 }
5949
5950 if (i < name_count)
5951 {
5952 /* Found a name for the number - there can be only one; duplicate
5953 names for different numbers are allowed, but not vice versa. First
5954 scan down for duplicates. */
5955
5956 slotB = slotA;
5957 while (slotB > name_table)
5958 {
5959 slotB -= name_entry_size;
5960 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5961 {
5962 condition = GET2(slotB, 0) == group_num;
5963 if (condition) break;
5964 }
5965 else break;
5966 }
5967
5968 /* Scan up for duplicates */
5969 if (!condition)
5970 {
5971 slotB = slotA;
5972 for (i++; i < name_count; i++)
5973 {
5974 slotB += name_entry_size;
5975 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5976 {
5977 condition = GET2(slotB, 0) == group_num;
5978 if (condition) break;
5979 }
5980 else break;
5981 }
5982 }
5983 }
5984 return condition;
5985 }
5986
5987 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
5988 {
5989 DEFINE_COMPILER;
5990 int stacksize;
5991
5992 if (framesize < 0)
5993 {
5994 if (framesize == no_frame)
5995 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5996 else
5997 {
5998 stacksize = needs_control_head ? 1 : 0;
5999 if (ket != OP_KET || has_alternatives)
6000 stacksize++;
6001 free_stack(common, stacksize);
6002 }
6003
6004 if (needs_control_head)
6005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6006
6007 /* TMP2 which is set here used by OP_KETRMAX below. */
6008 if (ket == OP_KETRMAX)
6009 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6010 else if (ket == OP_KETRMIN)
6011 {
6012 /* Move the STR_PTR to the private_data_ptr. */
6013 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6014 }
6015 }
6016 else
6017 {
6018 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6019 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6020 if (needs_control_head)
6021 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6022
6023 if (ket == OP_KETRMAX)
6024 {
6025 /* TMP2 which is set here used by OP_KETRMAX below. */
6026 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6027 }
6028 }
6029 if (needs_control_head)
6030 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6031 }
6032
6033 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6034 {
6035 DEFINE_COMPILER;
6036
6037 if (common->capture_last_ptr != 0)
6038 {
6039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6040 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6041 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6042 stacksize++;
6043 }
6044 if (common->optimized_cbracket[offset >> 1] == 0)
6045 {
6046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6047 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6048 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6049 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6050 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6052 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6053 stacksize += 2;
6054 }
6055 return stacksize;
6056 }
6057
6058 /*
6059 Handling bracketed expressions is probably the most complex part.
6060
6061 Stack layout naming characters:
6062 S - Push the current STR_PTR
6063 0 - Push a 0 (NULL)
6064 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6065 before the next alternative. Not pushed if there are no alternatives.
6066 M - Any values pushed by the current alternative. Can be empty, or anything.
6067 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6068 L - Push the previous local (pointed by localptr) to the stack
6069 () - opional values stored on the stack
6070 ()* - optonal, can be stored multiple times
6071
6072 The following list shows the regular expression templates, their PCRE byte codes
6073 and stack layout supported by pcre-sljit.
6074
6075 (?:) OP_BRA | OP_KET A M
6076 () OP_CBRA | OP_KET C M
6077 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6078 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6079 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6080 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6081 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6082 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6083 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6084 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6085 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6086 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6087 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6088 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6089 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6090 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6091 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6092 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6093 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6094 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6095 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6096 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6097
6098
6099 Stack layout naming characters:
6100 A - Push the alternative index (starting from 0) on the stack.
6101 Not pushed if there is no alternatives.
6102 M - Any values pushed by the current alternative. Can be empty, or anything.
6103
6104 The next list shows the possible content of a bracket:
6105 (|) OP_*BRA | OP_ALT ... M A
6106 (?()|) OP_*COND | OP_ALT M A
6107 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6108 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6109 Or nothing, if trace is unnecessary
6110 */
6111
6112 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6113 {
6114 DEFINE_COMPILER;
6115 backtrack_common *backtrack;
6116 pcre_uchar opcode;
6117 int private_data_ptr = 0;
6118 int offset = 0;
6119 int stacksize;
6120 pcre_uchar *ccbegin;
6121 pcre_uchar *matchingpath;
6122 pcre_uchar bra = OP_BRA;
6123 pcre_uchar ket;
6124 assert_backtrack *assert;
6125 BOOL has_alternatives;
6126 BOOL needs_control_head = FALSE;
6127 struct sljit_jump *jump;
6128 struct sljit_jump *skip;
6129 struct sljit_label *rmaxlabel = NULL;
6130 struct sljit_jump *braminzerojump = NULL;
6131
6132 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6133
6134 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6135 {
6136 bra = *cc;
6137 cc++;
6138 opcode = *cc;
6139 }
6140
6141 opcode = *cc;
6142 ccbegin = cc;
6143 matchingpath = ccbegin + 1 + LINK_SIZE;
6144
6145 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6146 {
6147 /* Drop this bracket_backtrack. */
6148 parent->top = backtrack->prev;
6149 return bracketend(cc);
6150 }
6151
6152 ket = *(bracketend(cc) - 1 - LINK_SIZE);
6153 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6154 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6155 cc += GET(cc, 1);
6156
6157 has_alternatives = *cc == OP_ALT;
6158 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6159 {
6160 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6161 if (*matchingpath == OP_NRREF)
6162 {
6163 stacksize = GET2(matchingpath, 1);
6164 if (common->currententry == NULL || stacksize == RREF_ANY)
6165 has_alternatives = FALSE;
6166 else if (common->currententry->start == 0)
6167 has_alternatives = stacksize != 0;
6168 else
6169 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6170 }
6171 }
6172
6173 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6174 opcode = OP_SCOND;
6175 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6176 opcode = OP_ONCE;
6177
6178 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6179 {
6180 /* Capturing brackets has a pre-allocated space. */
6181 offset = GET2(ccbegin, 1 + LINK_SIZE);
6182 if (common->optimized_cbracket[offset] == 0)
6183 {
6184 private_data_ptr = OVECTOR_PRIV(offset);
6185 offset <<= 1;
6186 }
6187 else
6188 {
6189 offset <<= 1;
6190 private_data_ptr = OVECTOR(offset);
6191 }
6192 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6193 matchingpath += IMM2_SIZE;
6194 }
6195 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6196 {
6197 /* Other brackets simply allocate the next entry. */
6198 private_data_ptr = PRIVATE_DATA(ccbegin);
6199 SLJIT_ASSERT(private_data_ptr != 0);
6200 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6201 if (opcode == OP_ONCE)
6202 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6203 }
6204
6205 /* Instructions before the first alternative. */
6206 stacksize = 0;
6207 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6208 stacksize++;
6209 if (bra == OP_BRAZERO)
6210 stacksize++;
6211
6212 if (stacksize > 0)
6213 allocate_stack(common, stacksize);
6214
6215 stacksize = 0;
6216 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6217 {
6218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6219 stacksize++;
6220 }
6221
6222 if (bra == OP_BRAZERO)
6223 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6224
6225 if (bra == OP_BRAMINZERO)
6226 {
6227 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6228 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6229 if (ket != OP_KETRMIN)
6230 {
6231 free_stack(common, 1);
6232 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6233 }
6234 else
6235 {
6236 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6237 {
6238 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6239 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6240 /* Nothing stored during the first run. */
6241 skip = JUMP(SLJIT_JUMP);
6242 JUMPHERE(jump);
6243 /* Checking zero-length iteration. */
6244 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6245 {
6246 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6247 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6248 }
6249 else
6250 {
6251 /* Except when the whole stack frame must be saved. */
6252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6253 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6254 }
6255 JUMPHERE(skip);
6256 }
6257 else
6258 {
6259 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6260 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6261 JUMPHERE(jump);
6262 }
6263 }
6264 }
6265
6266 if (ket == OP_KETRMIN)
6267 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6268
6269 if (ket == OP_KETRMAX)
6270 {
6271 rmaxlabel = LABEL();
6272 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
6273 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
6274 }
6275
6276 /* Handling capturing brackets and alternatives. */
6277 if (opcode == OP_ONCE)
6278 {
6279 stacksize = 0;
6280 if (needs_control_head)
6281 {
6282 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6283 stacksize++;
6284 }
6285
6286 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6287 {
6288 /* Neither capturing brackets nor recursions are found in the block. */
6289 if (ket == OP_KETRMIN)
6290 {
6291 stacksize += 2;
6292 if (!needs_control_head)
6293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6294 }
6295 else
6296 {
6297 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6298 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6299 if (ket == OP_KETRMAX || has_alternatives)
6300 stacksize++;
6301 }
6302
6303 if (stacksize > 0)
6304 allocate_stack(common, stacksize);
6305
6306 stacksize = 0;
6307 if (needs_control_head)
6308 {
6309 stacksize++;
6310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6311 }
6312
6313 if (ket == OP_KETRMIN)
6314 {
6315 if (needs_control_head)
6316 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6318 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6319 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6320 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6321 }
6322 else if (ket == OP_KETRMAX || has_alternatives)
6323 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6324 }
6325 else
6326 {
6327 if (ket != OP_KET || has_alternatives)
6328 stacksize++;
6329
6330 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6331 allocate_stack(common, stacksize);
6332
6333 if (needs_control_head)
6334 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6335
6336 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6337 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6338
6339 stacksize = needs_control_head ? 1 : 0;
6340 if (ket != OP_KET || has_alternatives)
6341 {
6342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6344 stacksize++;
6345 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6346 }
6347 else
6348 {
6349 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6351 }
6352 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6353 }
6354 }
6355 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6356 {
6357 /* Saving the previous values. */
6358 if (common->optimized_cbracket[offset >> 1] != 0)
6359 {
6360 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6361 allocate_stack(common, 2);
6362 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6363 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6364 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6366 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6367 }
6368 else
6369 {
6370 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6371 allocate_stack(common, 1);
6372 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6373 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6374 }
6375 }
6376 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6377 {
6378 /* Saving the previous value. */
6379 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6380 allocate_stack(common, 1);
6381 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6383 }
6384 else if (has_alternatives)
6385 {
6386 /* Pushing the starting string pointer. */
6387 allocate_stack(common, 1);
6388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6389 }
6390
6391 /* Generating code for the first alternative. */
6392 if (opcode == OP_COND || opcode == OP_SCOND)
6393 {
6394 if (*matchingpath == OP_CREF)
6395 {
6396 SLJIT_ASSERT(has_alternatives);
6397 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6398 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6399 matchingpath += 1 + IMM2_SIZE;
6400 }
6401 else if (*matchingpath == OP_NCREF)
6402 {
6403 SLJIT_ASSERT(has_alternatives);
6404 stacksize = GET2(matchingpath, 1);
6405 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6406
6407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6409 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6410 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6411 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6412 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6413 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6414 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6415 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6416
6417 JUMPHERE(jump);
6418 matchingpath += 1 + IMM2_SIZE;
6419 }
6420 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6421 {
6422 /* Never has other case. */
6423 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6424
6425 stacksize = GET2(matchingpath, 1);
6426 if (common->currententry == NULL)
6427 stacksize = 0;
6428 else if (stacksize == RREF_ANY)
6429 stacksize = 1;
6430 else if (common->currententry->start == 0)
6431 stacksize = stacksize == 0;
6432 else
6433 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6434
6435 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6436 {
6437 SLJIT_ASSERT(!has_alternatives);
6438 if (stacksize != 0)
6439 matchingpath += 1 + IMM2_SIZE;
6440 else
6441 {
6442 if (*cc == OP_ALT)
6443 {
6444 matchingpath = cc + 1 + LINK_SIZE;
6445 cc += GET(cc, 1);
6446 }
6447 else
6448 matchingpath = cc;
6449 }
6450 }
6451 else
6452 {
6453 SLJIT_ASSERT(has_alternatives);
6454
6455 stacksize = GET2(matchingpath, 1);
6456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6458 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6459 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6460 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6461 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6462 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6463 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6464 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6465 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6466 matchingpath += 1 + IMM2_SIZE;
6467 }
6468 }
6469 else
6470 {
6471 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6472 /* Similar code as PUSH_BACKTRACK macro. */
6473 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6474 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6475 return NULL;
6476 memset(assert, 0, sizeof(assert_backtrack));
6477 assert->common.cc = matchingpath;
6478 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6479 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6480 }
6481 }
6482
6483 compile_matchingpath(common, matchingpath, cc, backtrack);
6484 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6485 return NULL;
6486
6487 if (opcode == OP_ONCE)
6488 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6489
6490 stacksize = 0;
6491 if (ket != OP_KET || bra != OP_BRA)
6492 stacksize++;
6493 if (offset != 0)
6494 {
6495 if (common->capture_last_ptr != 0)
6496 stacksize++;
6497 if (common->optimized_cbracket[offset >> 1] == 0)
6498 stacksize += 2;
6499 }
6500 if (has_alternatives && opcode != OP_ONCE)
6501 stacksize++;
6502
6503 if (stacksize > 0)
6504 allocate_stack(common, stacksize);
6505
6506 stacksize = 0;
6507 if (ket != OP_KET || bra != OP_BRA)
6508 {
6509 if (ket != OP_KET)
6510 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6511 else
6512 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6513 stacksize++;
6514 }
6515
6516 if (offset != 0)
6517 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6518
6519 if (has_alternatives)
6520 {
6521 if (opcode != OP_ONCE)
6522 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6523 if (ket != OP_KETRMAX)
6524 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6525 }
6526
6527 /* Must be after the matchingpath label. */
6528 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6529 {
6530 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6532 }
6533
6534 if (ket == OP_KETRMAX)
6535 {
6536 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6537 {
6538 if (has_alternatives)
6539 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6540 /* Checking zero-length iteration. */
6541 if (opcode != OP_ONCE)
6542 {
6543 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6544 /* Drop STR_PTR for greedy plus quantifier. */
6545 if (bra != OP_BRAZERO)
6546 free_stack(common, 1);
6547 }
6548 else
6549 /* TMP2 must contain the starting STR_PTR. */
6550 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6551 }
6552 else
6553 JUMPTO(SLJIT_JUMP, rmaxlabel);
6554 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6555 }
6556
6557 if (bra == OP_BRAZERO)
6558 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6559
6560 if (bra == OP_BRAMINZERO)
6561 {
6562 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6563 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6564 if (braminzerojump != NULL)
6565 {
6566 JUMPHERE(braminzerojump);
6567 /* We need to release the end pointer to perform the
6568 backtrack for the zero-length iteration. When
6569 framesize is < 0, OP_ONCE will do the release itself. */
6570 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6571 {
6572 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6573 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6574 }
6575 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6576 free_stack(common, 1);
6577 }
6578 /* Continue to the normal backtrack. */
6579 }
6580
6581 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6582 decrease_call_count(common);
6583
6584 /* Skip the other alternatives. */
6585 while (*cc == OP_ALT)
6586 cc += GET(cc, 1);
6587 cc += 1 + LINK_SIZE;
6588
6589 /* Temporarily encoding the needs_control_head in framesize. */
6590 if (opcode == OP_ONCE)
6591 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6592 return cc;
6593 }
6594
6595 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6596 {
6597 DEFINE_COMPILER;
6598 backtrack_common *backtrack;
6599 pcre_uchar opcode;
6600 int private_data_ptr;
6601 int cbraprivptr = 0;
6602 BOOL needs_control_head;
6603 int framesize;
6604 int stacksize;
6605 int offset = 0;
6606 BOOL zero = FALSE;
6607 pcre_uchar *ccbegin = NULL;
6608 int stack; /* Also contains the offset of control head. */
6609 struct sljit_label *loop = NULL;
6610 struct jump_list *emptymatch = NULL;
6611
6612 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6613 if (*cc == OP_BRAPOSZERO)
6614 {
6615 zero = TRUE;
6616 cc++;
6617 }
6618
6619 opcode = *cc;
6620 private_data_ptr = PRIVATE_DATA(cc);
6621 SLJIT_ASSERT(private_data_ptr != 0);
6622 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6623 switch(opcode)
6624 {
6625 case OP_BRAPOS:
6626 case OP_SBRAPOS:
6627 ccbegin = cc + 1 + LINK_SIZE;
6628 break;
6629
6630 case OP_CBRAPOS:
6631 case OP_SCBRAPOS:
6632 offset = GET2(cc, 1 + LINK_SIZE);
6633 /* This case cannot be optimized in the same was as
6634 normal capturing brackets. */
6635 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6636 cbraprivptr = OVECTOR_PRIV(offset);
6637 offset <<= 1;
6638 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6639 break;
6640
6641 default:
6642 SLJIT_ASSERT_STOP();
6643 break;
6644 }
6645
6646 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6647 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6648 if (framesize < 0)
6649 {
6650 if (offset != 0)
6651 {
6652 stacksize = 2;
6653 if (common->capture_last_ptr != 0)
6654 stacksize++;
6655 }
6656 else
6657 stacksize = 1;
6658
6659 if (needs_control_head)
6660 stacksize++;
6661 if (!zero)
6662 stacksize++;
6663
6664 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6665 allocate_stack(common, stacksize);
6666 if (framesize == no_frame)
6667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6668
6669 stack = 0;
6670 if (offset != 0)
6671 {
6672 stack = 2;
6673 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6674 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6676 if (common->capture_last_ptr != 0)
6677 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6678 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6679 if (needs_control_head)
6680 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6681 if (common->capture_last_ptr != 0)
6682 {
6683 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6684 stack = 3;
6685 }
6686 }
6687 else
6688 {
6689 if (needs_control_head)
6690 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6691 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6692 stack = 1;
6693 }
6694
6695 if (needs_control_head)
6696 stack++;
6697 if (!zero)
6698 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6699 if (needs_control_head)
6700 {
6701 stack--;
6702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6703 }
6704 }
6705 else
6706 {
6707 stacksize = framesize + 1;
6708 if (!zero)
6709 stacksize++;
6710 if (needs_control_head)
6711 stacksize++;
6712 if (offset == 0)
6713 stacksize++;
6714 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6715
6716 allocate_stack(common, stacksize);
6717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6718 if (needs_control_head)
6719 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6720 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6721
6722 stack = 0;
6723 if (!zero)
6724 {
6725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6726 stack = 1;
6727 }
6728 if (needs_control_head)
6729 {
6730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6731 stack++;
6732 }
6733 if (offset == 0)
6734 {
6735 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6736 stack++;
6737 }
6738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6739 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
6740 stack -= 1 + (offset == 0);
6741 }
6742
6743 if (offset != 0)
6744 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6745
6746 loop = LABEL();
6747 while (*cc != OP_KETRPOS)
6748 {
6749 backtrack->top = N