/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1279 - (show annotations)
Tue Mar 12 17:27:34 2013 UTC (6 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 302847 byte(s)
Experimental support of (*THEN) backtracking verb in the JIT compiler.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum bytecode_flag_types {
200 flag_optimized_cbracket = 1,
201 flag_then_start = 2,
202 };
203
204 enum frame_types {
205 no_frame = -1,
206 no_stack = -2
207 };
208
209 enum control_types {
210 type_commit = 0,
211 type_prune = 1,
212 type_skip = 2,
213 type_skip_arg = 3,
214 type_mark = 4,
215 type_then_trap = 5
216 };
217
218 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
219
220 /* The following structure is the key data type for the recursive
221 code generator. It is allocated by compile_matchingpath, and contains
222 the aguments for compile_backtrackingpath. Must be the first member
223 of its descendants. */
224 typedef struct backtrack_common {
225 /* Concatenation stack. */
226 struct backtrack_common *prev;
227 jump_list *nextbacktracks;
228 /* Internal stack (for component operators). */
229 struct backtrack_common *top;
230 jump_list *topbacktracks;
231 /* Opcode pointer. */
232 pcre_uchar *cc;
233 } backtrack_common;
234
235 typedef struct assert_backtrack {
236 backtrack_common common;
237 jump_list *condfailed;
238 /* Less than 0 if a frame is not needed. */
239 int framesize;
240 /* Points to our private memory word on the stack. */
241 int private_data_ptr;
242 /* For iterators. */
243 struct sljit_label *matchingpath;
244 } assert_backtrack;
245
246 typedef struct bracket_backtrack {
247 backtrack_common common;
248 /* Where to coninue if an alternative is successfully matched. */
249 struct sljit_label *alternative_matchingpath;
250 /* For rmin and rmax iterators. */
251 struct sljit_label *recursive_matchingpath;
252 /* For greedy ? operator. */
253 struct sljit_label *zero_matchingpath;
254 /* Contains the branches of a failed condition. */
255 union {
256 /* Both for OP_COND, OP_SCOND. */
257 jump_list *condfailed;
258 assert_backtrack *assert;
259 /* For OP_ONCE. Less than 0 if not needed. */
260 int framesize;
261 } u;
262 /* Points to our private memory word on the stack. */
263 int private_data_ptr;
264 } bracket_backtrack;
265
266 typedef struct bracketpos_backtrack {
267 backtrack_common common;
268 /* Points to our private memory word on the stack. */
269 int private_data_ptr;
270 /* Reverting stack is needed. */
271 int framesize;
272 /* Allocated stack size. */
273 int stacksize;
274 } bracketpos_backtrack;
275
276 typedef struct braminzero_backtrack {
277 backtrack_common common;
278 struct sljit_label *matchingpath;
279 } braminzero_backtrack;
280
281 typedef struct iterator_backtrack {
282 backtrack_common common;
283 /* Next iteration. */
284 struct sljit_label *matchingpath;
285 } iterator_backtrack;
286
287 typedef struct recurse_entry {
288 struct recurse_entry *next;
289 /* Contains the function entry. */
290 struct sljit_label *entry;
291 /* Collects the calls until the function is not created. */
292 jump_list *calls;
293 /* Points to the starting opcode. */
294 int start;
295 } recurse_entry;
296
297 typedef struct recurse_backtrack {
298 backtrack_common common;
299 BOOL inlined_pattern;
300 } recurse_backtrack;
301
302 typedef struct then_trap_backtrack {
303 backtrack_common common;
304 struct then_trap_backtrack *then_trap;
305 jump_list *quit;
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 6
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 int *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or assert. */
361 BOOL local_exit;
362 /* Newline control. */
363 int nltype;
364 int newline;
365 int bsr_nltype;
366 /* Dollar endonly. */
367 int endonly;
368 /* Tables. */
369 sljit_sw ctypes;
370 int digits[2 + MAX_RANGE_SIZE];
371 /* Named capturing brackets. */
372 sljit_uw name_table;
373 sljit_sw name_count;
374 sljit_sw name_entry_size;
375
376 /* Labels and jump lists. */
377 struct sljit_label *partialmatchlabel;
378 struct sljit_label *quit_label;
379 struct sljit_label *forced_quit_label;
380 struct sljit_label *accept_label;
381 stub_list *stubs;
382 recurse_entry *entries;
383 recurse_entry *currententry;
384 jump_list *partialmatch;
385 jump_list *quit;
386 jump_list *forced_quit;
387 jump_list *accept;
388 jump_list *calllimit;
389 jump_list *stackalloc;
390 jump_list *revertframes;
391 jump_list *wordboundary;
392 jump_list *anynewline;
393 jump_list *hspace;
394 jump_list *vspace;
395 jump_list *casefulcmp;
396 jump_list *caselesscmp;
397 jump_list *reset_match;
398 BOOL jscript_compat;
399 #ifdef SUPPORT_UTF
400 BOOL utf;
401 #ifdef SUPPORT_UCP
402 BOOL use_ucp;
403 #endif
404 #ifndef COMPILE_PCRE32
405 jump_list *utfreadchar;
406 #endif
407 #ifdef COMPILE_PCRE8
408 jump_list *utfreadtype8;
409 #endif
410 #endif /* SUPPORT_UTF */
411 #ifdef SUPPORT_UCP
412 jump_list *getucd;
413 #endif
414 } compiler_common;
415
416 /* For byte_sequence_compare. */
417
418 typedef struct compare_context {
419 int length;
420 int sourcereg;
421 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
422 int ucharptr;
423 union {
424 sljit_si asint;
425 sljit_uh asushort;
426 #if defined COMPILE_PCRE8
427 sljit_ub asbyte;
428 sljit_ub asuchars[4];
429 #elif defined COMPILE_PCRE16
430 sljit_uh asuchars[2];
431 #elif defined COMPILE_PCRE32
432 sljit_ui asuchars[1];
433 #endif
434 } c;
435 union {
436 sljit_si asint;
437 sljit_uh asushort;
438 #if defined COMPILE_PCRE8
439 sljit_ub asbyte;
440 sljit_ub asuchars[4];
441 #elif defined COMPILE_PCRE16
442 sljit_uh asuchars[2];
443 #elif defined COMPILE_PCRE32
444 sljit_ui asuchars[1];
445 #endif
446 } oc;
447 #endif
448 } compare_context;
449
450 /* Undefine sljit macros. */
451 #undef CMP
452
453 /* Used for accessing the elements of the stack. */
454 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
455
456 #define TMP1 SLJIT_SCRATCH_REG1
457 #define TMP2 SLJIT_SCRATCH_REG3
458 #define TMP3 SLJIT_TEMPORARY_EREG2
459 #define STR_PTR SLJIT_SAVED_REG1
460 #define STR_END SLJIT_SAVED_REG2
461 #define STACK_TOP SLJIT_SCRATCH_REG2
462 #define STACK_LIMIT SLJIT_SAVED_REG3
463 #define ARGUMENTS SLJIT_SAVED_EREG1
464 #define CALL_COUNT SLJIT_SAVED_EREG2
465 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
466
467 /* Local space layout. */
468 /* These two locals can be used by the current opcode. */
469 #define LOCALS0 (0 * sizeof(sljit_sw))
470 #define LOCALS1 (1 * sizeof(sljit_sw))
471 /* Two local variables for possessive quantifiers (char1 cannot use them). */
472 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
473 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
474 /* Max limit of recursions. */
475 #define CALL_LIMIT (4 * sizeof(sljit_sw))
476 /* The output vector is stored on the stack, and contains pointers
477 to characters. The vector data is divided into two groups: the first
478 group contains the start / end character pointers, and the second is
479 the start pointers when the end of the capturing group has not yet reached. */
480 #define OVECTOR_START (common->ovector_start)
481 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
482 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
483 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
484
485 #if defined COMPILE_PCRE8
486 #define MOV_UCHAR SLJIT_MOV_UB
487 #define MOVU_UCHAR SLJIT_MOVU_UB
488 #elif defined COMPILE_PCRE16
489 #define MOV_UCHAR SLJIT_MOV_UH
490 #define MOVU_UCHAR SLJIT_MOVU_UH
491 #elif defined COMPILE_PCRE32
492 #define MOV_UCHAR SLJIT_MOV_UI
493 #define MOVU_UCHAR SLJIT_MOVU_UI
494 #else
495 #error Unsupported compiling mode
496 #endif
497
498 /* Shortcuts. */
499 #define DEFINE_COMPILER \
500 struct sljit_compiler *compiler = common->compiler
501 #define OP1(op, dst, dstw, src, srcw) \
502 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
503 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
504 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
505 #define LABEL() \
506 sljit_emit_label(compiler)
507 #define JUMP(type) \
508 sljit_emit_jump(compiler, (type))
509 #define JUMPTO(type, label) \
510 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
511 #define JUMPHERE(jump) \
512 sljit_set_label((jump), sljit_emit_label(compiler))
513 #define SET_LABEL(jump, label) \
514 sljit_set_label((jump), (label))
515 #define CMP(type, src1, src1w, src2, src2w) \
516 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
517 #define CMPTO(type, src1, src1w, src2, src2w, label) \
518 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
519 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
520 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
521 #define GET_LOCAL_BASE(dst, dstw, offset) \
522 sljit_get_local_base(compiler, (dst), (dstw), (offset))
523
524 static pcre_uchar* bracketend(pcre_uchar* cc)
525 {
526 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
527 do cc += GET(cc, 1); while (*cc == OP_ALT);
528 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
529 cc += 1 + LINK_SIZE;
530 return cc;
531 }
532
533 /* Functions whose might need modification for all new supported opcodes:
534 next_opcode
535 get_private_data_length
536 set_private_data_ptrs
537 get_framesize
538 init_frame
539 get_private_data_copy_length
540 copy_private_data
541 compile_matchingpath
542 compile_backtrackingpath
543 */
544
545 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
546 {
547 SLJIT_UNUSED_ARG(common);
548 switch(*cc)
549 {
550 case OP_SOD:
551 case OP_SOM:
552 case OP_SET_SOM:
553 case OP_NOT_WORD_BOUNDARY:
554 case OP_WORD_BOUNDARY:
555 case OP_NOT_DIGIT:
556 case OP_DIGIT:
557 case OP_NOT_WHITESPACE:
558 case OP_WHITESPACE:
559 case OP_NOT_WORDCHAR:
560 case OP_WORDCHAR:
561 case OP_ANY:
562 case OP_ALLANY:
563 case OP_NOTPROP:
564 case OP_PROP:
565 case OP_ANYNL:
566 case OP_NOT_HSPACE:
567 case OP_HSPACE:
568 case OP_NOT_VSPACE:
569 case OP_VSPACE:
570 case OP_EXTUNI:
571 case OP_EODN:
572 case OP_EOD:
573 case OP_CIRC:
574 case OP_CIRCM:
575 case OP_DOLL:
576 case OP_DOLLM:
577 case OP_CRSTAR:
578 case OP_CRMINSTAR:
579 case OP_CRPLUS:
580 case OP_CRMINPLUS:
581 case OP_CRQUERY:
582 case OP_CRMINQUERY:
583 case OP_CRRANGE:
584 case OP_CRMINRANGE:
585 case OP_CLASS:
586 case OP_NCLASS:
587 case OP_REF:
588 case OP_REFI:
589 case OP_RECURSE:
590 case OP_CALLOUT:
591 case OP_ALT:
592 case OP_KET:
593 case OP_KETRMAX:
594 case OP_KETRMIN:
595 case OP_KETRPOS:
596 case OP_REVERSE:
597 case OP_ASSERT:
598 case OP_ASSERT_NOT:
599 case OP_ASSERTBACK:
600 case OP_ASSERTBACK_NOT:
601 case OP_ONCE:
602 case OP_ONCE_NC:
603 case OP_BRA:
604 case OP_BRAPOS:
605 case OP_CBRA:
606 case OP_CBRAPOS:
607 case OP_COND:
608 case OP_SBRA:
609 case OP_SBRAPOS:
610 case OP_SCBRA:
611 case OP_SCBRAPOS:
612 case OP_SCOND:
613 case OP_CREF:
614 case OP_NCREF:
615 case OP_RREF:
616 case OP_NRREF:
617 case OP_DEF:
618 case OP_BRAZERO:
619 case OP_BRAMINZERO:
620 case OP_BRAPOSZERO:
621 case OP_PRUNE:
622 case OP_SKIP:
623 case OP_THEN:
624 case OP_COMMIT:
625 case OP_FAIL:
626 case OP_ACCEPT:
627 case OP_ASSERT_ACCEPT:
628 case OP_CLOSE:
629 case OP_SKIPZERO:
630 return cc + PRIV(OP_lengths)[*cc];
631
632 case OP_CHAR:
633 case OP_CHARI:
634 case OP_NOT:
635 case OP_NOTI:
636 case OP_STAR:
637 case OP_MINSTAR:
638 case OP_PLUS:
639 case OP_MINPLUS:
640 case OP_QUERY:
641 case OP_MINQUERY:
642 case OP_UPTO:
643 case OP_MINUPTO:
644 case OP_EXACT:
645 case OP_POSSTAR:
646 case OP_POSPLUS:
647 case OP_POSQUERY:
648 case OP_POSUPTO:
649 case OP_STARI:
650 case OP_MINSTARI:
651 case OP_PLUSI:
652 case OP_MINPLUSI:
653 case OP_QUERYI:
654 case OP_MINQUERYI:
655 case OP_UPTOI:
656 case OP_MINUPTOI:
657 case OP_EXACTI:
658 case OP_POSSTARI:
659 case OP_POSPLUSI:
660 case OP_POSQUERYI:
661 case OP_POSUPTOI:
662 case OP_NOTSTAR:
663 case OP_NOTMINSTAR:
664 case OP_NOTPLUS:
665 case OP_NOTMINPLUS:
666 case OP_NOTQUERY:
667 case OP_NOTMINQUERY:
668 case OP_NOTUPTO:
669 case OP_NOTMINUPTO:
670 case OP_NOTEXACT:
671 case OP_NOTPOSSTAR:
672 case OP_NOTPOSPLUS:
673 case OP_NOTPOSQUERY:
674 case OP_NOTPOSUPTO:
675 case OP_NOTSTARI:
676 case OP_NOTMINSTARI:
677 case OP_NOTPLUSI:
678 case OP_NOTMINPLUSI:
679 case OP_NOTQUERYI:
680 case OP_NOTMINQUERYI:
681 case OP_NOTUPTOI:
682 case OP_NOTMINUPTOI:
683 case OP_NOTEXACTI:
684 case OP_NOTPOSSTARI:
685 case OP_NOTPOSPLUSI:
686 case OP_NOTPOSQUERYI:
687 case OP_NOTPOSUPTOI:
688 cc += PRIV(OP_lengths)[*cc];
689 #ifdef SUPPORT_UTF
690 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
691 #endif
692 return cc;
693
694 /* Special cases. */
695 case OP_TYPESTAR:
696 case OP_TYPEMINSTAR:
697 case OP_TYPEPLUS:
698 case OP_TYPEMINPLUS:
699 case OP_TYPEQUERY:
700 case OP_TYPEMINQUERY:
701 case OP_TYPEUPTO:
702 case OP_TYPEMINUPTO:
703 case OP_TYPEEXACT:
704 case OP_TYPEPOSSTAR:
705 case OP_TYPEPOSPLUS:
706 case OP_TYPEPOSQUERY:
707 case OP_TYPEPOSUPTO:
708 return cc + PRIV(OP_lengths)[*cc] - 1;
709
710 case OP_ANYBYTE:
711 #ifdef SUPPORT_UTF
712 if (common->utf) return NULL;
713 #endif
714 return cc + 1;
715
716 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
717 case OP_XCLASS:
718 return cc + GET(cc, 1);
719 #endif
720
721 case OP_MARK:
722 case OP_PRUNE_ARG:
723 case OP_SKIP_ARG:
724 case OP_THEN_ARG:
725 return cc + 1 + 2 + cc[1];
726
727 default:
728 return NULL;
729 }
730 }
731
732 #define CASE_ITERATOR_PRIVATE_DATA_1 \
733 case OP_MINSTAR: \
734 case OP_MINPLUS: \
735 case OP_QUERY: \
736 case OP_MINQUERY: \
737 case OP_MINSTARI: \
738 case OP_MINPLUSI: \
739 case OP_QUERYI: \
740 case OP_MINQUERYI: \
741 case OP_NOTMINSTAR: \
742 case OP_NOTMINPLUS: \
743 case OP_NOTQUERY: \
744 case OP_NOTMINQUERY: \
745 case OP_NOTMINSTARI: \
746 case OP_NOTMINPLUSI: \
747 case OP_NOTQUERYI: \
748 case OP_NOTMINQUERYI:
749
750 #define CASE_ITERATOR_PRIVATE_DATA_2A \
751 case OP_STAR: \
752 case OP_PLUS: \
753 case OP_STARI: \
754 case OP_PLUSI: \
755 case OP_NOTSTAR: \
756 case OP_NOTPLUS: \
757 case OP_NOTSTARI: \
758 case OP_NOTPLUSI:
759
760 #define CASE_ITERATOR_PRIVATE_DATA_2B \
761 case OP_UPTO: \
762 case OP_MINUPTO: \
763 case OP_UPTOI: \
764 case OP_MINUPTOI: \
765 case OP_NOTUPTO: \
766 case OP_NOTMINUPTO: \
767 case OP_NOTUPTOI: \
768 case OP_NOTMINUPTOI:
769
770 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
771 case OP_TYPEMINSTAR: \
772 case OP_TYPEMINPLUS: \
773 case OP_TYPEQUERY: \
774 case OP_TYPEMINQUERY:
775
776 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
777 case OP_TYPESTAR: \
778 case OP_TYPEPLUS:
779
780 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
781 case OP_TYPEUPTO: \
782 case OP_TYPEMINUPTO:
783
784 static int get_class_iterator_size(pcre_uchar *cc)
785 {
786 switch(*cc)
787 {
788 case OP_CRSTAR:
789 case OP_CRPLUS:
790 return 2;
791
792 case OP_CRMINSTAR:
793 case OP_CRMINPLUS:
794 case OP_CRQUERY:
795 case OP_CRMINQUERY:
796 return 1;
797
798 case OP_CRRANGE:
799 case OP_CRMINRANGE:
800 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
801 return 0;
802 return 2;
803
804 default:
805 return 0;
806 }
807 }
808
809 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
810 {
811 int private_data_length = 0;
812 pcre_uchar *alternative;
813 pcre_uchar *name;
814 pcre_uchar *end = NULL;
815 int space, size, i;
816 pcre_uint32 bracketlen;
817
818 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
819 while (cc < ccend)
820 {
821 space = 0;
822 size = 0;
823 bracketlen = 0;
824 switch(*cc)
825 {
826 case OP_SET_SOM:
827 common->has_set_som = TRUE;
828 cc += 1;
829 break;
830
831 case OP_REF:
832 case OP_REFI:
833 common->optimized_cbracket[GET2(cc, 1)] = 0;
834 cc += 1 + IMM2_SIZE;
835 break;
836
837 case OP_ASSERT:
838 case OP_ASSERT_NOT:
839 case OP_ASSERTBACK:
840 case OP_ASSERTBACK_NOT:
841 case OP_ONCE:
842 case OP_ONCE_NC:
843 case OP_BRAPOS:
844 case OP_SBRA:
845 case OP_SBRAPOS:
846 private_data_length += sizeof(sljit_sw);
847 bracketlen = 1 + LINK_SIZE;
848 break;
849
850 case OP_CBRAPOS:
851 case OP_SCBRAPOS:
852 private_data_length += sizeof(sljit_sw);
853 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
854 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
855 break;
856
857 case OP_COND:
858 case OP_SCOND:
859 /* Only AUTO_CALLOUT can insert this opcode. We do
860 not intend to support this case. */
861 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
862 return -1;
863
864 if (*cc == OP_COND)
865 {
866 /* Might be a hidden SCOND. */
867 alternative = cc + GET(cc, 1);
868 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
869 private_data_length += sizeof(sljit_sw);
870 }
871 else
872 private_data_length += sizeof(sljit_sw);
873 bracketlen = 1 + LINK_SIZE;
874 break;
875
876 case OP_CREF:
877 i = GET2(cc, 1);
878 common->optimized_cbracket[i] = 0;
879 cc += 1 + IMM2_SIZE;
880 break;
881
882 case OP_NCREF:
883 bracketlen = GET2(cc, 1);
884 name = (pcre_uchar *)common->name_table;
885 alternative = name;
886 for (i = 0; i < common->name_count; i++)
887 {
888 if (GET2(name, 0) == bracketlen) break;
889 name += common->name_entry_size;
890 }
891 SLJIT_ASSERT(i != common->name_count);
892
893 for (i = 0; i < common->name_count; i++)
894 {
895 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
896 common->optimized_cbracket[GET2(alternative, 0)] = 0;
897 alternative += common->name_entry_size;
898 }
899 bracketlen = 0;
900 cc += 1 + IMM2_SIZE;
901 break;
902
903 case OP_BRA:
904 bracketlen = 1 + LINK_SIZE;
905 break;
906
907 case OP_CBRA:
908 case OP_SCBRA:
909 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
910 break;
911
912 CASE_ITERATOR_PRIVATE_DATA_1
913 space = 1;
914 size = -2;
915 break;
916
917 CASE_ITERATOR_PRIVATE_DATA_2A
918 space = 2;
919 size = -2;
920 break;
921
922 CASE_ITERATOR_PRIVATE_DATA_2B
923 space = 2;
924 size = -(2 + IMM2_SIZE);
925 break;
926
927 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
928 space = 1;
929 size = 1;
930 break;
931
932 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
933 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
934 space = 2;
935 size = 1;
936 break;
937
938 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
939 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
940 space = 2;
941 size = 1 + IMM2_SIZE;
942 break;
943
944 case OP_CLASS:
945 case OP_NCLASS:
946 size += 1 + 32 / sizeof(pcre_uchar);
947 space = get_class_iterator_size(cc + size);
948 break;
949
950 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
951 case OP_XCLASS:
952 size = GET(cc, 1);
953 space = get_class_iterator_size(cc + size);
954 break;
955 #endif
956
957 case OP_RECURSE:
958 /* Set its value only once. */
959 if (common->recursive_head_ptr == 0)
960 {
961 common->recursive_head_ptr = common->ovector_start;
962 common->ovector_start += sizeof(sljit_sw);
963 }
964 cc += 1 + LINK_SIZE;
965 break;
966
967 case OP_CALLOUT:
968 if (common->capture_last_ptr == 0)
969 {
970 common->capture_last_ptr = common->ovector_start;
971 common->ovector_start += sizeof(sljit_sw);
972 }
973 cc += 2 + 2 * LINK_SIZE;
974 break;
975
976 case OP_THEN_ARG:
977 common->has_then = TRUE;
978 /* Fall through. */
979
980 case OP_PRUNE_ARG:
981 common->needs_start_ptr = TRUE;
982 common->control_head_ptr = 1;
983 /* Fall through. */
984
985 case OP_MARK:
986 if (common->mark_ptr == 0)
987 {
988 common->mark_ptr = common->ovector_start;
989 common->ovector_start += sizeof(sljit_sw);
990 }
991 cc += 1 + 2 + cc[1];
992 break;
993
994 case OP_THEN:
995 common->has_then = TRUE;
996 /* Fall through. */
997
998 case OP_PRUNE:
999 case OP_SKIP:
1000 common->needs_start_ptr = TRUE;
1001 common->control_head_ptr = 1;
1002 cc += 1;
1003 break;
1004
1005 case OP_SKIP_ARG:
1006 common->control_head_ptr = 1;
1007 common->has_skip_arg = TRUE;
1008 cc += 1 + 2 + cc[1];
1009 break;
1010
1011 default:
1012 cc = next_opcode(common, cc);
1013 if (cc == NULL)
1014 return -1;
1015 break;
1016 }
1017
1018 if (space > 0 && cc >= end)
1019 private_data_length += sizeof(sljit_sw) * space;
1020
1021 if (size != 0)
1022 {
1023 if (size < 0)
1024 {
1025 cc += -size;
1026 #ifdef SUPPORT_UTF
1027 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1028 #endif
1029 }
1030 else
1031 cc += size;
1032 }
1033
1034 if (bracketlen != 0)
1035 {
1036 if (cc >= end)
1037 {
1038 end = bracketend(cc);
1039 if (end[-1 - LINK_SIZE] == OP_KET)
1040 end = NULL;
1041 }
1042 cc += bracketlen;
1043 }
1044 }
1045 return private_data_length;
1046 }
1047
1048 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
1049 {
1050 pcre_uchar *cc = common->start;
1051 pcre_uchar *alternative;
1052 pcre_uchar *end = NULL;
1053 int space, size, bracketlen;
1054
1055 while (cc < ccend)
1056 {
1057 space = 0;
1058 size = 0;
1059 bracketlen = 0;
1060 switch(*cc)
1061 {
1062 case OP_ASSERT:
1063 case OP_ASSERT_NOT:
1064 case OP_ASSERTBACK:
1065 case OP_ASSERTBACK_NOT:
1066 case OP_ONCE:
1067 case OP_ONCE_NC:
1068 case OP_BRAPOS:
1069 case OP_SBRA:
1070 case OP_SBRAPOS:
1071 case OP_SCOND:
1072 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1073 private_data_ptr += sizeof(sljit_sw);
1074 bracketlen = 1 + LINK_SIZE;
1075 break;
1076
1077 case OP_CBRAPOS:
1078 case OP_SCBRAPOS:
1079 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1080 private_data_ptr += sizeof(sljit_sw);
1081 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1082 break;
1083
1084 case OP_COND:
1085 /* Might be a hidden SCOND. */
1086 alternative = cc + GET(cc, 1);
1087 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1088 {
1089 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1090 private_data_ptr += sizeof(sljit_sw);
1091 }
1092 bracketlen = 1 + LINK_SIZE;
1093 break;
1094
1095 case OP_BRA:
1096 bracketlen = 1 + LINK_SIZE;
1097 break;
1098
1099 case OP_CBRA:
1100 case OP_SCBRA:
1101 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1102 break;
1103
1104 CASE_ITERATOR_PRIVATE_DATA_1
1105 space = 1;
1106 size = -2;
1107 break;
1108
1109 CASE_ITERATOR_PRIVATE_DATA_2A
1110 space = 2;
1111 size = -2;
1112 break;
1113
1114 CASE_ITERATOR_PRIVATE_DATA_2B
1115 space = 2;
1116 size = -(2 + IMM2_SIZE);
1117 break;
1118
1119 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1120 space = 1;
1121 size = 1;
1122 break;
1123
1124 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1125 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1126 space = 2;
1127 size = 1;
1128 break;
1129
1130 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1131 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1132 space = 2;
1133 size = 1 + IMM2_SIZE;
1134 break;
1135
1136 case OP_CLASS:
1137 case OP_NCLASS:
1138 size += 1 + 32 / sizeof(pcre_uchar);
1139 space = get_class_iterator_size(cc + size);
1140 break;
1141
1142 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1143 case OP_XCLASS:
1144 size = GET(cc, 1);
1145 space = get_class_iterator_size(cc + size);
1146 break;
1147 #endif
1148
1149 default:
1150 cc = next_opcode(common, cc);
1151 SLJIT_ASSERT(cc != NULL);
1152 break;
1153 }
1154
1155 if (space > 0 && cc >= end)
1156 {
1157 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1158 private_data_ptr += sizeof(sljit_sw) * space;
1159 }
1160
1161 if (size != 0)
1162 {
1163 if (size < 0)
1164 {
1165 cc += -size;
1166 #ifdef SUPPORT_UTF
1167 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1168 #endif
1169 }
1170 else
1171 cc += size;
1172 }
1173
1174 if (bracketlen > 0)
1175 {
1176 if (cc >= end)
1177 {
1178 end = bracketend(cc);
1179 if (end[-1 - LINK_SIZE] == OP_KET)
1180 end = NULL;
1181 }
1182 cc += bracketlen;
1183 }
1184 }
1185 }
1186
1187 /* Returns with a frame_types (always < 0) if no need for frame. */
1188 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1189 {
1190 int length = 0;
1191 int possessive = 0;
1192 BOOL stack_restore = FALSE;
1193 BOOL setsom_found = recursive;
1194 BOOL setmark_found = recursive;
1195 /* The last capture is a local variable even for recursions. */
1196 BOOL capture_last_found = FALSE;
1197
1198 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1199 SLJIT_ASSERT(common->control_head_ptr != 0);
1200 *needs_control_head = TRUE;
1201 #else
1202 *needs_control_head = FALSE;
1203 #endif
1204
1205 if (ccend == NULL)
1206 {
1207 ccend = bracketend(cc) - (1 + LINK_SIZE);
1208 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1209 {
1210 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1211 /* This is correct regardless of common->capture_last_ptr. */
1212 capture_last_found = TRUE;
1213 }
1214 cc = next_opcode(common, cc);
1215 }
1216
1217 SLJIT_ASSERT(cc != NULL);
1218 while (cc < ccend)
1219 switch(*cc)
1220 {
1221 case OP_SET_SOM:
1222 SLJIT_ASSERT(common->has_set_som);
1223 stack_restore = TRUE;
1224 if (!setsom_found)
1225 {
1226 length += 2;
1227 setsom_found = TRUE;
1228 }
1229 cc += 1;
1230 break;
1231
1232 case OP_MARK:
1233 case OP_PRUNE_ARG:
1234 case OP_THEN_ARG:
1235 SLJIT_ASSERT(common->mark_ptr != 0);
1236 stack_restore = TRUE;
1237 if (!setmark_found)
1238 {
1239 length += 2;
1240 setmark_found = TRUE;
1241 }
1242 if (common->control_head_ptr != 0)
1243 *needs_control_head = TRUE;
1244 cc += 1 + 2 + cc[1];
1245 break;
1246
1247 case OP_RECURSE:
1248 stack_restore = TRUE;
1249 if (common->has_set_som && !setsom_found)
1250 {
1251 length += 2;
1252 setsom_found = TRUE;
1253 }
1254 if (common->mark_ptr != 0 && !setmark_found)
1255 {
1256 length += 2;
1257 setmark_found = TRUE;
1258 }
1259 if (common->capture_last_ptr != 0 && !capture_last_found)
1260 {
1261 length += 2;
1262 capture_last_found = TRUE;
1263 }
1264 cc += 1 + LINK_SIZE;
1265 break;
1266
1267 case OP_CBRA:
1268 case OP_CBRAPOS:
1269 case OP_SCBRA:
1270 case OP_SCBRAPOS:
1271 stack_restore = TRUE;
1272 if (common->capture_last_ptr != 0 && !capture_last_found)
1273 {
1274 length += 2;
1275 capture_last_found = TRUE;
1276 }
1277 length += 3;
1278 cc += 1 + LINK_SIZE + IMM2_SIZE;
1279 break;
1280
1281 case OP_PRUNE:
1282 case OP_SKIP:
1283 case OP_SKIP_ARG:
1284 case OP_COMMIT:
1285 if (common->control_head_ptr != 0)
1286 *needs_control_head = TRUE;
1287 /* Fall through. */
1288
1289 default:
1290 stack_restore = TRUE;
1291 /* Fall through. */
1292
1293 case OP_NOT_WORD_BOUNDARY:
1294 case OP_WORD_BOUNDARY:
1295 case OP_NOT_DIGIT:
1296 case OP_DIGIT:
1297 case OP_NOT_WHITESPACE:
1298 case OP_WHITESPACE:
1299 case OP_NOT_WORDCHAR:
1300 case OP_WORDCHAR:
1301 case OP_ANY:
1302 case OP_ALLANY:
1303 case OP_ANYBYTE:
1304 case OP_NOTPROP:
1305 case OP_PROP:
1306 case OP_ANYNL:
1307 case OP_NOT_HSPACE:
1308 case OP_HSPACE:
1309 case OP_NOT_VSPACE:
1310 case OP_VSPACE:
1311 case OP_EXTUNI:
1312 case OP_EODN:
1313 case OP_EOD:
1314 case OP_CIRC:
1315 case OP_CIRCM:
1316 case OP_DOLL:
1317 case OP_DOLLM:
1318 case OP_CHAR:
1319 case OP_CHARI:
1320 case OP_NOT:
1321 case OP_NOTI:
1322
1323 case OP_EXACT:
1324 case OP_POSSTAR:
1325 case OP_POSPLUS:
1326 case OP_POSQUERY:
1327 case OP_POSUPTO:
1328
1329 case OP_EXACTI:
1330 case OP_POSSTARI:
1331 case OP_POSPLUSI:
1332 case OP_POSQUERYI:
1333 case OP_POSUPTOI:
1334
1335 case OP_NOTEXACT:
1336 case OP_NOTPOSSTAR:
1337 case OP_NOTPOSPLUS:
1338 case OP_NOTPOSQUERY:
1339 case OP_NOTPOSUPTO:
1340
1341 case OP_NOTEXACTI:
1342 case OP_NOTPOSSTARI:
1343 case OP_NOTPOSPLUSI:
1344 case OP_NOTPOSQUERYI:
1345 case OP_NOTPOSUPTOI:
1346
1347 case OP_TYPEEXACT:
1348 case OP_TYPEPOSSTAR:
1349 case OP_TYPEPOSPLUS:
1350 case OP_TYPEPOSQUERY:
1351 case OP_TYPEPOSUPTO:
1352
1353 case OP_CLASS:
1354 case OP_NCLASS:
1355 case OP_XCLASS:
1356
1357 cc = next_opcode(common, cc);
1358 SLJIT_ASSERT(cc != NULL);
1359 break;
1360 }
1361
1362 /* Possessive quantifiers can use a special case. */
1363 if (SLJIT_UNLIKELY(possessive == length))
1364 return stack_restore ? no_frame : no_stack;
1365
1366 if (length > 0)
1367 return length + 1;
1368 return stack_restore ? no_frame : no_stack;
1369 }
1370
1371 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1372 {
1373 DEFINE_COMPILER;
1374 BOOL setsom_found = recursive;
1375 BOOL setmark_found = recursive;
1376 /* The last capture is a local variable even for recursions. */
1377 BOOL capture_last_found = FALSE;
1378 int offset;
1379
1380 /* >= 1 + shortest item size (2) */
1381 SLJIT_UNUSED_ARG(stacktop);
1382 SLJIT_ASSERT(stackpos >= stacktop + 2);
1383
1384 stackpos = STACK(stackpos);
1385 if (ccend == NULL)
1386 {
1387 ccend = bracketend(cc) - (1 + LINK_SIZE);
1388 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1389 cc = next_opcode(common, cc);
1390 }
1391
1392 SLJIT_ASSERT(cc != NULL);
1393 while (cc < ccend)
1394 switch(*cc)
1395 {
1396 case OP_SET_SOM:
1397 SLJIT_ASSERT(common->has_set_som);
1398 if (!setsom_found)
1399 {
1400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1402 stackpos += (int)sizeof(sljit_sw);
1403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1404 stackpos += (int)sizeof(sljit_sw);
1405 setsom_found = TRUE;
1406 }
1407 cc += 1;
1408 break;
1409
1410 case OP_MARK:
1411 case OP_PRUNE_ARG:
1412 case OP_THEN_ARG:
1413 SLJIT_ASSERT(common->mark_ptr != 0);
1414 if (!setmark_found)
1415 {
1416 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1418 stackpos += (int)sizeof(sljit_sw);
1419 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1420 stackpos += (int)sizeof(sljit_sw);
1421 setmark_found = TRUE;
1422 }
1423 cc += 1 + 2 + cc[1];
1424 break;
1425
1426 case OP_RECURSE:
1427 if (common->has_set_som && !setsom_found)
1428 {
1429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1430 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1431 stackpos += (int)sizeof(sljit_sw);
1432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1433 stackpos += (int)sizeof(sljit_sw);
1434 setsom_found = TRUE;
1435 }
1436 if (common->mark_ptr != 0 && !setmark_found)
1437 {
1438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1440 stackpos += (int)sizeof(sljit_sw);
1441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1442 stackpos += (int)sizeof(sljit_sw);
1443 setmark_found = TRUE;
1444 }
1445 if (common->capture_last_ptr != 0 && !capture_last_found)
1446 {
1447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1448 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1449 stackpos += (int)sizeof(sljit_sw);
1450 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1451 stackpos += (int)sizeof(sljit_sw);
1452 capture_last_found = TRUE;
1453 }
1454 cc += 1 + LINK_SIZE;
1455 break;
1456
1457 case OP_CBRA:
1458 case OP_CBRAPOS:
1459 case OP_SCBRA:
1460 case OP_SCBRAPOS:
1461 if (common->capture_last_ptr != 0 && !capture_last_found)
1462 {
1463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1465 stackpos += (int)sizeof(sljit_sw);
1466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1467 stackpos += (int)sizeof(sljit_sw);
1468 capture_last_found = TRUE;
1469 }
1470 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1472 stackpos += (int)sizeof(sljit_sw);
1473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1474 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1476 stackpos += (int)sizeof(sljit_sw);
1477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1478 stackpos += (int)sizeof(sljit_sw);
1479
1480 cc += 1 + LINK_SIZE + IMM2_SIZE;
1481 break;
1482
1483 default:
1484 cc = next_opcode(common, cc);
1485 SLJIT_ASSERT(cc != NULL);
1486 break;
1487 }
1488
1489 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1490 SLJIT_ASSERT(stackpos == STACK(stacktop));
1491 }
1492
1493 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1494 {
1495 int private_data_length = needs_control_head ? 3 : 2;
1496 int size;
1497 pcre_uchar *alternative;
1498 /* Calculate the sum of the private machine words. */
1499 while (cc < ccend)
1500 {
1501 size = 0;
1502 switch(*cc)
1503 {
1504 case OP_ASSERT:
1505 case OP_ASSERT_NOT:
1506 case OP_ASSERTBACK:
1507 case OP_ASSERTBACK_NOT:
1508 case OP_ONCE:
1509 case OP_ONCE_NC:
1510 case OP_BRAPOS:
1511 case OP_SBRA:
1512 case OP_SBRAPOS:
1513 case OP_SCOND:
1514 private_data_length++;
1515 cc += 1 + LINK_SIZE;
1516 break;
1517
1518 case OP_CBRA:
1519 case OP_SCBRA:
1520 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1521 private_data_length++;
1522 cc += 1 + LINK_SIZE + IMM2_SIZE;
1523 break;
1524
1525 case OP_CBRAPOS:
1526 case OP_SCBRAPOS:
1527 private_data_length += 2;
1528 cc += 1 + LINK_SIZE + IMM2_SIZE;
1529 break;
1530
1531 case OP_COND:
1532 /* Might be a hidden SCOND. */
1533 alternative = cc + GET(cc, 1);
1534 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1535 private_data_length++;
1536 cc += 1 + LINK_SIZE;
1537 break;
1538
1539 CASE_ITERATOR_PRIVATE_DATA_1
1540 if (PRIVATE_DATA(cc))
1541 private_data_length++;
1542 cc += 2;
1543 #ifdef SUPPORT_UTF
1544 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1545 #endif
1546 break;
1547
1548 CASE_ITERATOR_PRIVATE_DATA_2A
1549 if (PRIVATE_DATA(cc))
1550 private_data_length += 2;
1551 cc += 2;
1552 #ifdef SUPPORT_UTF
1553 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1554 #endif
1555 break;
1556
1557 CASE_ITERATOR_PRIVATE_DATA_2B
1558 if (PRIVATE_DATA(cc))
1559 private_data_length += 2;
1560 cc += 2 + IMM2_SIZE;
1561 #ifdef SUPPORT_UTF
1562 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1563 #endif
1564 break;
1565
1566 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1567 if (PRIVATE_DATA(cc))
1568 private_data_length++;
1569 cc += 1;
1570 break;
1571
1572 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1573 if (PRIVATE_DATA(cc))
1574 private_data_length += 2;
1575 cc += 1;
1576 break;
1577
1578 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1579 if (PRIVATE_DATA(cc))
1580 private_data_length += 2;
1581 cc += 1 + IMM2_SIZE;
1582 break;
1583
1584 case OP_CLASS:
1585 case OP_NCLASS:
1586 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1587 case OP_XCLASS:
1588 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1589 #else
1590 size = 1 + 32 / (int)sizeof(pcre_uchar);
1591 #endif
1592 if (PRIVATE_DATA(cc))
1593 private_data_length += get_class_iterator_size(cc + size);
1594 cc += size;
1595 break;
1596
1597 default:
1598 cc = next_opcode(common, cc);
1599 SLJIT_ASSERT(cc != NULL);
1600 break;
1601 }
1602 }
1603 SLJIT_ASSERT(cc == ccend);
1604 return private_data_length;
1605 }
1606
1607 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1608 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1609 {
1610 DEFINE_COMPILER;
1611 int srcw[2];
1612 int count, size;
1613 BOOL tmp1next = TRUE;
1614 BOOL tmp1empty = TRUE;
1615 BOOL tmp2empty = TRUE;
1616 pcre_uchar *alternative;
1617 enum {
1618 start,
1619 loop,
1620 end
1621 } status;
1622
1623 status = save ? start : loop;
1624 stackptr = STACK(stackptr - 2);
1625 stacktop = STACK(stacktop - 1);
1626
1627 if (!save)
1628 {
1629 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1630 if (stackptr < stacktop)
1631 {
1632 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1633 stackptr += sizeof(sljit_sw);
1634 tmp1empty = FALSE;
1635 }
1636 if (stackptr < stacktop)
1637 {
1638 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1639 stackptr += sizeof(sljit_sw);
1640 tmp2empty = FALSE;
1641 }
1642 /* The tmp1next must be TRUE in either way. */
1643 }
1644
1645 do
1646 {
1647 count = 0;
1648 switch(status)
1649 {
1650 case start:
1651 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1652 count = 1;
1653 srcw[0] = common->recursive_head_ptr;
1654 if (needs_control_head)
1655 {
1656 SLJIT_ASSERT(common->control_head_ptr != 0);
1657 count = 2;
1658 srcw[1] = common->control_head_ptr;
1659 }
1660 status = loop;
1661 break;
1662
1663 case loop:
1664 if (cc >= ccend)
1665 {
1666 status = end;
1667 break;
1668 }
1669
1670 switch(*cc)
1671 {
1672 case OP_ASSERT:
1673 case OP_ASSERT_NOT:
1674 case OP_ASSERTBACK:
1675 case OP_ASSERTBACK_NOT:
1676 case OP_ONCE:
1677 case OP_ONCE_NC:
1678 case OP_BRAPOS:
1679 case OP_SBRA:
1680 case OP_SBRAPOS:
1681 case OP_SCOND:
1682 count = 1;
1683 srcw[0] = PRIVATE_DATA(cc);
1684 SLJIT_ASSERT(srcw[0] != 0);
1685 cc += 1 + LINK_SIZE;
1686 break;
1687
1688 case OP_CBRA:
1689 case OP_SCBRA:
1690 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1691 {
1692 count = 1;
1693 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1694 }
1695 cc += 1 + LINK_SIZE + IMM2_SIZE;
1696 break;
1697
1698 case OP_CBRAPOS:
1699 case OP_SCBRAPOS:
1700 count = 2;
1701 srcw[0] = PRIVATE_DATA(cc);
1702 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1703 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1704 cc += 1 + LINK_SIZE + IMM2_SIZE;
1705 break;
1706
1707 case OP_COND:
1708 /* Might be a hidden SCOND. */
1709 alternative = cc + GET(cc, 1);
1710 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1711 {
1712 count = 1;
1713 srcw[0] = PRIVATE_DATA(cc);
1714 SLJIT_ASSERT(srcw[0] != 0);
1715 }
1716 cc += 1 + LINK_SIZE;
1717 break;
1718
1719 CASE_ITERATOR_PRIVATE_DATA_1
1720 if (PRIVATE_DATA(cc))
1721 {
1722 count = 1;
1723 srcw[0] = PRIVATE_DATA(cc);
1724 }
1725 cc += 2;
1726 #ifdef SUPPORT_UTF
1727 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1728 #endif
1729 break;
1730
1731 CASE_ITERATOR_PRIVATE_DATA_2A
1732 if (PRIVATE_DATA(cc))
1733 {
1734 count = 2;
1735 srcw[0] = PRIVATE_DATA(cc);
1736 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1737 }
1738 cc += 2;
1739 #ifdef SUPPORT_UTF
1740 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1741 #endif
1742 break;
1743
1744 CASE_ITERATOR_PRIVATE_DATA_2B
1745 if (PRIVATE_DATA(cc))
1746 {
1747 count = 2;
1748 srcw[0] = PRIVATE_DATA(cc);
1749 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1750 }
1751 cc += 2 + IMM2_SIZE;
1752 #ifdef SUPPORT_UTF
1753 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1754 #endif
1755 break;
1756
1757 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1758 if (PRIVATE_DATA(cc))
1759 {
1760 count = 1;
1761 srcw[0] = PRIVATE_DATA(cc);
1762 }
1763 cc += 1;
1764 break;
1765
1766 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1767 if (PRIVATE_DATA(cc))
1768 {
1769 count = 2;
1770 srcw[0] = PRIVATE_DATA(cc);
1771 srcw[1] = srcw[0] + sizeof(sljit_sw);
1772 }
1773 cc += 1;
1774 break;
1775
1776 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1777 if (PRIVATE_DATA(cc))
1778 {
1779 count = 2;
1780 srcw[0] = PRIVATE_DATA(cc);
1781 srcw[1] = srcw[0] + sizeof(sljit_sw);
1782 }
1783 cc += 1 + IMM2_SIZE;
1784 break;
1785
1786 case OP_CLASS:
1787 case OP_NCLASS:
1788 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1789 case OP_XCLASS:
1790 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1791 #else
1792 size = 1 + 32 / (int)sizeof(pcre_uchar);
1793 #endif
1794 if (PRIVATE_DATA(cc))
1795 switch(get_class_iterator_size(cc + size))
1796 {
1797 case 1:
1798 count = 1;
1799 srcw[0] = PRIVATE_DATA(cc);
1800 break;
1801
1802 case 2:
1803 count = 2;
1804 srcw[0] = PRIVATE_DATA(cc);
1805 srcw[1] = srcw[0] + sizeof(sljit_sw);
1806 break;
1807
1808 default:
1809 SLJIT_ASSERT_STOP();
1810 break;
1811 }
1812 cc += size;
1813 break;
1814
1815 default:
1816 cc = next_opcode(common, cc);
1817 SLJIT_ASSERT(cc != NULL);
1818 break;
1819 }
1820 break;
1821
1822 case end:
1823 SLJIT_ASSERT_STOP();
1824 break;
1825 }
1826
1827 while (count > 0)
1828 {
1829 count--;
1830 if (save)
1831 {
1832 if (tmp1next)
1833 {
1834 if (!tmp1empty)
1835 {
1836 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1837 stackptr += sizeof(sljit_sw);
1838 }
1839 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1840 tmp1empty = FALSE;
1841 tmp1next = FALSE;
1842 }
1843 else
1844 {
1845 if (!tmp2empty)
1846 {
1847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1848 stackptr += sizeof(sljit_sw);
1849 }
1850 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1851 tmp2empty = FALSE;
1852 tmp1next = TRUE;
1853 }
1854 }
1855 else
1856 {
1857 if (tmp1next)
1858 {
1859 SLJIT_ASSERT(!tmp1empty);
1860 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1861 tmp1empty = stackptr >= stacktop;
1862 if (!tmp1empty)
1863 {
1864 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1865 stackptr += sizeof(sljit_sw);
1866 }
1867 tmp1next = FALSE;
1868 }
1869 else
1870 {
1871 SLJIT_ASSERT(!tmp2empty);
1872 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1873 tmp2empty = stackptr >= stacktop;
1874 if (!tmp2empty)
1875 {
1876 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1877 stackptr += sizeof(sljit_sw);
1878 }
1879 tmp1next = TRUE;
1880 }
1881 }
1882 }
1883 }
1884 while (status != end);
1885
1886 if (save)
1887 {
1888 if (tmp1next)
1889 {
1890 if (!tmp1empty)
1891 {
1892 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1893 stackptr += sizeof(sljit_sw);
1894 }
1895 if (!tmp2empty)
1896 {
1897 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1898 stackptr += sizeof(sljit_sw);
1899 }
1900 }
1901 else
1902 {
1903 if (!tmp2empty)
1904 {
1905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1906 stackptr += sizeof(sljit_sw);
1907 }
1908 if (!tmp1empty)
1909 {
1910 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1911 stackptr += sizeof(sljit_sw);
1912 }
1913 }
1914 }
1915 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1916 }
1917
1918 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1919 {
1920 pcre_uchar *end = bracketend(cc);
1921 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1922
1923 /* Assert captures then. */
1924 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1925 current_offset = NULL;
1926 /* Conditional block does not. */
1927 if (*cc == OP_COND || *cc == OP_SCOND)
1928 has_alternatives = FALSE;
1929
1930 cc = next_opcode(common, cc);
1931 if (has_alternatives)
1932 current_offset = common->then_offsets + (cc - common->start);
1933
1934 while (cc < end)
1935 {
1936 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1937 cc = set_then_offsets(common, cc, current_offset);
1938 else
1939 {
1940 if (*cc == OP_ALT && has_alternatives)
1941 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1942 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1943 *current_offset = 1;
1944 cc = next_opcode(common, cc);
1945 }
1946 }
1947
1948 return end;
1949 }
1950
1951 #undef CASE_ITERATOR_PRIVATE_DATA_1
1952 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1953 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1954 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1955 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1956 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1957
1958 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1959 {
1960 return (value & (value - 1)) == 0;
1961 }
1962
1963 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1964 {
1965 while (list)
1966 {
1967 /* sljit_set_label is clever enough to do nothing
1968 if either the jump or the label is NULL. */
1969 SET_LABEL(list->jump, label);
1970 list = list->next;
1971 }
1972 }
1973
1974 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1975 {
1976 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1977 if (list_item)
1978 {
1979 list_item->next = *list;
1980 list_item->jump = jump;
1981 *list = list_item;
1982 }
1983 }
1984
1985 static void add_stub(compiler_common *common, struct sljit_jump *start)
1986 {
1987 DEFINE_COMPILER;
1988 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1989
1990 if (list_item)
1991 {
1992 list_item->start = start;
1993 list_item->quit = LABEL();
1994 list_item->next = common->stubs;
1995 common->stubs = list_item;
1996 }
1997 }
1998
1999 static void flush_stubs(compiler_common *common)
2000 {
2001 DEFINE_COMPILER;
2002 stub_list* list_item = common->stubs;
2003
2004 while (list_item)
2005 {
2006 JUMPHERE(list_item->start);
2007 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2008 JUMPTO(SLJIT_JUMP, list_item->quit);
2009 list_item = list_item->next;
2010 }
2011 common->stubs = NULL;
2012 }
2013
2014 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
2015 {
2016 DEFINE_COMPILER;
2017
2018 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
2019 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2020 }
2021
2022 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2023 {
2024 /* May destroy all locals and registers except TMP2. */
2025 DEFINE_COMPILER;
2026
2027 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2028 #ifdef DESTROY_REGISTERS
2029 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2030 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2031 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2032 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2033 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2034 #endif
2035 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2036 }
2037
2038 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2039 {
2040 DEFINE_COMPILER;
2041 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2042 }
2043
2044 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2045 {
2046 DEFINE_COMPILER;
2047 struct sljit_label *loop;
2048 int i;
2049
2050 /* At this point we can freely use all temporary registers. */
2051 SLJIT_ASSERT(length > 1);
2052 /* TMP1 returns with begin - 1. */
2053 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2054 if (length < 8)
2055 {
2056 for (i = 1; i < length; i++)
2057 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2058 }
2059 else
2060 {
2061 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2062 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2063 loop = LABEL();
2064 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2065 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2066 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2067 }
2068 }
2069
2070 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2071 {
2072 DEFINE_COMPILER;
2073 struct sljit_label *loop;
2074 int i;
2075
2076 SLJIT_ASSERT(length > 1);
2077 /* OVECTOR(1) contains the "string begin - 1" constant. */
2078 if (length > 2)
2079 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2080 if (length < 8)
2081 {
2082 for (i = 2; i < length; i++)
2083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2084 }
2085 else
2086 {
2087 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2088 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2089 loop = LABEL();
2090 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2091 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2092 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2093 }
2094
2095 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2096 if (common->mark_ptr != 0)
2097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2098 SLJIT_ASSERT(common->control_head_ptr != 0);
2099 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2100 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2101 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2102 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2103 }
2104
2105 static sljit_sw SLJIT_CALL do_check_control_chain(sljit_sw *current)
2106 {
2107 sljit_sw return_value = 0;
2108 const pcre_uchar *skip_arg = NULL;
2109
2110 SLJIT_ASSERT(current != NULL);
2111 do
2112 {
2113 switch (current[-2])
2114 {
2115 case type_commit:
2116 /* Commit overwrites all. */
2117 return -1;
2118
2119 case type_prune:
2120 case type_then_trap:
2121 break;
2122
2123 case type_skip:
2124 /* Overwrites prune, but not other skips. */
2125 if (return_value == 0 && skip_arg == NULL)
2126 return_value = current[-3];
2127 break;
2128
2129 case type_skip_arg:
2130 if (return_value == 0 && skip_arg == NULL)
2131 skip_arg = (pcre_uchar *)current[-3];
2132 break;
2133
2134 case type_mark:
2135 if (return_value == 0 && skip_arg != NULL)
2136 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2137 return_value = current[-4];
2138 break;
2139
2140 default:
2141 SLJIT_ASSERT_STOP();
2142 break;
2143 }
2144 current = (sljit_sw*)current[-1];
2145 }
2146 while (current != NULL);
2147 return (return_value != 0 || skip_arg == NULL) ? return_value : -2;
2148 }
2149
2150 static sljit_sw SLJIT_CALL do_search_then_trap(sljit_sw *current)
2151 {
2152 do
2153 {
2154 switch (current[-2])
2155 {
2156 case type_commit:
2157 /* Commit overwrites all. */
2158 return 0;
2159
2160 case type_then_trap:
2161 return (sljit_sw)current;
2162
2163 case type_prune:
2164 case type_skip:
2165 case type_skip_arg:
2166 case type_mark:
2167 break;
2168
2169 default:
2170 SLJIT_ASSERT_STOP();
2171 break;
2172 }
2173 current = (sljit_sw*)current[-1];
2174 SLJIT_ASSERT(current != NULL);
2175 }
2176 while (TRUE);
2177 }
2178
2179 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2180 {
2181 DEFINE_COMPILER;
2182 struct sljit_label *loop;
2183 struct sljit_jump *early_quit;
2184
2185 /* At this point we can freely use all registers. */
2186 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2187 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2188
2189 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2190 if (common->mark_ptr != 0)
2191 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2192 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2193 if (common->mark_ptr != 0)
2194 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2195 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2196 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2197 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2198 /* Unlikely, but possible */
2199 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2200 loop = LABEL();
2201 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2202 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2203 /* Copy the integer value to the output buffer */
2204 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2205 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2206 #endif
2207 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2208 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2209 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2210 JUMPHERE(early_quit);
2211
2212 /* Calculate the return value, which is the maximum ovector value. */
2213 if (topbracket > 1)
2214 {
2215 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2216 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2217
2218 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2219 loop = LABEL();
2220 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2221 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2222 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2223 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2224 }
2225 else
2226 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2227 }
2228
2229 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2230 {
2231 DEFINE_COMPILER;
2232 struct sljit_jump *jump;
2233
2234 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2235 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2236 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2237
2238 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2239 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2240 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2241 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2242
2243 /* Store match begin and end. */
2244 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2245 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2246
2247 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2248 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2249 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2250 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2251 #endif
2252 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2253 JUMPHERE(jump);
2254
2255 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2256 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2257 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2258 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2259 #endif
2260 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2261
2262 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2263 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2264 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2265 #endif
2266 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2267
2268 JUMPTO(SLJIT_JUMP, quit);
2269 }
2270
2271 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2272 {
2273 /* May destroy TMP1. */
2274 DEFINE_COMPILER;
2275 struct sljit_jump *jump;
2276
2277 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2278 {
2279 /* The value of -1 must be kept for start_used_ptr! */
2280 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2281 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2282 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2283 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2284 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2285 JUMPHERE(jump);
2286 }
2287 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2288 {
2289 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2290 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2291 JUMPHERE(jump);
2292 }
2293 }
2294
2295 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2296 {
2297 /* Detects if the character has an othercase. */
2298 unsigned int c;
2299
2300 #ifdef SUPPORT_UTF
2301 if (common->utf)
2302 {
2303 GETCHAR(c, cc);
2304 if (c > 127)
2305 {
2306 #ifdef SUPPORT_UCP
2307 return c != UCD_OTHERCASE(c);
2308 #else
2309 return FALSE;
2310 #endif
2311 }
2312 #ifndef COMPILE_PCRE8
2313 return common->fcc[c] != c;
2314 #endif
2315 }
2316 else
2317 #endif
2318 c = *cc;
2319 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2320 }
2321
2322 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2323 {
2324 /* Returns with the othercase. */
2325 #ifdef SUPPORT_UTF
2326 if (common->utf && c > 127)
2327 {
2328 #ifdef SUPPORT_UCP
2329 return UCD_OTHERCASE(c);
2330 #else
2331 return c;
2332 #endif
2333 }
2334 #endif
2335 return TABLE_GET(c, common->fcc, c);
2336 }
2337
2338 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2339 {
2340 /* Detects if the character and its othercase has only 1 bit difference. */
2341 unsigned int c, oc, bit;
2342 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2343 int n;
2344 #endif
2345
2346 #ifdef SUPPORT_UTF
2347 if (common->utf)
2348 {
2349 GETCHAR(c, cc);
2350 if (c <= 127)
2351 oc = common->fcc[c];
2352 else
2353 {
2354 #ifdef SUPPORT_UCP
2355 oc = UCD_OTHERCASE(c);
2356 #else
2357 oc = c;
2358 #endif
2359 }
2360 }
2361 else
2362 {
2363 c = *cc;
2364 oc = TABLE_GET(c, common->fcc, c);
2365 }
2366 #else
2367 c = *cc;
2368 oc = TABLE_GET(c, common->fcc, c);
2369 #endif
2370
2371 SLJIT_ASSERT(c != oc);
2372
2373 bit = c ^ oc;
2374 /* Optimized for English alphabet. */
2375 if (c <= 127 && bit == 0x20)
2376 return (0 << 8) | 0x20;
2377
2378 /* Since c != oc, they must have at least 1 bit difference. */
2379 if (!is_powerof2(bit))
2380 return 0;
2381
2382 #if defined COMPILE_PCRE8
2383
2384 #ifdef SUPPORT_UTF
2385 if (common->utf && c > 127)
2386 {
2387 n = GET_EXTRALEN(*cc);
2388 while ((bit & 0x3f) == 0)
2389 {
2390 n--;
2391 bit >>= 6;
2392 }
2393 return (n << 8) | bit;
2394 }
2395 #endif /* SUPPORT_UTF */
2396 return (0 << 8) | bit;
2397
2398 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2399
2400 #ifdef SUPPORT_UTF
2401 if (common->utf && c > 65535)
2402 {
2403 if (bit >= (1 << 10))
2404 bit >>= 10;
2405 else
2406 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2407 }
2408 #endif /* SUPPORT_UTF */
2409 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2410
2411 #endif /* COMPILE_PCRE[8|16|32] */
2412 }
2413
2414 static void check_partial(compiler_common *common, BOOL force)
2415 {
2416 /* Checks whether a partial matching is occured. Does not modify registers. */
2417 DEFINE_COMPILER;
2418 struct sljit_jump *jump = NULL;
2419
2420 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2421
2422 if (common->mode == JIT_COMPILE)
2423 return;
2424
2425 if (!force)
2426 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2427 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2428 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2429
2430 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2432 else
2433 {
2434 if (common->partialmatchlabel != NULL)
2435 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2436 else
2437 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2438 }
2439
2440 if (jump != NULL)
2441 JUMPHERE(jump);
2442 }
2443
2444 static void check_str_end(compiler_common *common, jump_list **end_reached)
2445 {
2446 /* Does not affect registers. Usually used in a tight spot. */
2447 DEFINE_COMPILER;
2448 struct sljit_jump *jump;
2449
2450 if (common->mode == JIT_COMPILE)
2451 {
2452 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2453 return;
2454 }
2455
2456 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2457 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2458 {
2459 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2460 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2461 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2462 }
2463 else
2464 {
2465 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2466 if (common->partialmatchlabel != NULL)
2467 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2468 else
2469 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2470 }
2471 JUMPHERE(jump);
2472 }
2473
2474 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2475 {
2476 DEFINE_COMPILER;
2477 struct sljit_jump *jump;
2478
2479 if (common->mode == JIT_COMPILE)
2480 {
2481 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2482 return;
2483 }
2484
2485 /* Partial matching mode. */
2486 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2487 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2488 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2489 {
2490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2491 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2492 }
2493 else
2494 {
2495 if (common->partialmatchlabel != NULL)
2496 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2497 else
2498 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2499 }
2500 JUMPHERE(jump);
2501 }
2502
2503 static void read_char(compiler_common *common)
2504 {
2505 /* Reads the character into TMP1, updates STR_PTR.
2506 Does not check STR_END. TMP2 Destroyed. */
2507 DEFINE_COMPILER;
2508 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2509 struct sljit_jump *jump;
2510 #endif
2511
2512 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2513 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2514 if (common->utf)
2515 {
2516 #if defined COMPILE_PCRE8
2517 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2518 #elif defined COMPILE_PCRE16
2519 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2520 #endif /* COMPILE_PCRE[8|16] */
2521 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2522 JUMPHERE(jump);
2523 }
2524 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2526 }
2527
2528 static void peek_char(compiler_common *common)
2529 {
2530 /* Reads the character into TMP1, keeps STR_PTR.
2531 Does not check STR_END. TMP2 Destroyed. */
2532 DEFINE_COMPILER;
2533 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2534 struct sljit_jump *jump;
2535 #endif
2536
2537 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2538 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2539 if (common->utf)
2540 {
2541 #if defined COMPILE_PCRE8
2542 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2543 #elif defined COMPILE_PCRE16
2544 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2545 #endif /* COMPILE_PCRE[8|16] */
2546 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2547 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2548 JUMPHERE(jump);
2549 }
2550 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2551 }
2552
2553 static void read_char8_type(compiler_common *common)
2554 {
2555 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2556 DEFINE_COMPILER;
2557 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2558 struct sljit_jump *jump;
2559 #endif
2560
2561 #ifdef SUPPORT_UTF
2562 if (common->utf)
2563 {
2564 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2565 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2566 #if defined COMPILE_PCRE8
2567 /* This can be an extra read in some situations, but hopefully
2568 it is needed in most cases. */
2569 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2570 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2571 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2572 JUMPHERE(jump);
2573 #elif defined COMPILE_PCRE16
2574 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2575 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2576 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2577 JUMPHERE(jump);
2578 /* Skip low surrogate if necessary. */
2579 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2580 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2581 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2582 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2583 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2584 #elif defined COMPILE_PCRE32
2585 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2586 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2587 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2588 JUMPHERE(jump);
2589 #endif /* COMPILE_PCRE[8|16|32] */
2590 return;
2591 }
2592 #endif /* SUPPORT_UTF */
2593 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2595 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2596 /* The ctypes array contains only 256 values. */
2597 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2598 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2599 #endif
2600 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2601 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2602 JUMPHERE(jump);
2603 #endif
2604 }
2605
2606 static void skip_char_back(compiler_common *common)
2607 {
2608 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2609 DEFINE_COMPILER;
2610 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2611 #if defined COMPILE_PCRE8
2612 struct sljit_label *label;
2613
2614 if (common->utf)
2615 {
2616 label = LABEL();
2617 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2618 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2619 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2620 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2621 return;
2622 }
2623 #elif defined COMPILE_PCRE16
2624 if (common->utf)
2625 {
2626 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2627 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2628 /* Skip low surrogate if necessary. */
2629 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2630 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2631 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2632 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2633 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2634 return;
2635 }
2636 #endif /* COMPILE_PCRE[8|16] */
2637 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2638 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2639 }
2640
2641 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2642 {
2643 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2644 DEFINE_COMPILER;
2645
2646 if (nltype == NLTYPE_ANY)
2647 {
2648 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2649 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2650 }
2651 else if (nltype == NLTYPE_ANYCRLF)
2652 {
2653 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2654 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2655 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2656 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2657 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2658 }
2659 else
2660 {
2661 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2662 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2663 }
2664 }
2665
2666 #ifdef SUPPORT_UTF
2667
2668 #if defined COMPILE_PCRE8
2669 static void do_utfreadchar(compiler_common *common)
2670 {
2671 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2672 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2673 DEFINE_COMPILER;
2674 struct sljit_jump *jump;
2675
2676 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2677 /* Searching for the first zero. */
2678 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2679 jump = JUMP(SLJIT_C_NOT_ZERO);
2680 /* Two byte sequence. */
2681 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2682 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2683 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2684 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2685 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2686 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2687 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2688 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2689 JUMPHERE(jump);
2690
2691 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2692 jump = JUMP(SLJIT_C_NOT_ZERO);
2693 /* Three byte sequence. */
2694 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2695 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2696 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2697 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2698 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2699 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2700 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2701 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2702 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2703 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2704 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2705 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2706 JUMPHERE(jump);
2707
2708 /* Four byte sequence. */
2709 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2710 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2711 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2712 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2713 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2714 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2715 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2716 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2717 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2718 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2719 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2720 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2721 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2722 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2723 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2724 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2725 }
2726
2727 static void do_utfreadtype8(compiler_common *common)
2728 {
2729 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2730 of the character (>= 0xc0). Return value in TMP1. */
2731 DEFINE_COMPILER;
2732 struct sljit_jump *jump;
2733 struct sljit_jump *compare;
2734
2735 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2736
2737 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2738 jump = JUMP(SLJIT_C_NOT_ZERO);
2739 /* Two byte sequence. */
2740 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2741 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2742 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2743 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2744 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2745 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2746 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2747 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2748 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2749
2750 JUMPHERE(compare);
2751 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2752 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2753 JUMPHERE(jump);
2754
2755 /* We only have types for characters less than 256. */
2756 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2757 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2758 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2759 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2760 }
2761
2762 #elif defined COMPILE_PCRE16
2763
2764 static void do_utfreadchar(compiler_common *common)
2765 {
2766 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2767 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2768 DEFINE_COMPILER;
2769 struct sljit_jump *jump;
2770
2771 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2772 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2773 /* Do nothing, only return. */
2774 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2775
2776 JUMPHERE(jump);
2777 /* Combine two 16 bit characters. */
2778 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2779 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2780 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2781 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2782 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2783 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2784 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2785 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2786 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2787 }
2788
2789 #endif /* COMPILE_PCRE[8|16] */
2790
2791 #endif /* SUPPORT_UTF */
2792
2793 #ifdef SUPPORT_UCP
2794
2795 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2796 #define UCD_BLOCK_MASK 127
2797 #define UCD_BLOCK_SHIFT 7
2798
2799 static void do_getucd(compiler_common *common)
2800 {
2801 /* Search the UCD record for the character comes in TMP1.
2802 Returns chartype in TMP1 and UCD offset in TMP2. */
2803 DEFINE_COMPILER;
2804
2805 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2806
2807 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2808 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2809 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2810 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2811 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2812 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2813 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2814 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2815 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2816 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2817 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2818 }
2819 #endif
2820
2821 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2822 {
2823 DEFINE_COMPILER;
2824 struct sljit_label *mainloop;
2825 struct sljit_label *newlinelabel = NULL;
2826 struct sljit_jump *start;
2827 struct sljit_jump *end = NULL;
2828 struct sljit_jump *nl = NULL;
2829 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2830 struct sljit_jump *singlechar;
2831 #endif
2832 jump_list *newline = NULL;
2833 BOOL newlinecheck = FALSE;
2834 BOOL readuchar = FALSE;
2835
2836 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2837 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2838 newlinecheck = TRUE;
2839
2840 if (firstline)
2841 {
2842 /* Search for the end of the first line. */
2843 SLJIT_ASSERT(common->first_line_end != 0);
2844 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2845
2846 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2847 {
2848 mainloop = LABEL();
2849 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2850 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2851 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2852 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2853 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2854 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2855 JUMPHERE(end);
2856 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2857 }
2858 else
2859 {
2860 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2861 mainloop = LABEL();
2862 /* Continual stores does not cause data dependency. */
2863 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2864 read_char(common);
2865 check_newlinechar(common, common->nltype, &newline, TRUE);
2866 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2867 JUMPHERE(end);
2868 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2869 set_jumps(newline, LABEL());
2870 }
2871
2872 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2873 }
2874
2875 start = JUMP(SLJIT_JUMP);
2876
2877 if (newlinecheck)
2878 {
2879 newlinelabel = LABEL();
2880 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2881 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2882 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2883 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2884 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2885 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2886 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2887 #endif
2888 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2889 nl = JUMP(SLJIT_JUMP);
2890 }
2891
2892 mainloop = LABEL();
2893
2894 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2895 #ifdef SUPPORT_UTF
2896 if (common->utf) readuchar = TRUE;
2897 #endif
2898 if (newlinecheck) readuchar = TRUE;
2899
2900 if (readuchar)
2901 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2902
2903 if (newlinecheck)
2904 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2905
2906 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2907 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2908 #if defined COMPILE_PCRE8
2909 if (common->utf)
2910 {
2911 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2912 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2913 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2914 JUMPHERE(singlechar);
2915 }
2916 #elif defined COMPILE_PCRE16
2917 if (common->utf)
2918 {
2919 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2920 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2921 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2922 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2923 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2924 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2925 JUMPHERE(singlechar);
2926 }
2927 #endif /* COMPILE_PCRE[8|16] */
2928 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2929 JUMPHERE(start);
2930
2931 if (newlinecheck)
2932 {
2933 JUMPHERE(end);
2934 JUMPHERE(nl);
2935 }
2936
2937 return mainloop;
2938 }
2939
2940 #define MAX_N_CHARS 3
2941
2942 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2943 {
2944 DEFINE_COMPILER;
2945 struct sljit_label *start;
2946 struct sljit_jump *quit;
2947 pcre_uint32 chars[MAX_N_CHARS * 2];
2948 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2949 int location = 0;
2950 pcre_int32 len, c, bit, caseless;
2951 int must_stop;
2952
2953 /* We do not support alternatives now. */
2954 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2955 return FALSE;
2956
2957 while (TRUE)
2958 {
2959 caseless = 0;
2960 must_stop = 1;
2961 switch(*cc)
2962 {
2963 case OP_CHAR:
2964 must_stop = 0;
2965 cc++;
2966 break;
2967
2968 case OP_CHARI:
2969 caseless = 1;
2970 must_stop = 0;
2971 cc++;
2972 break;
2973
2974 case OP_SOD:
2975 case OP_SOM:
2976 case OP_SET_SOM:
2977 case OP_NOT_WORD_BOUNDARY:
2978 case OP_WORD_BOUNDARY:
2979 case OP_EODN:
2980 case OP_EOD:
2981 case OP_CIRC:
2982 case OP_CIRCM:
2983 case OP_DOLL:
2984 case OP_DOLLM:
2985 /* Zero width assertions. */
2986 cc++;
2987 continue;
2988
2989 case OP_PLUS:
2990 case OP_MINPLUS:
2991 case OP_POSPLUS:
2992 cc++;
2993 break;
2994
2995 case OP_EXACT:
2996 cc += 1 + IMM2_SIZE;
2997 break;
2998
2999 case OP_PLUSI:
3000 case OP_MINPLUSI:
3001 case OP_POSPLUSI:
3002 caseless = 1;
3003 cc++;
3004 break;
3005
3006 case OP_EXACTI:
3007 caseless = 1;
3008 cc += 1 + IMM2_SIZE;
3009 break;
3010
3011 default:
3012 must_stop = 2;
3013 break;
3014 }
3015
3016 if (must_stop == 2)
3017 break;
3018
3019 len = 1;
3020 #ifdef SUPPORT_UTF
3021 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
3022 #endif
3023
3024 if (caseless && char_has_othercase(common, cc))
3025 {
3026 caseless = char_get_othercase_bit(common, cc);
3027 if (caseless == 0)
3028 return FALSE;
3029 #ifdef COMPILE_PCRE8
3030 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3031 #else
3032 if ((caseless & 0x100) != 0)
3033 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3034 else
3035 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3036 #endif
3037 }
3038 else
3039 caseless = 0;
3040
3041 while (len > 0 && location < MAX_N_CHARS * 2)
3042 {
3043 c = *cc;
3044 bit = 0;
3045 if (len == (caseless & 0xff))
3046 {
3047 bit = caseless >> 8;
3048 c |= bit;
3049 }
3050
3051 chars[location] = c;
3052 chars[location + 1] = bit;
3053
3054 len--;
3055 location += 2;
3056 cc++;
3057 }
3058
3059 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3060 break;
3061 }
3062
3063 /* At least two characters are required. */
3064 if (location < 2 * 2)
3065 return FALSE;
3066
3067 if (firstline)
3068 {
3069 SLJIT_ASSERT(common->first_line_end != 0);
3070 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3071 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3072 }
3073 else
3074 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3075
3076 start = LABEL();
3077 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3078
3079 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3080 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3081 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3082 if (chars[1] != 0)
3083 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3084 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3085 if (location > 2 * 2)
3086 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3087 if (chars[3] != 0)
3088 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3089 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3090 if (location > 2 * 2)
3091 {
3092 if (chars[5] != 0)
3093 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3094 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3095 }
3096 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3097
3098 JUMPHERE(quit);
3099
3100 if (firstline)
3101 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3102 else
3103 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3104 return TRUE;
3105 }
3106
3107 #undef MAX_N_CHARS
3108
3109 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3110 {
3111 DEFINE_COMPILER;
3112 struct sljit_label *start;
3113 struct sljit_jump *quit;
3114 struct sljit_jump *found;
3115 pcre_uchar oc, bit;
3116
3117 if (firstline)
3118 {
3119 SLJIT_ASSERT(common->first_line_end != 0);
3120 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3121 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3122 }
3123
3124 start = LABEL();
3125 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3126 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3127
3128 oc = first_char;
3129 if (caseless)
3130 {
3131 oc = TABLE_GET(first_char, common->fcc, first_char);
3132 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3133 if (first_char > 127 && common->utf)
3134 oc = UCD_OTHERCASE(first_char);
3135 #endif
3136 }
3137 if (first_char == oc)
3138 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3139 else
3140 {
3141 bit = first_char ^ oc;
3142 if (is_powerof2(bit))
3143 {
3144 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3145 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3146 }
3147 else
3148 {
3149 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3150 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3151 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3152 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3153 found = JUMP(SLJIT_C_NOT_ZERO);
3154 }
3155 }
3156
3157 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3158 JUMPTO(SLJIT_JUMP, start);
3159 JUMPHERE(found);
3160 JUMPHERE(quit);
3161
3162 if (firstline)
3163 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3164 }
3165
3166 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3167 {
3168 DEFINE_COMPILER;
3169 struct sljit_label *loop;
3170 struct sljit_jump *lastchar;
3171 struct sljit_jump *firstchar;
3172 struct sljit_jump *quit;
3173 struct sljit_jump *foundcr = NULL;
3174 struct sljit_jump *notfoundnl;
3175 jump_list *newline = NULL;
3176
3177 if (firstline)
3178 {
3179 SLJIT_ASSERT(common->first_line_end != 0);
3180 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3181 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3182 }
3183
3184 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3185 {
3186 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3187 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3189 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3190 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3191
3192 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3193 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3194 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3195 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3196 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3197 #endif
3198 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3199
3200 loop = LABEL();
3201 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3202 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3203 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3204 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3205 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3206 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3207
3208 JUMPHERE(quit);
3209 JUMPHERE(firstchar);
3210 JUMPHERE(lastchar);
3211
3212 if (firstline)
3213 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3214 return;
3215 }
3216
3217 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3219 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3220 skip_char_back(common);
3221
3222 loop = LABEL();
3223 read_char(common);
3224 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3225 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3226 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3227 check_newlinechar(common, common->nltype, &newline, FALSE);
3228 set_jumps(newline, loop);
3229
3230 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3231 {
3232 quit = JUMP(SLJIT_JUMP);
3233 JUMPHERE(foundcr);
3234 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3235 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3236 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3237 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3238 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3239 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3240 #endif
3241 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3242 JUMPHERE(notfoundnl);
3243 JUMPHERE(quit);
3244 }
3245 JUMPHERE(lastchar);
3246 JUMPHERE(firstchar);
3247
3248 if (firstline)
3249 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3250 }
3251
3252 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3253
3254 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3255 {
3256 DEFINE_COMPILER;
3257 struct sljit_label *start;
3258 struct sljit_jump *quit;
3259 struct sljit_jump *found = NULL;
3260 jump_list *matches = NULL;
3261 pcre_uint8 inverted_start_bits[32];
3262 int i;
3263 #ifndef COMPILE_PCRE8
3264 struct sljit_jump *jump;
3265 #endif
3266
3267 for (i = 0; i < 32; ++i)
3268 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3269
3270 if (firstline)
3271 {
3272 SLJIT_ASSERT(common->first_line_end != 0);
3273 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3274 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3275 }
3276
3277 start = LABEL();
3278 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3279 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3280 #ifdef SUPPORT_UTF
3281 if (common->utf)
3282 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3283 #endif
3284
3285 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3286 {
3287 #ifndef COMPILE_PCRE8
3288 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3289 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3290 JUMPHERE(jump);
3291 #endif
3292 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3293 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3294 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3295 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3296 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3297 found = JUMP(SLJIT_C_NOT_ZERO);
3298 }
3299
3300 #ifdef SUPPORT_UTF
3301 if (common->utf)
3302 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3303 #endif
3304 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3305 #ifdef SUPPORT_UTF
3306 #if defined COMPILE_PCRE8
3307 if (common->utf)
3308 {
3309 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3310 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3311 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3312 }
3313 #elif defined COMPILE_PCRE16
3314 if (common->utf)
3315 {
3316 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3317 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3318 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3319 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3320 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3321 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3322 }
3323 #endif /* COMPILE_PCRE[8|16] */
3324 #endif /* SUPPORT_UTF */
3325 JUMPTO(SLJIT_JUMP, start);
3326 if (found != NULL)
3327 JUMPHERE(found);
3328 if (matches != NULL)
3329 set_jumps(matches, LABEL());
3330 JUMPHERE(quit);
3331
3332 if (firstline)
3333 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3334 }
3335
3336 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3337 {
3338 DEFINE_COMPILER;
3339 struct sljit_label *loop;
3340 struct sljit_jump *toolong;
3341 struct sljit_jump *alreadyfound;
3342 struct sljit_jump *found;
3343 struct sljit_jump *foundoc = NULL;
3344 struct sljit_jump *notfound;
3345 pcre_uint32 oc, bit;
3346
3347 SLJIT_ASSERT(common->req_char_ptr != 0);
3348 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3349 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3350 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3351 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3352
3353 if (has_firstchar)
3354 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3355 else
3356 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3357
3358 loop = LABEL();
3359 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3360
3361 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3362 oc = req_char;
3363 if (caseless)
3364 {
3365 oc = TABLE_GET(req_char, common->fcc, req_char);
3366 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3367 if (req_char > 127 && common->utf)
3368 oc = UCD_OTHERCASE(req_char);
3369 #endif
3370 }
3371 if (req_char == oc)
3372 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3373 else
3374 {
3375 bit = req_char ^ oc;
3376 if (is_powerof2(bit))
3377 {
3378 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3379 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3380 }
3381 else
3382 {
3383 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3384 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3385 }
3386 }
3387 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3388 JUMPTO(SLJIT_JUMP, loop);
3389
3390 JUMPHERE(found);
3391 if (foundoc)
3392 JUMPHERE(foundoc);
3393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3394 JUMPHERE(alreadyfound);
3395 JUMPHERE(toolong);
3396 return notfound;
3397 }
3398
3399 static void do_revertframes(compiler_common *common)
3400 {
3401 DEFINE_COMPILER;
3402 struct sljit_jump *jump;
3403 struct sljit_label *mainloop;
3404
3405 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3406 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3407 GET_LOCAL_BASE(TMP3, 0, 0);
3408
3409 /* Drop frames until we reach STACK_TOP. */
3410 mainloop = LABEL();
3411 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3412 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3413 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3414
3415 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3416 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3417 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3418 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3419 JUMPTO(SLJIT_JUMP, mainloop);
3420
3421 JUMPHERE(jump);
3422 jump = JUMP(SLJIT_C_SIG_LESS);
3423 /* End of dropping frames. */
3424 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3425
3426 JUMPHERE(jump);
3427 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3428 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3429 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3430 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3431 JUMPTO(SLJIT_JUMP, mainloop);
3432 }
3433
3434 static void check_wordboundary(compiler_common *common)
3435 {
3436 DEFINE_COMPILER;
3437 struct sljit_jump *skipread;
3438 jump_list *skipread_list = NULL;
3439 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3440 struct sljit_jump *jump;
3441 #endif
3442
3443 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3444
3445 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3446 /* Get type of the previous char, and put it to LOCALS1. */
3447 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3448 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3450 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3451 skip_char_back(common);
3452 check_start_used_ptr(common);
3453 read_char(common);
3454
3455 /* Testing char type. */
3456 #ifdef SUPPORT_UCP
3457 if (common->use_ucp)
3458 {
3459 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3460 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3461 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3462 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3463 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3464 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3465 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3466 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3467 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3468 JUMPHERE(jump);
3469 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3470 }
3471 else
3472 #endif
3473 {
3474 #ifndef COMPILE_PCRE8
3475 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3476 #elif defined SUPPORT_UTF
3477 /* Here LOCALS1 has already been zeroed. */
3478 jump = NULL;
3479 if (common->utf)
3480 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3481 #endif /* COMPILE_PCRE8 */
3482 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3483 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3484 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3485 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3486 #ifndef COMPILE_PCRE8
3487 JUMPHERE(jump);
3488 #elif defined SUPPORT_UTF
3489 if (jump != NULL)
3490 JUMPHERE(jump);
3491 #endif /* COMPILE_PCRE8 */
3492 }
3493 JUMPHERE(skipread);
3494
3495 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3496 check_str_end(common, &skipread_list);
3497 peek_char(common);
3498
3499 /* Testing char type. This is a code duplication. */
3500 #ifdef SUPPORT_UCP
3501 if (common->use_ucp)
3502 {
3503 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3504 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3505 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3506 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3507 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3508 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3509 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3510 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3511 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3512 JUMPHERE(jump);
3513 }
3514 else
3515 #endif
3516 {
3517 #ifndef COMPILE_PCRE8
3518 /* TMP2 may be destroyed by peek_char. */
3519 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3520 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3521 #elif defined SUPPORT_UTF
3522 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3523 jump = NULL;
3524 if (common->utf)
3525 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3526 #endif
3527 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3528 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3529 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3530 #ifndef COMPILE_PCRE8
3531 JUMPHERE(jump);
3532 #elif defined SUPPORT_UTF
3533 if (jump != NULL)
3534 JUMPHERE(jump);
3535 #endif /* COMPILE_PCRE8 */
3536 }
3537 set_jumps(skipread_list, LABEL());
3538
3539 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3540 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3541 }
3542
3543 /*
3544 range format:
3545
3546 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3547 ranges[1] = first bit (0 or 1)
3548 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3549 */
3550
3551 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3552 {
3553 DEFINE_COMPILER;
3554 struct sljit_jump *jump;
3555
3556 if (ranges[0] < 0)
3557 return FALSE;
3558
3559 switch(ranges[0])
3560 {
3561 case 1:
3562 if (readch)
3563 read_char(common);
3564 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3565 return TRUE;
3566
3567 case 2:
3568 if (readch)
3569 read_char(common);
3570 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3571 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3572 return TRUE;
3573
3574 case 4:
3575 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3576 {
3577 if (readch)
3578 read_char(common);
3579 if (ranges[1] != 0)
3580 {
3581 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3582 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3583 }
3584 else
3585 {
3586 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3587 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3588 JUMPHERE(jump);
3589 }
3590 return TRUE;
3591 }
3592 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3593 {
3594 if (readch)
3595 read_char(common);
3596 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3597 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3598 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3599 return TRUE;
3600 }
3601 return FALSE;
3602
3603 default:
3604 return FALSE;
3605 }
3606 }
3607
3608 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3609 {
3610 int i, bit, length;
3611 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3612
3613 bit = ctypes[0] & flag;
3614 ranges[0] = -1;
3615 ranges[1] = bit != 0 ? 1 : 0;
3616 length = 0;
3617
3618 for (i = 1; i < 256; i++)
3619 if ((ctypes[i] & flag) != bit)
3620 {
3621 if (length >= MAX_RANGE_SIZE)
3622 return;
3623 ranges[2 + length] = i;
3624 length++;
3625 bit ^= flag;
3626 }
3627
3628 if (bit != 0)
3629 {
3630 if (length >= MAX_RANGE_SIZE)
3631 return;
3632 ranges[2 + length] = 256;
3633 length++;
3634 }
3635 ranges[0] = length;
3636 }
3637
3638 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3639 {
3640 int ranges[2 + MAX_RANGE_SIZE];
3641 pcre_uint8 bit, cbit, all;
3642 int i, byte, length = 0;
3643
3644 bit = bits[0] & 0x1;
3645 ranges[1] = bit;
3646 /* Can be 0 or 255. */
3647 all = -bit;
3648
3649 for (i = 0; i < 256; )
3650 {
3651 byte = i >> 3;
3652 if ((i & 0x7) == 0 && bits[byte] == all)
3653 i += 8;
3654 else
3655 {
3656 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3657 if (cbit != bit)
3658 {
3659 if (length >= MAX_RANGE_SIZE)
3660 return FALSE;
3661 ranges[2 + length] = i;
3662 length++;
3663 bit = cbit;
3664 all = -cbit;
3665 }
3666 i++;
3667 }
3668 }
3669
3670 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3671 {
3672 if (length >= MAX_RANGE_SIZE)
3673 return FALSE;
3674 ranges[2 + length] = 256;
3675 length++;
3676 }
3677 ranges[0] = length;
3678
3679 return check_ranges(common, ranges, backtracks, FALSE);
3680 }
3681
3682 static void check_anynewline(compiler_common *common)
3683 {
3684 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3685 DEFINE_COMPILER;
3686
3687 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3688
3689 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3690 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3691 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3692 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3693 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3694 #ifdef COMPILE_PCRE8
3695 if (common->utf)
3696 {
3697 #endif
3698 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3699 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3700 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3701 #ifdef COMPILE_PCRE8
3702 }
3703 #endif
3704 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3705 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3706 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3707 }
3708
3709 static void check_hspace(compiler_common *common)
3710 {
3711 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3712 DEFINE_COMPILER;
3713
3714 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3715
3716 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3717 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3718 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3719 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3720 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3721 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3722 #ifdef COMPILE_PCRE8
3723 if (common->utf)
3724 {
3725 #endif
3726 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3727 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3728 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3729 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3730 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3731 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3732 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3733 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3734 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3735 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3736 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3737 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3738 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3739 #ifdef COMPILE_PCRE8
3740 }
3741 #endif
3742 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3743 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3744
3745 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3746 }
3747
3748 static void check_vspace(compiler_common *common)
3749 {
3750 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3751 DEFINE_COMPILER;
3752
3753 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3754
3755 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3756 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3757 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3758 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3759 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3760 #ifdef COMPILE_PCRE8
3761 if (common->utf)
3762 {
3763 #endif
3764 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3765 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3766 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3767 #ifdef COMPILE_PCRE8
3768 }
3769 #endif
3770 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3771 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3772
3773 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3774 }
3775
3776 #define CHAR1 STR_END
3777 #define CHAR2 STACK_TOP
3778
3779 static void do_casefulcmp(compiler_common *common)
3780 {
3781 DEFINE_COMPILER;
3782 struct sljit_jump *jump;
3783 struct sljit_label *label;
3784
3785 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3786 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3787 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3788 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3789 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3790 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3791
3792 label = LABEL();
3793 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3794 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3795 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3796 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3797 JUMPTO(SLJIT_C_NOT_ZERO, label);
3798
3799 JUMPHERE(jump);
3800 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3801 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3802 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3803 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3804 }
3805
3806 #define LCC_TABLE STACK_LIMIT
3807
3808 static void do_caselesscmp(compiler_common *common)
3809 {
3810 DEFINE_COMPILER;
3811 struct sljit_jump *jump;
3812 struct sljit_label *label;
3813
3814 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3815 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3816
3817 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3818 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3819 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3820 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3821 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3822 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3823
3824 label = LABEL();
3825 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3826 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3827 #ifndef COMPILE_PCRE8
3828 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3829 #endif
3830 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3831 #ifndef COMPILE_PCRE8
3832 JUMPHERE(jump);
3833 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3834 #endif
3835 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3836 #ifndef COMPILE_PCRE8
3837 JUMPHERE(jump);
3838 #endif
3839 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3840 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3841 JUMPTO(SLJIT_C_NOT_ZERO, label);
3842
3843 JUMPHERE(jump);
3844 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3845 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3846 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3847 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3848 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3849 }
3850
3851 #undef LCC_TABLE
3852 #undef CHAR1
3853 #undef CHAR2
3854
3855 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3856
3857 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3858 {
3859 /* This function would be ineffective to do in JIT level. */
3860 pcre_uint32 c1, c2;
3861 const pcre_uchar *src2 = args->uchar_ptr;
3862 const pcre_uchar *end2 = args->end;
3863 const ucd_record *ur;
3864 const pcre_uint32 *pp;
3865
3866 while (src1 < end1)
3867 {
3868 if (src2 >= end2)
3869 return (pcre_uchar*)1;
3870 GETCHARINC(c1, src1);
3871 GETCHARINC(c2, src2);
3872 ur = GET_UCD(c2);
3873 if (c1 != c2 && c1 != c2 + ur->other_case)
3874 {
3875 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3876 for (;;)
3877 {
3878 if (c1 < *pp) return NULL;
3879 if (c1 == *pp++) break;
3880 }
3881 }
3882 }
3883 return src2;
3884 }
3885
3886 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3887
3888 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3889 compare_context* context, jump_list **backtracks)
3890 {
3891 DEFINE_COMPILER;
3892 unsigned int othercasebit = 0;
3893 pcre_uchar *othercasechar = NULL;
3894 #ifdef SUPPORT_UTF
3895 int utflength;
3896 #endif
3897
3898 if (caseless && char_has_othercase(common, cc))
3899 {
3900 othercasebit = char_get_othercase_bit(common, cc);
3901 SLJIT_ASSERT(othercasebit);
3902 /* Extracting bit difference info. */
3903 #if defined COMPILE_PCRE8
3904 othercasechar = cc + (othercasebit >> 8);
3905 othercasebit &= 0xff;
3906 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3907 /* Note that this code only handles characters in the BMP. If there
3908 ever are characters outside the BMP whose othercase differs in only one
3909 bit from itself (there currently are none), this code will need to be
3910 revised for COMPILE_PCRE32. */
3911 othercasechar = cc + (othercasebit >> 9);
3912 if ((othercasebit & 0x100) != 0)
3913 othercasebit = (othercasebit & 0xff) << 8;
3914 else
3915 othercasebit &= 0xff;
3916 #endif /* COMPILE_PCRE[8|16|32] */
3917 }
3918
3919 if (context->sourcereg == -1)
3920 {
3921 #if defined COMPILE_PCRE8
3922 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3923 if (context->length >= 4)
3924 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3925 else if (context->length >= 2)
3926 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3927 else
3928 #endif
3929 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3930 #elif defined COMPILE_PCRE16
3931 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3932 if (context->length >= 4)
3933 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3934 else
3935 #endif
3936 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3937 #elif defined COMPILE_PCRE32
3938 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3939 #endif /* COMPILE_PCRE[8|16|32] */
3940 context->sourcereg = TMP2;
3941 }
3942
3943 #ifdef SUPPORT_UTF
3944 utflength = 1;
3945 if (common->utf && HAS_EXTRALEN(*cc))
3946 utflength += GET_EXTRALEN(*cc);
3947
3948 do
3949 {
3950 #endif
3951
3952 context->length -= IN_UCHARS(1);
3953 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3954
3955 /* Unaligned read is supported. */
3956 if (othercasebit != 0 && othercasechar == cc)
3957 {
3958 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3959 context->oc.asuchars[context->ucharptr] = othercasebit;
3960 }
3961 else
3962 {
3963 context->c.asuchars[context->ucharptr] = *cc;
3964 context->oc.asuchars[context->ucharptr] = 0;
3965 }
3966 context->ucharptr++;
3967
3968 #if defined COMPILE_PCRE8
3969 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3970 #else
3971 if (context->ucharptr >= 2 || context->length == 0)
3972 #endif
3973 {
3974 if (context->length >= 4)
3975 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3976 else if (context->length >= 2)
3977 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3978 #if defined COMPILE_PCRE8
3979 else if (context->length >= 1)
3980 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3981 #endif /* COMPILE_PCRE8 */
3982 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3983
3984 switch(context->ucharptr)
3985 {
3986 case 4 / sizeof(pcre_uchar):
3987 if (context->oc.asint != 0)
3988 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3989 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3990 break;
3991
3992 case 2 / sizeof(pcre_uchar):
3993 if (context->oc.asushort != 0)
3994 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3995 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3996 break;
3997
3998 #ifdef COMPILE_PCRE8
3999 case 1:
4000 if (context->oc.asbyte != 0)
4001 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4002 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4003 break;
4004 #endif
4005
4006 default:
4007 SLJIT_ASSERT_STOP();
4008 break;
4009 }
4010 context->ucharptr = 0;
4011 }
4012
4013 #else
4014
4015 /* Unaligned read is unsupported or in 32 bit mode. */
4016 if (context->length >= 1)
4017 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4018
4019 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4020
4021 if (othercasebit != 0 && othercasechar == cc)
4022 {
4023 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4024 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4025 }
4026 else
4027 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4028
4029 #endif
4030
4031 cc++;
4032 #ifdef SUPPORT_UTF
4033 utflength--;
4034 }
4035 while (utflength > 0);
4036 #endif
4037
4038 return cc;
4039 }
4040
4041 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4042
4043 #define SET_TYPE_OFFSET(value) \
4044 if ((value) != typeoffset) \
4045 { \
4046 if ((value) > typeoffset) \
4047 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4048 else \
4049 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4050 } \
4051 typeoffset = (value);
4052
4053 #define SET_CHAR_OFFSET(value) \
4054 if ((value) != charoffset) \
4055 { \
4056 if ((value) > charoffset) \
4057 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4058 else \
4059 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4060 } \
4061 charoffset = (value);
4062
4063 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4064 {
4065 DEFINE_COMPILER;
4066 jump_list *found = NULL;
4067 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4068 pcre_int32 c, charoffset;
4069 const pcre_uint32 *other_cases;
4070 struct sljit_jump *jump = NULL;
4071 pcre_uchar *ccbegin;
4072 int compares, invertcmp, numberofcmps;
4073 #ifdef SUPPORT_UCP
4074 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4075 BOOL charsaved = FALSE;
4076 int typereg = TMP1, scriptreg = TMP1;
4077 pcre_int32 typeoffset;
4078 #endif
4079
4080 /* Although SUPPORT_UTF must be defined, we are
4081 not necessary in utf mode even in 8 bit mode. */
4082 detect_partial_match(common, backtracks);
4083 read_char(common);
4084
4085 if ((*cc++ & XCL_MAP) != 0)
4086 {
4087 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4088 #ifndef COMPILE_PCRE8
4089 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4090 #elif defined SUPPORT_UTF
4091 if (common->utf)
4092 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4093 #endif
4094
4095 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4096 {
4097 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4098 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4099 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4100 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4101 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4102 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4103 }
4104
4105 #ifndef COMPILE_PCRE8
4106 JUMPHERE(jump);
4107 #elif defined SUPPORT_UTF
4108 if (common->utf)
4109 JUMPHERE(jump);
4110 #endif
4111 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4112 #ifdef SUPPORT_UCP
4113 charsaved = TRUE;
4114 #endif
4115 cc += 32 / sizeof(pcre_uchar);
4116 }
4117
4118 /* Scanning the necessary info. */
4119 ccbegin = cc;
4120 compares = 0;
4121 while (*cc != XCL_END)
4122 {
4123 compares++;
4124 if (*cc == XCL_SINGLE)
4125 {
4126 cc += 2;
4127 #ifdef SUPPORT_UTF
4128 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4129 #endif
4130 #ifdef SUPPORT_UCP
4131 needschar = TRUE;
4132 #endif
4133 }
4134 else if (*cc == XCL_RANGE)
4135 {
4136 cc += 2;
4137 #ifdef SUPPORT_UTF
4138 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4139 #endif
4140 cc++;
4141 #ifdef SUPPORT_UTF
4142 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4143 #endif
4144 #ifdef SUPPORT_UCP
4145 needschar = TRUE;
4146 #endif
4147 }
4148 #ifdef SUPPORT_UCP
4149 else
4150 {
4151 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4152 cc++;
4153 switch(*cc)
4154 {
4155 case PT_ANY:
4156 break;
4157
4158 case PT_LAMP:
4159 case PT_GC:
4160 case PT_PC:
4161 case PT_ALNUM:
4162 needstype = TRUE;
4163 break;
4164
4165 case PT_SC:
4166 needsscript = TRUE;
4167 break;
4168
4169 case PT_SPACE:
4170 case PT_PXSPACE:
4171 case PT_WORD:
4172 needstype = TRUE;
4173 needschar = TRUE;
4174 break;
4175
4176 case PT_CLIST:
4177 case PT_UCNC:
4178 needschar = TRUE;
4179 break;
4180
4181 default:
4182 SLJIT_ASSERT_STOP();
4183 break;
4184 }
4185 cc += 2;
4186 }
4187 #endif
4188 }
4189
4190 #ifdef SUPPORT_UCP
4191 /* Simple register allocation. TMP1 is preferred if possible. */
4192 if (needstype || needsscript)
4193 {
4194 if (needschar && !charsaved)
4195 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4196 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4197 if (needschar)
4198 {
4199 if (needstype)
4200 {
4201 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4202 typereg = RETURN_ADDR;
4203 }
4204
4205 if (needsscript)
4206 scriptreg = TMP3;
4207 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4208 }
4209 else if (needstype && needsscript)
4210 scriptreg = TMP3;
4211 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4212
4213 if (needsscript)
4214 {
4215 if (scriptreg == TMP1)
4216 {
4217 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4218 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4219 }
4220 else
4221 {
4222 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4223 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4224 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4225 }
4226 }
4227 }
4228 #endif
4229
4230 /* Generating code. */
4231 cc = ccbegin;
4232 charoffset = 0;
4233 numberofcmps = 0;
4234 #ifdef SUPPORT_UCP
4235 typeoffset = 0;
4236 #endif
4237
4238 while (*cc != XCL_END)
4239 {
4240 compares--;
4241 invertcmp = (compares == 0 && list != backtracks);
4242 jump = NULL;
4243
4244 if (*cc == XCL_SINGLE)
4245 {
4246 cc ++;
4247 #ifdef SUPPORT_UTF
4248 if (common->utf)
4249 {
4250 GETCHARINC(c, cc);
4251 }
4252 else
4253 #endif
4254 c = *cc++;
4255
4256 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4257 {
4258 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4259 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4260 numberofcmps++;
4261 }
4262 else if (numberofcmps > 0)
4263 {
4264 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4265 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4266 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4267 numberofcmps = 0;
4268 }
4269 else
4270 {
4271 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4272 numberofcmps = 0;
4273 }
4274 }
4275 else if (*cc == XCL_RANGE)
4276 {
4277 cc ++;
4278 #ifdef SUPPORT_UTF
4279 if (common->utf)
4280 {
4281 GETCHARINC(c, cc);
4282 }
4283 else
4284 #endif
4285 c = *cc++;
4286 SET_CHAR_OFFSET(c);
4287 #ifdef SUPPORT_UTF
4288 if (common->utf)
4289 {
4290 GETCHARINC(c, cc);
4291 }
4292 else
4293 #endif
4294 c = *cc++;
4295 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4296 {
4297 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4298 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4299 numberofcmps++;
4300 }
4301 else if (numberofcmps > 0)
4302 {
4303 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4304 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4305 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4306 numberofcmps = 0;
4307 }
4308 else
4309 {
4310 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4311 numberofcmps = 0;
4312 }
4313 }
4314 #ifdef SUPPORT_UCP
4315 else
4316 {
4317 if (*cc == XCL_NOTPROP)
4318 invertcmp ^= 0x1;
4319 cc++;
4320 switch(*cc)
4321 {
4322 case PT_ANY:
4323 if (list != backtracks)
4324 {
4325 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4326 continue;
4327 }
4328 else if (cc[-1] == XCL_NOTPROP)
4329 continue;
4330 jump = JUMP(SLJIT_JUMP);
4331 break;
4332
4333 case PT_LAMP:
4334 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4335 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4336 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4337 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4338 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4339 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4340 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4341 break;
4342
4343 case PT_GC:
4344 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4345 SET_TYPE_OFFSET(c);
4346 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4347 break;
4348
4349 case PT_PC:
4350 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4351 break;
4352
4353 case PT_SC:
4354 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4355 break;
4356
4357 case PT_SPACE:
4358 case PT_PXSPACE:
4359 if (*cc == PT_SPACE)
4360 {
4361 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4362 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4363 }
4364 SET_CHAR_OFFSET(9);
4365 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4366 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4367 if (*cc == PT_SPACE)
4368 JUMPHERE(jump);
4369
4370 SET_TYPE_OFFSET(ucp_Zl);
4371 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4372 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4373 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4374 break;
4375
4376 case PT_WORD:
4377 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4378 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4379 /* Fall through. */
4380
4381 case PT_ALNUM:
4382 SET_TYPE_OFFSET(ucp_Ll);
4383 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4384 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4385 SET_TYPE_OFFSET(ucp_Nd);
4386 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4387 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4388 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4389 break;
4390
4391 case PT_CLIST:
4392 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4393
4394 /* At least three characters are required.
4395 Otherwise this case would be handled by the normal code path. */
4396 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4397 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4398
4399 /* Optimizing character pairs, if their difference is power of 2. */
4400 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4401 {
4402 if (charoffset == 0)
4403 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4404 else
4405 {
4406 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4407 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4408 }
4409 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4410 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4411 other_cases += 2;
4412 }
4413 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4414 {
4415 if (charoffset == 0)
4416 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4417 else
4418 {
4419 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4420 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4421 }
4422 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4423 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4424
4425 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4426 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4427
4428 other_cases += 3;
4429 }
4430 else
4431 {
4432 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4433 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4434 }
4435
4436 while (*other_cases != NOTACHAR)
4437 {
4438 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4439 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4440 }
4441 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4442 break;
4443
4444 case PT_UCNC:
4445 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4446 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4447 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4448 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4449 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4450 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4451
4452 SET_CHAR_OFFSET(0xa0);
4453 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4454 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4455 SET_CHAR_OFFSET(0);
4456 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4457 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4458 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4459 break;
4460 }
4461 cc += 2;
4462 }
4463 #endif
4464
4465 if (jump != NULL)
4466 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4467 }
4468
4469 if (found != NULL)
4470 set_jumps(found, LABEL());
4471 }
4472
4473 #undef SET_TYPE_OFFSET
4474 #undef SET_CHAR_OFFSET
4475
4476 #endif
4477
4478 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4479 {
4480 DEFINE_COMPILER;
4481 int length;
4482 unsigned int c, oc, bit;
4483 compare_context context;
4484 struct sljit_jump *jump[4];
4485 jump_list *end_list;
4486 #ifdef SUPPORT_UTF
4487 struct sljit_label *label;
4488 #ifdef SUPPORT_UCP
4489 pcre_uchar propdata[5];
4490 #endif
4491 #endif
4492
4493 switch(type)
4494 {
4495 case OP_SOD:
4496 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4497 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4498 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4499 return cc;
4500
4501 case OP_SOM:
4502 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4503 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4504 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4505 return cc;
4506
4507 case OP_NOT_WORD_BOUNDARY:
4508 case OP_WORD_BOUNDARY:
4509 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4510 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4511 return cc;
4512
4513 case OP_NOT_DIGIT:
4514 case OP_DIGIT:
4515 /* Digits are usually 0-9, so it is worth to optimize them. */
4516 if (common->digits[0] == -2)
4517 get_ctype_ranges(common, ctype_digit, common->digits);
4518 detect_partial_match(common, backtracks);
4519 /* Flip the starting bit in the negative case. */
4520 if (type == OP_NOT_DIGIT)
4521 common->digits[1] ^= 1;
4522 if (!check_ranges(common, common->digits, backtracks, TRUE))
4523 {
4524 read_char8_type(common);
4525 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4526 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4527 }
4528 if (type == OP_NOT_DIGIT)
4529 common->digits[1] ^= 1;
4530 return cc;
4531
4532 case OP_NOT_WHITESPACE:
4533 case OP_WHITESPACE:
4534 detect_partial_match(common, backtracks);
4535 read_char8_type(common);
4536 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4537 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4538 return cc;
4539
4540 case OP_NOT_WORDCHAR:
4541 case OP_WORDCHAR:
4542 detect_partial_match(common, backtracks);
4543 read_char8_type(common);
4544 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4545 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4546 return cc;
4547
4548 case OP_ANY:
4549 detect_partial_match(common, backtracks);
4550 read_char(common);
4551 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4552 {
4553 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4554 end_list = NULL;
4555 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4556 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4557 else
4558 check_str_end(common, &end_list);
4559
4560 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4561 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4562 set_jumps(end_list, LABEL());
4563 JUMPHERE(jump[0]);
4564 }
4565 else
4566 check_newlinechar(common, common->nltype, backtracks, TRUE);
4567 return cc;
4568
4569 case OP_ALLANY:
4570 detect_partial_match(common, backtracks);
4571 #ifdef SUPPORT_UTF
4572 if (common->utf)
4573 {
4574 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4575 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4576 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4577 #if defined COMPILE_PCRE8
4578 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4579 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4580 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4581 #elif defined COMPILE_PCRE16
4582 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4583 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4584 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4585 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4586 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4587 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4588 #endif
4589 JUMPHERE(jump[0]);
4590 #endif /* COMPILE_PCRE[8|16] */
4591 return cc;
4592 }
4593 #endif
4594 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4595 return cc;
4596
4597 case OP_ANYBYTE:
4598 detect_partial_match(common, backtracks);
4599 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4600 return cc;
4601
4602 #ifdef SUPPORT_UTF
4603 #ifdef SUPPORT_UCP
4604 case OP_NOTPROP:
4605 case OP_PROP:
4606 propdata[0] = 0;
4607 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4608 propdata[2] = cc[0];
4609 propdata[3] = cc[1];
4610 propdata[4] = XCL_END;
4611 compile_xclass_matchingpath(common, propdata, backtracks);
4612 return cc + 2;
4613 #endif
4614 #endif
4615
4616 case OP_ANYNL:
4617 detect_partial_match(common, backtracks);
4618 read_char(common);
4619 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4620 /* We don't need to handle soft partial matching case. */
4621 end_list = NULL;
4622 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4623 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4624 else
4625 check_str_end(common, &end_list);
4626 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4627 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4628 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4629 jump[2] = JUMP(SLJIT_JUMP);
4630 JUMPHERE(jump[0]);
4631 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4632 set_jumps(end_list, LABEL());
4633 JUMPHERE(jump[1]);
4634 JUMPHERE(jump[2]);
4635 return cc;
4636
4637 case OP_NOT_HSPACE:
4638 case OP_HSPACE:
4639 detect_partial_match(common, backtracks);
4640 read_char(common);
4641 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4642 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4643 return cc;
4644
4645 case OP_NOT_VSPACE:
4646 case OP_VSPACE:
4647 detect_partial_match(common, backtracks);
4648 read_char(common);
4649 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4650 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4651 return cc;
4652
4653 #ifdef SUPPORT_UCP
4654 case OP_EXTUNI:
4655 detect_partial_match(common, backtracks);
4656 read_char(common);
4657 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4658 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4659 /* Optimize register allocation: use a real register. */
4660 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4661 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4662
4663 label = LABEL();
4664 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4665 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4666 read_char(common);
4667 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4668 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4669 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4670
4671 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4672 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4673 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4674 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4675 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4676 JUMPTO(SLJIT_C_NOT_ZERO, label);
4677
4678 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4679 JUMPHERE(jump[0]);
4680 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4681
4682 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4683 {
4684 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4685 /* Since we successfully read a char above, partial matching must occure. */
4686 check_partial(common, TRUE);
4687 JUMPHERE(jump[0]);
4688 }
4689 return cc;
4690 #endif
4691
4692 case OP_EODN:
4693 /* Requires rather complex checks. */
4694 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4695 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4696 {
4697 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4698 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4699 if (common->mode == JIT_COMPILE)
4700 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4701 else
4702 {
4703 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4704 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4705 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4706 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4707 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4708 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4709 check_partial(common, TRUE);
4710 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4711 JUMPHERE(jump[1]);
4712 }
4713 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4714 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4715 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4716 }
4717 else if (common->nltype == NLTYPE_FIXED)
4718 {
4719 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4720 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4721 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4722 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4723 }
4724 else
4725 {
4726 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4727 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4728 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4729 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4730 jump[2] = JUMP(SLJIT_C_GREATER);
4731 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4732 /* Equal. */
4733 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4734 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4735 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4736
4737 JUMPHERE(jump[1]);
4738 if (common->nltype == NLTYPE_ANYCRLF)
4739 {
4740 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4741 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4742 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4743 }
4744 else
4745 {
4746 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4747 read_char(common);
4748 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4749 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4750 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4751 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4752 }
4753 JUMPHERE(jump[2]);
4754 JUMPHERE(jump[3]);
4755 }
4756 JUMPHERE(jump[0]);
4757 check_partial(common, FALSE);
4758 return cc;
4759
4760 case OP_EOD:
4761 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4762 check_partial(common, FALSE);
4763 return cc;
4764
4765 case OP_CIRC:
4766 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4768 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4769 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4770 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4771 return cc;
4772
4773 case OP_CIRCM:
4774 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4775 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4776 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4777 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4778 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4779 jump[0] = JUMP(SLJIT_JUMP);
4780 JUMPHERE(jump[1]);
4781
4782 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4783 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4784 {
4785 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4786 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4787 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4788 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4789 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4790 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4791 }
4792 else
4793 {
4794 skip_char_back(common);
4795 read_char(common);
4796 check_newlinechar(common, common->nltype, backtracks, FALSE);
4797 }
4798 JUMPHERE(jump[0]);
4799 return cc;
4800
4801 case OP_DOLL:
4802 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4803 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4804 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4805
4806 if (!common->endonly)
4807 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4808 else
4809 {
4810 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4811 check_partial(common, FALSE);
4812 }
4813 return cc;
4814
4815 case OP_DOLLM:
4816 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4817 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4818 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4819 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4820 check_partial(common, FALSE);
4821 jump[0] = JUMP(SLJIT_JUMP);
4822 JUMPHERE(jump[1]);
4823
4824 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4825 {
4826 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4827 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4828 if (common->mode == JIT_COMPILE)
4829 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4830 else
4831 {
4832 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4833 /* STR_PTR = STR_END - IN_UCHARS(1) */
4834 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4835 check_partial(common, TRUE);
4836 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4837 JUMPHERE(jump[1]);
4838 }
4839
4840 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4841 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4842 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4843 }
4844 else
4845 {
4846 peek_char(common);
4847 check_newlinechar(common, common->nltype, backtracks, FALSE);
4848 }
4849 JUMPHERE(jump[0]);
4850 return cc;
4851
4852 case OP_CHAR:
4853 case OP_CHARI:
4854 length = 1;
4855 #ifdef SUPPORT_UTF
4856 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4857 #endif
4858 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4859 {
4860 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4861 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4862
4863 context.length = IN_UCHARS(length);
4864 context.sourcereg = -1;
4865 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4866 context.ucharptr = 0;
4867 #endif
4868 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4869 }
4870 detect_partial_match(common, backtracks);
4871 read_char(common);
4872 #ifdef SUPPORT_UTF
4873 if (common->utf)
4874 {
4875 GETCHAR(c, cc);
4876 }
4877 else
4878 #endif
4879 c = *cc;
4880 if (type == OP_CHAR || !char_has_othercase(common, cc))
4881 {
4882 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4883 return cc + length;
4884 }
4885 oc = char_othercase(common, c);
4886 bit = c ^ oc;
4887 if (is_powerof2(bit))
4888 {
4889 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4890 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4891 return cc + length;
4892 }
4893 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4894 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4895 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4896 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4897 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4898 return cc + length;
4899
4900 case OP_NOT:
4901 case OP_NOTI:
4902 detect_partial_match(common, backtracks);
4903 length = 1;
4904 #ifdef SUPPORT_UTF
4905 if (common->utf)
4906 {
4907 #ifdef COMPILE_PCRE8
4908 c = *cc;
4909 if (c < 128)
4910 {
4911 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4912 if (type == OP_NOT || !char_has_othercase(common, cc))
4913 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4914 else
4915 {
4916 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4917 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4918 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4919 }
4920 /* Skip the variable-length character. */
4921 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4922 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4923 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4924 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4925 JUMPHERE(jump[0]);
4926 return cc + 1;
4927 }
4928 else
4929 #endif /* COMPILE_PCRE8 */
4930 {
4931 GETCHARLEN(c, cc, length);
4932 read_char(common);
4933 }
4934 }
4935 else
4936 #endif /* SUPPORT_UTF */
4937 {
4938 read_char(common);
4939 c = *cc;
4940 }
4941
4942 if (type == OP_NOT || !char_has_othercase(common, cc))
4943 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4944 else
4945 {
4946 oc = char_othercase(common, c);
4947 bit = c ^ oc;
4948 if (is_powerof2(bit))
4949 {
4950 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4951 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4952 }
4953 else
4954 {
4955 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4956 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4957 }
4958 }
4959 return cc + length;
4960
4961 case OP_CLASS:
4962 case OP_NCLASS:
4963 detect_partial_match(common, backtracks);
4964 read_char(common);
4965 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4966 return cc + 32 / sizeof(pcre_uchar);
4967
4968 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4969 jump[0] = NULL;
4970 #ifdef COMPILE_PCRE8
4971 /* This check only affects 8 bit mode. In other modes, we
4972 always need to compare the value with 255. */
4973 if (common->utf)
4974 #endif /* COMPILE_PCRE8 */
4975 {
4976 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4977 if (type == OP_CLASS)
4978 {
4979 add_jump(compiler, backtracks, jump[0]);
4980 jump[0] = NULL;
4981 }
4982 }
4983 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4984 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4985 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4986 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4987 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4988 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4989 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4990 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4991 if (jump[0] != NULL)
4992 JUMPHERE(jump[0]);
4993 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4994 return cc + 32 / sizeof(pcre_uchar);
4995
4996 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4997 case OP_XCLASS:
4998 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4999 return cc + GET(cc, 0) - 1;
5000 #endif
5001
5002 case OP_REVERSE:
5003 length = GET(cc, 0);
5004 if (length == 0)
5005 return cc + LINK_SIZE;
5006 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5007 #ifdef SUPPORT_UTF
5008 if (common->utf)
5009 {
5010 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5011 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5012 label = LABEL();
5013 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5014 skip_char_back(common);
5015 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5016 JUMPTO(SLJIT_C_NOT_ZERO, label);
5017 }
5018 else
5019 #endif
5020 {
5021 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5022 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5023 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5024 }
5025 check_start_used_ptr(common);
5026 return cc + LINK_SIZE;
5027 }
5028 SLJIT_ASSERT_STOP();
5029 return cc;
5030 }
5031
5032 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5033 {
5034 /* This function consumes at least one input character. */
5035 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5036 DEFINE_COMPILER;
5037 pcre_uchar *ccbegin = cc;
5038 compare_context context;
5039 int size;
5040
5041 context.length = 0;
5042 do
5043 {
5044 if (cc >= ccend)
5045 break;
5046
5047 if (*cc == OP_CHAR)
5048 {
5049 size = 1;
5050 #ifdef SUPPORT_UTF
5051 if (common->utf && HAS_EXTRALEN(cc[1]))
5052 size += GET_EXTRALEN(cc[1]);
5053 #endif
5054 }
5055 else if (*cc == OP_CHARI)
5056 {
5057 size = 1;
5058 #ifdef SUPPORT_UTF
5059 if (common->utf)
5060 {
5061 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5062 size = 0;
5063 else if (HAS_EXTRALEN(cc[1]))
5064 size += GET_EXTRALEN(cc[1]);
5065 }
5066 else
5067 #endif
5068 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5069 size = 0;
5070 }
5071 else
5072 size = 0;
5073
5074 cc += 1 + size;
5075 context.length += IN_UCHARS(size);
5076 }
5077 while (size > 0 && context.length <= 128);
5078
5079 cc = ccbegin;
5080 if (context.length > 0)
5081 {
5082 /* We have a fixed-length byte sequence. */
5083 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5084 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5085
5086 context.sourcereg = -1;
5087 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5088 context.ucharptr = 0;
5089 #endif
5090 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5091 return cc;
5092 }
5093
5094 /* A non-fixed length character will be checked if length == 0. */
5095 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5096 }
5097
5098 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5099 {
5100 DEFINE_COMPILER;
5101 int offset = GET2(cc, 1) << 1;
5102
5103 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5104 if (!common->jscript_compat)
5105 {
5106 if (backtracks == NULL)
5107 {
5108 /* OVECTOR(1) contains the "string begin - 1" constant. */
5109 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5110 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5111 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5112 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5113 return JUMP(SLJIT_C_NOT_ZERO);
5114 }
5115 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5116 }
5117 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5118 }
5119
5120 /* Forward definitions. */
5121 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5122 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5123
5124 #define PUSH_BACKTRACK(size, ccstart, error) \
5125 do \
5126 { \
5127 backtrack = sljit_alloc_memory(compiler, (size)); \
5128 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5129 return error; \
5130 memset(backtrack, 0, size); \
5131 backtrack->prev = parent->top; \
5132 backtrack->cc = (ccstart); \
5133 parent->top = backtrack; \
5134 } \
5135 while (0)
5136
5137 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5138 do \
5139 { \
5140 backtrack = sljit_alloc_memory(compiler, (size)); \
5141 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5142 return; \
5143 memset(backtrack, 0, size); \
5144 backtrack->prev = parent->top; \
5145 backtrack->cc = (ccstart); \
5146 parent->top = backtrack; \
5147 } \
5148 while (0)
5149
5150 #define BACKTRACK_AS(type) ((type *)backtrack)
5151
5152 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5153 {
5154 DEFINE_COMPILER;
5155 int offset = GET2(cc, 1) << 1;
5156 struct sljit_jump *jump = NULL;
5157 struct sljit_jump *partial;
5158 struct sljit_jump *nopartial;
5159
5160 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5161 /* OVECTOR(1) contains the "string begin - 1" constant. */
5162 if (withchecks && !common->jscript_compat)
5163 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5164
5165 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5166 if (common->utf && *cc == OP_REFI)
5167 {
5168 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5169 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5170 if (withchecks)
5171 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5172
5173 /* Needed to save important temporary registers. */
5174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5175 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5177 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5178 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5179 if (common->mode == JIT_COMPILE)
5180 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5181 else
5182 {
5183 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5184 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5185 check_partial(common, FALSE);
5186 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5187 JUMPHERE(nopartial);
5188 }
5189 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5190 }
5191 else
5192 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5193 {
5194 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5195 if (withchecks)
5196 jump = JUMP(SLJIT_C_ZERO);
5197
5198 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5199 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5200 if (common->mode == JIT_COMPILE)
5201 add_jump(compiler, backtracks, partial);
5202
5203 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5204 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5205
5206 if (common->mode != JIT_COMPILE)
5207 {
5208 nopartial = JUMP(SLJIT_JUMP);
5209 JUMPHERE(partial);
5210 /* TMP2 -= STR_END - STR_PTR */
5211 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5212 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5213 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5214 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5215 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5216 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5217 JUMPHERE(partial);
5218 check_partial(common, FALSE);
5219 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5220 JUMPHERE(nopartial);
5221 }
5222 }
5223
5224 if (jump != NULL)
5225 {
5226 if (emptyfail)
5227 add_jump(compiler, backtracks, jump);
5228 else
5229 JUMPHERE(jump);
5230 }
5231 return cc + 1 + IMM2_SIZE;
5232 }
5233
5234 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5235 {
5236 DEFINE_COMPILER;
5237 backtrack_common *backtrack;
5238 pcre_uchar type;
5239 struct sljit_label *label;
5240 struct sljit_jump *zerolength;
5241 struct sljit_jump *jump = NULL;
5242 pcre_uchar *ccbegin = cc;
5243 int min = 0, max = 0;
5244 BOOL minimize;
5245
5246 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5247
5248 type = cc[1 + IMM2_SIZE];
5249 minimize = (type & 0x1) != 0;
5250 switch(type)
5251 {
5252 case OP_CRSTAR:
5253 case OP_CRMINSTAR:
5254 min = 0;
5255 max = 0;
5256 cc += 1 + IMM2_SIZE + 1;
5257 break;
5258 case OP_CRPLUS:
5259 case OP_CRMINPLUS:
5260 min = 1;
5261 max = 0;
5262 cc += 1 + IMM2_SIZE + 1;
5263 break;
5264 case OP_CRQUERY:
5265 case OP_CRMINQUERY:
5266 min = 0;
5267 max = 1;
5268 cc += 1 + IMM2_SIZE + 1;
5269 break;
5270 case OP_CRRANGE:
5271 case OP_CRMINRANGE:
5272 min = GET2(cc, 1 + IMM2_SIZE + 1);
5273 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5274 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5275 break;
5276 default:
5277 SLJIT_ASSERT_STOP();
5278 break;
5279 }
5280
5281 if (!minimize)
5282 {
5283 if (min == 0)
5284 {
5285 allocate_stack(common, 2);
5286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5287 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5288 /* Temporary release of STR_PTR. */
5289 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5290 zerolength = compile_ref_checks(common, ccbegin, NULL);
5291 /* Restore if not zero length. */
5292 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5293 }
5294 else
5295 {
5296 allocate_stack(common, 1);
5297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5298 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5299 }
5300
5301 if (min > 1 || max > 1)
5302 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5303
5304 label = LABEL();
5305 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5306
5307 if (min > 1 || max > 1)
5308 {
5309 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5310 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5311 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5312 if (min > 1)
5313 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5314 if (max > 1)
5315 {
5316 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5317 allocate_stack(common, 1);
5318 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5319 JUMPTO(SLJIT_JUMP, label);
5320 JUMPHERE(jump);
5321 }
5322 }
5323
5324 if (max == 0)
5325 {
5326 /* Includes min > 1 case as well. */
5327 allocate_stack(common, 1);
5328 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5329 JUMPTO(SLJIT_JUMP, label);
5330 }
5331
5332 JUMPHERE(zerolength);
5333 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5334
5335 decrease_call_count(common);
5336 return cc;
5337 }
5338
5339 allocate_stack(common, 2);
5340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5341 if (type != OP_CRMINSTAR)
5342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5343
5344 if (min == 0)
5345 {
5346 zerolength = compile_ref_checks(common, ccbegin, NULL);
5347 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5348 jump = JUMP(SLJIT_JUMP);
5349 }
5350 else
5351 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5352
5353 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5354 if (max > 0)
5355 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5356
5357 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5359
5360 if (min > 1)
5361 {
5362 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5363 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5364 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5365 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5366 }
5367 else if (max > 0)
5368 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5369
5370 if (jump != NULL)
5371 JUMPHERE(jump);
5372 JUMPHERE(zerolength);
5373
5374 decrease_call_count(common);
5375 return cc;
5376 }
5377
5378 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5379 {
5380 DEFINE_COMPILER;
5381 backtrack_common *backtrack;
5382 recurse_entry *entry = common->entries;
5383 recurse_entry *prev = NULL;
5384 int start = GET(cc, 1);
5385 pcre_uchar *start_cc;
5386 BOOL needs_control_head;
5387
5388 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5389
5390 /* Inlining simple patterns. */
5391 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5392 {
5393 start_cc = common->start + start;
5394 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5395 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5396 return cc + 1 + LINK_SIZE;
5397 }
5398
5399 while (entry != NULL)
5400 {
5401 if (entry->start == start)
5402 break;
5403 prev = entry;
5404 entry = entry->next;
5405 }
5406
5407 if (entry == NULL)
5408 {
5409 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5410 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5411 return NULL;
5412 entry->next = NULL;
5413 entry->entry = NULL;
5414 entry->calls = NULL;
5415 entry->start = start;
5416
5417 if (prev != NULL)
5418 prev->next = entry;
5419 else
5420 common->entries = entry;
5421 }
5422
5423 if (common->has_set_som && common->mark_ptr != 0)
5424 {
5425 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5426 allocate_stack(common, 2);
5427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5429 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5430 }
5431 else if (common->has_set_som || common->mark_ptr != 0)
5432 {
5433 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5434 allocate_stack(common, 1);
5435 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5436 }
5437
5438 if (entry->entry == NULL)
5439 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5440 else
5441 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5442 /* Leave if the match is failed. */
5443 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5444 return cc + 1 + LINK_SIZE;
5445 }
5446
5447 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5448 {
5449 const pcre_uchar *begin = arguments->begin;
5450 int *offset_vector = arguments->offsets;
5451 int offset_count = arguments->offset_count;
5452 int i;
5453
5454 if (PUBL(callout) == NULL)
5455 return 0;
5456
5457 callout_block->version = 2;
5458 callout_block->callout_data = arguments->callout_data;
5459
5460 /* Offsets in subject. */
5461 callout_block->subject_length = arguments->end - arguments->begin;
5462 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5463 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5464 #if defined COMPILE_PCRE8
5465 callout_block->subject = (PCRE_SPTR)begin;
5466 #elif defined COMPILE_PCRE16
5467 callout_block->subject = (PCRE_SPTR16)begin;
5468 #elif defined COMPILE_PCRE32
5469 callout_block->subject = (PCRE_SPTR32)begin;
5470 #endif
5471
5472 /* Convert and copy the JIT offset vector to the offset_vector array. */
5473 callout_block->capture_top = 0;
5474 callout_block->offset_vector = offset_vector;
5475 for (i = 2; i < offset_count; i += 2)
5476 {
5477 offset_vector[i] = jit_ovector[i] - begin;
5478 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5479 if (jit_ovector[i] >= begin)
5480 callout_block->capture_top = i;
5481 }
5482
5483 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5484 if (offset_count > 0)
5485 offset_vector[0] = -1;
5486 if (offset_count > 1)
5487 offset_vector[1] = -1;
5488 return (*PUBL(callout))(callout_block);
5489 }
5490
5491 /* Aligning to 8 byte. */
5492 #define CALLOUT_ARG_SIZE \
5493 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5494
5495 #define CALLOUT_ARG_OFFSET(arg) \
5496 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5497
5498 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5499 {
5500 DEFINE_COMPILER;
5501 backtrack_common *backtrack;
5502
5503 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5504
5505 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5506
5507 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5508 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5509 SLJIT_ASSERT(common->capture_last_ptr != 0);
5510 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5511 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5512
5513 /* These pointer sized fields temporarly stores internal variables. */
5514 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5515 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5516 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5517
5518 if (common->mark_ptr != 0)
5519 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5520 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5521 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5522 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5523
5524 /* Needed to save important temporary registers. */
5525 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5526 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5527 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5528 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5529 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5530 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5531 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5532
5533 /* Check return value. */
5534 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5535 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5536 if (common->forced_quit_label == NULL)
5537 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5538 else
5539 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5540 return cc + 2 + 2 * LINK_SIZE;
5541 }
5542
5543 #undef CALLOUT_ARG_SIZE
5544 #undef CALLOUT_ARG_OFFSET
5545
5546 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5547 {
5548 DEFINE_COMPILER;
5549 int framesize;
5550 int extrasize;
5551 BOOL needs_control_head;
5552 int private_data_ptr;
5553 backtrack_common altbacktrack;
5554 pcre_uchar *ccbegin;
5555 pcre_uchar opcode;
5556 pcre_uchar bra = OP_BRA;
5557 jump_list *tmp = NULL;
5558 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5559 jump_list **found;
5560 /* Saving previous accept variables. */
5561 BOOL save_local_exit = common->local_exit;
5562 then_trap_backtrack *save_then_trap = common->then_trap;
5563 struct sljit_label *save_quit_label = common->quit_label;
5564 struct sljit_label *save_accept_label = common->accept_label;
5565 jump_list *save_quit = common->quit;
5566 jump_list *save_accept = common->accept;
5567 struct sljit_jump *jump;
5568 struct sljit_jump *brajump = NULL;
5569
5570 /* Assert captures then. */
5571 common->then_trap = NULL;
5572
5573 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5574 {
5575 SLJIT_ASSERT(!conditional);
5576 bra = *cc;
5577 cc++;
5578 }
5579 private_data_ptr = PRIVATE_DATA(cc);
5580 SLJIT_ASSERT(private_data_ptr != 0);
5581 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5582 backtrack->framesize = framesize;
5583 backtrack->private_data_ptr = private_data_ptr;
5584 opcode = *cc;
5585 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5586 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5587 ccbegin = cc;
5588 cc += GET(cc, 1);
5589
5590 if (bra == OP_BRAMINZERO)
5591 {
5592 /* This is a braminzero backtrack path. */
5593 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5594 free_stack(common, 1);
5595 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5596 }
5597
5598 if (framesize < 0)
5599 {
5600 extrasize = needs_control_head ? 2 : 1;
5601 if (framesize == no_frame)
5602 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5603 allocate_stack(common, extrasize);
5604 if (needs_control_head)
5605 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5606 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5607 if (needs_control_head)
5608 {
5609 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5610 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5611 }
5612 }
5613 else
5614 {
5615 extrasize = needs_control_head ? 3 : 2;
5616 allocate_stack(common, framesize + extrasize);
5617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5618 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5619 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5620 if (needs_control_head)
5621 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5622 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5623 if (needs_control_head)
5624 {
5625 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5626 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5627 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5628 }
5629 else
5630 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5631 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5632 }
5633
5634 memset(&altbacktrack, 0, sizeof(backtrack_common));
5635 common->local_exit = TRUE;
5636 common->quit_label = NULL;
5637 common->quit = NULL;
5638 while (1)
5639 {
5640 common->accept_label = NULL;
5641 common->accept = NULL;
5642 altbacktrack.top = NULL;
5643 altbacktrack.topbacktracks = NULL;
5644
5645 if (*ccbegin == OP_ALT)
5646 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5647
5648 altbacktrack.cc = ccbegin;
5649 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5650 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5651 {
5652 common->local_exit = save_local_exit;
5653 common->then_trap = save_then_trap;
5654 common->quit_label = save_quit_label;
5655 common->accept_label = save_accept_label;
5656 common->quit = save_quit;
5657 common->accept = save_accept;
5658 return NULL;
5659 }
5660 common->accept_label = LABEL();
5661 if (common->accept != NULL)
5662 set_jumps(common->accept, common->accept_label);
5663
5664 /* Reset stack. */
5665 if (framesize < 0)
5666 {
5667 if (framesize == no_frame)
5668 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5669 else
5670 free_stack(common, extrasize);
5671 if (needs_control_head)
5672 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5673 }
5674 else
5675 {
5676 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5677 {
5678 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5679 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5680 if (needs_control_head)
5681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5682 }
5683 else
5684 {
5685 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5686 if (needs_control_head)
5687 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5688 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5689 }
5690 }
5691
5692 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5693 {
5694 /* We know that STR_PTR was stored on the top of the stack. */
5695 if (conditional)
5696 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5697 else if (bra == OP_BRAZERO)
5698 {
5699 if (framesize < 0)
5700 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5701 else
5702 {
5703 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5704 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5705 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5706 }
5707 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5708 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5709 }
5710 else if (framesize >= 0)
5711 {
5712 /* For OP_BRA and OP_BRAMINZERO. */
5713 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5714 }
5715 }
5716 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5717
5718 compile_backtrackingpath(common, altbacktrack.top);
5719 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5720 {
5721 common->local_exit = save_local_exit;
5722 common->then_trap = save_then_trap;
5723 common->quit_label = save_quit_label;
5724 common->accept_label = save_accept_label;
5725 common->quit = save_quit;
5726 common->accept = save_accept;
5727 return NULL;
5728 }
5729 set_jumps(altbacktrack.topbacktracks, LABEL());
5730
5731 if (*cc != OP_ALT)
5732 break;
5733
5734 ccbegin = cc;
5735 cc += GET(cc, 1);
5736 }
5737
5738 /* None of them matched. */
5739 if (common->quit != NULL)
5740 {
5741 jump = JUMP(SLJIT_JUMP);
5742 set_jumps(common->quit, LABEL());
5743 SLJIT_ASSERT(framesize != no_stack);
5744 if (framesize < 0)
5745 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5746 else
5747 {
5748 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5749 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5750 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5751 }
5752 JUMPHERE(jump);
5753 }
5754
5755 if (needs_control_head)
5756 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5757
5758 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5759 {
5760 /* Assert is failed. */
5761 if (conditional || bra == OP_BRAZERO)
5762 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5763
5764 if (framesize < 0)
5765 {
5766 /* The topmost item should be 0. */
5767 if (bra == OP_BRAZERO)
5768 {
5769 if (extrasize == 2)
5770 free_stack(common, 1);
5771 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5772 }
5773 else
5774 free_stack(common, extrasize);
5775 }
5776 else
5777 {
5778 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5779 /* The topmost item should be 0. */
5780 if (bra == OP_BRAZERO)
5781 {
5782 free_stack(common, framesize + extrasize - 1);
5783 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5784 }
5785 else
5786 free_stack(common, framesize + extrasize);
5787 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5788 }
5789 jump = JUMP(SLJIT_JUMP);
5790 if (bra != OP_BRAZERO)
5791 add_jump(compiler, target, jump);
5792
5793 /* Assert is successful. */
5794 set_jumps(tmp, LABEL());
5795 if (framesize < 0)
5796 {
5797 /* We know that STR_PTR was stored on the top of the stack. */
5798 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5799 /* Keep the STR_PTR on the top of the stack. */
5800 if (bra == OP_BRAZERO)
5801 {
5802 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5803 if (extrasize == 2)
5804 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5805 }
5806 else if (bra == OP_BRAMINZERO)
5807 {
5808 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5809 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5810 }
5811 }
5812 else
5813 {
5814 if (bra == OP_BRA)
5815 {
5816 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5817 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5818 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5819 }
5820 else
5821 {
5822 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5823 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5824 if (extrasize == 2)
5825 {
5826 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5827 if (bra == OP_BRAMINZERO)
5828 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5829 }
5830 else
5831 {
5832 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5833 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5834 }
5835 }
5836 }
5837
5838 if (bra == OP_BRAZERO)
5839 {
5840 backtrack->matchingpath = LABEL();
5841 SET_LABEL(jump, backtrack->matchingpath);
5842 }
5843 else if (bra == OP_BRAMINZERO)
5844 {
5845 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5846 JUMPHERE(brajump);
5847 if (framesize >= 0)
5848 {
5849 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5850 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5851 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5852 }
5853 set_jumps(backtrack->common.topbacktracks, LABEL());
5854 }
5855 }
5856 else
5857 {
5858 /* AssertNot is successful. */
5859 if (framesize < 0)
5860 {
5861 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5862 if (bra != OP_BRA)
5863 {
5864 if (extrasize == 2)
5865 free_stack(common, 1);
5866 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5867 }
5868 else
5869 free_stack(common, extrasize);
5870 }
5871 else
5872 {
5873 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5874 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5875 /* The topmost item should be 0. */
5876 if (bra != OP_BRA)
5877 {
5878 free_stack(common, framesize + extrasize - 1);
5879 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5880 }
5881 else
5882 free_stack(common, framesize + extrasize);
5883 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5884 }
5885
5886 if (bra == OP_BRAZERO)
5887 backtrack->matchingpath = LABEL();
5888 else if (bra == OP_BRAMINZERO)
5889 {
5890 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5891 JUMPHERE(brajump);
5892 }
5893
5894 if (bra != OP_BRA)
5895 {
5896 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5897 set_jumps(backtrack->common.topbacktracks, LABEL());
5898 backtrack->common.topbacktracks = NULL;
5899 }
5900 }
5901
5902 common->local_exit = save_local_exit;
5903 common->then_trap = save_then_trap;
5904 common->quit_label = save_quit_label;
5905 common->accept_label = save_accept_label;
5906 common->quit = save_quit;
5907 common->accept = save_accept;
5908 return cc + 1 + LINK_SIZE;
5909 }
5910
5911 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5912 {
5913 int condition = FALSE;
5914 pcre_uchar *slotA = name_table;
5915 pcre_uchar *slotB;
5916 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5917 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5918 sljit_sw no_capture;
5919 int i;
5920
5921 locals += refno & 0xff;
5922 refno >>= 8;
5923 no_capture = locals[1];
5924
5925 for (i = 0; i < name_count; i++)
5926 {
5927 if (GET2(slotA, 0) == refno) break;
5928 slotA += name_entry_size;
5929 }
5930
5931 if (i < name_count)
5932 {
5933 /* Found a name for the number - there can be only one; duplicate names
5934 for different numbers are allowed, but not vice versa. First scan down
5935 for duplicates. */
5936
5937 slotB = slotA;
5938 while (slotB > name_table)
5939 {
5940 slotB -= name_entry_size;
5941 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5942 {
5943 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5944 if (condition) break;
5945 }
5946 else break;
5947 }
5948
5949 /* Scan up for duplicates */
5950 if (!condition)
5951 {
5952 slotB = slotA;
5953 for (i++; i < name_count; i++)
5954 {
5955 slotB += name_entry_size;
5956 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5957 {
5958 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5959 if (condition) break;
5960 }
5961 else break;
5962 }
5963 }
5964 }
5965 return condition;
5966 }
5967
5968 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5969 {
5970 int condition = FALSE;
5971 pcre_uchar *slotA = name_table;
5972 pcre_uchar *slotB;
5973 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5974 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5975 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5976 sljit_uw i;
5977
5978 for (i = 0; i < name_count; i++)
5979 {
5980 if (GET2(slotA, 0) == recno) break;
5981 slotA += name_entry_size;
5982 }
5983
5984 if (i < name_count)
5985 {
5986 /* Found a name for the number - there can be only one; duplicate
5987 names for different numbers are allowed, but not vice versa. First
5988 scan down for duplicates. */
5989
5990 slotB = slotA;
5991 while (slotB > name_table)
5992 {
5993 slotB -= name_entry_size;
5994 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5995 {
5996 condition = GET2(slotB, 0) == group_num;
5997 if (condition) break;
5998 }
5999 else break;
6000 }
6001
6002 /* Scan up for duplicates */
6003 if (!condition)
6004 {
6005 slotB = slotA;
6006 for (i++; i < name_count; i++)
6007 {
6008 slotB += name_entry_size;
6009 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
6010 {
6011 condition = GET2(slotB, 0) == group_num;
6012 if (condition) break;
6013 }
6014 else break;
6015 }
6016 }
6017 }
6018 return condition;
6019 }
6020
6021 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6022 {
6023 DEFINE_COMPILER;
6024 int stacksize;
6025
6026 if (framesize < 0)
6027 {
6028 if (framesize == no_frame)
6029 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6030 else
6031 {
6032 stacksize = needs_control_head ? 1 : 0;
6033 if (ket != OP_KET || has_alternatives)
6034 stacksize++;
6035 free_stack(common, stacksize);
6036 }
6037
6038 if (needs_control_head)
6039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6040
6041 /* TMP2 which is set here used by OP_KETRMAX below. */
6042 if (ket == OP_KETRMAX)
6043 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6044 else if (ket == OP_KETRMIN)
6045 {
6046 /* Move the STR_PTR to the private_data_ptr. */
6047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6048 }
6049 }
6050 else
6051 {
6052 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6053 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6054 if (needs_control_head)
6055 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6056
6057 if (ket == OP_KETRMAX)
6058 {
6059 /* TMP2 which is set here used by OP_KETRMAX below. */
6060 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6061 }
6062 }
6063 if (needs_control_head)
6064 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6065 }
6066
6067 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6068 {
6069 DEFINE_COMPILER;
6070
6071 if (common->capture_last_ptr != 0)
6072 {
6073 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6075 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6076 stacksize++;
6077 }
6078 if (common->optimized_cbracket[offset >> 1] == 0)
6079 {
6080 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6081 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6082 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6083 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6085 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6086 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6087 stacksize += 2;
6088 }
6089 return stacksize;
6090 }
6091
6092 /*
6093 Handling bracketed expressions is probably the most complex part.
6094
6095 Stack layout naming characters:
6096 S - Push the current STR_PTR
6097 0 - Push a 0 (NULL)
6098 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6099 before the next alternative. Not pushed if there are no alternatives.
6100 M - Any values pushed by the current alternative. Can be empty, or anything.
6101 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6102 L - Push the previous local (pointed by localptr) to the stack
6103 () - opional values stored on the stack
6104 ()* - optonal, can be stored multiple times
6105
6106 The following list shows the regular expression templates, their PCRE byte codes
6107 and stack layout supported by pcre-sljit.
6108
6109 (?:) OP_BRA | OP_KET A M
6110 () OP_CBRA | OP_KET C M
6111 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6112 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6113 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6114 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6115 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6116 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6117 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6118 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6119 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6120 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6121 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6122 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6123 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6124 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6125 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6126 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6127 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6128 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6129 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6130 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6131
6132
6133 Stack layout naming characters:
6134 A - Push the alternative index (starting from 0) on the stack.
6135 Not pushed if there is no alternatives.
6136 M - Any values pushed by the current alternative. Can be empty, or anything.
6137
6138 The next list shows the possible content of a bracket:
6139 (|) OP_*BRA | OP_ALT ... M A
6140 (?()|) OP_*COND | OP_ALT M A
6141 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6142 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6143 Or nothing, if trace is unnecessary
6144 */
6145
6146 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6147 {
6148 DEFINE_COMPILER;
6149 backtrack_common *backtrack;
6150 pcre_uchar opcode;
6151 int private_data_ptr = 0;
6152 int offset = 0;
6153 int stacksize;
6154 pcre_uchar *ccbegin;
6155 pcre_uchar *matchingpath;
6156 pcre_uchar bra = OP_BRA;
6157 pcre_uchar ket;
6158 assert_backtrack *assert;
6159 BOOL has_alternatives;
6160 BOOL needs_control_head = FALSE;
6161 struct sljit_jump *jump;
6162 struct sljit_jump *skip;
6163 struct sljit_label *rmaxlabel = NULL;
6164 struct sljit_jump *braminzerojump = NULL;
6165
6166 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6167
6168 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6169 {
6170 bra = *cc;
6171 cc++;
6172 opcode = *cc;
6173 }
6174
6175 opcode = *cc;
6176 ccbegin = cc;
6177 matchingpath = ccbegin + 1 + LINK_SIZE;
6178
6179 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6180 {
6181 /* Drop this bracket_backtrack. */
6182 parent->top = backtrack->prev;
6183 return bracketend(cc);
6184 }
6185
6186 ket = *(bracketend(cc) - 1 - LINK_SIZE);
6187 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6188 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6189 cc += GET(cc, 1);
6190
6191 has_alternatives = *cc == OP_ALT;
6192 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6193 {
6194 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6195 if (*matchingpath == OP_NRREF)
6196 {
6197 stacksize = GET2(matchingpath, 1);
6198 if (common->currententry == NULL || stacksize == RREF_ANY)
6199 has_alternatives = FALSE;
6200 else if (common->currententry->start == 0)
6201 has_alternatives = stacksize != 0;
6202 else
6203 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6204 }
6205 }
6206
6207 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6208 opcode = OP_SCOND;
6209 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6210 opcode = OP_ONCE;
6211
6212 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6213 {
6214 /* Capturing brackets has a pre-allocated space. */
6215 offset = GET2(ccbegin, 1 + LINK_SIZE);
6216 if (common->optimized_cbracket[offset] == 0)
6217 {
6218 private_data_ptr = OVECTOR_PRIV(offset);
6219 offset <<= 1;
6220 }
6221 else
6222 {
6223 offset <<= 1;
6224 private_data_ptr = OVECTOR(offset);
6225 }
6226 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6227 matchingpath += IMM2_SIZE;
6228 }
6229 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6230 {
6231 /* Other brackets simply allocate the next entry. */
6232 private_data_ptr = PRIVATE_DATA(ccbegin);
6233 SLJIT_ASSERT(private_data_ptr != 0);
6234 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6235 if (opcode == OP_ONCE)
6236 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6237 }
6238
6239 /* Instructions before the first alternative. */
6240 stacksize = 0;
6241 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6242 stacksize++;
6243 if (bra == OP_BRAZERO)
6244 stacksize++;
6245
6246 if (stacksize > 0)
6247 allocate_stack(common, stacksize);
6248
6249 stacksize = 0;
6250 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6251 {
6252 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6253 stacksize++;
6254 }
6255
6256 if (bra == OP_BRAZERO)
6257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6258
6259 if (bra == OP_BRAMINZERO)
6260 {
6261 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6262 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6263 if (ket != OP_KETRMIN)
6264 {
6265 free_stack(common, 1);
6266 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6267 }
6268 else
6269 {
6270 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6271 {
6272 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6273 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6274 /* Nothing stored during the first run. */
6275 skip = JUMP(SLJIT_JUMP);
6276 JUMPHERE(jump);
6277 /* Checking zero-length iteration. */
6278 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6279 {
6280 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6281 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6282 }
6283 else
6284 {
6285 /* Except when the whole stack frame must be saved. */
6286 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6287 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6288 }
6289 JUMPHERE(skip);
6290 }
6291 else
6292 {
6293 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6294 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6295 JUMPHERE(jump);
6296 }
6297 }
6298 }
6299
6300 if (ket == OP_KETRMIN)
6301 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6302
6303 if (ket == OP_KETRMAX)
6304 {
6305 rmaxlabel = LABEL();
6306 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
6307 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
6308 }
6309
6310 /* Handling capturing brackets and alternatives. */
6311 if (opcode == OP_ONCE)
6312 {
6313 stacksize = 0;
6314 if (needs_control_head)
6315 {
6316 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6317 stacksize++;
6318 }
6319
6320 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6321 {
6322 /* Neither capturing brackets nor recursions are found in the block. */
6323 if (ket == OP_KETRMIN)
6324 {
6325 stacksize += 2;
6326 if (!needs_control_head)
6327 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6328 }
6329 else
6330 {
6331 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6332 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6333 if (ket == OP_KETRMAX || has_alternatives)
6334 stacksize++;
6335 }
6336
6337 if (stacksize > 0)
6338 allocate_stack(common, stacksize);
6339
6340 stacksize = 0;
6341 if (needs_control_head)
6342 {
6343 stacksize++;
6344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6345 }
6346
6347 if (ket == OP_KETRMIN)
6348 {
6349 if (needs_control_head)
6350 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6351 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6352 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6353 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6354 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6355 }
6356 else if (ket == OP_KETRMAX || has_alternatives)
6357 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6358 }
6359 else
6360 {
6361 if (ket != OP_KET || has_alternatives)
6362 stacksize++;
6363
6364 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6365 allocate_stack(common, stacksize);
6366
6367 if (needs_control_head)
6368 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6369
6370 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6371 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6372
6373 stacksize = needs_control_head ? 1 : 0;
6374 if (ket != OP_KET || has_alternatives)
6375 {
6376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6377 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6378 stacksize++;
6379 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6380 }
6381 else
6382 {
6383 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6384 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6385 }
6386 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6387 }
6388 }
6389 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6390 {
6391 /* Saving the previous values. */
6392 if (common->optimized_cbracket[offset >> 1] != 0)
6393 {
6394 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6395 allocate_stack(common, 2);
6396 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6397 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6400 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6401 }
6402 else
6403 {
6404 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6405 allocate_stack(common, 1);
6406 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6407 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6408 }
6409 }
6410 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6411 {
6412 /* Saving the previous value. */
6413 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6414 allocate_stack(common, 1);
6415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6416 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6417 }
6418 else if (has_alternatives)
6419 {
6420 /* Pushing the starting string pointer. */
6421 allocate_stack(common, 1);
6422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6423 }
6424
6425 /* Generating code for the first alternative. */
6426 if (opcode == OP_COND || opcode == OP_SCOND)
6427 {
6428 if (*matchingpath == OP_CREF)
6429 {
6430 SLJIT_ASSERT(has_alternatives);
6431 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6432 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6433 matchingpath += 1 + IMM2_SIZE;
6434 }
6435 else if (*matchingpath == OP_NCREF)
6436 {
6437 SLJIT_ASSERT(has_alternatives);
6438 stacksize = GET2(matchingpath, 1);
6439 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6440
6441 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6442 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6444 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6445 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6446 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6447 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6448 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6449 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6450
6451 JUMPHERE(jump);
6452 matchingpath += 1 + IMM2_SIZE;
6453 }
6454 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6455 {
6456 /* Never has other case. */
6457 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6458
6459 stacksize = GET2(matchingpath, 1);
6460 if (common->currententry == NULL)
6461 stacksize = 0;
6462 else if (stacksize == RREF_ANY)
6463 stacksize = 1;
6464 else if (common->currententry->start == 0)
6465 stacksize = stacksize == 0;
6466 else
6467 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6468
6469 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6470 {
6471 SLJIT_ASSERT(!has_alternatives);
6472 if (stacksize != 0)
6473 matchingpath += 1 + IMM2_SIZE;
6474 else
6475 {
6476 if (*cc == OP_ALT)
6477 {
6478 matchingpath = cc + 1 + LINK_SIZE;
6479 cc += GET(cc, 1);
6480 }
6481 else
6482 matchingpath = cc;
6483 }
6484 }
6485 else
6486 {
6487 SLJIT_ASSERT(has_alternatives);
6488
6489 stacksize = GET2(matchingpath, 1);
6490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6492 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6493 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6494 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6495 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6496 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6497 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6498 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6499 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6500 matchingpath += 1 + IMM2_SIZE;
6501 }
6502 }
6503 else
6504 {
6505 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6506 /* Similar code as PUSH_BACKTRACK macro. */
6507 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6508 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6509 return NULL;
6510 memset(assert, 0, sizeof(assert_backtrack));
6511 assert->common.cc = matchingpath;
6512 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6513 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6514 }
6515 }
6516
6517 compile_matchingpath(common, matchingpath, cc, backtrack);
6518 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6519 return NULL;
6520
6521 if (opcode == OP_ONCE)
6522 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6523
6524 stacksize = 0;
6525 if (ket != OP_KET || bra != OP_BRA)
6526 stacksize++;
6527 if (offset != 0)
6528 {
6529 if (common->capture_last_ptr != 0)
6530 stacksize++;
6531 if (common->optimized_cbracket[offset >> 1] == 0)
6532 stacksize += 2;
6533 }
6534 if (has_alternatives && opcode != OP_ONCE)
6535 stacksize++;
6536
6537 if (stacksize > 0)
6538 allocate_stack(common, stacksize);
6539
6540 stacksize = 0;
6541 if (ket != OP_KET || bra != OP_BRA)
6542 {
6543 if (ket != OP_KET)
6544 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6545 else
6546 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6547 stacksize++;
6548 }
6549
6550 if (offset != 0)
6551 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6552
6553 if (has_alternatives)
6554 {
6555 if (opcode != OP_ONCE)
6556 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6557 if (ket != OP_KETRMAX)
6558 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6559 }
6560
6561 /* Must be after the matchingpath label. */
6562 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6563 {
6564 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6565 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6566 }
6567
6568 if (ket == OP_KETRMAX)
6569 {
6570 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6571 {
6572 if (has_alternatives)
6573 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6574 /* Checking zero-length iteration. */
6575 if (opcode != OP_ONCE)
6576 {
6577 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6578 /* Drop STR_PTR for greedy plus quantifier. */
6579 if (bra != OP_BRAZERO)
6580 free_stack(common, 1);
6581 }
6582 else
6583 /* TMP2 must contain the starting STR_PTR. */
6584 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6585 }
6586 else
6587 JUMPTO(SLJIT_JUMP, rmaxlabel);
6588 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6589 }
6590
6591 if (bra == OP_BRAZERO)
6592 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6593
6594 if (bra == OP_BRAMINZERO)
6595 {
6596 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6597 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6598 if (braminzerojump != NULL)
6599 {
6600 JUMPHERE(braminzerojump);
6601 /* We need to release the end pointer to perform the
6602 backtrack for the zero-length iteration. When
6603 framesize is < 0, OP_ONCE will do the release itself. */
6604 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6605 {
6606 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6607 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6608 }
6609 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6610 free_stack(common, 1);
6611 }
6612 /* Continue to the normal backtrack. */
6613 }
6614
6615 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6616 decrease_call_count(common);
6617
6618 /* Skip the other alternatives. */
6619 while (*cc == OP_ALT)
6620 cc += GET(cc, 1);
6621 cc += 1 + LINK_SIZE;
6622
6623 /* Temporarily encoding the needs_control_head in framesize. */
6624 if (opcode == OP_ONCE)
6625 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6626 return cc;
6627 }
6628
6629 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6630 {
6631 DEFINE_COMPILER;
6632 backtrack_common *backtrack;
6633 pcre_uchar opcode;
6634 int private_data_ptr;
6635 int cbraprivptr = 0;
6636 BOOL needs_control_head;
6637 int framesize;
6638 int stacksize;
6639 int offset = 0;
6640 BOOL zero = FALSE;
6641 pcre_uchar *ccbegin = NULL;
6642 int stack; /* Also contains the offset of control head. */
6643 struct sljit_label *loop = NULL;
6644 struct jump_list *emptymatch = NULL;
6645
6646 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6647 if (*cc == OP_BRAPOSZERO)
6648 {
6649 zero = TRUE;
6650 cc++;
6651 }
6652
6653 opcode = *cc;
6654 private_data_ptr = PRIVATE_DATA(cc);
6655 SLJIT_ASSERT(private_data_ptr != 0);
6656 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6657 switch(opcode)
6658 {
6659 case OP_BRAPOS:
6660 case OP_SBRAPOS:
6661 ccbegin = cc + 1 + LINK_SIZE;
6662 break;
6663
6664 case OP_CBRAPOS:
6665 case OP_SCBRAPOS:
6666 offset = GET2(cc, 1 + LINK_SIZE);
6667 /* This case cannot be optimized in the same was as
6668 normal capturing brackets. */
6669 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6670 cbraprivptr = OVECTOR_PRIV(offset);
6671 offset <<= 1;
6672 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6673 break;
6674
6675 default:
6676 SLJIT_ASSERT_STOP();
6677 break;
6678 }
6679
6680 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6681 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6682 if (framesize < 0)
6683 {
6684 if (offset != 0)
6685 {
6686 stacksize = 2;
6687 if (common->capture_last_ptr != 0)
6688 stacksize++;
6689 }
6690 else
6691 stacksize = 1;
6692
6693 if (needs_control_head)
6694 stacksize++;
6695 if (!zero)
6696 stacksize++;
6697
6698 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6699 allocate_stack(common, stacksize);
6700 if (framesize == no_frame)
6701 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6702
6703 stack = 0;
6704 if (offset != 0)
6705 {
6706 stack = 2;
6707 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6708 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6709 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6710 if (common->capture_last_ptr != 0)
6711 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6712 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6713 if (needs_control_head)
6714 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6715 if (common->capture_last_ptr != 0)
6716 {
6717 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6718 stack = 3;
6719 }
6720 }
6721 else
6722 {
6723 if (needs_control_head)
6724 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6726 stack = 1;
6727 }
6728
6729 if (needs_control_head)
6730 stack++;
6731 if (!zero)
6732 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6733 if (needs_control_head)
6734 {
6735 stack--;
6736 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6737 }
6738 }
6739 else
6740 {
6741 stacksize = framesize + 1;
6742 if (!zero)
6743 stacksize++;
6744 if (needs_control_head)
6745 stacksize++;
6746 if (offset == 0)
6747 stacksize++;
6748 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6749
6750 allocate_stack(common, stacksize);
6751 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr)