/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1291 - (show annotations)
Sun Mar 17 05:27:48 2013 UTC (6 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 300006 byte(s)
Improve performance of then verb in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum frame_types {
200 no_frame = -1,
201 no_stack = -2
202 };
203
204 enum control_types {
205 type_mark = 0,
206 type_then_trap = 1
207 };
208
209 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
210
211 /* The following structure is the key data type for the recursive
212 code generator. It is allocated by compile_matchingpath, and contains
213 the aguments for compile_backtrackingpath. Must be the first member
214 of its descendants. */
215 typedef struct backtrack_common {
216 /* Concatenation stack. */
217 struct backtrack_common *prev;
218 jump_list *nextbacktracks;
219 /* Internal stack (for component operators). */
220 struct backtrack_common *top;
221 jump_list *topbacktracks;
222 /* Opcode pointer. */
223 pcre_uchar *cc;
224 } backtrack_common;
225
226 typedef struct assert_backtrack {
227 backtrack_common common;
228 jump_list *condfailed;
229 /* Less than 0 if a frame is not needed. */
230 int framesize;
231 /* Points to our private memory word on the stack. */
232 int private_data_ptr;
233 /* For iterators. */
234 struct sljit_label *matchingpath;
235 } assert_backtrack;
236
237 typedef struct bracket_backtrack {
238 backtrack_common common;
239 /* Where to coninue if an alternative is successfully matched. */
240 struct sljit_label *alternative_matchingpath;
241 /* For rmin and rmax iterators. */
242 struct sljit_label *recursive_matchingpath;
243 /* For greedy ? operator. */
244 struct sljit_label *zero_matchingpath;
245 /* Contains the branches of a failed condition. */
246 union {
247 /* Both for OP_COND, OP_SCOND. */
248 jump_list *condfailed;
249 assert_backtrack *assert;
250 /* For OP_ONCE. Less than 0 if not needed. */
251 int framesize;
252 } u;
253 /* Points to our private memory word on the stack. */
254 int private_data_ptr;
255 } bracket_backtrack;
256
257 typedef struct bracketpos_backtrack {
258 backtrack_common common;
259 /* Points to our private memory word on the stack. */
260 int private_data_ptr;
261 /* Reverting stack is needed. */
262 int framesize;
263 /* Allocated stack size. */
264 int stacksize;
265 } bracketpos_backtrack;
266
267 typedef struct braminzero_backtrack {
268 backtrack_common common;
269 struct sljit_label *matchingpath;
270 } braminzero_backtrack;
271
272 typedef struct iterator_backtrack {
273 backtrack_common common;
274 /* Next iteration. */
275 struct sljit_label *matchingpath;
276 } iterator_backtrack;
277
278 typedef struct recurse_entry {
279 struct recurse_entry *next;
280 /* Contains the function entry. */
281 struct sljit_label *entry;
282 /* Collects the calls until the function is not created. */
283 jump_list *calls;
284 /* Points to the starting opcode. */
285 sljit_sw start;
286 } recurse_entry;
287
288 typedef struct recurse_backtrack {
289 backtrack_common common;
290 BOOL inlined_pattern;
291 } recurse_backtrack;
292
293 #define OP_THEN_TRAP OP_TABLE_LENGTH
294
295 typedef struct then_trap_backtrack {
296 backtrack_common common;
297 /* If then_trap is not NULL, this structure contains the real
298 then_trap for the backtracking path. */
299 struct then_trap_backtrack *then_trap;
300 /* Points to the starting opcode. */
301 sljit_sw start;
302 /* Exit point for the then opcodes of this alternative. */
303 jump_list *quit;
304 /* Frame size of the current alternative. */
305 int framesize;
306 } then_trap_backtrack;
307
308 #define MAX_RANGE_SIZE 6
309
310 typedef struct compiler_common {
311 /* The sljit ceneric compiler. */
312 struct sljit_compiler *compiler;
313 /* First byte code. */
314 pcre_uchar *start;
315 /* Maps private data offset to each opcode. */
316 int *private_data_ptrs;
317 /* Tells whether the capturing bracket is optimized. */
318 pcre_uint8 *optimized_cbracket;
319 /* Tells whether the starting offset is a target of then. */
320 pcre_uint8 *then_offsets;
321 /* Current position where a THEN must jump. */
322 then_trap_backtrack *then_trap;
323 /* Starting offset of private data for capturing brackets. */
324 int cbra_ptr;
325 /* Output vector starting point. Must be divisible by 2. */
326 int ovector_start;
327 /* Last known position of the requested byte. */
328 int req_char_ptr;
329 /* Head of the last recursion. */
330 int recursive_head_ptr;
331 /* First inspected character for partial matching. */
332 int start_used_ptr;
333 /* Starting pointer for partial soft matches. */
334 int hit_start;
335 /* End pointer of the first line. */
336 int first_line_end;
337 /* Points to the marked string. */
338 int mark_ptr;
339 /* Recursive control verb management chain. */
340 int control_head_ptr;
341 /* Points to the last matched capture block index. */
342 int capture_last_ptr;
343 /* Points to the starting position of the current match. */
344 int start_ptr;
345
346 /* Flipped and lower case tables. */
347 const pcre_uint8 *fcc;
348 sljit_sw lcc;
349 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
350 int mode;
351 /* \K is found in the pattern. */
352 BOOL has_set_som;
353 /* (*SKIP:arg) is found in the pattern. */
354 BOOL has_skip_arg;
355 /* (*THEN) is found in the pattern. */
356 BOOL has_then;
357 /* Needs to know the start position anytime. */
358 BOOL needs_start_ptr;
359 /* Currently in recurse or assert. */
360 BOOL local_exit;
361 /* Newline control. */
362 int nltype;
363 int newline;
364 int bsr_nltype;
365 /* Dollar endonly. */
366 int endonly;
367 /* Tables. */
368 sljit_sw ctypes;
369 int digits[2 + MAX_RANGE_SIZE];
370 /* Named capturing brackets. */
371 sljit_uw name_table;
372 sljit_sw name_count;
373 sljit_sw name_entry_size;
374
375 /* Labels and jump lists. */
376 struct sljit_label *partialmatchlabel;
377 struct sljit_label *quit_label;
378 struct sljit_label *forced_quit_label;
379 struct sljit_label *accept_label;
380 stub_list *stubs;
381 recurse_entry *entries;
382 recurse_entry *currententry;
383 jump_list *partialmatch;
384 jump_list *quit;
385 jump_list *forced_quit;
386 jump_list *accept;
387 jump_list *calllimit;
388 jump_list *stackalloc;
389 jump_list *revertframes;
390 jump_list *wordboundary;
391 jump_list *anynewline;
392 jump_list *hspace;
393 jump_list *vspace;
394 jump_list *casefulcmp;
395 jump_list *caselesscmp;
396 jump_list *reset_match;
397 BOOL jscript_compat;
398 #ifdef SUPPORT_UTF
399 BOOL utf;
400 #ifdef SUPPORT_UCP
401 BOOL use_ucp;
402 #endif
403 #ifndef COMPILE_PCRE32
404 jump_list *utfreadchar;
405 #endif
406 #ifdef COMPILE_PCRE8
407 jump_list *utfreadtype8;
408 #endif
409 #endif /* SUPPORT_UTF */
410 #ifdef SUPPORT_UCP
411 jump_list *getucd;
412 #endif
413 } compiler_common;
414
415 /* For byte_sequence_compare. */
416
417 typedef struct compare_context {
418 int length;
419 int sourcereg;
420 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
421 int ucharptr;
422 union {
423 sljit_si asint;
424 sljit_uh asushort;
425 #if defined COMPILE_PCRE8
426 sljit_ub asbyte;
427 sljit_ub asuchars[4];
428 #elif defined COMPILE_PCRE16
429 sljit_uh asuchars[2];
430 #elif defined COMPILE_PCRE32
431 sljit_ui asuchars[1];
432 #endif
433 } c;
434 union {
435 sljit_si asint;
436 sljit_uh asushort;
437 #if defined COMPILE_PCRE8
438 sljit_ub asbyte;
439 sljit_ub asuchars[4];
440 #elif defined COMPILE_PCRE16
441 sljit_uh asuchars[2];
442 #elif defined COMPILE_PCRE32
443 sljit_ui asuchars[1];
444 #endif
445 } oc;
446 #endif
447 } compare_context;
448
449 /* Undefine sljit macros. */
450 #undef CMP
451
452 /* Used for accessing the elements of the stack. */
453 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
454
455 #define TMP1 SLJIT_SCRATCH_REG1
456 #define TMP2 SLJIT_SCRATCH_REG3
457 #define TMP3 SLJIT_TEMPORARY_EREG2
458 #define STR_PTR SLJIT_SAVED_REG1
459 #define STR_END SLJIT_SAVED_REG2
460 #define STACK_TOP SLJIT_SCRATCH_REG2
461 #define STACK_LIMIT SLJIT_SAVED_REG3
462 #define ARGUMENTS SLJIT_SAVED_EREG1
463 #define CALL_COUNT SLJIT_SAVED_EREG2
464 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
465
466 /* Local space layout. */
467 /* These two locals can be used by the current opcode. */
468 #define LOCALS0 (0 * sizeof(sljit_sw))
469 #define LOCALS1 (1 * sizeof(sljit_sw))
470 /* Two local variables for possessive quantifiers (char1 cannot use them). */
471 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
472 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
473 /* Max limit of recursions. */
474 #define CALL_LIMIT (4 * sizeof(sljit_sw))
475 /* The output vector is stored on the stack, and contains pointers
476 to characters. The vector data is divided into two groups: the first
477 group contains the start / end character pointers, and the second is
478 the start pointers when the end of the capturing group has not yet reached. */
479 #define OVECTOR_START (common->ovector_start)
480 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
481 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
482 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
483
484 #if defined COMPILE_PCRE8
485 #define MOV_UCHAR SLJIT_MOV_UB
486 #define MOVU_UCHAR SLJIT_MOVU_UB
487 #elif defined COMPILE_PCRE16
488 #define MOV_UCHAR SLJIT_MOV_UH
489 #define MOVU_UCHAR SLJIT_MOVU_UH
490 #elif defined COMPILE_PCRE32
491 #define MOV_UCHAR SLJIT_MOV_UI
492 #define MOVU_UCHAR SLJIT_MOVU_UI
493 #else
494 #error Unsupported compiling mode
495 #endif
496
497 /* Shortcuts. */
498 #define DEFINE_COMPILER \
499 struct sljit_compiler *compiler = common->compiler
500 #define OP1(op, dst, dstw, src, srcw) \
501 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
502 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
503 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
504 #define LABEL() \
505 sljit_emit_label(compiler)
506 #define JUMP(type) \
507 sljit_emit_jump(compiler, (type))
508 #define JUMPTO(type, label) \
509 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
510 #define JUMPHERE(jump) \
511 sljit_set_label((jump), sljit_emit_label(compiler))
512 #define SET_LABEL(jump, label) \
513 sljit_set_label((jump), (label))
514 #define CMP(type, src1, src1w, src2, src2w) \
515 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
516 #define CMPTO(type, src1, src1w, src2, src2w, label) \
517 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
518 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
519 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
520 #define GET_LOCAL_BASE(dst, dstw, offset) \
521 sljit_get_local_base(compiler, (dst), (dstw), (offset))
522
523 static pcre_uchar* bracketend(pcre_uchar* cc)
524 {
525 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
526 do cc += GET(cc, 1); while (*cc == OP_ALT);
527 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
528 cc += 1 + LINK_SIZE;
529 return cc;
530 }
531
532 /* Functions whose might need modification for all new supported opcodes:
533 next_opcode
534 get_private_data_length
535 set_private_data_ptrs
536 get_framesize
537 init_frame
538 get_private_data_copy_length
539 copy_private_data
540 compile_matchingpath
541 compile_backtrackingpath
542 */
543
544 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
545 {
546 SLJIT_UNUSED_ARG(common);
547 switch(*cc)
548 {
549 case OP_SOD:
550 case OP_SOM:
551 case OP_SET_SOM:
552 case OP_NOT_WORD_BOUNDARY:
553 case OP_WORD_BOUNDARY:
554 case OP_NOT_DIGIT:
555 case OP_DIGIT:
556 case OP_NOT_WHITESPACE:
557 case OP_WHITESPACE:
558 case OP_NOT_WORDCHAR:
559 case OP_WORDCHAR:
560 case OP_ANY:
561 case OP_ALLANY:
562 case OP_NOTPROP:
563 case OP_PROP:
564 case OP_ANYNL:
565 case OP_NOT_HSPACE:
566 case OP_HSPACE:
567 case OP_NOT_VSPACE:
568 case OP_VSPACE:
569 case OP_EXTUNI:
570 case OP_EODN:
571 case OP_EOD:
572 case OP_CIRC:
573 case OP_CIRCM:
574 case OP_DOLL:
575 case OP_DOLLM:
576 case OP_CRSTAR:
577 case OP_CRMINSTAR:
578 case OP_CRPLUS:
579 case OP_CRMINPLUS:
580 case OP_CRQUERY:
581 case OP_CRMINQUERY:
582 case OP_CRRANGE:
583 case OP_CRMINRANGE:
584 case OP_CLASS:
585 case OP_NCLASS:
586 case OP_REF:
587 case OP_REFI:
588 case OP_RECURSE:
589 case OP_CALLOUT:
590 case OP_ALT:
591 case OP_KET:
592 case OP_KETRMAX:
593 case OP_KETRMIN:
594 case OP_KETRPOS:
595 case OP_REVERSE:
596 case OP_ASSERT:
597 case OP_ASSERT_NOT:
598 case OP_ASSERTBACK:
599 case OP_ASSERTBACK_NOT:
600 case OP_ONCE:
601 case OP_ONCE_NC:
602 case OP_BRA:
603 case OP_BRAPOS:
604 case OP_CBRA:
605 case OP_CBRAPOS:
606 case OP_COND:
607 case OP_SBRA:
608 case OP_SBRAPOS:
609 case OP_SCBRA:
610 case OP_SCBRAPOS:
611 case OP_SCOND:
612 case OP_CREF:
613 case OP_NCREF:
614 case OP_RREF:
615 case OP_NRREF:
616 case OP_DEF:
617 case OP_BRAZERO:
618 case OP_BRAMINZERO:
619 case OP_BRAPOSZERO:
620 case OP_PRUNE:
621 case OP_SKIP:
622 case OP_THEN:
623 case OP_COMMIT:
624 case OP_FAIL:
625 case OP_ACCEPT:
626 case OP_ASSERT_ACCEPT:
627 case OP_CLOSE:
628 case OP_SKIPZERO:
629 return cc + PRIV(OP_lengths)[*cc];
630
631 case OP_CHAR:
632 case OP_CHARI:
633 case OP_NOT:
634 case OP_NOTI:
635 case OP_STAR:
636 case OP_MINSTAR:
637 case OP_PLUS:
638 case OP_MINPLUS:
639 case OP_QUERY:
640 case OP_MINQUERY:
641 case OP_UPTO:
642 case OP_MINUPTO:
643 case OP_EXACT:
644 case OP_POSSTAR:
645 case OP_POSPLUS:
646 case OP_POSQUERY:
647 case OP_POSUPTO:
648 case OP_STARI:
649 case OP_MINSTARI:
650 case OP_PLUSI:
651 case OP_MINPLUSI:
652 case OP_QUERYI:
653 case OP_MINQUERYI:
654 case OP_UPTOI:
655 case OP_MINUPTOI:
656 case OP_EXACTI:
657 case OP_POSSTARI:
658 case OP_POSPLUSI:
659 case OP_POSQUERYI:
660 case OP_POSUPTOI:
661 case OP_NOTSTAR:
662 case OP_NOTMINSTAR:
663 case OP_NOTPLUS:
664 case OP_NOTMINPLUS:
665 case OP_NOTQUERY:
666 case OP_NOTMINQUERY:
667 case OP_NOTUPTO:
668 case OP_NOTMINUPTO:
669 case OP_NOTEXACT:
670 case OP_NOTPOSSTAR:
671 case OP_NOTPOSPLUS:
672 case OP_NOTPOSQUERY:
673 case OP_NOTPOSUPTO:
674 case OP_NOTSTARI:
675 case OP_NOTMINSTARI:
676 case OP_NOTPLUSI:
677 case OP_NOTMINPLUSI:
678 case OP_NOTQUERYI:
679 case OP_NOTMINQUERYI:
680 case OP_NOTUPTOI:
681 case OP_NOTMINUPTOI:
682 case OP_NOTEXACTI:
683 case OP_NOTPOSSTARI:
684 case OP_NOTPOSPLUSI:
685 case OP_NOTPOSQUERYI:
686 case OP_NOTPOSUPTOI:
687 cc += PRIV(OP_lengths)[*cc];
688 #ifdef SUPPORT_UTF
689 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
690 #endif
691 return cc;
692
693 /* Special cases. */
694 case OP_TYPESTAR:
695 case OP_TYPEMINSTAR:
696 case OP_TYPEPLUS:
697 case OP_TYPEMINPLUS:
698 case OP_TYPEQUERY:
699 case OP_TYPEMINQUERY:
700 case OP_TYPEUPTO:
701 case OP_TYPEMINUPTO:
702 case OP_TYPEEXACT:
703 case OP_TYPEPOSSTAR:
704 case OP_TYPEPOSPLUS:
705 case OP_TYPEPOSQUERY:
706 case OP_TYPEPOSUPTO:
707 return cc + PRIV(OP_lengths)[*cc] - 1;
708
709 case OP_ANYBYTE:
710 #ifdef SUPPORT_UTF
711 if (common->utf) return NULL;
712 #endif
713 return cc + 1;
714
715 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
716 case OP_XCLASS:
717 return cc + GET(cc, 1);
718 #endif
719
720 case OP_MARK:
721 case OP_PRUNE_ARG:
722 case OP_SKIP_ARG:
723 case OP_THEN_ARG:
724 return cc + 1 + 2 + cc[1];
725
726 default:
727 /* All opcodes are supported now! */
728 SLJIT_ASSERT_STOP();
729 return NULL;
730 }
731 }
732
733 #define CASE_ITERATOR_PRIVATE_DATA_1 \
734 case OP_MINSTAR: \
735 case OP_MINPLUS: \
736 case OP_QUERY: \
737 case OP_MINQUERY: \
738 case OP_MINSTARI: \
739 case OP_MINPLUSI: \
740 case OP_QUERYI: \
741 case OP_MINQUERYI: \
742 case OP_NOTMINSTAR: \
743 case OP_NOTMINPLUS: \
744 case OP_NOTQUERY: \
745 case OP_NOTMINQUERY: \
746 case OP_NOTMINSTARI: \
747 case OP_NOTMINPLUSI: \
748 case OP_NOTQUERYI: \
749 case OP_NOTMINQUERYI:
750
751 #define CASE_ITERATOR_PRIVATE_DATA_2A \
752 case OP_STAR: \
753 case OP_PLUS: \
754 case OP_STARI: \
755 case OP_PLUSI: \
756 case OP_NOTSTAR: \
757 case OP_NOTPLUS: \
758 case OP_NOTSTARI: \
759 case OP_NOTPLUSI:
760
761 #define CASE_ITERATOR_PRIVATE_DATA_2B \
762 case OP_UPTO: \
763 case OP_MINUPTO: \
764 case OP_UPTOI: \
765 case OP_MINUPTOI: \
766 case OP_NOTUPTO: \
767 case OP_NOTMINUPTO: \
768 case OP_NOTUPTOI: \
769 case OP_NOTMINUPTOI:
770
771 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
772 case OP_TYPEMINSTAR: \
773 case OP_TYPEMINPLUS: \
774 case OP_TYPEQUERY: \
775 case OP_TYPEMINQUERY:
776
777 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
778 case OP_TYPESTAR: \
779 case OP_TYPEPLUS:
780
781 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
782 case OP_TYPEUPTO: \
783 case OP_TYPEMINUPTO:
784
785 static int get_class_iterator_size(pcre_uchar *cc)
786 {
787 switch(*cc)
788 {
789 case OP_CRSTAR:
790 case OP_CRPLUS:
791 return 2;
792
793 case OP_CRMINSTAR:
794 case OP_CRMINPLUS:
795 case OP_CRQUERY:
796 case OP_CRMINQUERY:
797 return 1;
798
799 case OP_CRRANGE:
800 case OP_CRMINRANGE:
801 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
802 return 0;
803 return 2;
804
805 default:
806 return 0;
807 }
808 }
809
810 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
811 {
812 int private_data_length = 0;
813 pcre_uchar *alternative;
814 pcre_uchar *name;
815 pcre_uchar *end = NULL;
816 int space, size, i;
817 pcre_uint32 bracketlen;
818
819 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
820 while (cc < ccend)
821 {
822 space = 0;
823 size = 0;
824 bracketlen = 0;
825 switch(*cc)
826 {
827 case OP_SET_SOM:
828 common->has_set_som = TRUE;
829 cc += 1;
830 break;
831
832 case OP_REF:
833 case OP_REFI:
834 common->optimized_cbracket[GET2(cc, 1)] = 0;
835 cc += 1 + IMM2_SIZE;
836 break;
837
838 case OP_ASSERT:
839 case OP_ASSERT_NOT:
840 case OP_ASSERTBACK:
841 case OP_ASSERTBACK_NOT:
842 case OP_ONCE:
843 case OP_ONCE_NC:
844 case OP_BRAPOS:
845 case OP_SBRA:
846 case OP_SBRAPOS:
847 private_data_length += sizeof(sljit_sw);
848 bracketlen = 1 + LINK_SIZE;
849 break;
850
851 case OP_CBRAPOS:
852 case OP_SCBRAPOS:
853 private_data_length += sizeof(sljit_sw);
854 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
855 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
856 break;
857
858 case OP_COND:
859 case OP_SCOND:
860 /* Only AUTO_CALLOUT can insert this opcode. We do
861 not intend to support this case. */
862 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
863 return -1;
864
865 if (*cc == OP_COND)
866 {
867 /* Might be a hidden SCOND. */
868 alternative = cc + GET(cc, 1);
869 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
870 private_data_length += sizeof(sljit_sw);
871 }
872 else
873 private_data_length += sizeof(sljit_sw);
874 bracketlen = 1 + LINK_SIZE;
875 break;
876
877 case OP_CREF:
878 i = GET2(cc, 1);
879 common->optimized_cbracket[i] = 0;
880 cc += 1 + IMM2_SIZE;
881 break;
882
883 case OP_NCREF:
884 bracketlen = GET2(cc, 1);
885 name = (pcre_uchar *)common->name_table;
886 alternative = name;
887 for (i = 0; i < common->name_count; i++)
888 {
889 if (GET2(name, 0) == bracketlen) break;
890 name += common->name_entry_size;
891 }
892 SLJIT_ASSERT(i != common->name_count);
893
894 for (i = 0; i < common->name_count; i++)
895 {
896 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
897 common->optimized_cbracket[GET2(alternative, 0)] = 0;
898 alternative += common->name_entry_size;
899 }
900 bracketlen = 0;
901 cc += 1 + IMM2_SIZE;
902 break;
903
904 case OP_BRA:
905 bracketlen = 1 + LINK_SIZE;
906 break;
907
908 case OP_CBRA:
909 case OP_SCBRA:
910 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
911 break;
912
913 CASE_ITERATOR_PRIVATE_DATA_1
914 space = 1;
915 size = -2;
916 break;
917
918 CASE_ITERATOR_PRIVATE_DATA_2A
919 space = 2;
920 size = -2;
921 break;
922
923 CASE_ITERATOR_PRIVATE_DATA_2B
924 space = 2;
925 size = -(2 + IMM2_SIZE);
926 break;
927
928 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
929 space = 1;
930 size = 1;
931 break;
932
933 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
934 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
935 space = 2;
936 size = 1;
937 break;
938
939 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
940 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
941 space = 2;
942 size = 1 + IMM2_SIZE;
943 break;
944
945 case OP_CLASS:
946 case OP_NCLASS:
947 size += 1 + 32 / sizeof(pcre_uchar);
948 space = get_class_iterator_size(cc + size);
949 break;
950
951 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
952 case OP_XCLASS:
953 size = GET(cc, 1);
954 space = get_class_iterator_size(cc + size);
955 break;
956 #endif
957
958 case OP_RECURSE:
959 /* Set its value only once. */
960 if (common->recursive_head_ptr == 0)
961 {
962 common->recursive_head_ptr = common->ovector_start;
963 common->ovector_start += sizeof(sljit_sw);
964 }
965 cc += 1 + LINK_SIZE;
966 break;
967
968 case OP_CALLOUT:
969 if (common->capture_last_ptr == 0)
970 {
971 common->capture_last_ptr = common->ovector_start;
972 common->ovector_start += sizeof(sljit_sw);
973 }
974 cc += 2 + 2 * LINK_SIZE;
975 break;
976
977 case OP_THEN_ARG:
978 common->has_then = TRUE;
979 common->control_head_ptr = 1;
980 /* Fall through. */
981
982 case OP_PRUNE_ARG:
983 common->needs_start_ptr = TRUE;
984 /* Fall through. */
985
986 case OP_MARK:
987 if (common->mark_ptr == 0)
988 {
989 common->mark_ptr = common->ovector_start;
990 common->ovector_start += sizeof(sljit_sw);
991 }
992 cc += 1 + 2 + cc[1];
993 break;
994
995 case OP_THEN:
996 common->has_then = TRUE;
997 common->control_head_ptr = 1;
998 /* Fall through. */
999
1000 case OP_PRUNE:
1001 case OP_SKIP:
1002 common->needs_start_ptr = TRUE;
1003 cc += 1;
1004 break;
1005
1006 case OP_SKIP_ARG:
1007 common->control_head_ptr = 1;
1008 common->has_skip_arg = TRUE;
1009 cc += 1 + 2 + cc[1];
1010 break;
1011
1012 default:
1013 cc = next_opcode(common, cc);
1014 if (cc == NULL)
1015 return -1;
1016 break;
1017 }
1018
1019 if (space > 0 && cc >= end)
1020 private_data_length += sizeof(sljit_sw) * space;
1021
1022 if (size != 0)
1023 {
1024 if (size < 0)
1025 {
1026 cc += -size;
1027 #ifdef SUPPORT_UTF
1028 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1029 #endif
1030 }
1031 else
1032 cc += size;
1033 }
1034
1035 if (bracketlen != 0)
1036 {
1037 if (cc >= end)
1038 {
1039 end = bracketend(cc);
1040 if (end[-1 - LINK_SIZE] == OP_KET)
1041 end = NULL;
1042 }
1043 cc += bracketlen;
1044 }
1045 }
1046 return private_data_length;
1047 }
1048
1049 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
1050 {
1051 pcre_uchar *cc = common->start;
1052 pcre_uchar *alternative;
1053 pcre_uchar *end = NULL;
1054 int space, size, bracketlen;
1055
1056 while (cc < ccend)
1057 {
1058 space = 0;
1059 size = 0;
1060 bracketlen = 0;
1061 switch(*cc)
1062 {
1063 case OP_ASSERT:
1064 case OP_ASSERT_NOT:
1065 case OP_ASSERTBACK:
1066 case OP_ASSERTBACK_NOT:
1067 case OP_ONCE:
1068 case OP_ONCE_NC:
1069 case OP_BRAPOS:
1070 case OP_SBRA:
1071 case OP_SBRAPOS:
1072 case OP_SCOND:
1073 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1074 private_data_ptr += sizeof(sljit_sw);
1075 bracketlen = 1 + LINK_SIZE;
1076 break;
1077
1078 case OP_CBRAPOS:
1079 case OP_SCBRAPOS:
1080 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1081 private_data_ptr += sizeof(sljit_sw);
1082 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1083 break;
1084
1085 case OP_COND:
1086 /* Might be a hidden SCOND. */
1087 alternative = cc + GET(cc, 1);
1088 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1089 {
1090 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091 private_data_ptr += sizeof(sljit_sw);
1092 }
1093 bracketlen = 1 + LINK_SIZE;
1094 break;
1095
1096 case OP_BRA:
1097 bracketlen = 1 + LINK_SIZE;
1098 break;
1099
1100 case OP_CBRA:
1101 case OP_SCBRA:
1102 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1103 break;
1104
1105 CASE_ITERATOR_PRIVATE_DATA_1
1106 space = 1;
1107 size = -2;
1108 break;
1109
1110 CASE_ITERATOR_PRIVATE_DATA_2A
1111 space = 2;
1112 size = -2;
1113 break;
1114
1115 CASE_ITERATOR_PRIVATE_DATA_2B
1116 space = 2;
1117 size = -(2 + IMM2_SIZE);
1118 break;
1119
1120 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1121 space = 1;
1122 size = 1;
1123 break;
1124
1125 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1126 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1127 space = 2;
1128 size = 1;
1129 break;
1130
1131 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1132 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1133 space = 2;
1134 size = 1 + IMM2_SIZE;
1135 break;
1136
1137 case OP_CLASS:
1138 case OP_NCLASS:
1139 size += 1 + 32 / sizeof(pcre_uchar);
1140 space = get_class_iterator_size(cc + size);
1141 break;
1142
1143 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1144 case OP_XCLASS:
1145 size = GET(cc, 1);
1146 space = get_class_iterator_size(cc + size);
1147 break;
1148 #endif
1149
1150 default:
1151 cc = next_opcode(common, cc);
1152 SLJIT_ASSERT(cc != NULL);
1153 break;
1154 }
1155
1156 if (space > 0 && cc >= end)
1157 {
1158 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1159 private_data_ptr += sizeof(sljit_sw) * space;
1160 }
1161
1162 if (size != 0)
1163 {
1164 if (size < 0)
1165 {
1166 cc += -size;
1167 #ifdef SUPPORT_UTF
1168 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1169 #endif
1170 }
1171 else
1172 cc += size;
1173 }
1174
1175 if (bracketlen > 0)
1176 {
1177 if (cc >= end)
1178 {
1179 end = bracketend(cc);
1180 if (end[-1 - LINK_SIZE] == OP_KET)
1181 end = NULL;
1182 }
1183 cc += bracketlen;
1184 }
1185 }
1186 }
1187
1188 /* Returns with a frame_types (always < 0) if no need for frame. */
1189 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1190 {
1191 int length = 0;
1192 int possessive = 0;
1193 BOOL stack_restore = FALSE;
1194 BOOL setsom_found = recursive;
1195 BOOL setmark_found = recursive;
1196 /* The last capture is a local variable even for recursions. */
1197 BOOL capture_last_found = FALSE;
1198
1199 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1200 SLJIT_ASSERT(common->control_head_ptr != 0);
1201 *needs_control_head = TRUE;
1202 #else
1203 *needs_control_head = FALSE;
1204 #endif
1205
1206 if (ccend == NULL)
1207 {
1208 ccend = bracketend(cc) - (1 + LINK_SIZE);
1209 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1210 {
1211 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1212 /* This is correct regardless of common->capture_last_ptr. */
1213 capture_last_found = TRUE;
1214 }
1215 cc = next_opcode(common, cc);
1216 }
1217
1218 SLJIT_ASSERT(cc != NULL);
1219 while (cc < ccend)
1220 switch(*cc)
1221 {
1222 case OP_SET_SOM:
1223 SLJIT_ASSERT(common->has_set_som);
1224 stack_restore = TRUE;
1225 if (!setsom_found)
1226 {
1227 length += 2;
1228 setsom_found = TRUE;
1229 }
1230 cc += 1;
1231 break;
1232
1233 case OP_MARK:
1234 case OP_PRUNE_ARG:
1235 case OP_THEN_ARG:
1236 SLJIT_ASSERT(common->mark_ptr != 0);
1237 stack_restore = TRUE;
1238 if (!setmark_found)
1239 {
1240 length += 2;
1241 setmark_found = TRUE;
1242 }
1243 if (common->control_head_ptr != 0)
1244 *needs_control_head = TRUE;
1245 cc += 1 + 2 + cc[1];
1246 break;
1247
1248 case OP_RECURSE:
1249 stack_restore = TRUE;
1250 if (common->has_set_som && !setsom_found)
1251 {
1252 length += 2;
1253 setsom_found = TRUE;
1254 }
1255 if (common->mark_ptr != 0 && !setmark_found)
1256 {
1257 length += 2;
1258 setmark_found = TRUE;
1259 }
1260 if (common->capture_last_ptr != 0 && !capture_last_found)
1261 {
1262 length += 2;
1263 capture_last_found = TRUE;
1264 }
1265 cc += 1 + LINK_SIZE;
1266 break;
1267
1268 case OP_CBRA:
1269 case OP_CBRAPOS:
1270 case OP_SCBRA:
1271 case OP_SCBRAPOS:
1272 stack_restore = TRUE;
1273 if (common->capture_last_ptr != 0 && !capture_last_found)
1274 {
1275 length += 2;
1276 capture_last_found = TRUE;
1277 }
1278 length += 3;
1279 cc += 1 + LINK_SIZE + IMM2_SIZE;
1280 break;
1281
1282 default:
1283 stack_restore = TRUE;
1284 /* Fall through. */
1285
1286 case OP_NOT_WORD_BOUNDARY:
1287 case OP_WORD_BOUNDARY:
1288 case OP_NOT_DIGIT:
1289 case OP_DIGIT:
1290 case OP_NOT_WHITESPACE:
1291 case OP_WHITESPACE:
1292 case OP_NOT_WORDCHAR:
1293 case OP_WORDCHAR:
1294 case OP_ANY:
1295 case OP_ALLANY:
1296 case OP_ANYBYTE:
1297 case OP_NOTPROP:
1298 case OP_PROP:
1299 case OP_ANYNL:
1300 case OP_NOT_HSPACE:
1301 case OP_HSPACE:
1302 case OP_NOT_VSPACE:
1303 case OP_VSPACE:
1304 case OP_EXTUNI:
1305 case OP_EODN:
1306 case OP_EOD:
1307 case OP_CIRC:
1308 case OP_CIRCM:
1309 case OP_DOLL:
1310 case OP_DOLLM:
1311 case OP_CHAR:
1312 case OP_CHARI:
1313 case OP_NOT:
1314 case OP_NOTI:
1315
1316 case OP_EXACT:
1317 case OP_POSSTAR:
1318 case OP_POSPLUS:
1319 case OP_POSQUERY:
1320 case OP_POSUPTO:
1321
1322 case OP_EXACTI:
1323 case OP_POSSTARI:
1324 case OP_POSPLUSI:
1325 case OP_POSQUERYI:
1326 case OP_POSUPTOI:
1327
1328 case OP_NOTEXACT:
1329 case OP_NOTPOSSTAR:
1330 case OP_NOTPOSPLUS:
1331 case OP_NOTPOSQUERY:
1332 case OP_NOTPOSUPTO:
1333
1334 case OP_NOTEXACTI:
1335 case OP_NOTPOSSTARI:
1336 case OP_NOTPOSPLUSI:
1337 case OP_NOTPOSQUERYI:
1338 case OP_NOTPOSUPTOI:
1339
1340 case OP_TYPEEXACT:
1341 case OP_TYPEPOSSTAR:
1342 case OP_TYPEPOSPLUS:
1343 case OP_TYPEPOSQUERY:
1344 case OP_TYPEPOSUPTO:
1345
1346 case OP_CLASS:
1347 case OP_NCLASS:
1348 case OP_XCLASS:
1349
1350 cc = next_opcode(common, cc);
1351 SLJIT_ASSERT(cc != NULL);
1352 break;
1353 }
1354
1355 /* Possessive quantifiers can use a special case. */
1356 if (SLJIT_UNLIKELY(possessive == length))
1357 return stack_restore ? no_frame : no_stack;
1358
1359 if (length > 0)
1360 return length + 1;
1361 return stack_restore ? no_frame : no_stack;
1362 }
1363
1364 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1365 {
1366 DEFINE_COMPILER;
1367 BOOL setsom_found = recursive;
1368 BOOL setmark_found = recursive;
1369 /* The last capture is a local variable even for recursions. */
1370 BOOL capture_last_found = FALSE;
1371 int offset;
1372
1373 /* >= 1 + shortest item size (2) */
1374 SLJIT_UNUSED_ARG(stacktop);
1375 SLJIT_ASSERT(stackpos >= stacktop + 2);
1376
1377 stackpos = STACK(stackpos);
1378 if (ccend == NULL)
1379 {
1380 ccend = bracketend(cc) - (1 + LINK_SIZE);
1381 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1382 cc = next_opcode(common, cc);
1383 }
1384
1385 SLJIT_ASSERT(cc != NULL);
1386 while (cc < ccend)
1387 switch(*cc)
1388 {
1389 case OP_SET_SOM:
1390 SLJIT_ASSERT(common->has_set_som);
1391 if (!setsom_found)
1392 {
1393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1395 stackpos += (int)sizeof(sljit_sw);
1396 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1397 stackpos += (int)sizeof(sljit_sw);
1398 setsom_found = TRUE;
1399 }
1400 cc += 1;
1401 break;
1402
1403 case OP_MARK:
1404 case OP_PRUNE_ARG:
1405 case OP_THEN_ARG:
1406 SLJIT_ASSERT(common->mark_ptr != 0);
1407 if (!setmark_found)
1408 {
1409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1411 stackpos += (int)sizeof(sljit_sw);
1412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1413 stackpos += (int)sizeof(sljit_sw);
1414 setmark_found = TRUE;
1415 }
1416 cc += 1 + 2 + cc[1];
1417 break;
1418
1419 case OP_RECURSE:
1420 if (common->has_set_som && !setsom_found)
1421 {
1422 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1424 stackpos += (int)sizeof(sljit_sw);
1425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1426 stackpos += (int)sizeof(sljit_sw);
1427 setsom_found = TRUE;
1428 }
1429 if (common->mark_ptr != 0 && !setmark_found)
1430 {
1431 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1433 stackpos += (int)sizeof(sljit_sw);
1434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1435 stackpos += (int)sizeof(sljit_sw);
1436 setmark_found = TRUE;
1437 }
1438 if (common->capture_last_ptr != 0 && !capture_last_found)
1439 {
1440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1442 stackpos += (int)sizeof(sljit_sw);
1443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1444 stackpos += (int)sizeof(sljit_sw);
1445 capture_last_found = TRUE;
1446 }
1447 cc += 1 + LINK_SIZE;
1448 break;
1449
1450 case OP_CBRA:
1451 case OP_CBRAPOS:
1452 case OP_SCBRA:
1453 case OP_SCBRAPOS:
1454 if (common->capture_last_ptr != 0 && !capture_last_found)
1455 {
1456 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1458 stackpos += (int)sizeof(sljit_sw);
1459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1460 stackpos += (int)sizeof(sljit_sw);
1461 capture_last_found = TRUE;
1462 }
1463 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1465 stackpos += (int)sizeof(sljit_sw);
1466 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1467 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1469 stackpos += (int)sizeof(sljit_sw);
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1471 stackpos += (int)sizeof(sljit_sw);
1472
1473 cc += 1 + LINK_SIZE + IMM2_SIZE;
1474 break;
1475
1476 default:
1477 cc = next_opcode(common, cc);
1478 SLJIT_ASSERT(cc != NULL);
1479 break;
1480 }
1481
1482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1483 SLJIT_ASSERT(stackpos == STACK(stacktop));
1484 }
1485
1486 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1487 {
1488 int private_data_length = needs_control_head ? 3 : 2;
1489 int size;
1490 pcre_uchar *alternative;
1491 /* Calculate the sum of the private machine words. */
1492 while (cc < ccend)
1493 {
1494 size = 0;
1495 switch(*cc)
1496 {
1497 case OP_ASSERT:
1498 case OP_ASSERT_NOT:
1499 case OP_ASSERTBACK:
1500 case OP_ASSERTBACK_NOT:
1501 case OP_ONCE:
1502 case OP_ONCE_NC:
1503 case OP_BRAPOS:
1504 case OP_SBRA:
1505 case OP_SBRAPOS:
1506 case OP_SCOND:
1507 private_data_length++;
1508 cc += 1 + LINK_SIZE;
1509 break;
1510
1511 case OP_CBRA:
1512 case OP_SCBRA:
1513 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1514 private_data_length++;
1515 cc += 1 + LINK_SIZE + IMM2_SIZE;
1516 break;
1517
1518 case OP_CBRAPOS:
1519 case OP_SCBRAPOS:
1520 private_data_length += 2;
1521 cc += 1 + LINK_SIZE + IMM2_SIZE;
1522 break;
1523
1524 case OP_COND:
1525 /* Might be a hidden SCOND. */
1526 alternative = cc + GET(cc, 1);
1527 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1528 private_data_length++;
1529 cc += 1 + LINK_SIZE;
1530 break;
1531
1532 CASE_ITERATOR_PRIVATE_DATA_1
1533 if (PRIVATE_DATA(cc))
1534 private_data_length++;
1535 cc += 2;
1536 #ifdef SUPPORT_UTF
1537 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1538 #endif
1539 break;
1540
1541 CASE_ITERATOR_PRIVATE_DATA_2A
1542 if (PRIVATE_DATA(cc))
1543 private_data_length += 2;
1544 cc += 2;
1545 #ifdef SUPPORT_UTF
1546 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1547 #endif
1548 break;
1549
1550 CASE_ITERATOR_PRIVATE_DATA_2B
1551 if (PRIVATE_DATA(cc))
1552 private_data_length += 2;
1553 cc += 2 + IMM2_SIZE;
1554 #ifdef SUPPORT_UTF
1555 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1556 #endif
1557 break;
1558
1559 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1560 if (PRIVATE_DATA(cc))
1561 private_data_length++;
1562 cc += 1;
1563 break;
1564
1565 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1566 if (PRIVATE_DATA(cc))
1567 private_data_length += 2;
1568 cc += 1;
1569 break;
1570
1571 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1572 if (PRIVATE_DATA(cc))
1573 private_data_length += 2;
1574 cc += 1 + IMM2_SIZE;
1575 break;
1576
1577 case OP_CLASS:
1578 case OP_NCLASS:
1579 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1580 case OP_XCLASS:
1581 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1582 #else
1583 size = 1 + 32 / (int)sizeof(pcre_uchar);
1584 #endif
1585 if (PRIVATE_DATA(cc))
1586 private_data_length += get_class_iterator_size(cc + size);
1587 cc += size;
1588 break;
1589
1590 default:
1591 cc = next_opcode(common, cc);
1592 SLJIT_ASSERT(cc != NULL);
1593 break;
1594 }
1595 }
1596 SLJIT_ASSERT(cc == ccend);
1597 return private_data_length;
1598 }
1599
1600 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1601 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1602 {
1603 DEFINE_COMPILER;
1604 int srcw[2];
1605 int count, size;
1606 BOOL tmp1next = TRUE;
1607 BOOL tmp1empty = TRUE;
1608 BOOL tmp2empty = TRUE;
1609 pcre_uchar *alternative;
1610 enum {
1611 start,
1612 loop,
1613 end
1614 } status;
1615
1616 status = save ? start : loop;
1617 stackptr = STACK(stackptr - 2);
1618 stacktop = STACK(stacktop - 1);
1619
1620 if (!save)
1621 {
1622 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1623 if (stackptr < stacktop)
1624 {
1625 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1626 stackptr += sizeof(sljit_sw);
1627 tmp1empty = FALSE;
1628 }
1629 if (stackptr < stacktop)
1630 {
1631 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1632 stackptr += sizeof(sljit_sw);
1633 tmp2empty = FALSE;
1634 }
1635 /* The tmp1next must be TRUE in either way. */
1636 }
1637
1638 do
1639 {
1640 count = 0;
1641 switch(status)
1642 {
1643 case start:
1644 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1645 count = 1;
1646 srcw[0] = common->recursive_head_ptr;
1647 if (needs_control_head)
1648 {
1649 SLJIT_ASSERT(common->control_head_ptr != 0);
1650 count = 2;
1651 srcw[1] = common->control_head_ptr;
1652 }
1653 status = loop;
1654 break;
1655
1656 case loop:
1657 if (cc >= ccend)
1658 {
1659 status = end;
1660 break;
1661 }
1662
1663 switch(*cc)
1664 {
1665 case OP_ASSERT:
1666 case OP_ASSERT_NOT:
1667 case OP_ASSERTBACK:
1668 case OP_ASSERTBACK_NOT:
1669 case OP_ONCE:
1670 case OP_ONCE_NC:
1671 case OP_BRAPOS:
1672 case OP_SBRA:
1673 case OP_SBRAPOS:
1674 case OP_SCOND:
1675 count = 1;
1676 srcw[0] = PRIVATE_DATA(cc);
1677 SLJIT_ASSERT(srcw[0] != 0);
1678 cc += 1 + LINK_SIZE;
1679 break;
1680
1681 case OP_CBRA:
1682 case OP_SCBRA:
1683 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1684 {
1685 count = 1;
1686 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1687 }
1688 cc += 1 + LINK_SIZE + IMM2_SIZE;
1689 break;
1690
1691 case OP_CBRAPOS:
1692 case OP_SCBRAPOS:
1693 count = 2;
1694 srcw[0] = PRIVATE_DATA(cc);
1695 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1696 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1697 cc += 1 + LINK_SIZE + IMM2_SIZE;
1698 break;
1699
1700 case OP_COND:
1701 /* Might be a hidden SCOND. */
1702 alternative = cc + GET(cc, 1);
1703 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1704 {
1705 count = 1;
1706 srcw[0] = PRIVATE_DATA(cc);
1707 SLJIT_ASSERT(srcw[0] != 0);
1708 }
1709 cc += 1 + LINK_SIZE;
1710 break;
1711
1712 CASE_ITERATOR_PRIVATE_DATA_1
1713 if (PRIVATE_DATA(cc))
1714 {
1715 count = 1;
1716 srcw[0] = PRIVATE_DATA(cc);
1717 }
1718 cc += 2;
1719 #ifdef SUPPORT_UTF
1720 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1721 #endif
1722 break;
1723
1724 CASE_ITERATOR_PRIVATE_DATA_2A
1725 if (PRIVATE_DATA(cc))
1726 {
1727 count = 2;
1728 srcw[0] = PRIVATE_DATA(cc);
1729 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1730 }
1731 cc += 2;
1732 #ifdef SUPPORT_UTF
1733 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1734 #endif
1735 break;
1736
1737 CASE_ITERATOR_PRIVATE_DATA_2B
1738 if (PRIVATE_DATA(cc))
1739 {
1740 count = 2;
1741 srcw[0] = PRIVATE_DATA(cc);
1742 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1743 }
1744 cc += 2 + IMM2_SIZE;
1745 #ifdef SUPPORT_UTF
1746 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1747 #endif
1748 break;
1749
1750 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1751 if (PRIVATE_DATA(cc))
1752 {
1753 count = 1;
1754 srcw[0] = PRIVATE_DATA(cc);
1755 }
1756 cc += 1;
1757 break;
1758
1759 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1760 if (PRIVATE_DATA(cc))
1761 {
1762 count = 2;
1763 srcw[0] = PRIVATE_DATA(cc);
1764 srcw[1] = srcw[0] + sizeof(sljit_sw);
1765 }
1766 cc += 1;
1767 break;
1768
1769 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1770 if (PRIVATE_DATA(cc))
1771 {
1772 count = 2;
1773 srcw[0] = PRIVATE_DATA(cc);
1774 srcw[1] = srcw[0] + sizeof(sljit_sw);
1775 }
1776 cc += 1 + IMM2_SIZE;
1777 break;
1778
1779 case OP_CLASS:
1780 case OP_NCLASS:
1781 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1782 case OP_XCLASS:
1783 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1784 #else
1785 size = 1 + 32 / (int)sizeof(pcre_uchar);
1786 #endif
1787 if (PRIVATE_DATA(cc))
1788 switch(get_class_iterator_size(cc + size))
1789 {
1790 case 1:
1791 count = 1;
1792 srcw[0] = PRIVATE_DATA(cc);
1793 break;
1794
1795 case 2:
1796 count = 2;
1797 srcw[0] = PRIVATE_DATA(cc);
1798 srcw[1] = srcw[0] + sizeof(sljit_sw);
1799 break;
1800
1801 default:
1802 SLJIT_ASSERT_STOP();
1803 break;
1804 }
1805 cc += size;
1806 break;
1807
1808 default:
1809 cc = next_opcode(common, cc);
1810 SLJIT_ASSERT(cc != NULL);
1811 break;
1812 }
1813 break;
1814
1815 case end:
1816 SLJIT_ASSERT_STOP();
1817 break;
1818 }
1819
1820 while (count > 0)
1821 {
1822 count--;
1823 if (save)
1824 {
1825 if (tmp1next)
1826 {
1827 if (!tmp1empty)
1828 {
1829 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1830 stackptr += sizeof(sljit_sw);
1831 }
1832 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1833 tmp1empty = FALSE;
1834 tmp1next = FALSE;
1835 }
1836 else
1837 {
1838 if (!tmp2empty)
1839 {
1840 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1841 stackptr += sizeof(sljit_sw);
1842 }
1843 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1844 tmp2empty = FALSE;
1845 tmp1next = TRUE;
1846 }
1847 }
1848 else
1849 {
1850 if (tmp1next)
1851 {
1852 SLJIT_ASSERT(!tmp1empty);
1853 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1854 tmp1empty = stackptr >= stacktop;
1855 if (!tmp1empty)
1856 {
1857 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1858 stackptr += sizeof(sljit_sw);
1859 }
1860 tmp1next = FALSE;
1861 }
1862 else
1863 {
1864 SLJIT_ASSERT(!tmp2empty);
1865 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1866 tmp2empty = stackptr >= stacktop;
1867 if (!tmp2empty)
1868 {
1869 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1870 stackptr += sizeof(sljit_sw);
1871 }
1872 tmp1next = TRUE;
1873 }
1874 }
1875 }
1876 }
1877 while (status != end);
1878
1879 if (save)
1880 {
1881 if (tmp1next)
1882 {
1883 if (!tmp1empty)
1884 {
1885 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1886 stackptr += sizeof(sljit_sw);
1887 }
1888 if (!tmp2empty)
1889 {
1890 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1891 stackptr += sizeof(sljit_sw);
1892 }
1893 }
1894 else
1895 {
1896 if (!tmp2empty)
1897 {
1898 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1899 stackptr += sizeof(sljit_sw);
1900 }
1901 if (!tmp1empty)
1902 {
1903 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1904 stackptr += sizeof(sljit_sw);
1905 }
1906 }
1907 }
1908 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1909 }
1910
1911 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1912 {
1913 pcre_uchar *end = bracketend(cc);
1914 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1915
1916 /* Assert captures then. */
1917 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1918 current_offset = NULL;
1919 /* Conditional block does not. */
1920 if (*cc == OP_COND || *cc == OP_SCOND)
1921 has_alternatives = FALSE;
1922
1923 cc = next_opcode(common, cc);
1924 if (has_alternatives)
1925 current_offset = common->then_offsets + (cc - common->start);
1926
1927 while (cc < end)
1928 {
1929 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1930 cc = set_then_offsets(common, cc, current_offset);
1931 else
1932 {
1933 if (*cc == OP_ALT && has_alternatives)
1934 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1935 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1936 *current_offset = 1;
1937 cc = next_opcode(common, cc);
1938 }
1939 }
1940
1941 return end;
1942 }
1943
1944 #undef CASE_ITERATOR_PRIVATE_DATA_1
1945 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1946 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1947 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1948 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1949 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1950
1951 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1952 {
1953 return (value & (value - 1)) == 0;
1954 }
1955
1956 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1957 {
1958 while (list)
1959 {
1960 /* sljit_set_label is clever enough to do nothing
1961 if either the jump or the label is NULL. */
1962 SET_LABEL(list->jump, label);
1963 list = list->next;
1964 }
1965 }
1966
1967 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1968 {
1969 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1970 if (list_item)
1971 {
1972 list_item->next = *list;
1973 list_item->jump = jump;
1974 *list = list_item;
1975 }
1976 }
1977
1978 static void add_stub(compiler_common *common, struct sljit_jump *start)
1979 {
1980 DEFINE_COMPILER;
1981 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1982
1983 if (list_item)
1984 {
1985 list_item->start = start;
1986 list_item->quit = LABEL();
1987 list_item->next = common->stubs;
1988 common->stubs = list_item;
1989 }
1990 }
1991
1992 static void flush_stubs(compiler_common *common)
1993 {
1994 DEFINE_COMPILER;
1995 stub_list* list_item = common->stubs;
1996
1997 while (list_item)
1998 {
1999 JUMPHERE(list_item->start);
2000 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2001 JUMPTO(SLJIT_JUMP, list_item->quit);
2002 list_item = list_item->next;
2003 }
2004 common->stubs = NULL;
2005 }
2006
2007 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
2008 {
2009 DEFINE_COMPILER;
2010
2011 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
2012 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2013 }
2014
2015 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2016 {
2017 /* May destroy all locals and registers except TMP2. */
2018 DEFINE_COMPILER;
2019
2020 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2021 #ifdef DESTROY_REGISTERS
2022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2023 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2024 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2027 #endif
2028 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2029 }
2030
2031 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2032 {
2033 DEFINE_COMPILER;
2034 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2035 }
2036
2037 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2038 {
2039 DEFINE_COMPILER;
2040 struct sljit_label *loop;
2041 int i;
2042
2043 /* At this point we can freely use all temporary registers. */
2044 SLJIT_ASSERT(length > 1);
2045 /* TMP1 returns with begin - 1. */
2046 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2047 if (length < 8)
2048 {
2049 for (i = 1; i < length; i++)
2050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2051 }
2052 else
2053 {
2054 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2055 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2056 loop = LABEL();
2057 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2058 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2059 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2060 }
2061 }
2062
2063 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2064 {
2065 DEFINE_COMPILER;
2066 struct sljit_label *loop;
2067 int i;
2068
2069 SLJIT_ASSERT(length > 1);
2070 /* OVECTOR(1) contains the "string begin - 1" constant. */
2071 if (length > 2)
2072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2073 if (length < 8)
2074 {
2075 for (i = 2; i < length; i++)
2076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2077 }
2078 else
2079 {
2080 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2081 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2082 loop = LABEL();
2083 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2084 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2085 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2086 }
2087
2088 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2089 if (common->mark_ptr != 0)
2090 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2091 if (common->control_head_ptr != 0)
2092 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2093 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2094 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2095 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2096 }
2097
2098 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2099 {
2100 while (current != NULL)
2101 {
2102 switch (current[-2])
2103 {
2104 case type_then_trap:
2105 break;
2106
2107 case type_mark:
2108 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2109 return current[-4];
2110 break;
2111
2112 default:
2113 SLJIT_ASSERT_STOP();
2114 break;
2115 }
2116 current = (sljit_sw*)current[-1];
2117 }
2118 return -1;
2119 }
2120
2121 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2122 {
2123 DEFINE_COMPILER;
2124 struct sljit_label *loop;
2125 struct sljit_jump *early_quit;
2126
2127 /* At this point we can freely use all registers. */
2128 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2129 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2130
2131 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2132 if (common->mark_ptr != 0)
2133 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2134 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2135 if (common->mark_ptr != 0)
2136 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2137 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2138 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2139 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2140 /* Unlikely, but possible */
2141 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2142 loop = LABEL();
2143 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2144 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2145 /* Copy the integer value to the output buffer */
2146 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2147 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2148 #endif
2149 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2150 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2151 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2152 JUMPHERE(early_quit);
2153
2154 /* Calculate the return value, which is the maximum ovector value. */
2155 if (topbracket > 1)
2156 {
2157 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2158 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2159
2160 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2161 loop = LABEL();
2162 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2163 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2164 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2165 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2166 }
2167 else
2168 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2169 }
2170
2171 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2172 {
2173 DEFINE_COMPILER;
2174 struct sljit_jump *jump;
2175
2176 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2177 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2178 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2179
2180 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2181 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2182 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2183 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2184
2185 /* Store match begin and end. */
2186 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2187 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2188
2189 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2190 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2191 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2192 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2193 #endif
2194 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2195 JUMPHERE(jump);
2196
2197 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2198 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2199 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2200 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2201 #endif
2202 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2203
2204 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2205 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2206 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2207 #endif
2208 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2209
2210 JUMPTO(SLJIT_JUMP, quit);
2211 }
2212
2213 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2214 {
2215 /* May destroy TMP1. */
2216 DEFINE_COMPILER;
2217 struct sljit_jump *jump;
2218
2219 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2220 {
2221 /* The value of -1 must be kept for start_used_ptr! */
2222 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2223 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2224 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2225 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2226 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2227 JUMPHERE(jump);
2228 }
2229 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2230 {
2231 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2232 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2233 JUMPHERE(jump);
2234 }
2235 }
2236
2237 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2238 {
2239 /* Detects if the character has an othercase. */
2240 unsigned int c;
2241
2242 #ifdef SUPPORT_UTF
2243 if (common->utf)
2244 {
2245 GETCHAR(c, cc);
2246 if (c > 127)
2247 {
2248 #ifdef SUPPORT_UCP
2249 return c != UCD_OTHERCASE(c);
2250 #else
2251 return FALSE;
2252 #endif
2253 }
2254 #ifndef COMPILE_PCRE8
2255 return common->fcc[c] != c;
2256 #endif
2257 }
2258 else
2259 #endif
2260 c = *cc;
2261 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2262 }
2263
2264 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2265 {
2266 /* Returns with the othercase. */
2267 #ifdef SUPPORT_UTF
2268 if (common->utf && c > 127)
2269 {
2270 #ifdef SUPPORT_UCP
2271 return UCD_OTHERCASE(c);
2272 #else
2273 return c;
2274 #endif
2275 }
2276 #endif
2277 return TABLE_GET(c, common->fcc, c);
2278 }
2279
2280 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2281 {
2282 /* Detects if the character and its othercase has only 1 bit difference. */
2283 unsigned int c, oc, bit;
2284 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2285 int n;
2286 #endif
2287
2288 #ifdef SUPPORT_UTF
2289 if (common->utf)
2290 {
2291 GETCHAR(c, cc);
2292 if (c <= 127)
2293 oc = common->fcc[c];
2294 else
2295 {
2296 #ifdef SUPPORT_UCP
2297 oc = UCD_OTHERCASE(c);
2298 #else
2299 oc = c;
2300 #endif
2301 }
2302 }
2303 else
2304 {
2305 c = *cc;
2306 oc = TABLE_GET(c, common->fcc, c);
2307 }
2308 #else
2309 c = *cc;
2310 oc = TABLE_GET(c, common->fcc, c);
2311 #endif
2312
2313 SLJIT_ASSERT(c != oc);
2314
2315 bit = c ^ oc;
2316 /* Optimized for English alphabet. */
2317 if (c <= 127 && bit == 0x20)
2318 return (0 << 8) | 0x20;
2319
2320 /* Since c != oc, they must have at least 1 bit difference. */
2321 if (!is_powerof2(bit))
2322 return 0;
2323
2324 #if defined COMPILE_PCRE8
2325
2326 #ifdef SUPPORT_UTF
2327 if (common->utf && c > 127)
2328 {
2329 n = GET_EXTRALEN(*cc);
2330 while ((bit & 0x3f) == 0)
2331 {
2332 n--;
2333 bit >>= 6;
2334 }
2335 return (n << 8) | bit;
2336 }
2337 #endif /* SUPPORT_UTF */
2338 return (0 << 8) | bit;
2339
2340 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2341
2342 #ifdef SUPPORT_UTF
2343 if (common->utf && c > 65535)
2344 {
2345 if (bit >= (1 << 10))
2346 bit >>= 10;
2347 else
2348 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2349 }
2350 #endif /* SUPPORT_UTF */
2351 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2352
2353 #endif /* COMPILE_PCRE[8|16|32] */
2354 }
2355
2356 static void check_partial(compiler_common *common, BOOL force)
2357 {
2358 /* Checks whether a partial matching is occured. Does not modify registers. */
2359 DEFINE_COMPILER;
2360 struct sljit_jump *jump = NULL;
2361
2362 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2363
2364 if (common->mode == JIT_COMPILE)
2365 return;
2366
2367 if (!force)
2368 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2369 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2370 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2371
2372 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2373 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2374 else
2375 {
2376 if (common->partialmatchlabel != NULL)
2377 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2378 else
2379 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2380 }
2381
2382 if (jump != NULL)
2383 JUMPHERE(jump);
2384 }
2385
2386 static void check_str_end(compiler_common *common, jump_list **end_reached)
2387 {
2388 /* Does not affect registers. Usually used in a tight spot. */
2389 DEFINE_COMPILER;
2390 struct sljit_jump *jump;
2391
2392 if (common->mode == JIT_COMPILE)
2393 {
2394 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2395 return;
2396 }
2397
2398 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2399 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2400 {
2401 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2402 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2403 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2404 }
2405 else
2406 {
2407 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2408 if (common->partialmatchlabel != NULL)
2409 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2410 else
2411 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2412 }
2413 JUMPHERE(jump);
2414 }
2415
2416 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2417 {
2418 DEFINE_COMPILER;
2419 struct sljit_jump *jump;
2420
2421 if (common->mode == JIT_COMPILE)
2422 {
2423 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2424 return;
2425 }
2426
2427 /* Partial matching mode. */
2428 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2429 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2430 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2431 {
2432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2433 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2434 }
2435 else
2436 {
2437 if (common->partialmatchlabel != NULL)
2438 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2439 else
2440 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2441 }
2442 JUMPHERE(jump);
2443 }
2444
2445 static void read_char(compiler_common *common)
2446 {
2447 /* Reads the character into TMP1, updates STR_PTR.
2448 Does not check STR_END. TMP2 Destroyed. */
2449 DEFINE_COMPILER;
2450 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2451 struct sljit_jump *jump;
2452 #endif
2453
2454 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2455 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2456 if (common->utf)
2457 {
2458 #if defined COMPILE_PCRE8
2459 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2460 #elif defined COMPILE_PCRE16
2461 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2462 #endif /* COMPILE_PCRE[8|16] */
2463 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2464 JUMPHERE(jump);
2465 }
2466 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2467 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2468 }
2469
2470 static void peek_char(compiler_common *common)
2471 {
2472 /* Reads the character into TMP1, keeps STR_PTR.
2473 Does not check STR_END. TMP2 Destroyed. */
2474 DEFINE_COMPILER;
2475 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2476 struct sljit_jump *jump;
2477 #endif
2478
2479 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2480 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2481 if (common->utf)
2482 {
2483 #if defined COMPILE_PCRE8
2484 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2485 #elif defined COMPILE_PCRE16
2486 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2487 #endif /* COMPILE_PCRE[8|16] */
2488 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2489 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2490 JUMPHERE(jump);
2491 }
2492 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2493 }
2494
2495 static void read_char8_type(compiler_common *common)
2496 {
2497 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2498 DEFINE_COMPILER;
2499 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2500 struct sljit_jump *jump;
2501 #endif
2502
2503 #ifdef SUPPORT_UTF
2504 if (common->utf)
2505 {
2506 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2507 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2508 #if defined COMPILE_PCRE8
2509 /* This can be an extra read in some situations, but hopefully
2510 it is needed in most cases. */
2511 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2512 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2513 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2514 JUMPHERE(jump);
2515 #elif defined COMPILE_PCRE16
2516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2517 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2518 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2519 JUMPHERE(jump);
2520 /* Skip low surrogate if necessary. */
2521 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2522 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2523 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2524 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2526 #elif defined COMPILE_PCRE32
2527 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2528 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2529 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2530 JUMPHERE(jump);
2531 #endif /* COMPILE_PCRE[8|16|32] */
2532 return;
2533 }
2534 #endif /* SUPPORT_UTF */
2535 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2536 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2537 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2538 /* The ctypes array contains only 256 values. */
2539 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2540 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2541 #endif
2542 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2543 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2544 JUMPHERE(jump);
2545 #endif
2546 }
2547
2548 static void skip_char_back(compiler_common *common)
2549 {
2550 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2551 DEFINE_COMPILER;
2552 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2553 #if defined COMPILE_PCRE8
2554 struct sljit_label *label;
2555
2556 if (common->utf)
2557 {
2558 label = LABEL();
2559 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2560 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2561 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2562 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2563 return;
2564 }
2565 #elif defined COMPILE_PCRE16
2566 if (common->utf)
2567 {
2568 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2569 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2570 /* Skip low surrogate if necessary. */
2571 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2572 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2573 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2574 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2575 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2576 return;
2577 }
2578 #endif /* COMPILE_PCRE[8|16] */
2579 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2580 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2581 }
2582
2583 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2584 {
2585 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2586 DEFINE_COMPILER;
2587
2588 if (nltype == NLTYPE_ANY)
2589 {
2590 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2591 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2592 }
2593 else if (nltype == NLTYPE_ANYCRLF)
2594 {
2595 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2596 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2597 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2598 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2599 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2600 }
2601 else
2602 {
2603 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2604 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2605 }
2606 }
2607
2608 #ifdef SUPPORT_UTF
2609
2610 #if defined COMPILE_PCRE8
2611 static void do_utfreadchar(compiler_common *common)
2612 {
2613 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2614 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2615 DEFINE_COMPILER;
2616 struct sljit_jump *jump;
2617
2618 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2619 /* Searching for the first zero. */
2620 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2621 jump = JUMP(SLJIT_C_NOT_ZERO);
2622 /* Two byte sequence. */
2623 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2624 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2625 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2626 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2627 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2628 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2629 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2630 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2631 JUMPHERE(jump);
2632
2633 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2634 jump = JUMP(SLJIT_C_NOT_ZERO);
2635 /* Three byte sequence. */
2636 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2637 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2638 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2639 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2640 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2641 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2642 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2643 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2644 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2645 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2647 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2648 JUMPHERE(jump);
2649
2650 /* Four byte sequence. */
2651 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2652 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2653 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2654 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2655 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2656 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2657 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2658 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2660 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2661 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2663 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2664 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2665 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2666 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2667 }
2668
2669 static void do_utfreadtype8(compiler_common *common)
2670 {
2671 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2672 of the character (>= 0xc0). Return value in TMP1. */
2673 DEFINE_COMPILER;
2674 struct sljit_jump *jump;
2675 struct sljit_jump *compare;
2676
2677 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2678
2679 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2680 jump = JUMP(SLJIT_C_NOT_ZERO);
2681 /* Two byte sequence. */
2682 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2683 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2684 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2685 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2686 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2687 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2688 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2689 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2690 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2691
2692 JUMPHERE(compare);
2693 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2694 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2695 JUMPHERE(jump);
2696
2697 /* We only have types for characters less than 256. */
2698 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2699 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2700 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2701 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2702 }
2703
2704 #elif defined COMPILE_PCRE16
2705
2706 static void do_utfreadchar(compiler_common *common)
2707 {
2708 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2709 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2710 DEFINE_COMPILER;
2711 struct sljit_jump *jump;
2712
2713 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2714 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2715 /* Do nothing, only return. */
2716 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2717
2718 JUMPHERE(jump);
2719 /* Combine two 16 bit characters. */
2720 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2722 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2723 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2724 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2725 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2726 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2727 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2728 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2729 }
2730
2731 #endif /* COMPILE_PCRE[8|16] */
2732
2733 #endif /* SUPPORT_UTF */
2734
2735 #ifdef SUPPORT_UCP
2736
2737 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2738 #define UCD_BLOCK_MASK 127
2739 #define UCD_BLOCK_SHIFT 7
2740
2741 static void do_getucd(compiler_common *common)
2742 {
2743 /* Search the UCD record for the character comes in TMP1.
2744 Returns chartype in TMP1 and UCD offset in TMP2. */
2745 DEFINE_COMPILER;
2746
2747 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2748
2749 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2750 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2751 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2752 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2753 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2754 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2755 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2756 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2757 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2758 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2759 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2760 }
2761 #endif
2762
2763 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2764 {
2765 DEFINE_COMPILER;
2766 struct sljit_label *mainloop;
2767 struct sljit_label *newlinelabel = NULL;
2768 struct sljit_jump *start;
2769 struct sljit_jump *end = NULL;
2770 struct sljit_jump *nl = NULL;
2771 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2772 struct sljit_jump *singlechar;
2773 #endif
2774 jump_list *newline = NULL;
2775 BOOL newlinecheck = FALSE;
2776 BOOL readuchar = FALSE;
2777
2778 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2779 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2780 newlinecheck = TRUE;
2781
2782 if (firstline)
2783 {
2784 /* Search for the end of the first line. */
2785 SLJIT_ASSERT(common->first_line_end != 0);
2786 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2787
2788 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2789 {
2790 mainloop = LABEL();
2791 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2792 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2793 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2794 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2795 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2796 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2797 JUMPHERE(end);
2798 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2799 }
2800 else
2801 {
2802 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2803 mainloop = LABEL();
2804 /* Continual stores does not cause data dependency. */
2805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2806 read_char(common);
2807 check_newlinechar(common, common->nltype, &newline, TRUE);
2808 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2809 JUMPHERE(end);
2810 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2811 set_jumps(newline, LABEL());
2812 }
2813
2814 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2815 }
2816
2817 start = JUMP(SLJIT_JUMP);
2818
2819 if (newlinecheck)
2820 {
2821 newlinelabel = LABEL();
2822 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2823 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2824 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2825 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2826 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2827 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2828 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2829 #endif
2830 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2831 nl = JUMP(SLJIT_JUMP);
2832 }
2833
2834 mainloop = LABEL();
2835
2836 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2837 #ifdef SUPPORT_UTF
2838 if (common->utf) readuchar = TRUE;
2839 #endif
2840 if (newlinecheck) readuchar = TRUE;
2841
2842 if (readuchar)
2843 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2844
2845 if (newlinecheck)
2846 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2847
2848 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2849 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2850 #if defined COMPILE_PCRE8
2851 if (common->utf)
2852 {
2853 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2854 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2855 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2856 JUMPHERE(singlechar);
2857 }
2858 #elif defined COMPILE_PCRE16
2859 if (common->utf)
2860 {
2861 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2862 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2863 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2864 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2865 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2866 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2867 JUMPHERE(singlechar);
2868 }
2869 #endif /* COMPILE_PCRE[8|16] */
2870 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2871 JUMPHERE(start);
2872
2873 if (newlinecheck)
2874 {
2875 JUMPHERE(end);
2876 JUMPHERE(nl);
2877 }
2878
2879 return mainloop;
2880 }
2881
2882 #define MAX_N_CHARS 3
2883
2884 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2885 {
2886 DEFINE_COMPILER;
2887 struct sljit_label *start;
2888 struct sljit_jump *quit;
2889 pcre_uint32 chars[MAX_N_CHARS * 2];
2890 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2891 int location = 0;
2892 pcre_int32 len, c, bit, caseless;
2893 int must_stop;
2894
2895 /* We do not support alternatives now. */
2896 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2897 return FALSE;
2898
2899 while (TRUE)
2900 {
2901 caseless = 0;
2902 must_stop = 1;
2903 switch(*cc)
2904 {
2905 case OP_CHAR:
2906 must_stop = 0;
2907 cc++;
2908 break;
2909
2910 case OP_CHARI:
2911 caseless = 1;
2912 must_stop = 0;
2913 cc++;
2914 break;
2915
2916 case OP_SOD:
2917 case OP_SOM:
2918 case OP_SET_SOM:
2919 case OP_NOT_WORD_BOUNDARY:
2920 case OP_WORD_BOUNDARY:
2921 case OP_EODN:
2922 case OP_EOD:
2923 case OP_CIRC:
2924 case OP_CIRCM:
2925 case OP_DOLL:
2926 case OP_DOLLM:
2927 /* Zero width assertions. */
2928 cc++;
2929 continue;
2930
2931 case OP_PLUS:
2932 case OP_MINPLUS:
2933 case OP_POSPLUS:
2934 cc++;
2935 break;
2936
2937 case OP_EXACT:
2938 cc += 1 + IMM2_SIZE;
2939 break;
2940
2941 case OP_PLUSI:
2942 case OP_MINPLUSI:
2943 case OP_POSPLUSI:
2944 caseless = 1;
2945 cc++;
2946 break;
2947
2948 case OP_EXACTI:
2949 caseless = 1;
2950 cc += 1 + IMM2_SIZE;
2951 break;
2952
2953 default:
2954 must_stop = 2;
2955 break;
2956 }
2957
2958 if (must_stop == 2)
2959 break;
2960
2961 len = 1;
2962 #ifdef SUPPORT_UTF
2963 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2964 #endif
2965
2966 if (caseless && char_has_othercase(common, cc))
2967 {
2968 caseless = char_get_othercase_bit(common, cc);
2969 if (caseless == 0)
2970 return FALSE;
2971 #ifdef COMPILE_PCRE8
2972 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2973 #else
2974 if ((caseless & 0x100) != 0)
2975 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2976 else
2977 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2978 #endif
2979 }
2980 else
2981 caseless = 0;
2982
2983 while (len > 0 && location < MAX_N_CHARS * 2)
2984 {
2985 c = *cc;
2986 bit = 0;
2987 if (len == (caseless & 0xff))
2988 {
2989 bit = caseless >> 8;
2990 c |= bit;
2991 }
2992
2993 chars[location] = c;
2994 chars[location + 1] = bit;
2995
2996 len--;
2997 location += 2;
2998 cc++;
2999 }
3000
3001 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3002 break;
3003 }
3004
3005 /* At least two characters are required. */
3006 if (location < 2 * 2)
3007 return FALSE;
3008
3009 if (firstline)
3010 {
3011 SLJIT_ASSERT(common->first_line_end != 0);
3012 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3013 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3014 }
3015 else
3016 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3017
3018 start = LABEL();
3019 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3020
3021 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3022 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3023 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3024 if (chars[1] != 0)
3025 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3026 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3027 if (location > 2 * 2)
3028 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3029 if (chars[3] != 0)
3030 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3031 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3032 if (location > 2 * 2)
3033 {
3034 if (chars[5] != 0)
3035 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3036 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3037 }
3038 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3039
3040 JUMPHERE(quit);
3041
3042 if (firstline)
3043 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3044 else
3045 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3046 return TRUE;
3047 }
3048
3049 #undef MAX_N_CHARS
3050
3051 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3052 {
3053 DEFINE_COMPILER;
3054 struct sljit_label *start;
3055 struct sljit_jump *quit;
3056 struct sljit_jump *found;
3057 pcre_uchar oc, bit;
3058
3059 if (firstline)
3060 {
3061 SLJIT_ASSERT(common->first_line_end != 0);
3062 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3063 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3064 }
3065
3066 start = LABEL();
3067 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3068 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3069
3070 oc = first_char;
3071 if (caseless)
3072 {
3073 oc = TABLE_GET(first_char, common->fcc, first_char);
3074 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3075 if (first_char > 127 && common->utf)
3076 oc = UCD_OTHERCASE(first_char);
3077 #endif
3078 }
3079 if (first_char == oc)
3080 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3081 else
3082 {
3083 bit = first_char ^ oc;
3084 if (is_powerof2(bit))
3085 {
3086 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3087 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3088 }
3089 else
3090 {
3091 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3092 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3093 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3094 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3095 found = JUMP(SLJIT_C_NOT_ZERO);
3096 }
3097 }
3098
3099 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3100 JUMPTO(SLJIT_JUMP, start);
3101 JUMPHERE(found);
3102 JUMPHERE(quit);
3103
3104 if (firstline)
3105 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3106 }
3107
3108 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3109 {
3110 DEFINE_COMPILER;
3111 struct sljit_label *loop;
3112 struct sljit_jump *lastchar;
3113 struct sljit_jump *firstchar;
3114 struct sljit_jump *quit;
3115 struct sljit_jump *foundcr = NULL;
3116 struct sljit_jump *notfoundnl;
3117 jump_list *newline = NULL;
3118
3119 if (firstline)
3120 {
3121 SLJIT_ASSERT(common->first_line_end != 0);
3122 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3123 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3124 }
3125
3126 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3127 {
3128 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3129 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3130 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3131 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3132 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3133
3134 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3135 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3136 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3137 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3138 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3139 #endif
3140 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3141
3142 loop = LABEL();
3143 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3144 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3145 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3146 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3147 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3148 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3149
3150 JUMPHERE(quit);
3151 JUMPHERE(firstchar);
3152 JUMPHERE(lastchar);
3153
3154 if (firstline)
3155 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3156 return;
3157 }
3158
3159 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3160 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3161 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3162 skip_char_back(common);
3163
3164 loop = LABEL();
3165 read_char(common);
3166 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3167 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3168 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3169 check_newlinechar(common, common->nltype, &newline, FALSE);
3170 set_jumps(newline, loop);
3171
3172 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3173 {
3174 quit = JUMP(SLJIT_JUMP);
3175 JUMPHERE(foundcr);
3176 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3177 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3178 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3179 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3180 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3181 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3182 #endif
3183 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3184 JUMPHERE(notfoundnl);
3185 JUMPHERE(quit);
3186 }
3187 JUMPHERE(lastchar);
3188 JUMPHERE(firstchar);
3189
3190 if (firstline)
3191 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3192 }
3193
3194 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3195
3196 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3197 {
3198 DEFINE_COMPILER;
3199 struct sljit_label *start;
3200 struct sljit_jump *quit;
3201 struct sljit_jump *found = NULL;
3202 jump_list *matches = NULL;
3203 pcre_uint8 inverted_start_bits[32];
3204 int i;
3205 #ifndef COMPILE_PCRE8
3206 struct sljit_jump *jump;
3207 #endif
3208
3209 for (i = 0; i < 32; ++i)
3210 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3211
3212 if (firstline)
3213 {
3214 SLJIT_ASSERT(common->first_line_end != 0);
3215 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3216 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3217 }
3218
3219 start = LABEL();
3220 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3221 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3222 #ifdef SUPPORT_UTF
3223 if (common->utf)
3224 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3225 #endif
3226
3227 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3228 {
3229 #ifndef COMPILE_PCRE8
3230 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3231 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3232 JUMPHERE(jump);
3233 #endif
3234 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3235 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3236 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3237 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3238 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3239 found = JUMP(SLJIT_C_NOT_ZERO);
3240 }
3241
3242 #ifdef SUPPORT_UTF
3243 if (common->utf)
3244 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3245 #endif
3246 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3247 #ifdef SUPPORT_UTF
3248 #if defined COMPILE_PCRE8
3249 if (common->utf)
3250 {
3251 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3252 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3253 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3254 }
3255 #elif defined COMPILE_PCRE16
3256 if (common->utf)
3257 {
3258 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3259 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3260 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3261 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3262 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3263 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3264 }
3265 #endif /* COMPILE_PCRE[8|16] */
3266 #endif /* SUPPORT_UTF */
3267 JUMPTO(SLJIT_JUMP, start);
3268 if (found != NULL)
3269 JUMPHERE(found);
3270 if (matches != NULL)
3271 set_jumps(matches, LABEL());
3272 JUMPHERE(quit);
3273
3274 if (firstline)
3275 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3276 }
3277
3278 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3279 {
3280 DEFINE_COMPILER;
3281 struct sljit_label *loop;
3282 struct sljit_jump *toolong;
3283 struct sljit_jump *alreadyfound;
3284 struct sljit_jump *found;
3285 struct sljit_jump *foundoc = NULL;
3286 struct sljit_jump *notfound;
3287 pcre_uint32 oc, bit;
3288
3289 SLJIT_ASSERT(common->req_char_ptr != 0);
3290 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3291 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3292 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3293 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3294
3295 if (has_firstchar)
3296 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3297 else
3298 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3299
3300 loop = LABEL();
3301 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3302
3303 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3304 oc = req_char;
3305 if (caseless)
3306 {
3307 oc = TABLE_GET(req_char, common->fcc, req_char);
3308 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3309 if (req_char > 127 && common->utf)
3310 oc = UCD_OTHERCASE(req_char);
3311 #endif
3312 }
3313 if (req_char == oc)
3314 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3315 else
3316 {
3317 bit = req_char ^ oc;
3318 if (is_powerof2(bit))
3319 {
3320 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3321 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3322 }
3323 else
3324 {
3325 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3326 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3327 }
3328 }
3329 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3330 JUMPTO(SLJIT_JUMP, loop);
3331
3332 JUMPHERE(found);
3333 if (foundoc)
3334 JUMPHERE(foundoc);
3335 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3336 JUMPHERE(alreadyfound);
3337 JUMPHERE(toolong);
3338 return notfound;
3339 }
3340
3341 static void do_revertframes(compiler_common *common)
3342 {
3343 DEFINE_COMPILER;
3344 struct sljit_jump *jump;
3345 struct sljit_label *mainloop;
3346
3347 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3348 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3349 GET_LOCAL_BASE(TMP3, 0, 0);
3350
3351 /* Drop frames until we reach STACK_TOP. */
3352 mainloop = LABEL();
3353 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3354 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3355 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3356
3357 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3358 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3359 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3360 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3361 JUMPTO(SLJIT_JUMP, mainloop);
3362
3363 JUMPHERE(jump);
3364 jump = JUMP(SLJIT_C_SIG_LESS);
3365 /* End of dropping frames. */
3366 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3367
3368 JUMPHERE(jump);
3369 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3370 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3371 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3372 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3373 JUMPTO(SLJIT_JUMP, mainloop);
3374 }
3375
3376 static void check_wordboundary(compiler_common *common)
3377 {
3378 DEFINE_COMPILER;
3379 struct sljit_jump *skipread;
3380 jump_list *skipread_list = NULL;
3381 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3382 struct sljit_jump *jump;
3383 #endif
3384
3385 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3386
3387 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3388 /* Get type of the previous char, and put it to LOCALS1. */
3389 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3390 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3392 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3393 skip_char_back(common);
3394 check_start_used_ptr(common);
3395 read_char(common);
3396
3397 /* Testing char type. */
3398 #ifdef SUPPORT_UCP
3399 if (common->use_ucp)
3400 {
3401 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3402 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3403 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3404 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3405 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3406 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3407 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3408 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3409 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3410 JUMPHERE(jump);
3411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3412 }
3413 else
3414 #endif
3415 {
3416 #ifndef COMPILE_PCRE8
3417 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3418 #elif defined SUPPORT_UTF
3419 /* Here LOCALS1 has already been zeroed. */
3420 jump = NULL;
3421 if (common->utf)
3422 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3423 #endif /* COMPILE_PCRE8 */
3424 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3425 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3426 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3427 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3428 #ifndef COMPILE_PCRE8
3429 JUMPHERE(jump);
3430 #elif defined SUPPORT_UTF
3431 if (jump != NULL)
3432 JUMPHERE(jump);
3433 #endif /* COMPILE_PCRE8 */
3434 }
3435 JUMPHERE(skipread);
3436
3437 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3438 check_str_end(common, &skipread_list);
3439 peek_char(common);
3440
3441 /* Testing char type. This is a code duplication. */
3442 #ifdef SUPPORT_UCP
3443 if (common->use_ucp)
3444 {
3445 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3446 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3447 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3448 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3449 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3450 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3451 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3452 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3453 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3454 JUMPHERE(jump);
3455 }
3456 else
3457 #endif
3458 {
3459 #ifndef COMPILE_PCRE8
3460 /* TMP2 may be destroyed by peek_char. */
3461 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3462 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3463 #elif defined SUPPORT_UTF
3464 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3465 jump = NULL;
3466 if (common->utf)
3467 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3468 #endif
3469 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3470 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3471 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3472 #ifndef COMPILE_PCRE8
3473 JUMPHERE(jump);
3474 #elif defined SUPPORT_UTF
3475 if (jump != NULL)
3476 JUMPHERE(jump);
3477 #endif /* COMPILE_PCRE8 */
3478 }
3479 set_jumps(skipread_list, LABEL());
3480
3481 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3482 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3483 }
3484
3485 /*
3486 range format:
3487
3488 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3489 ranges[1] = first bit (0 or 1)
3490 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3491 */
3492
3493 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3494 {
3495 DEFINE_COMPILER;
3496 struct sljit_jump *jump;
3497
3498 if (ranges[0] < 0)
3499 return FALSE;
3500
3501 switch(ranges[0])
3502 {
3503 case 1:
3504 if (readch)
3505 read_char(common);
3506 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3507 return TRUE;
3508
3509 case 2:
3510 if (readch)
3511 read_char(common);
3512 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3513 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3514 return TRUE;
3515
3516 case 4:
3517 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3518 {
3519 if (readch)
3520 read_char(common);
3521 if (ranges[1] != 0)
3522 {
3523 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3524 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3525 }
3526 else
3527 {
3528 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3529 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3530 JUMPHERE(jump);
3531 }
3532 return TRUE;
3533 }
3534 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3535 {
3536 if (readch)
3537 read_char(common);
3538 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3539 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3540 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3541 return TRUE;
3542 }
3543 return FALSE;
3544
3545 default:
3546 return FALSE;
3547 }
3548 }
3549
3550 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3551 {
3552 int i, bit, length;
3553 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3554
3555 bit = ctypes[0] & flag;
3556 ranges[0] = -1;
3557 ranges[1] = bit != 0 ? 1 : 0;
3558 length = 0;
3559
3560 for (i = 1; i < 256; i++)
3561 if ((ctypes[i] & flag) != bit)
3562 {
3563 if (length >= MAX_RANGE_SIZE)
3564 return;
3565 ranges[2 + length] = i;
3566 length++;
3567 bit ^= flag;
3568 }
3569
3570 if (bit != 0)
3571 {
3572 if (length >= MAX_RANGE_SIZE)
3573 return;
3574 ranges[2 + length] = 256;
3575 length++;
3576 }
3577 ranges[0] = length;
3578 }
3579
3580 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3581 {
3582 int ranges[2 + MAX_RANGE_SIZE];
3583 pcre_uint8 bit, cbit, all;
3584 int i, byte, length = 0;
3585
3586 bit = bits[0] & 0x1;
3587 ranges[1] = bit;
3588 /* Can be 0 or 255. */
3589 all = -bit;
3590
3591 for (i = 0; i < 256; )
3592 {
3593 byte = i >> 3;
3594 if ((i & 0x7) == 0 && bits[byte] == all)
3595 i += 8;
3596 else
3597 {
3598 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3599 if (cbit != bit)
3600 {
3601 if (length >= MAX_RANGE_SIZE)
3602 return FALSE;
3603 ranges[2 + length] = i;
3604 length++;
3605 bit = cbit;
3606 all = -cbit;
3607 }
3608 i++;
3609 }
3610 }
3611
3612 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3613 {
3614 if (length >= MAX_RANGE_SIZE)
3615 return FALSE;
3616 ranges[2 + length] = 256;
3617 length++;
3618 }
3619 ranges[0] = length;
3620
3621 return check_ranges(common, ranges, backtracks, FALSE);
3622 }
3623
3624 static void check_anynewline(compiler_common *common)
3625 {
3626 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3627 DEFINE_COMPILER;
3628
3629 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3630
3631 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3632 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3633 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3634 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3635 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3636 #ifdef COMPILE_PCRE8
3637 if (common->utf)
3638 {
3639 #endif
3640 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3641 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3642 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3643 #ifdef COMPILE_PCRE8
3644 }
3645 #endif
3646 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3647 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3648 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3649 }
3650
3651 static void check_hspace(compiler_common *common)
3652 {
3653 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3654 DEFINE_COMPILER;
3655
3656 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3657
3658 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3659 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3660 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3661 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3662 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3663 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3664 #ifdef COMPILE_PCRE8
3665 if (common->utf)
3666 {
3667 #endif
3668 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3669 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3670 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3671 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3672 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3673 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3674 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3675 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3676 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3677 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3678 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3679 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3680 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3681 #ifdef COMPILE_PCRE8
3682 }
3683 #endif
3684 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3685 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3686
3687 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3688 }
3689
3690 static void check_vspace(compiler_common *common)
3691 {
3692 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3693 DEFINE_COMPILER;
3694
3695 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3696
3697 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3698 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3699 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3700 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3701 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3702 #ifdef COMPILE_PCRE8
3703 if (common->utf)
3704 {
3705 #endif
3706 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3707 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3708 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3709 #ifdef COMPILE_PCRE8
3710 }
3711 #endif
3712 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3713 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3714
3715 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3716 }
3717
3718 #define CHAR1 STR_END
3719 #define CHAR2 STACK_TOP
3720
3721 static void do_casefulcmp(compiler_common *common)
3722 {
3723 DEFINE_COMPILER;
3724 struct sljit_jump *jump;
3725 struct sljit_label *label;
3726
3727 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3728 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3729 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3730 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3731 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3732 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3733
3734 label = LABEL();
3735 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3736 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3737 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3738 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3739 JUMPTO(SLJIT_C_NOT_ZERO, label);
3740
3741 JUMPHERE(jump);
3742 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3743 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3744 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3745 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3746 }
3747
3748 #define LCC_TABLE STACK_LIMIT
3749
3750 static void do_caselesscmp(compiler_common *common)
3751 {
3752 DEFINE_COMPILER;
3753 struct sljit_jump *jump;
3754 struct sljit_label *label;
3755
3756 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3757 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3758
3759 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3760 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3761 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3762 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3763 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3764 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3765
3766 label = LABEL();
3767 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3768 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3769 #ifndef COMPILE_PCRE8
3770 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3771 #endif
3772 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3773 #ifndef COMPILE_PCRE8
3774 JUMPHERE(jump);
3775 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3776 #endif
3777 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3778 #ifndef COMPILE_PCRE8
3779 JUMPHERE(jump);
3780 #endif
3781 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3782 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3783 JUMPTO(SLJIT_C_NOT_ZERO, label);
3784
3785 JUMPHERE(jump);
3786 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3787 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3788 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3789 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3790 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3791 }
3792
3793 #undef LCC_TABLE
3794 #undef CHAR1
3795 #undef CHAR2
3796
3797 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3798
3799 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3800 {
3801 /* This function would be ineffective to do in JIT level. */
3802 pcre_uint32 c1, c2;
3803 const pcre_uchar *src2 = args->uchar_ptr;
3804 const pcre_uchar *end2 = args->end;
3805 const ucd_record *ur;
3806 const pcre_uint32 *pp;
3807
3808 while (src1 < end1)
3809 {
3810 if (src2 >= end2)
3811 return (pcre_uchar*)1;
3812 GETCHARINC(c1, src1);
3813 GETCHARINC(c2, src2);
3814 ur = GET_UCD(c2);
3815 if (c1 != c2 && c1 != c2 + ur->other_case)
3816 {
3817 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3818 for (;;)
3819 {
3820 if (c1 < *pp) return NULL;
3821 if (c1 == *pp++) break;
3822 }
3823 }
3824 }
3825 return src2;
3826 }
3827
3828 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3829
3830 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3831 compare_context* context, jump_list **backtracks)
3832 {
3833 DEFINE_COMPILER;
3834 unsigned int othercasebit = 0;
3835 pcre_uchar *othercasechar = NULL;
3836 #ifdef SUPPORT_UTF
3837 int utflength;
3838 #endif
3839
3840 if (caseless && char_has_othercase(common, cc))
3841 {
3842 othercasebit = char_get_othercase_bit(common, cc);
3843 SLJIT_ASSERT(othercasebit);
3844 /* Extracting bit difference info. */
3845 #if defined COMPILE_PCRE8
3846 othercasechar = cc + (othercasebit >> 8);
3847 othercasebit &= 0xff;
3848 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3849 /* Note that this code only handles characters in the BMP. If there
3850 ever are characters outside the BMP whose othercase differs in only one
3851 bit from itself (there currently are none), this code will need to be
3852 revised for COMPILE_PCRE32. */
3853 othercasechar = cc + (othercasebit >> 9);
3854 if ((othercasebit & 0x100) != 0)
3855 othercasebit = (othercasebit & 0xff) << 8;
3856 else
3857 othercasebit &= 0xff;
3858 #endif /* COMPILE_PCRE[8|16|32] */
3859 }
3860
3861 if (context->sourcereg == -1)
3862 {
3863 #if defined COMPILE_PCRE8
3864 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3865 if (context->length >= 4)
3866 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3867 else if (context->length >= 2)
3868 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3869 else
3870 #endif
3871 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3872 #elif defined COMPILE_PCRE16
3873 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3874 if (context->length >= 4)
3875 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3876 else
3877 #endif
3878 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3879 #elif defined COMPILE_PCRE32
3880 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3881 #endif /* COMPILE_PCRE[8|16|32] */
3882 context->sourcereg = TMP2;
3883 }
3884
3885 #ifdef SUPPORT_UTF
3886 utflength = 1;
3887 if (common->utf && HAS_EXTRALEN(*cc))
3888 utflength += GET_EXTRALEN(*cc);
3889
3890 do
3891 {
3892 #endif
3893
3894 context->length -= IN_UCHARS(1);
3895 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3896
3897 /* Unaligned read is supported. */
3898 if (othercasebit != 0 && othercasechar == cc)
3899 {
3900 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3901 context->oc.asuchars[context->ucharptr] = othercasebit;
3902 }
3903 else
3904 {
3905 context->c.asuchars[context->ucharptr] = *cc;
3906 context->oc.asuchars[context->ucharptr] = 0;
3907 }
3908 context->ucharptr++;
3909
3910 #if defined COMPILE_PCRE8
3911 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3912 #else
3913 if (context->ucharptr >= 2 || context->length == 0)
3914 #endif
3915 {
3916 if (context->length >= 4)
3917 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3918 else if (context->length >= 2)
3919 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3920 #if defined COMPILE_PCRE8
3921 else if (context->length >= 1)
3922 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3923 #endif /* COMPILE_PCRE8 */
3924 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3925
3926 switch(context->ucharptr)
3927 {
3928 case 4 / sizeof(pcre_uchar):
3929 if (context->oc.asint != 0)
3930 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3931 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3932 break;
3933
3934 case 2 / sizeof(pcre_uchar):
3935 if (context->oc.asushort != 0)
3936 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3937 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3938 break;
3939
3940 #ifdef COMPILE_PCRE8
3941 case 1:
3942 if (context->oc.asbyte != 0)
3943 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3944 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3945 break;
3946 #endif
3947
3948 default:
3949 SLJIT_ASSERT_STOP();
3950 break;
3951 }
3952 context->ucharptr = 0;
3953 }
3954
3955 #else
3956
3957 /* Unaligned read is unsupported or in 32 bit mode. */
3958 if (context->length >= 1)
3959 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3960
3961 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3962
3963 if (othercasebit != 0 && othercasechar == cc)
3964 {
3965 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3966 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3967 }
3968 else
3969 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3970
3971 #endif
3972
3973 cc++;
3974 #ifdef SUPPORT_UTF
3975 utflength--;
3976 }
3977 while (utflength > 0);
3978 #endif
3979
3980 return cc;
3981 }
3982
3983 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3984
3985 #define SET_TYPE_OFFSET(value) \
3986 if ((value) != typeoffset) \
3987 { \
3988 if ((value) > typeoffset) \
3989 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3990 else \
3991 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3992 } \
3993 typeoffset = (value);
3994
3995 #define SET_CHAR_OFFSET(value) \
3996 if ((value) != charoffset) \
3997 { \
3998 if ((value) > charoffset) \
3999 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4000 else \
4001 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4002 } \
4003 charoffset = (value);
4004
4005 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4006 {
4007 DEFINE_COMPILER;
4008 jump_list *found = NULL;
4009 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4010 pcre_int32 c, charoffset;
4011 const pcre_uint32 *other_cases;
4012 struct sljit_jump *jump = NULL;
4013 pcre_uchar *ccbegin;
4014 int compares, invertcmp, numberofcmps;
4015 #ifdef SUPPORT_UCP
4016 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4017 BOOL charsaved = FALSE;
4018 int typereg = TMP1, scriptreg = TMP1;
4019 pcre_int32 typeoffset;
4020 #endif
4021
4022 /* Although SUPPORT_UTF must be defined, we are
4023 not necessary in utf mode even in 8 bit mode. */
4024 detect_partial_match(common, backtracks);
4025 read_char(common);
4026
4027 if ((*cc++ & XCL_MAP) != 0)
4028 {
4029 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4030 #ifndef COMPILE_PCRE8
4031 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4032 #elif defined SUPPORT_UTF
4033 if (common->utf)
4034 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4035 #endif
4036
4037 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4038 {
4039 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4040 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4041 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4042 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4043 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4044 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4045 }
4046
4047 #ifndef COMPILE_PCRE8
4048 JUMPHERE(jump);
4049 #elif defined SUPPORT_UTF
4050 if (common->utf)
4051 JUMPHERE(jump);
4052 #endif
4053 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4054 #ifdef SUPPORT_UCP
4055 charsaved = TRUE;
4056 #endif
4057 cc += 32 / sizeof(pcre_uchar);
4058 }
4059
4060 /* Scanning the necessary info. */
4061 ccbegin = cc;
4062 compares = 0;
4063 while (*cc != XCL_END)
4064 {
4065 compares++;
4066 if (*cc == XCL_SINGLE)
4067 {
4068 cc += 2;
4069 #ifdef SUPPORT_UTF
4070 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4071 #endif
4072 #ifdef SUPPORT_UCP
4073 needschar = TRUE;
4074 #endif
4075 }
4076 else if (*cc == XCL_RANGE)
4077 {
4078 cc += 2;
4079 #ifdef SUPPORT_UTF
4080 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4081 #endif
4082 cc++;
4083 #ifdef SUPPORT_UTF
4084 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4085 #endif
4086 #ifdef SUPPORT_UCP
4087 needschar = TRUE;
4088 #endif
4089 }
4090 #ifdef SUPPORT_UCP
4091 else
4092 {
4093 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4094 cc++;
4095 switch(*cc)
4096 {
4097 case PT_ANY:
4098 break;
4099
4100 case PT_LAMP:
4101 case PT_GC:
4102 case PT_PC:
4103 case PT_ALNUM:
4104 needstype = TRUE;
4105 break;
4106
4107 case PT_SC:
4108 needsscript = TRUE;
4109 break;
4110
4111 case PT_SPACE:
4112 case PT_PXSPACE:
4113 case PT_WORD:
4114 needstype = TRUE;
4115 needschar = TRUE;
4116 break;
4117
4118 case PT_CLIST:
4119 case PT_UCNC:
4120 needschar = TRUE;
4121 break;
4122
4123 default:
4124 SLJIT_ASSERT_STOP();
4125 break;
4126 }
4127 cc += 2;
4128 }
4129 #endif
4130 }
4131
4132 #ifdef SUPPORT_UCP
4133 /* Simple register allocation. TMP1 is preferred if possible. */
4134 if (needstype || needsscript)
4135 {
4136 if (needschar && !charsaved)
4137 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4138 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4139 if (needschar)
4140 {
4141 if (needstype)
4142 {
4143 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4144 typereg = RETURN_ADDR;
4145 }
4146
4147 if (needsscript)
4148 scriptreg = TMP3;
4149 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4150 }
4151 else if (needstype && needsscript)
4152 scriptreg = TMP3;
4153 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4154
4155 if (needsscript)
4156 {
4157 if (scriptreg == TMP1)
4158 {
4159 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4160 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4161 }
4162 else
4163 {
4164 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4165 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4166 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4167 }
4168 }
4169 }
4170 #endif
4171
4172 /* Generating code. */
4173 cc = ccbegin;
4174 charoffset = 0;
4175 numberofcmps = 0;
4176 #ifdef SUPPORT_UCP
4177 typeoffset = 0;
4178 #endif
4179
4180 while (*cc != XCL_END)
4181 {
4182 compares--;
4183 invertcmp = (compares == 0 && list != backtracks);
4184 jump = NULL;
4185
4186 if (*cc == XCL_SINGLE)
4187 {
4188 cc ++;
4189 #ifdef SUPPORT_UTF
4190 if (common->utf)
4191 {
4192 GETCHARINC(c, cc);
4193 }
4194 else
4195 #endif
4196 c = *cc++;
4197
4198 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4199 {
4200 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4201 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4202 numberofcmps++;
4203 }
4204 else if (numberofcmps > 0)
4205 {
4206 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4207 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4208 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4209 numberofcmps = 0;
4210 }
4211 else
4212 {
4213 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4214 numberofcmps = 0;
4215 }
4216 }
4217 else if (*cc == XCL_RANGE)
4218 {
4219 cc ++;
4220 #ifdef SUPPORT_UTF
4221 if (common->utf)
4222 {
4223 GETCHARINC(c, cc);
4224 }
4225 else
4226 #endif
4227 c = *cc++;
4228 SET_CHAR_OFFSET(c);
4229 #ifdef SUPPORT_UTF
4230 if (common->utf)
4231 {
4232 GETCHARINC(c, cc);
4233 }
4234 else
4235 #endif
4236 c = *cc++;
4237 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4238 {
4239 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4240 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4241 numberofcmps++;
4242 }
4243 else if (numberofcmps > 0)
4244 {
4245 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4246 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4247 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4248 numberofcmps = 0;
4249 }
4250 else
4251 {
4252 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4253 numberofcmps = 0;
4254 }
4255 }
4256 #ifdef SUPPORT_UCP
4257 else
4258 {
4259 if (*cc == XCL_NOTPROP)
4260 invertcmp ^= 0x1;
4261 cc++;
4262 switch(*cc)
4263 {
4264 case PT_ANY:
4265 if (list != backtracks)
4266 {
4267 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4268 continue;
4269 }
4270 else if (cc[-1] == XCL_NOTPROP)
4271 continue;
4272 jump = JUMP(SLJIT_JUMP);
4273 break;
4274
4275 case PT_LAMP:
4276 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4277 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4278 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4279 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4280 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4281 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4282 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4283 break;
4284
4285 case PT_GC:
4286 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4287 SET_TYPE_OFFSET(c);
4288 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4289 break;
4290
4291 case PT_PC:
4292 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4293 break;
4294
4295 case PT_SC:
4296 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4297 break;
4298
4299 case PT_SPACE:
4300 case PT_PXSPACE:
4301 if (*cc == PT_SPACE)
4302 {
4303 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4304 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4305 }
4306 SET_CHAR_OFFSET(9);
4307 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4308 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4309 if (*cc == PT_SPACE)
4310 JUMPHERE(jump);
4311
4312 SET_TYPE_OFFSET(ucp_Zl);
4313 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4314 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4315 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4316 break;
4317
4318 case PT_WORD:
4319 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4320 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4321 /* Fall through. */
4322
4323 case PT_ALNUM:
4324 SET_TYPE_OFFSET(ucp_Ll);
4325 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4326 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4327 SET_TYPE_OFFSET(ucp_Nd);
4328 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4329 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4330 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4331 break;
4332
4333 case PT_CLIST:
4334 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4335
4336 /* At least three characters are required.
4337 Otherwise this case would be handled by the normal code path. */
4338 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4339 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4340
4341 /* Optimizing character pairs, if their difference is power of 2. */
4342 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4343 {
4344 if (charoffset == 0)
4345 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4346 else
4347 {
4348 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4349 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4350 }
4351 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4352 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4353 other_cases += 2;
4354 }
4355 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4356 {
4357 if (charoffset == 0)
4358 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4359 else
4360 {
4361 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4362 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4363 }
4364 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4365 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4366
4367 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4368 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4369
4370 other_cases += 3;
4371 }
4372 else
4373 {
4374 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4375 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4376 }
4377
4378 while (*other_cases != NOTACHAR)
4379 {
4380 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4381 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4382 }
4383 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4384 break;
4385
4386 case PT_UCNC:
4387 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4388 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4389 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4390 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4391 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4392 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4393
4394 SET_CHAR_OFFSET(0xa0);
4395 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4396 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4397 SET_CHAR_OFFSET(0);
4398 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4399 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4400 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4401 break;
4402 }
4403 cc += 2;
4404 }
4405 #endif
4406
4407 if (jump != NULL)
4408 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4409 }
4410
4411 if (found != NULL)
4412 set_jumps(found, LABEL());
4413 }
4414
4415 #undef SET_TYPE_OFFSET
4416 #undef SET_CHAR_OFFSET
4417
4418 #endif
4419
4420 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4421 {
4422 DEFINE_COMPILER;
4423 int length;
4424 unsigned int c, oc, bit;
4425 compare_context context;
4426 struct sljit_jump *jump[4];
4427 jump_list *end_list;
4428 #ifdef SUPPORT_UTF
4429 struct sljit_label *label;
4430 #ifdef SUPPORT_UCP
4431 pcre_uchar propdata[5];
4432 #endif
4433 #endif
4434
4435 switch(type)
4436 {
4437 case OP_SOD:
4438 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4439 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4440 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4441 return cc;
4442
4443 case OP_SOM:
4444 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4446 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4447 return cc;
4448
4449 case OP_NOT_WORD_BOUNDARY:
4450 case OP_WORD_BOUNDARY:
4451 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4452 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4453 return cc;
4454
4455 case OP_NOT_DIGIT:
4456 case OP_DIGIT:
4457 /* Digits are usually 0-9, so it is worth to optimize them. */
4458 if (common->digits[0] == -2)
4459 get_ctype_ranges(common, ctype_digit, common->digits);
4460 detect_partial_match(common, backtracks);
4461 /* Flip the starting bit in the negative case. */
4462 if (type == OP_NOT_DIGIT)
4463 common->digits[1] ^= 1;
4464 if (!check_ranges(common, common->digits, backtracks, TRUE))
4465 {
4466 read_char8_type(common);
4467 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4468 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4469 }
4470 if (type == OP_NOT_DIGIT)
4471 common->digits[1] ^= 1;
4472 return cc;
4473
4474 case OP_NOT_WHITESPACE:
4475 case OP_WHITESPACE:
4476 detect_partial_match(common, backtracks);
4477 read_char8_type(common);
4478 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4479 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4480 return cc;
4481
4482 case OP_NOT_WORDCHAR:
4483 case OP_WORDCHAR:
4484 detect_partial_match(common, backtracks);
4485 read_char8_type(common);
4486 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4487 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4488 return cc;
4489
4490 case OP_ANY:
4491 detect_partial_match(common, backtracks);
4492 read_char(common);
4493 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4494 {
4495 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4496 end_list = NULL;
4497 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4498 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4499 else
4500 check_str_end(common, &end_list);
4501
4502 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4503 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4504 set_jumps(end_list, LABEL());
4505 JUMPHERE(jump[0]);
4506 }
4507 else
4508 check_newlinechar(common, common->nltype, backtracks, TRUE);
4509 return cc;
4510
4511 case OP_ALLANY:
4512 detect_partial_match(common, backtracks);
4513 #ifdef SUPPORT_UTF
4514 if (common->utf)
4515 {
4516 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4517 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4518 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4519 #if defined COMPILE_PCRE8
4520 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4521 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4522 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4523 #elif defined COMPILE_PCRE16
4524 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4525 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4526 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4527 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4528 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4529 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4530 #endif
4531 JUMPHERE(jump[0]);
4532 #endif /* COMPILE_PCRE[8|16] */
4533 return cc;
4534 }
4535 #endif
4536 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4537 return cc;
4538
4539 case OP_ANYBYTE:
4540 detect_partial_match(common, backtracks);
4541 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4542 return cc;
4543
4544 #ifdef SUPPORT_UTF
4545 #ifdef SUPPORT_UCP
4546 case OP_NOTPROP:
4547 case OP_PROP:
4548 propdata[0] = 0;
4549 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4550 propdata[2] = cc[0];
4551 propdata[3] = cc[1];
4552 propdata[4] = XCL_END;
4553 compile_xclass_matchingpath(common, propdata, backtracks);
4554 return cc + 2;
4555 #endif
4556 #endif
4557
4558 case OP_ANYNL:
4559 detect_partial_match(common, backtracks);
4560 read_char(common);
4561 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4562 /* We don't need to handle soft partial matching case. */
4563 end_list = NULL;
4564 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4565 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4566 else
4567 check_str_end(common, &end_list);
4568 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4569 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4570 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4571 jump[2] = JUMP(SLJIT_JUMP);
4572 JUMPHERE(jump[0]);
4573 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4574 set_jumps(end_list, LABEL());
4575 JUMPHERE(jump[1]);
4576 JUMPHERE(jump[2]);
4577 return cc;
4578
4579 case OP_NOT_HSPACE:
4580 case OP_HSPACE:
4581 detect_partial_match(common, backtracks);
4582 read_char(common);
4583 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4584 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4585 return cc;
4586
4587 case OP_NOT_VSPACE:
4588 case OP_VSPACE:
4589 detect_partial_match(common, backtracks);
4590 read_char(common);
4591 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4592 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4593 return cc;
4594
4595 #ifdef SUPPORT_UCP
4596 case OP_EXTUNI:
4597 detect_partial_match(common, backtracks);
4598 read_char(common);
4599 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4600 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4601 /* Optimize register allocation: use a real register. */
4602 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4603 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4604
4605 label = LABEL();
4606 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4607 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4608 read_char(common);
4609 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4611 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4612
4613 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4614 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4615 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4616 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4617 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4618 JUMPTO(SLJIT_C_NOT_ZERO, label);
4619
4620 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4621 JUMPHERE(jump[0]);
4622 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4623
4624 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4625 {
4626 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4627 /* Since we successfully read a char above, partial matching must occure. */
4628 check_partial(common, TRUE);
4629 JUMPHERE(jump[0]);
4630 }
4631 return cc;
4632 #endif
4633
4634 case OP_EODN:
4635 /* Requires rather complex checks. */
4636 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4637 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4638 {
4639 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4640 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4641 if (common->mode == JIT_COMPILE)
4642 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4643 else
4644 {
4645 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4646 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4647 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4648 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4649 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4650 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4651 check_partial(common, TRUE);
4652 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4653 JUMPHERE(jump[1]);
4654 }
4655 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4656 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4657 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4658 }
4659 else if (common->nltype == NLTYPE_FIXED)
4660 {
4661 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4662 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4663 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4664 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4665 }
4666 else
4667 {
4668 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4669 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4670 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4671 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4672 jump[2] = JUMP(SLJIT_C_GREATER);
4673 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4674 /* Equal. */
4675 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4676 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4677 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4678
4679 JUMPHERE(jump[1]);
4680 if (common->nltype == NLTYPE_ANYCRLF)
4681 {
4682 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4683 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4684 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4685 }
4686 else
4687 {
4688 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4689 read_char(common);
4690 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4691 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4692 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4693 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4694 }
4695 JUMPHERE(jump[2]);
4696 JUMPHERE(jump[3]);
4697 }
4698 JUMPHERE(jump[0]);
4699 check_partial(common, FALSE);
4700 return cc;
4701
4702 case OP_EOD:
4703 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4704 check_partial(common, FALSE);
4705 return cc;
4706
4707 case OP_CIRC:
4708 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4709 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4710 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4711 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4712 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4713 return cc;
4714
4715 case OP_CIRCM:
4716 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4718 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4719 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4720 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4721 jump[0] = JUMP(SLJIT_JUMP);
4722 JUMPHERE(jump[1]);
4723
4724 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4725 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4726 {
4727 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4728 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4729 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4730 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4731 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4732 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4733 }
4734 else
4735 {
4736 skip_char_back(common);
4737 read_char(common);
4738 check_newlinechar(common, common->nltype, backtracks, FALSE);
4739 }
4740 JUMPHERE(jump[0]);
4741 return cc;
4742
4743 case OP_DOLL:
4744 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4745 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4746 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4747
4748 if (!common->endonly)
4749 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4750 else
4751 {
4752 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4753 check_partial(common, FALSE);
4754 }
4755 return cc;
4756
4757 case OP_DOLLM:
4758 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4759 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4760 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4761 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4762 check_partial(common, FALSE);
4763 jump[0] = JUMP(SLJIT_JUMP);
4764 JUMPHERE(jump[1]);
4765
4766 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4767 {
4768 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4769 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4770 if (common->mode == JIT_COMPILE)
4771 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4772 else
4773 {
4774 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4775 /* STR_PTR = STR_END - IN_UCHARS(1) */
4776 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4777 check_partial(common, TRUE);
4778 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4779 JUMPHERE(jump[1]);
4780 }
4781
4782 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4783 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4784 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4785 }
4786 else
4787 {
4788 peek_char(common);
4789 check_newlinechar(common, common->nltype, backtracks, FALSE);
4790 }
4791 JUMPHERE(jump[0]);
4792 return cc;
4793
4794 case OP_CHAR:
4795 case OP_CHARI:
4796 length = 1;
4797 #ifdef SUPPORT_UTF
4798 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4799 #endif
4800 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4801 {
4802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4803 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4804
4805 context.length = IN_UCHARS(length);
4806 context.sourcereg = -1;
4807 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4808 context.ucharptr = 0;
4809 #endif
4810 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4811 }
4812 detect_partial_match(common, backtracks);
4813 read_char(common);
4814 #ifdef SUPPORT_UTF
4815 if (common->utf)
4816 {
4817 GETCHAR(c, cc);
4818 }
4819 else
4820 #endif
4821 c = *cc;
4822 if (type == OP_CHAR || !char_has_othercase(common, cc))
4823 {
4824 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4825 return cc + length;
4826 }
4827 oc = char_othercase(common, c);
4828 bit = c ^ oc;
4829 if (is_powerof2(bit))
4830 {
4831 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4832 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4833 return cc + length;
4834 }
4835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4836 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4837 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4838 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4839 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4840 return cc + length;
4841
4842 case OP_NOT:
4843 case OP_NOTI:
4844 detect_partial_match(common, backtracks);
4845 length = 1;
4846 #ifdef SUPPORT_UTF
4847 if (common->utf)
4848 {
4849 #ifdef COMPILE_PCRE8
4850 c = *cc;
4851 if (c < 128)
4852 {
4853 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4854 if (type == OP_NOT || !char_has_othercase(common, cc))
4855 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4856 else
4857 {
4858 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4859 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4860 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4861 }
4862 /* Skip the variable-length character. */
4863 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4864 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4865 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4866 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4867 JUMPHERE(jump[0]);
4868 return cc + 1;
4869 }
4870 else
4871 #endif /* COMPILE_PCRE8 */
4872 {
4873 GETCHARLEN(c, cc, length);
4874 read_char(common);
4875 }
4876 }
4877 else
4878 #endif /* SUPPORT_UTF */
4879 {
4880 read_char(common);
4881 c = *cc;
4882 }
4883
4884 if (type == OP_NOT || !char_has_othercase(common, cc))
4885 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4886 else
4887 {
4888 oc = char_othercase(common, c);
4889 bit = c ^ oc;
4890 if (is_powerof2(bit))
4891 {
4892 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4893 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4894 }
4895 else
4896 {
4897 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4898 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4899 }
4900 }
4901 return cc + length;
4902
4903 case OP_CLASS:
4904 case OP_NCLASS:
4905 detect_partial_match(common, backtracks);
4906 read_char(common);
4907 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4908 return cc + 32 / sizeof(pcre_uchar);
4909
4910 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4911 jump[0] = NULL;
4912 #ifdef COMPILE_PCRE8
4913 /* This check only affects 8 bit mode. In other modes, we
4914 always need to compare the value with 255. */
4915 if (common->utf)
4916 #endif /* COMPILE_PCRE8 */
4917 {
4918 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4919 if (type == OP_CLASS)
4920 {
4921 add_jump(compiler, backtracks, jump[0]);
4922 jump[0] = NULL;
4923 }
4924 }
4925 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4926 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4927 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4928 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4929 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4930 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4931 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4932 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4933 if (jump[0] != NULL)
4934 JUMPHERE(jump[0]);
4935 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4936 return cc + 32 / sizeof(pcre_uchar);
4937
4938 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4939 case OP_XCLASS:
4940 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4941 return cc + GET(cc, 0) - 1;
4942 #endif
4943
4944 case OP_REVERSE:
4945 length = GET(cc, 0);
4946 if (length == 0)
4947 return cc + LINK_SIZE;
4948 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4949 #ifdef SUPPORT_UTF
4950 if (common->utf)
4951 {
4952 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4953 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4954 label = LABEL();
4955 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4956 skip_char_back(common);
4957 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4958 JUMPTO(SLJIT_C_NOT_ZERO, label);
4959 }
4960 else
4961 #endif
4962 {
4963 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4964 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4965 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4966 }
4967 check_start_used_ptr(common);
4968 return cc + LINK_SIZE;
4969 }
4970 SLJIT_ASSERT_STOP();
4971 return cc;
4972 }
4973
4974 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4975 {
4976 /* This function consumes at least one input character. */
4977 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4978 DEFINE_COMPILER;
4979 pcre_uchar *ccbegin = cc;
4980 compare_context context;
4981 int size;
4982
4983 context.length = 0;
4984 do
4985 {
4986 if (cc >= ccend)
4987 break;
4988
4989 if (*cc == OP_CHAR)
4990 {
4991 size = 1;
4992 #ifdef SUPPORT_UTF
4993 if (common->utf && HAS_EXTRALEN(cc[1]))
4994 size += GET_EXTRALEN(cc[1]);
4995 #endif
4996 }
4997 else if (*cc == OP_CHARI)
4998 {
4999 size = 1;
5000 #ifdef SUPPORT_UTF
5001 if (common->utf)
5002 {
5003 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5004 size = 0;
5005 else if (HAS_EXTRALEN(cc[1]))
5006 size += GET_EXTRALEN(cc[1]);
5007 }
5008 else
5009 #endif
5010 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5011 size = 0;
5012 }
5013 else
5014 size = 0;
5015
5016 cc += 1 + size;
5017 context.length += IN_UCHARS(size);
5018 }
5019 while (size > 0 && context.length <= 128);
5020
5021 cc = ccbegin;
5022 if (context.length > 0)
5023 {
5024 /* We have a fixed-length byte sequence. */
5025 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5026 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5027
5028 context.sourcereg = -1;
5029 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5030 context.ucharptr = 0;
5031 #endif
5032 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5033 return cc;
5034 }
5035
5036 /* A non-fixed length character will be checked if length == 0. */
5037 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5038 }
5039
5040 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5041 {
5042 DEFINE_COMPILER;
5043 int offset = GET2(cc, 1) << 1;
5044
5045 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5046 if (!common->jscript_compat)
5047 {
5048 if (backtracks == NULL)
5049 {
5050 /* OVECTOR(1) contains the "string begin - 1" constant. */
5051 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5052 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5053 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5054 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5055 return JUMP(SLJIT_C_NOT_ZERO);
5056 }
5057 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5058 }
5059 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5060 }
5061
5062 /* Forward definitions. */
5063 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5064 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5065
5066 #define PUSH_BACKTRACK(size, ccstart, error) \
5067 do \
5068 { \
5069 backtrack = sljit_alloc_memory(compiler, (size)); \
5070 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5071 return error; \
5072 memset(backtrack, 0, size); \
5073 backtrack->prev = parent->top; \
5074 backtrack->cc = (ccstart); \
5075 parent->top = backtrack; \
5076 } \
5077 while (0)
5078
5079 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5080 do \
5081 { \
5082 backtrack = sljit_alloc_memory(compiler, (size)); \
5083 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5084 return; \
5085 memset(backtrack, 0, size); \
5086 backtrack->prev = parent->top; \
5087 backtrack->cc = (ccstart); \
5088 parent->top = backtrack; \
5089 } \
5090 while (0)
5091
5092 #define BACKTRACK_AS(type) ((type *)backtrack)
5093
5094 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5095 {
5096 DEFINE_COMPILER;
5097 int offset = GET2(cc, 1) << 1;
5098 struct sljit_jump *jump = NULL;
5099 struct sljit_jump *partial;
5100 struct sljit_jump *nopartial;
5101
5102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5103 /* OVECTOR(1) contains the "string begin - 1" constant. */
5104 if (withchecks && !common->jscript_compat)
5105 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5106
5107 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5108 if (common->utf && *cc == OP_REFI)
5109 {
5110 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5111 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5112 if (withchecks)
5113 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5114
5115 /* Needed to save important temporary registers. */
5116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5117 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5119 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5120 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5121 if (common->mode == JIT_COMPILE)
5122 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5123 else
5124 {
5125 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5126 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5127 check_partial(common, FALSE);
5128 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5129 JUMPHERE(nopartial);
5130 }
5131 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5132 }
5133 else
5134 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5135 {
5136 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5137 if (withchecks)
5138 jump = JUMP(SLJIT_C_ZERO);
5139
5140 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5141 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5142 if (common->mode == JIT_COMPILE)
5143 add_jump(compiler, backtracks, partial);
5144
5145 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5146 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5147
5148 if (common->mode != JIT_COMPILE)
5149 {
5150 nopartial = JUMP(SLJIT_JUMP);
5151 JUMPHERE(partial);
5152 /* TMP2 -= STR_END - STR_PTR */
5153 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5154 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5155 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5156 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5157 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5158 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5159 JUMPHERE(partial);
5160 check_partial(common, FALSE);
5161 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5162 JUMPHERE(nopartial);
5163 }
5164 }
5165
5166 if (jump != NULL)
5167 {
5168 if (emptyfail)
5169 add_jump(compiler, backtracks, jump);
5170 else
5171 JUMPHERE(jump);
5172 }
5173 return cc + 1 + IMM2_SIZE;
5174 }
5175
5176 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5177 {
5178 DEFINE_COMPILER;
5179 backtrack_common *backtrack;
5180 pcre_uchar type;
5181 struct sljit_label *label;
5182 struct sljit_jump *zerolength;
5183 struct sljit_jump *jump = NULL;
5184 pcre_uchar *ccbegin = cc;
5185 int min = 0, max = 0;
5186 BOOL minimize;
5187
5188 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5189
5190 type = cc[1 + IMM2_SIZE];
5191 minimize = (type & 0x1) != 0;
5192 switch(type)
5193 {
5194 case OP_CRSTAR:
5195 case OP_CRMINSTAR:
5196 min = 0;
5197 max = 0;
5198 cc += 1 + IMM2_SIZE + 1;
5199 break;
5200 case OP_CRPLUS:
5201 case OP_CRMINPLUS:
5202 min = 1;
5203 max = 0;
5204 cc += 1 + IMM2_SIZE + 1;
5205 break;
5206 case OP_CRQUERY:
5207 case OP_CRMINQUERY:
5208 min = 0;
5209 max = 1;
5210 cc += 1 + IMM2_SIZE + 1;
5211 break;
5212 case OP_CRRANGE:
5213 case OP_CRMINRANGE:
5214 min = GET2(cc, 1 + IMM2_SIZE + 1);
5215 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5216 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5217 break;
5218 default:
5219 SLJIT_ASSERT_STOP();
5220 break;
5221 }
5222
5223 if (!minimize)
5224 {
5225 if (min == 0)
5226 {
5227 allocate_stack(common, 2);
5228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5229 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5230 /* Temporary release of STR_PTR. */
5231 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5232 zerolength = compile_ref_checks(common, ccbegin, NULL);
5233 /* Restore if not zero length. */
5234 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5235 }
5236 else
5237 {
5238 allocate_stack(common, 1);
5239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5240 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5241 }
5242
5243 if (min > 1 || max > 1)
5244 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5245
5246 label = LABEL();
5247 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5248
5249 if (min > 1 || max > 1)
5250 {
5251 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5252 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5253 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5254 if (min > 1)
5255 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5256 if (max > 1)
5257 {
5258 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5259 allocate_stack(common, 1);
5260 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5261 JUMPTO(SLJIT_JUMP, label);
5262 JUMPHERE(jump);
5263 }
5264 }
5265
5266 if (max == 0)
5267 {
5268 /* Includes min > 1 case as well. */
5269 allocate_stack(common, 1);
5270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5271 JUMPTO(SLJIT_JUMP, label);
5272 }
5273
5274 JUMPHERE(zerolength);
5275 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5276
5277 decrease_call_count(common);
5278 return cc;
5279 }
5280
5281 allocate_stack(common, 2);
5282 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5283 if (type != OP_CRMINSTAR)
5284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5285
5286 if (min == 0)
5287 {
5288 zerolength = compile_ref_checks(common, ccbegin, NULL);
5289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5290 jump = JUMP(SLJIT_JUMP);
5291 }
5292 else
5293 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5294
5295 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5296 if (max > 0)
5297 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5298
5299 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5300 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5301
5302 if (min > 1)
5303 {
5304 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5305 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5306 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5307 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5308 }
5309 else if (max > 0)
5310 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5311
5312 if (jump != NULL)
5313 JUMPHERE(jump);
5314 JUMPHERE(zerolength);
5315
5316 decrease_call_count(common);
5317 return cc;
5318 }
5319
5320 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5321 {
5322 DEFINE_COMPILER;
5323 backtrack_common *backtrack;
5324 recurse_entry *entry = common->entries;
5325 recurse_entry *prev = NULL;
5326 sljit_sw start = GET(cc, 1);
5327 pcre_uchar *start_cc;
5328 BOOL needs_control_head;
5329
5330 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5331
5332 /* Inlining simple patterns. */
5333 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5334 {
5335 start_cc = common->start + start;
5336 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5337 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5338 return cc + 1 + LINK_SIZE;
5339 }
5340
5341 while (entry != NULL)
5342 {
5343 if (entry->start == start)
5344 break;
5345 prev = entry;
5346 entry = entry->next;
5347 }
5348
5349 if (entry == NULL)
5350 {
5351 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5352 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5353 return NULL;
5354 entry->next = NULL;
5355 entry->entry = NULL;
5356 entry->calls = NULL;
5357 entry->start = start;
5358
5359 if (prev != NULL)
5360 prev->next = entry;
5361 else
5362 common->entries = entry;
5363 }
5364
5365 if (common->has_set_som && common->mark_ptr != 0)
5366 {
5367 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5368 allocate_stack(common, 2);
5369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5372 }
5373 else if (common->has_set_som || common->mark_ptr != 0)
5374 {
5375 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5376 allocate_stack(common, 1);
5377 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5378 }
5379
5380 if (entry->entry == NULL)
5381 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5382 else
5383 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5384 /* Leave if the match is failed. */
5385 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5386 return cc + 1 + LINK_SIZE;
5387 }
5388
5389 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5390 {
5391 const pcre_uchar *begin = arguments->begin;
5392 int *offset_vector = arguments->offsets;
5393 int offset_count = arguments->offset_count;
5394 int i;
5395
5396 if (PUBL(callout) == NULL)
5397 return 0;
5398
5399 callout_block->version = 2;
5400 callout_block->callout_data = arguments->callout_data;
5401
5402 /* Offsets in subject. */
5403 callout_block->subject_length = arguments->end - arguments->begin;
5404 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5405 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5406 #if defined COMPILE_PCRE8
5407 callout_block->subject = (PCRE_SPTR)begin;
5408 #elif defined COMPILE_PCRE16
5409 callout_block->subject = (PCRE_SPTR16)begin;
5410 #elif defined COMPILE_PCRE32
5411 callout_block->subject = (PCRE_SPTR32)begin;
5412 #endif
5413
5414 /* Convert and copy the JIT offset vector to the offset_vector array. */
5415 callout_block->capture_top = 0;
5416 callout_block->offset_vector = offset_vector;
5417 for (i = 2; i < offset_count; i += 2)
5418 {
5419 offset_vector[i] = jit_ovector[i] - begin;
5420 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5421 if (jit_ovector[i] >= begin)
5422 callout_block->capture_top = i;
5423 }
5424
5425 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5426 if (offset_count > 0)
5427 offset_vector[0] = -1;
5428 if (offset_count > 1)
5429 offset_vector[1] = -1;
5430 return (*PUBL(callout))(callout_block);
5431 }
5432
5433 /* Aligning to 8 byte. */
5434 #define CALLOUT_ARG_SIZE \
5435 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5436
5437 #define CALLOUT_ARG_OFFSET(arg) \
5438 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5439
5440 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5441 {
5442 DEFINE_COMPILER;
5443 backtrack_common *backtrack;
5444
5445 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5446
5447 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5448
5449 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5450 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5451 SLJIT_ASSERT(common->capture_last_ptr != 0);
5452 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5453 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5454
5455 /* These pointer sized fields temporarly stores internal variables. */
5456 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5457 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5458 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5459
5460 if (common->mark_ptr != 0)
5461 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5462 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5463 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5465
5466 /* Needed to save important temporary registers. */
5467 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5468 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5469 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5470 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5471 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5472 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5473 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5474
5475 /* Check return value. */
5476 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5477 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5478 if (common->forced_quit_label == NULL)
5479 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5480 else
5481 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5482 return cc + 2 + 2 * LINK_SIZE;
5483 }
5484
5485 #undef CALLOUT_ARG_SIZE
5486 #undef CALLOUT_ARG_OFFSET
5487
5488 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5489 {
5490 DEFINE_COMPILER;
5491 int framesize;
5492 int extrasize;
5493 BOOL needs_control_head;
5494 int private_data_ptr;
5495 backtrack_common altbacktrack;
5496 pcre_uchar *ccbegin;
5497 pcre_uchar opcode;
5498 pcre_uchar bra = OP_BRA;
5499 jump_list *tmp = NULL;
5500 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5501 jump_list **found;
5502 /* Saving previous accept variables. */
5503 BOOL save_local_exit = common->local_exit;
5504 then_trap_backtrack *save_then_trap = common->then_trap;
5505 struct sljit_label *save_quit_label = common->quit_label;
5506 struct sljit_label *save_accept_label = common->accept_label;
5507 jump_list *save_quit = common->quit;
5508 jump_list *save_accept = common->accept;
5509 struct sljit_jump *jump;
5510 struct sljit_jump *brajump = NULL;
5511
5512 /* Assert captures then. */
5513 common->then_trap = NULL;
5514
5515 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5516 {
5517 SLJIT_ASSERT(!conditional);
5518 bra = *cc;
5519 cc++;
5520 }
5521 private_data_ptr = PRIVATE_DATA(cc);
5522 SLJIT_ASSERT(private_data_ptr != 0);
5523 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5524 backtrack->framesize = framesize;
5525 backtrack->private_data_ptr = private_data_ptr;
5526 opcode = *cc;
5527 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5528 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5529 ccbegin = cc;
5530 cc += GET(cc, 1);
5531
5532 if (bra == OP_BRAMINZERO)
5533 {
5534 /* This is a braminzero backtrack path. */
5535 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5536 free_stack(common, 1);
5537 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5538 }
5539
5540 if (framesize < 0)
5541 {
5542 extrasize = needs_control_head ? 2 : 1;
5543 if (framesize == no_frame)
5544 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5545 allocate_stack(common, extrasize);
5546 if (needs_control_head)
5547 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5548 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5549 if (needs_control_head)
5550 {
5551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5552 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5553 }
5554 }
5555 else
5556 {
5557 extrasize = needs_control_head ? 3 : 2;
5558 allocate_stack(common, framesize + extrasize);
5559 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5560 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5561 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5562 if (needs_control_head)
5563 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5564 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5565 if (needs_control_head)
5566 {
5567 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5568 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5569 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5570 }
5571 else
5572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5573 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5574 }
5575
5576 memset(&altbacktrack, 0, sizeof(backtrack_common));
5577 common->local_exit = TRUE;
5578 common->quit_label = NULL;
5579 common->quit = NULL;
5580 while (1)
5581 {
5582 common->accept_label = NULL;
5583 common->accept = NULL;
5584 altbacktrack.top = NULL;
5585 altbacktrack.topbacktracks = NULL;
5586
5587 if (*ccbegin == OP_ALT)
5588 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5589
5590 altbacktrack.cc = ccbegin;
5591 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5592 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5593 {
5594 common->local_exit = save_local_exit;
5595 common->then_trap = save_then_trap;
5596 common->quit_label = save_quit_label;
5597 common->accept_label = save_accept_label;
5598 common->quit = save_quit;
5599 common->accept = save_accept;
5600 return NULL;
5601 }
5602 common->accept_label = LABEL();
5603 if (common->accept != NULL)
5604 set_jumps(common->accept, common->accept_label);
5605
5606 /* Reset stack. */
5607 if (framesize < 0)
5608 {
5609 if (framesize == no_frame)
5610 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5611 else
5612 free_stack(common, extrasize);
5613 if (needs_control_head)
5614 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5615 }
5616 else
5617 {
5618 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5619 {
5620 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5621 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5622 if (needs_control_head)
5623 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5624 }
5625 else
5626 {
5627 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5628 if (needs_control_head)
5629 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5630 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5631 }
5632 }
5633
5634 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5635 {
5636 /* We know that STR_PTR was stored on the top of the stack. */
5637 if (conditional)
5638 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5639 else if (bra == OP_BRAZERO)
5640 {
5641 if (framesize < 0)
5642 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5643 else
5644 {
5645 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5646 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5648 }
5649 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5650 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5651 }
5652 else if (framesize >= 0)
5653 {
5654 /* For OP_BRA and OP_BRAMINZERO. */
5655 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5656 }
5657 }
5658 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5659
5660 compile_backtrackingpath(common, altbacktrack.top);
5661 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5662 {
5663 common->local_exit = save_local_exit;
5664 common->then_trap = save_then_trap;
5665 common->quit_label = save_quit_label;
5666 common->accept_label = save_accept_label;
5667 common->quit = save_quit;
5668 common->accept = save_accept;
5669 return NULL;
5670 }
5671 set_jumps(altbacktrack.topbacktracks, LABEL());
5672
5673 if (*cc != OP_ALT)
5674 break;
5675
5676 ccbegin = cc;
5677 cc += GET(cc, 1);
5678 }
5679
5680 /* None of them matched. */
5681 if (common->quit != NULL)
5682 {
5683 jump = JUMP(SLJIT_JUMP);
5684 set_jumps(common->quit, LABEL());
5685 SLJIT_ASSERT(framesize != no_stack);
5686 if (framesize < 0)
5687 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5688 else
5689 {
5690 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5691 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5692 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5693 }
5694 JUMPHERE(jump);
5695 }
5696
5697 if (needs_control_head)
5698 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5699
5700 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5701 {
5702 /* Assert is failed. */
5703 if (conditional || bra == OP_BRAZERO)
5704 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5705
5706 if (framesize < 0)
5707 {
5708 /* The topmost item should be 0. */
5709 if (bra == OP_BRAZERO)
5710 {
5711 if (extrasize == 2)
5712 free_stack(common, 1);
5713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5714 }
5715 else
5716 free_stack(common, extrasize);
5717 }
5718 else
5719 {
5720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5721 /* The topmost item should be 0. */
5722 if (bra == OP_BRAZERO)
5723 {
5724 free_stack(common, framesize + extrasize - 1);
5725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5726 }
5727 else
5728 free_stack(common, framesize + extrasize);
5729 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5730 }
5731 jump = JUMP(SLJIT_JUMP);
5732 if (bra != OP_BRAZERO)
5733 add_jump(compiler, target, jump);
5734
5735 /* Assert is successful. */
5736 set_jumps(tmp, LABEL());
5737 if (framesize < 0)
5738 {
5739 /* We know that STR_PTR was stored on the top of the stack. */
5740 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5741 /* Keep the STR_PTR on the top of the stack. */
5742 if (bra == OP_BRAZERO)
5743 {
5744 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5745 if (extrasize == 2)
5746 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5747 }
5748 else if (bra == OP_BRAMINZERO)
5749 {
5750 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5751 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5752 }
5753 }
5754 else
5755 {
5756 if (bra == OP_BRA)
5757 {
5758 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5759 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5760 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5761 }
5762 else
5763 {
5764 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5765 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5766 if (extrasize == 2)
5767 {
5768 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5769 if (bra == OP_BRAMINZERO)
5770 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5771 }
5772 else
5773 {
5774 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5775 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5776 }
5777 }
5778 }
5779
5780 if (bra == OP_BRAZERO)
5781 {
5782 backtrack->matchingpath = LABEL();
5783 SET_LABEL(jump, backtrack->matchingpath);
5784 }
5785 else if (bra == OP_BRAMINZERO)
5786 {
5787 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5788 JUMPHERE(brajump);
5789 if (framesize >= 0)
5790 {
5791 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5792 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5793 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5794 }
5795 set_jumps(backtrack->common.topbacktracks, LABEL());
5796 }
5797 }
5798 else
5799 {
5800 /* AssertNot is successful. */
5801 if (framesize < 0)
5802 {
5803 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5804 if (bra != OP_BRA)
5805 {
5806 if (extrasize == 2)
5807 free_stack(common, 1);
5808 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5809 }
5810 else
5811 free_stack(common, extrasize);
5812 }
5813 else
5814 {
5815 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5816 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5817 /* The topmost item should be 0. */
5818 if (bra != OP_BRA)
5819 {
5820 free_stack(common, framesize + extrasize - 1);
5821 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5822 }
5823 else
5824 free_stack(common, framesize + extrasize);
5825 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5826 }
5827
5828 if (bra == OP_BRAZERO)
5829 backtrack->matchingpath = LABEL();
5830 else if (bra == OP_BRAMINZERO)
5831 {
5832 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5833 JUMPHERE(brajump);
5834 }
5835
5836 if (bra != OP_BRA)
5837 {
5838 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5839 set_jumps(backtrack->common.topbacktracks, LABEL());
5840 backtrack->common.topbacktracks = NULL;
5841 }
5842 }
5843
5844 common->local_exit = save_local_exit;
5845 common->then_trap = save_then_trap;
5846 common->quit_label = save_quit_label;
5847 common->accept_label = save_accept_label;
5848 common->quit = save_quit;
5849 common->accept = save_accept;
5850 return cc + 1 + LINK_SIZE;
5851 }
5852
5853 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5854 {
5855 int condition = FALSE;
5856 pcre_uchar *slotA = name_table;
5857 pcre_uchar *slotB;
5858 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5859 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5860 sljit_sw no_capture;
5861 int i;
5862
5863 locals += refno & 0xff;
5864 refno >>= 8;
5865 no_capture = locals[1];
5866
5867 for (i = 0; i < name_count; i++)
5868 {
5869 if (GET2(slotA, 0) == refno) break;
5870 slotA += name_entry_size;
5871 }
5872
5873 if (i < name_count)
5874 {
5875 /* Found a name for the number - there can be only one; duplicate names
5876 for different numbers are allowed, but not vice versa. First scan down
5877 for duplicates. */
5878
5879 slotB = slotA;
5880 while (slotB > name_table)
5881 {
5882 slotB -= name_entry_size;
5883 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5884 {
5885 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5886 if (condition) break;
5887 }
5888 else break;
5889 }
5890
5891 /* Scan up for duplicates */
5892 if (!condition)
5893 {
5894 slotB = slotA;
5895 for (i++; i < name_count; i++)
5896 {
5897 slotB += name_entry_size;
5898 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5899 {
5900 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5901 if (condition) break;
5902 }
5903 else break;
5904 }
5905 }
5906 }
5907 return condition;
5908 }
5909
5910 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5911 {
5912 int condition = FALSE;
5913 pcre_uchar *slotA = name_table;
5914 pcre_uchar *slotB;
5915 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5916 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5917 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5918 sljit_uw i;
5919
5920 for (i = 0; i < name_count; i++)
5921 {
5922 if (GET2(slotA, 0) == recno) break;
5923 slotA += name_entry_size;
5924 }
5925
5926 if (i < name_count)
5927 {
5928 /* Found a name for the number - there can be only one; duplicate
5929 names for different numbers are allowed, but not vice versa. First
5930 scan down for duplicates. */
5931
5932 slotB = slotA;
5933 while (slotB > name_table)
5934 {
5935 slotB -= name_entry_size;
5936 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5937 {
5938 condition = GET2(slotB, 0) == group_num;
5939 if (condition) break;
5940 }
5941 else break;
5942 }
5943
5944 /* Scan up for duplicates */
5945 if (!condition)
5946 {
5947 slotB = slotA;
5948 for (i++; i < name_count; i++)
5949 {
5950 slotB += name_entry_size;
5951 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5952 {
5953 condition = GET2(slotB, 0) == group_num;
5954 if (condition) break;
5955 }
5956 else break;
5957 }
5958 }
5959 }
5960 return condition;
5961 }
5962
5963 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
5964 {
5965 DEFINE_COMPILER;
5966 int stacksize;
5967
5968 if (framesize < 0)
5969 {
5970 if (framesize == no_frame)
5971 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5972 else
5973 {
5974 stacksize = needs_control_head ? 1 : 0;
5975 if (ket != OP_KET || has_alternatives)
5976 stacksize++;
5977 free_stack(common, stacksize);
5978 }
5979
5980 if (needs_control_head)
5981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
5982
5983 /* TMP2 which is set here used by OP_KETRMAX below. */
5984 if (ket == OP_KETRMAX)
5985 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5986 else if (ket == OP_KETRMIN)
5987 {
5988 /* Move the STR_PTR to the private_data_ptr. */
5989 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5990 }
5991 }
5992 else
5993 {
5994 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
5995 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
5996 if (needs_control_head)
5997 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
5998
5999 if (ket == OP_KETRMAX)
6000 {
6001 /* TMP2 which is set here used by OP_KETRMAX below. */
6002 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6003 }
6004 }
6005 if (needs_control_head)
6006 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6007 }
6008
6009 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6010 {
6011 DEFINE_COMPILER;
6012
6013 if (common->capture_last_ptr != 0)
6014 {
6015 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6016 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6017 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6018 stacksize++;
6019 }
6020 if (common->optimized_cbracket[offset >> 1] == 0)
6021 {
6022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6023 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6025 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6026 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6028 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6029 stacksize += 2;
6030 }
6031 return stacksize;
6032 }
6033
6034 /*
6035 Handling bracketed expressions is probably the most complex part.
6036
6037 Stack layout naming characters:
6038 S - Push the current STR_PTR
6039 0 - Push a 0 (NULL)
6040 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6041 before the next alternative. Not pushed if there are no alternatives.
6042 M - Any values pushed by the current alternative. Can be empty, or anything.
6043 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6044 L - Push the previous local (pointed by localptr) to the stack
6045 () - opional values stored on the stack
6046 ()* - optonal, can be stored multiple times
6047
6048 The following list shows the regular expression templates, their PCRE byte codes
6049 and stack layout supported by pcre-sljit.
6050
6051 (?:) OP_BRA | OP_KET A M
6052 () OP_CBRA | OP_KET C M
6053 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6054 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6055 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6056 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6057 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6058 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6059 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6060 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6061 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6062 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6063 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6064 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6065 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6066 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6067 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6068 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6069 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6070 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6071 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6072 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6073
6074
6075 Stack layout naming characters:
6076 A - Push the alternative index (starting from 0) on the stack.
6077 Not pushed if there is no alternatives.
6078 M - Any values pushed by the current alternative. Can be empty, or anything.
6079
6080 The next list shows the possible content of a bracket:
6081 (|) OP_*BRA | OP_ALT ... M A
6082 (?()|) OP_*COND | OP_ALT M A
6083 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6084 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6085 Or nothing, if trace is unnecessary
6086 */
6087
6088 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6089 {
6090 DEFINE_COMPILER;
6091 backtrack_common *backtrack;
6092 pcre_uchar opcode;
6093 int private_data_ptr = 0;
6094 int offset = 0;
6095 int stacksize;
6096 pcre_uchar *ccbegin;
6097 pcre_uchar *matchingpath;
6098 pcre_uchar bra = OP_BRA;
6099 pcre_uchar ket;
6100 assert_backtrack *assert;
6101 BOOL has_alternatives;
6102 BOOL needs_control_head = FALSE;
6103 struct sljit_jump *jump;
6104 struct sljit_jump *skip;
6105 struct sljit_label *rmaxlabel = NULL;
6106 struct sljit_jump *braminzerojump = NULL;
6107
6108 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6109
6110 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6111 {
6112 bra = *cc;
6113 cc++;
6114 opcode = *cc;
6115 }
6116
6117 opcode = *cc;
6118 ccbegin = cc;
6119 matchingpath = ccbegin + 1 + LINK_SIZE;
6120
6121 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6122 {
6123 /* Drop this bracket_backtrack. */
6124 parent->top = backtrack->prev;
6125 return bracketend(cc);
6126 }
6127
6128 ket = *(bracketend(cc) - 1 - LINK_SIZE);
6129 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6130 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6131 cc += GET(cc, 1);
6132
6133 has_alternatives = *cc == OP_ALT;
6134 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6135 {
6136 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6137 if (*matchingpath == OP_NRREF)
6138 {
6139 stacksize = GET2(matchingpath, 1);
6140 if (common->currententry == NULL || stacksize == RREF_ANY)
6141 has_alternatives = FALSE;
6142 else if (common->currententry->start == 0)
6143 has_alternatives = stacksize != 0;
6144 else
6145 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6146 }
6147 }
6148
6149 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6150 opcode = OP_SCOND;
6151 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6152 opcode = OP_ONCE;
6153
6154 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6155 {
6156 /* Capturing brackets has a pre-allocated space. */
6157 offset = GET2(ccbegin, 1 + LINK_SIZE);
6158 if (common->optimized_cbracket[offset] == 0)
6159 {
6160 private_data_ptr = OVECTOR_PRIV(offset);
6161 offset <<= 1;
6162 }
6163 else
6164 {
6165 offset <<= 1;
6166 private_data_ptr = OVECTOR(offset);
6167 }
6168 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6169 matchingpath += IMM2_SIZE;
6170 }
6171 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6172 {
6173 /* Other brackets simply allocate the next entry. */
6174 private_data_ptr = PRIVATE_DATA(ccbegin);
6175 SLJIT_ASSERT(private_data_ptr != 0);
6176 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6177 if (opcode == OP_ONCE)
6178 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6179 }
6180
6181 /* Instructions before the first alternative. */
6182 stacksize = 0;
6183 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6184 stacksize++;
6185 if (bra == OP_BRAZERO)
6186 stacksize++;
6187
6188 if (stacksize > 0)
6189 allocate_stack(common, stacksize);
6190
6191 stacksize = 0;
6192 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6193 {
6194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6195 stacksize++;
6196 }
6197
6198 if (bra == OP_BRAZERO)
6199 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6200
6201 if (bra == OP_BRAMINZERO)
6202 {
6203 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6204 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6205 if (ket != OP_KETRMIN)
6206 {
6207 free_stack(common, 1);
6208 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6209 }
6210 else
6211 {
6212 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6213 {
6214 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6215 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6216 /* Nothing stored during the first run. */
6217 skip = JUMP(SLJIT_JUMP);
6218 JUMPHERE(jump);
6219 /* Checking zero-length iteration. */
6220 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6221 {
6222 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6223 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6224 }
6225 else
6226 {
6227 /* Except when the whole stack frame must be saved. */
6228 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6229 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6230 }
6231 JUMPHERE(skip);
6232 }
6233 else
6234 {
6235 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6236 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6237 JUMPHERE(jump);
6238 }
6239 }
6240 }
6241
6242 if (ket == OP_KETRMIN)
6243 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6244
6245 if (ket == OP_KETRMAX)
6246 {
6247 rmaxlabel = LABEL();
6248 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
6249 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
6250 }
6251
6252 /* Handling capturing brackets and alternatives. */
6253 if (opcode == OP_ONCE)
6254 {
6255 stacksize = 0;
6256 if (needs_control_head)
6257 {
6258 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6259 stacksize++;
6260 }
6261
6262 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6263 {
6264 /* Neither capturing brackets nor recursions are found in the block. */
6265 if (ket == OP_KETRMIN)
6266 {
6267 stacksize += 2;
6268 if (!needs_control_head)
6269 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6270 }
6271 else
6272 {
6273 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6274 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6275 if (ket == OP_KETRMAX || has_alternatives)
6276 stacksize++;
6277 }
6278
6279 if (stacksize > 0)
6280 allocate_stack(common, stacksize);
6281
6282 stacksize = 0;
6283 if (needs_control_head)
6284 {
6285 stacksize++;
6286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6287 }
6288
6289 if (ket == OP_KETRMIN)
6290 {
6291 if (needs_control_head)
6292 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6294 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6295 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6297 }
6298 else if (ket == OP_KETRMAX || has_alternatives)
6299 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6300 }
6301 else
6302 {
6303 if (ket != OP_KET || has_alternatives)
6304 stacksize++;
6305
6306 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6307 allocate_stack(common, stacksize);
6308
6309 if (needs_control_head)
6310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6311
6312 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6313 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6314
6315 stacksize = needs_control_head ? 1 : 0;
6316 if (ket != OP_KET || has_alternatives)
6317 {
6318 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6319 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6320 stacksize++;
6321 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6322 }
6323 else
6324 {
6325 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6326 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6327 }
6328 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6329 }
6330 }
6331 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6332 {
6333 /* Saving the previous values. */
6334 if (common->optimized_cbracket[offset >> 1] != 0)
6335 {
6336 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6337 allocate_stack(common, 2);
6338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6339 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6343 }
6344 else
6345 {
6346 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6347 allocate_stack(common, 1);
6348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6349 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6350 }
6351 }
6352 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6353 {
6354 /* Saving the previous value. */
6355 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6356 allocate_stack(common, 1);
6357 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6359 }
6360 else if (has_alternatives)
6361 {
6362 /* Pushing the starting string pointer. */
6363 allocate_stack(common, 1);
6364 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6365 }
6366
6367 /* Generating code for the first alternative. */
6368 if (opcode == OP_COND || opcode == OP_SCOND)
6369 {
6370 if (*matchingpath == OP_CREF)
6371 {
6372 SLJIT_ASSERT(has_alternatives);
6373 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6374 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6375 matchingpath += 1 + IMM2_SIZE;
6376 }
6377 else if (*matchingpath == OP_NCREF)
6378 {
6379 SLJIT_ASSERT(has_alternatives);
6380 stacksize = GET2(matchingpath, 1);
6381 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6382
6383 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6384 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6386 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6387 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6388 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6389 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6390 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6391 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6392
6393 JUMPHERE(jump);
6394 matchingpath += 1 + IMM2_SIZE;
6395 }
6396 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6397 {
6398 /* Never has other case. */
6399 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6400
6401 stacksize = GET2(matchingpath, 1);
6402 if (common->currententry == NULL)
6403 stacksize = 0;
6404 else if (stacksize == RREF_ANY)
6405 stacksize = 1;
6406 else if (common->currententry->start == 0)
6407 stacksize = stacksize == 0;
6408 else
6409 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6410
6411 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6412 {
6413 SLJIT_ASSERT(!has_alternatives);
6414 if (stacksize != 0)
6415 matchingpath += 1 + IMM2_SIZE;
6416 else
6417 {
6418 if (*cc == OP_ALT)
6419 {
6420 matchingpath = cc + 1 + LINK_SIZE;
6421 cc += GET(cc, 1);
6422 }
6423 else
6424 matchingpath = cc;
6425 }
6426 }
6427 else
6428 {
6429 SLJIT_ASSERT(has_alternatives);
6430
6431 stacksize = GET2(matchingpath, 1);
6432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6433 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6435 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6436 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6437 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6438 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6439 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6440 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6441 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6442 matchingpath += 1 + IMM2_SIZE;
6443 }
6444 }
6445 else
6446 {
6447 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6448 /* Similar code as PUSH_BACKTRACK macro. */
6449 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6450 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6451 return NULL;
6452 memset(assert, 0, sizeof(assert_backtrack));
6453 assert->common.cc = matchingpath;
6454 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6455 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6456 }
6457 }
6458
6459 compile_matchingpath(common, matchingpath, cc, backtrack);
6460 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6461 return NULL;
6462
6463 if (opcode == OP_ONCE)
6464 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6465
6466 stacksize = 0;
6467 if (ket != OP_KET || bra != OP_BRA)
6468 stacksize++;
6469 if (offset != 0)
6470 {
6471 if (common->capture_last_ptr != 0)
6472 stacksize++;
6473 if (common->optimized_cbracket[offset >> 1] == 0)
6474 stacksize += 2;
6475 }
6476 if (has_alternatives && opcode != OP_ONCE)
6477 stacksize++;
6478
6479 if (stacksize > 0)
6480 allocate_stack(common, stacksize);
6481
6482 stacksize = 0;
6483 if (ket != OP_KET || bra != OP_BRA)
6484 {
6485 if (ket != OP_KET)
6486 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6487 else
6488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6489 stacksize++;
6490 }
6491
6492 if (offset != 0)
6493 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6494
6495 if (has_alternatives)
6496 {
6497 if (opcode != OP_ONCE)
6498 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6499 if (ket != OP_KETRMAX)
6500 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6501 }
6502
6503 /* Must be after the matchingpath label. */
6504 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6505 {
6506 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6508 }
6509
6510 if (ket == OP_KETRMAX)
6511 {
6512 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6513 {
6514 if (has_alternatives)
6515 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6516 /* Checking zero-length iteration. */
6517 if (opcode != OP_ONCE)
6518 {
6519 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6520 /* Drop STR_PTR for greedy plus quantifier. */
6521 if (bra != OP_BRAZERO)
6522 free_stack(common, 1);
6523 }
6524 else
6525 /* TMP2 must contain the starting STR_PTR. */
6526 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6527 }
6528 else
6529 JUMPTO(SLJIT_JUMP, rmaxlabel);
6530 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6531 }
6532
6533 if (bra == OP_BRAZERO)
6534 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6535
6536 if (bra == OP_BRAMINZERO)
6537 {
6538 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6539 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6540 if (braminzerojump != NULL)
6541 {
6542 JUMPHERE(braminzerojump);
6543 /* We need to release the end pointer to perform the
6544 backtrack for the zero-length iteration. When
6545 framesize is < 0, OP_ONCE will do the release itself. */
6546 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6547 {
6548 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6549 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6550 }
6551 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6552 free_stack(common, 1);
6553 }
6554 /* Continue to the normal backtrack. */
6555 }
6556
6557 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6558 decrease_call_count(common);
6559
6560 /* Skip the other alternatives. */
6561 while (*cc == OP_ALT)
6562 cc += GET(cc, 1);
6563 cc += 1 + LINK_SIZE;
6564
6565 /* Temporarily encoding the needs_control_head in framesize. */
6566 if (opcode == OP_ONCE)
6567 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6568 return cc;
6569 }
6570
6571 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6572 {
6573 DEFINE_COMPILER;
6574 backtrack_common *backtrack;
6575 pcre_uchar opcode;
6576 int private_data_ptr;
6577 int cbraprivptr = 0;
6578 BOOL needs_control_head;
6579 int framesize;
6580 int stacksize;
6581 int offset = 0;
6582 BOOL zero = FALSE;
6583 pcre_uchar *ccbegin = NULL;
6584 int stack; /* Also contains the offset of control head. */
6585 struct sljit_label *loop = NULL;
6586 struct jump_list *emptymatch = NULL;
6587
6588 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6589 if (*cc == OP_BRAPOSZERO)
6590 {
6591 zero = TRUE;
6592 cc++;
6593 }
6594
6595 opcode = *cc;
6596 private_data_ptr = PRIVATE_DATA(cc);
6597 SLJIT_ASSERT(private_data_ptr != 0);
6598 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6599 switch(opcode)
6600 {
6601 case OP_BRAPOS:
6602 case OP_SBRAPOS:
6603 ccbegin = cc + 1 + LINK_SIZE;
6604 break;
6605
6606 case OP_CBRAPOS:
6607 case OP_SCBRAPOS:
6608 offset = GET2(cc, 1 + LINK_SIZE);
6609 /* This case cannot be optimized in the same was as
6610 normal capturing brackets. */
6611 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6612 cbraprivptr = OVECTOR_PRIV(offset);
6613 offset <<= 1;
6614 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6615 break;
6616
6617 default:
6618 SLJIT_ASSERT_STOP();
6619 break;
6620 }
6621
6622 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6623 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6624 if (framesize < 0)
6625 {
6626 if (offset != 0)
6627 {
6628 stacksize = 2;
6629 if (common->capture_last_ptr != 0)
6630 stacksize++;
6631 }
6632 else
6633 stacksize = 1;
6634
6635 if (needs_control_head)
6636 stacksize++;
6637 if (!zero)
6638 stacksize++;
6639
6640 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6641 allocate_stack(common, stacksize);
6642 if (framesize == no_frame)
6643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6644
6645 stack = 0;
6646 if (offset != 0)
6647 {
6648 stack = 2;
6649 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6650 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6651 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6652 if (common->capture_last_ptr != 0)
6653 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6654 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6655 if (needs_control_head)
6656 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6657 if (common->capture_last_ptr != 0)
6658 {
6659 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6660 stack = 3;
6661 }
6662 }
6663 else
6664 {
6665 if (needs_control_head)
6666 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6667 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6668 stack = 1;
6669 }
6670
6671 if (needs_control_head)
6672 stack++;
6673 if (!zero)
6674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6675 if (needs_control_head)
6676 {
6677 stack--;
6678 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6679 }
6680 }
6681 else
6682 {
6683 stacksize = framesize + 1;
6684 if (!zero)
6685 stacksize++;
6686 if (needs_control_head)
6687 stacksize++;
6688 if (offset == 0)
6689 stacksize++;
6690 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6691
6692 allocate_stack(common, stacksize);
6693 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6694 if (needs_control_head)
6695 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6696 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6697
6698 stack = 0;
6699 if (!zero)
6700 {
6701 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6702 stack = 1;
6703 }
6704 if (needs_control_head)
6705 {
6706 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6707 stack++;
6708 }
6709 if (offset == 0)
6710 {
6711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6712 stack++;
6713 }
6714 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6715 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
6716 stack -= 1 + (offset == 0);
6717 }
6718
6719 if (offset != 0)
6720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6721
6722 loop = LABEL();
6723 while (*cc != OP_KETRPOS)
6724 {
6725 backtrack->top = NULL;
6726 backtrack->topbacktracks = NULL;
6727 cc += GET(cc, 1);
6728
6729 compile_matchingpath(common, ccbegin, cc, backtrack);
6730 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6731 return NULL;
6732
6733 if (framesize < 0)
6734 {
6735 if (framesize == no_frame)
6736 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6737
6738 if (offset != 0)
6739 {
6740 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6742 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6743 if (common->capture_last_ptr != 0)
6744 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6745 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);