/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1300 - (show annotations)
Mon Mar 25 12:34:26 2013 UTC (6 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 300382 byte(s)
Error occurred while calculating annotation data.
Control verbs are handled in the same way in JIT and interpreter.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum frame_types {
200 no_frame = -1,
201 no_stack = -2
202 };
203
204 enum control_types {
205 type_mark = 0,
206 type_then_trap = 1
207 };
208
209 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
210
211 /* The following structure is the key data type for the recursive
212 code generator. It is allocated by compile_matchingpath, and contains
213 the aguments for compile_backtrackingpath. Must be the first member
214 of its descendants. */
215 typedef struct backtrack_common {
216 /* Concatenation stack. */
217 struct backtrack_common *prev;
218 jump_list *nextbacktracks;
219 /* Internal stack (for component operators). */
220 struct backtrack_common *top;
221 jump_list *topbacktracks;
222 /* Opcode pointer. */
223 pcre_uchar *cc;
224 } backtrack_common;
225
226 typedef struct assert_backtrack {
227 backtrack_common common;
228 jump_list *condfailed;
229 /* Less than 0 if a frame is not needed. */
230 int framesize;
231 /* Points to our private memory word on the stack. */
232 int private_data_ptr;
233 /* For iterators. */
234 struct sljit_label *matchingpath;
235 } assert_backtrack;
236
237 typedef struct bracket_backtrack {
238 backtrack_common common;
239 /* Where to coninue if an alternative is successfully matched. */
240 struct sljit_label *alternative_matchingpath;
241 /* For rmin and rmax iterators. */
242 struct sljit_label *recursive_matchingpath;
243 /* For greedy ? operator. */
244 struct sljit_label *zero_matchingpath;
245 /* Contains the branches of a failed condition. */
246 union {
247 /* Both for OP_COND, OP_SCOND. */
248 jump_list *condfailed;
249 assert_backtrack *assert;
250 /* For OP_ONCE. Less than 0 if not needed. */
251 int framesize;
252 } u;
253 /* Points to our private memory word on the stack. */
254 int private_data_ptr;
255 } bracket_backtrack;
256
257 typedef struct bracketpos_backtrack {
258 backtrack_common common;
259 /* Points to our private memory word on the stack. */
260 int private_data_ptr;
261 /* Reverting stack is needed. */
262 int framesize;
263 /* Allocated stack size. */
264 int stacksize;
265 } bracketpos_backtrack;
266
267 typedef struct braminzero_backtrack {
268 backtrack_common common;
269 struct sljit_label *matchingpath;
270 } braminzero_backtrack;
271
272 typedef struct iterator_backtrack {
273 backtrack_common common;
274 /* Next iteration. */
275 struct sljit_label *matchingpath;
276 } iterator_backtrack;
277
278 typedef struct recurse_entry {
279 struct recurse_entry *next;
280 /* Contains the function entry. */
281 struct sljit_label *entry;
282 /* Collects the calls until the function is not created. */
283 jump_list *calls;
284 /* Points to the starting opcode. */
285 sljit_sw start;
286 } recurse_entry;
287
288 typedef struct recurse_backtrack {
289 backtrack_common common;
290 BOOL inlined_pattern;
291 } recurse_backtrack;
292
293 #define OP_THEN_TRAP OP_TABLE_LENGTH
294
295 typedef struct then_trap_backtrack {
296 backtrack_common common;
297 /* If then_trap is not NULL, this structure contains the real
298 then_trap for the backtracking path. */
299 struct then_trap_backtrack *then_trap;
300 /* Points to the starting opcode. */
301 sljit_sw start;
302 /* Exit point for the then opcodes of this alternative. */
303 jump_list *quit;
304 /* Frame size of the current alternative. */
305 int framesize;
306 } then_trap_backtrack;
307
308 #define MAX_RANGE_SIZE 6
309
310 typedef struct compiler_common {
311 /* The sljit ceneric compiler. */
312 struct sljit_compiler *compiler;
313 /* First byte code. */
314 pcre_uchar *start;
315 /* Maps private data offset to each opcode. */
316 int *private_data_ptrs;
317 /* Tells whether the capturing bracket is optimized. */
318 pcre_uint8 *optimized_cbracket;
319 /* Tells whether the starting offset is a target of then. */
320 pcre_uint8 *then_offsets;
321 /* Current position where a THEN must jump. */
322 then_trap_backtrack *then_trap;
323 /* Starting offset of private data for capturing brackets. */
324 int cbra_ptr;
325 /* Output vector starting point. Must be divisible by 2. */
326 int ovector_start;
327 /* Last known position of the requested byte. */
328 int req_char_ptr;
329 /* Head of the last recursion. */
330 int recursive_head_ptr;
331 /* First inspected character for partial matching. */
332 int start_used_ptr;
333 /* Starting pointer for partial soft matches. */
334 int hit_start;
335 /* End pointer of the first line. */
336 int first_line_end;
337 /* Points to the marked string. */
338 int mark_ptr;
339 /* Recursive control verb management chain. */
340 int control_head_ptr;
341 /* Points to the last matched capture block index. */
342 int capture_last_ptr;
343 /* Points to the starting position of the current match. */
344 int start_ptr;
345
346 /* Flipped and lower case tables. */
347 const pcre_uint8 *fcc;
348 sljit_sw lcc;
349 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
350 int mode;
351 /* \K is found in the pattern. */
352 BOOL has_set_som;
353 /* (*SKIP:arg) is found in the pattern. */
354 BOOL has_skip_arg;
355 /* (*THEN) is found in the pattern. */
356 BOOL has_then;
357 /* Needs to know the start position anytime. */
358 BOOL needs_start_ptr;
359 /* Currently in recurse. */
360 BOOL local_exit;
361 /* Currently in assert. */
362 int then_local_exit;
363 /* Newline control. */
364 int nltype;
365 int newline;
366 int bsr_nltype;
367 /* Dollar endonly. */
368 int endonly;
369 /* Tables. */
370 sljit_sw ctypes;
371 int digits[2 + MAX_RANGE_SIZE];
372 /* Named capturing brackets. */
373 sljit_uw name_table;
374 sljit_sw name_count;
375 sljit_sw name_entry_size;
376
377 /* Labels and jump lists. */
378 struct sljit_label *partialmatchlabel;
379 struct sljit_label *quit_label;
380 struct sljit_label *forced_quit_label;
381 struct sljit_label *accept_label;
382 stub_list *stubs;
383 recurse_entry *entries;
384 recurse_entry *currententry;
385 jump_list *partialmatch;
386 jump_list *quit;
387 jump_list *then_quit;
388 jump_list *forced_quit;
389 jump_list *accept;
390 jump_list *calllimit;
391 jump_list *stackalloc;
392 jump_list *revertframes;
393 jump_list *wordboundary;
394 jump_list *anynewline;
395 jump_list *hspace;
396 jump_list *vspace;
397 jump_list *casefulcmp;
398 jump_list *caselesscmp;
399 jump_list *reset_match;
400 BOOL jscript_compat;
401 #ifdef SUPPORT_UTF
402 BOOL utf;
403 #ifdef SUPPORT_UCP
404 BOOL use_ucp;
405 #endif
406 #ifndef COMPILE_PCRE32
407 jump_list *utfreadchar;
408 #endif
409 #ifdef COMPILE_PCRE8
410 jump_list *utfreadtype8;
411 #endif
412 #endif /* SUPPORT_UTF */
413 #ifdef SUPPORT_UCP
414 jump_list *getucd;
415 #endif
416 } compiler_common;
417
418 /* For byte_sequence_compare. */
419
420 typedef struct compare_context {
421 int length;
422 int sourcereg;
423 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
424 int ucharptr;
425 union {
426 sljit_si asint;
427 sljit_uh asushort;
428 #if defined COMPILE_PCRE8
429 sljit_ub asbyte;
430 sljit_ub asuchars[4];
431 #elif defined COMPILE_PCRE16
432 sljit_uh asuchars[2];
433 #elif defined COMPILE_PCRE32
434 sljit_ui asuchars[1];
435 #endif
436 } c;
437 union {
438 sljit_si asint;
439 sljit_uh asushort;
440 #if defined COMPILE_PCRE8
441 sljit_ub asbyte;
442 sljit_ub asuchars[4];
443 #elif defined COMPILE_PCRE16
444 sljit_uh asuchars[2];
445 #elif defined COMPILE_PCRE32
446 sljit_ui asuchars[1];
447 #endif
448 } oc;
449 #endif
450 } compare_context;
451
452 /* Undefine sljit macros. */
453 #undef CMP
454
455 /* Used for accessing the elements of the stack. */
456 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
457
458 #define TMP1 SLJIT_SCRATCH_REG1
459 #define TMP2 SLJIT_SCRATCH_REG3
460 #define TMP3 SLJIT_TEMPORARY_EREG2
461 #define STR_PTR SLJIT_SAVED_REG1
462 #define STR_END SLJIT_SAVED_REG2
463 #define STACK_TOP SLJIT_SCRATCH_REG2
464 #define STACK_LIMIT SLJIT_SAVED_REG3
465 #define ARGUMENTS SLJIT_SAVED_EREG1
466 #define CALL_COUNT SLJIT_SAVED_EREG2
467 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
468
469 /* Local space layout. */
470 /* These two locals can be used by the current opcode. */
471 #define LOCALS0 (0 * sizeof(sljit_sw))
472 #define LOCALS1 (1 * sizeof(sljit_sw))
473 /* Two local variables for possessive quantifiers (char1 cannot use them). */
474 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
475 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
476 /* Max limit of recursions. */
477 #define CALL_LIMIT (4 * sizeof(sljit_sw))
478 /* The output vector is stored on the stack, and contains pointers
479 to characters. The vector data is divided into two groups: the first
480 group contains the start / end character pointers, and the second is
481 the start pointers when the end of the capturing group has not yet reached. */
482 #define OVECTOR_START (common->ovector_start)
483 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
484 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
485 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
486
487 #if defined COMPILE_PCRE8
488 #define MOV_UCHAR SLJIT_MOV_UB
489 #define MOVU_UCHAR SLJIT_MOVU_UB
490 #elif defined COMPILE_PCRE16
491 #define MOV_UCHAR SLJIT_MOV_UH
492 #define MOVU_UCHAR SLJIT_MOVU_UH
493 #elif defined COMPILE_PCRE32
494 #define MOV_UCHAR SLJIT_MOV_UI
495 #define MOVU_UCHAR SLJIT_MOVU_UI
496 #else
497 #error Unsupported compiling mode
498 #endif
499
500 /* Shortcuts. */
501 #define DEFINE_COMPILER \
502 struct sljit_compiler *compiler = common->compiler
503 #define OP1(op, dst, dstw, src, srcw) \
504 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
505 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
506 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
507 #define LABEL() \
508 sljit_emit_label(compiler)
509 #define JUMP(type) \
510 sljit_emit_jump(compiler, (type))
511 #define JUMPTO(type, label) \
512 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
513 #define JUMPHERE(jump) \
514 sljit_set_label((jump), sljit_emit_label(compiler))
515 #define SET_LABEL(jump, label) \
516 sljit_set_label((jump), (label))
517 #define CMP(type, src1, src1w, src2, src2w) \
518 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
519 #define CMPTO(type, src1, src1w, src2, src2w, label) \
520 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
521 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
522 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
523 #define GET_LOCAL_BASE(dst, dstw, offset) \
524 sljit_get_local_base(compiler, (dst), (dstw), (offset))
525
526 static pcre_uchar* bracketend(pcre_uchar* cc)
527 {
528 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
529 do cc += GET(cc, 1); while (*cc == OP_ALT);
530 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
531 cc += 1 + LINK_SIZE;
532 return cc;
533 }
534
535 /* Functions whose might need modification for all new supported opcodes:
536 next_opcode
537 get_private_data_length
538 set_private_data_ptrs
539 get_framesize
540 init_frame
541 get_private_data_copy_length
542 copy_private_data
543 compile_matchingpath
544 compile_backtrackingpath
545 */
546
547 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
548 {
549 SLJIT_UNUSED_ARG(common);
550 switch(*cc)
551 {
552 case OP_SOD:
553 case OP_SOM:
554 case OP_SET_SOM:
555 case OP_NOT_WORD_BOUNDARY:
556 case OP_WORD_BOUNDARY:
557 case OP_NOT_DIGIT:
558 case OP_DIGIT:
559 case OP_NOT_WHITESPACE:
560 case OP_WHITESPACE:
561 case OP_NOT_WORDCHAR:
562 case OP_WORDCHAR:
563 case OP_ANY:
564 case OP_ALLANY:
565 case OP_NOTPROP:
566 case OP_PROP:
567 case OP_ANYNL:
568 case OP_NOT_HSPACE:
569 case OP_HSPACE:
570 case OP_NOT_VSPACE:
571 case OP_VSPACE:
572 case OP_EXTUNI:
573 case OP_EODN:
574 case OP_EOD:
575 case OP_CIRC:
576 case OP_CIRCM:
577 case OP_DOLL:
578 case OP_DOLLM:
579 case OP_CRSTAR:
580 case OP_CRMINSTAR:
581 case OP_CRPLUS:
582 case OP_CRMINPLUS:
583 case OP_CRQUERY:
584 case OP_CRMINQUERY:
585 case OP_CRRANGE:
586 case OP_CRMINRANGE:
587 case OP_CLASS:
588 case OP_NCLASS:
589 case OP_REF:
590 case OP_REFI:
591 case OP_RECURSE:
592 case OP_CALLOUT:
593 case OP_ALT:
594 case OP_KET:
595 case OP_KETRMAX:
596 case OP_KETRMIN:
597 case OP_KETRPOS:
598 case OP_REVERSE:
599 case OP_ASSERT:
600 case OP_ASSERT_NOT:
601 case OP_ASSERTBACK:
602 case OP_ASSERTBACK_NOT:
603 case OP_ONCE:
604 case OP_ONCE_NC:
605 case OP_BRA:
606 case OP_BRAPOS:
607 case OP_CBRA:
608 case OP_CBRAPOS:
609 case OP_COND:
610 case OP_SBRA:
611 case OP_SBRAPOS:
612 case OP_SCBRA:
613 case OP_SCBRAPOS:
614 case OP_SCOND:
615 case OP_CREF:
616 case OP_NCREF:
617 case OP_RREF:
618 case OP_NRREF:
619 case OP_DEF:
620 case OP_BRAZERO:
621 case OP_BRAMINZERO:
622 case OP_BRAPOSZERO:
623 case OP_PRUNE:
624 case OP_SKIP:
625 case OP_THEN:
626 case OP_COMMIT:
627 case OP_FAIL:
628 case OP_ACCEPT:
629 case OP_ASSERT_ACCEPT:
630 case OP_CLOSE:
631 case OP_SKIPZERO:
632 return cc + PRIV(OP_lengths)[*cc];
633
634 case OP_CHAR:
635 case OP_CHARI:
636 case OP_NOT:
637 case OP_NOTI:
638 case OP_STAR:
639 case OP_MINSTAR:
640 case OP_PLUS:
641 case OP_MINPLUS:
642 case OP_QUERY:
643 case OP_MINQUERY:
644 case OP_UPTO:
645 case OP_MINUPTO:
646 case OP_EXACT:
647 case OP_POSSTAR:
648 case OP_POSPLUS:
649 case OP_POSQUERY:
650 case OP_POSUPTO:
651 case OP_STARI:
652 case OP_MINSTARI:
653 case OP_PLUSI:
654 case OP_MINPLUSI:
655 case OP_QUERYI:
656 case OP_MINQUERYI:
657 case OP_UPTOI:
658 case OP_MINUPTOI:
659 case OP_EXACTI:
660 case OP_POSSTARI:
661 case OP_POSPLUSI:
662 case OP_POSQUERYI:
663 case OP_POSUPTOI:
664 case OP_NOTSTAR:
665 case OP_NOTMINSTAR:
666 case OP_NOTPLUS:
667 case OP_NOTMINPLUS:
668 case OP_NOTQUERY:
669 case OP_NOTMINQUERY:
670 case OP_NOTUPTO:
671 case OP_NOTMINUPTO:
672 case OP_NOTEXACT:
673 case OP_NOTPOSSTAR:
674 case OP_NOTPOSPLUS:
675 case OP_NOTPOSQUERY:
676 case OP_NOTPOSUPTO:
677 case OP_NOTSTARI:
678 case OP_NOTMINSTARI:
679 case OP_NOTPLUSI:
680 case OP_NOTMINPLUSI:
681 case OP_NOTQUERYI:
682 case OP_NOTMINQUERYI:
683 case OP_NOTUPTOI:
684 case OP_NOTMINUPTOI:
685 case OP_NOTEXACTI:
686 case OP_NOTPOSSTARI:
687 case OP_NOTPOSPLUSI:
688 case OP_NOTPOSQUERYI:
689 case OP_NOTPOSUPTOI:
690 cc += PRIV(OP_lengths)[*cc];
691 #ifdef SUPPORT_UTF
692 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
693 #endif
694 return cc;
695
696 /* Special cases. */
697 case OP_TYPESTAR:
698 case OP_TYPEMINSTAR:
699 case OP_TYPEPLUS:
700 case OP_TYPEMINPLUS:
701 case OP_TYPEQUERY:
702 case OP_TYPEMINQUERY:
703 case OP_TYPEUPTO:
704 case OP_TYPEMINUPTO:
705 case OP_TYPEEXACT:
706 case OP_TYPEPOSSTAR:
707 case OP_TYPEPOSPLUS:
708 case OP_TYPEPOSQUERY:
709 case OP_TYPEPOSUPTO:
710 return cc + PRIV(OP_lengths)[*cc] - 1;
711
712 case OP_ANYBYTE:
713 #ifdef SUPPORT_UTF
714 if (common->utf) return NULL;
715 #endif
716 return cc + 1;
717
718 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
719 case OP_XCLASS:
720 return cc + GET(cc, 1);
721 #endif
722
723 case OP_MARK:
724 case OP_PRUNE_ARG:
725 case OP_SKIP_ARG:
726 case OP_THEN_ARG:
727 return cc + 1 + 2 + cc[1];
728
729 default:
730 /* All opcodes are supported now! */
731 SLJIT_ASSERT_STOP();
732 return NULL;
733 }
734 }
735
736 #define CASE_ITERATOR_PRIVATE_DATA_1 \
737 case OP_MINSTAR: \
738 case OP_MINPLUS: \
739 case OP_QUERY: \
740 case OP_MINQUERY: \
741 case OP_MINSTARI: \
742 case OP_MINPLUSI: \
743 case OP_QUERYI: \
744 case OP_MINQUERYI: \
745 case OP_NOTMINSTAR: \
746 case OP_NOTMINPLUS: \
747 case OP_NOTQUERY: \
748 case OP_NOTMINQUERY: \
749 case OP_NOTMINSTARI: \
750 case OP_NOTMINPLUSI: \
751 case OP_NOTQUERYI: \
752 case OP_NOTMINQUERYI:
753
754 #define CASE_ITERATOR_PRIVATE_DATA_2A \
755 case OP_STAR: \
756 case OP_PLUS: \
757 case OP_STARI: \
758 case OP_PLUSI: \
759 case OP_NOTSTAR: \
760 case OP_NOTPLUS: \
761 case OP_NOTSTARI: \
762 case OP_NOTPLUSI:
763
764 #define CASE_ITERATOR_PRIVATE_DATA_2B \
765 case OP_UPTO: \
766 case OP_MINUPTO: \
767 case OP_UPTOI: \
768 case OP_MINUPTOI: \
769 case OP_NOTUPTO: \
770 case OP_NOTMINUPTO: \
771 case OP_NOTUPTOI: \
772 case OP_NOTMINUPTOI:
773
774 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
775 case OP_TYPEMINSTAR: \
776 case OP_TYPEMINPLUS: \
777 case OP_TYPEQUERY: \
778 case OP_TYPEMINQUERY:
779
780 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
781 case OP_TYPESTAR: \
782 case OP_TYPEPLUS:
783
784 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
785 case OP_TYPEUPTO: \
786 case OP_TYPEMINUPTO:
787
788 static int get_class_iterator_size(pcre_uchar *cc)
789 {
790 switch(*cc)
791 {
792 case OP_CRSTAR:
793 case OP_CRPLUS:
794 return 2;
795
796 case OP_CRMINSTAR:
797 case OP_CRMINPLUS:
798 case OP_CRQUERY:
799 case OP_CRMINQUERY:
800 return 1;
801
802 case OP_CRRANGE:
803 case OP_CRMINRANGE:
804 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
805 return 0;
806 return 2;
807
808 default:
809 return 0;
810 }
811 }
812
813 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
814 {
815 int private_data_length = 0;
816 pcre_uchar *alternative;
817 pcre_uchar *name;
818 pcre_uchar *end = NULL;
819 int space, size, i;
820 pcre_uint32 bracketlen;
821
822 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
823 while (cc < ccend)
824 {
825 space = 0;
826 size = 0;
827 bracketlen = 0;
828 switch(*cc)
829 {
830 case OP_SET_SOM:
831 common->has_set_som = TRUE;
832 cc += 1;
833 break;
834
835 case OP_REF:
836 case OP_REFI:
837 common->optimized_cbracket[GET2(cc, 1)] = 0;
838 cc += 1 + IMM2_SIZE;
839 break;
840
841 case OP_ASSERT:
842 case OP_ASSERT_NOT:
843 case OP_ASSERTBACK:
844 case OP_ASSERTBACK_NOT:
845 case OP_ONCE:
846 case OP_ONCE_NC:
847 case OP_BRAPOS:
848 case OP_SBRA:
849 case OP_SBRAPOS:
850 private_data_length += sizeof(sljit_sw);
851 bracketlen = 1 + LINK_SIZE;
852 break;
853
854 case OP_CBRAPOS:
855 case OP_SCBRAPOS:
856 private_data_length += sizeof(sljit_sw);
857 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
858 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
859 break;
860
861 case OP_COND:
862 case OP_SCOND:
863 /* Only AUTO_CALLOUT can insert this opcode. We do
864 not intend to support this case. */
865 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
866 return -1;
867
868 if (*cc == OP_COND)
869 {
870 /* Might be a hidden SCOND. */
871 alternative = cc + GET(cc, 1);
872 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
873 private_data_length += sizeof(sljit_sw);
874 }
875 else
876 private_data_length += sizeof(sljit_sw);
877 bracketlen = 1 + LINK_SIZE;
878 break;
879
880 case OP_CREF:
881 i = GET2(cc, 1);
882 common->optimized_cbracket[i] = 0;
883 cc += 1 + IMM2_SIZE;
884 break;
885
886 case OP_NCREF:
887 bracketlen = GET2(cc, 1);
888 name = (pcre_uchar *)common->name_table;
889 alternative = name;
890 for (i = 0; i < common->name_count; i++)
891 {
892 if (GET2(name, 0) == bracketlen) break;
893 name += common->name_entry_size;
894 }
895 SLJIT_ASSERT(i != common->name_count);
896
897 for (i = 0; i < common->name_count; i++)
898 {
899 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
900 common->optimized_cbracket[GET2(alternative, 0)] = 0;
901 alternative += common->name_entry_size;
902 }
903 bracketlen = 0;
904 cc += 1 + IMM2_SIZE;
905 break;
906
907 case OP_BRA:
908 bracketlen = 1 + LINK_SIZE;
909 break;
910
911 case OP_CBRA:
912 case OP_SCBRA:
913 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
914 break;
915
916 CASE_ITERATOR_PRIVATE_DATA_1
917 space = 1;
918 size = -2;
919 break;
920
921 CASE_ITERATOR_PRIVATE_DATA_2A
922 space = 2;
923 size = -2;
924 break;
925
926 CASE_ITERATOR_PRIVATE_DATA_2B
927 space = 2;
928 size = -(2 + IMM2_SIZE);
929 break;
930
931 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
932 space = 1;
933 size = 1;
934 break;
935
936 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
937 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
938 space = 2;
939 size = 1;
940 break;
941
942 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
943 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
944 space = 2;
945 size = 1 + IMM2_SIZE;
946 break;
947
948 case OP_CLASS:
949 case OP_NCLASS:
950 size += 1 + 32 / sizeof(pcre_uchar);
951 space = get_class_iterator_size(cc + size);
952 break;
953
954 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
955 case OP_XCLASS:
956 size = GET(cc, 1);
957 space = get_class_iterator_size(cc + size);
958 break;
959 #endif
960
961 case OP_RECURSE:
962 /* Set its value only once. */
963 if (common->recursive_head_ptr == 0)
964 {
965 common->recursive_head_ptr = common->ovector_start;
966 common->ovector_start += sizeof(sljit_sw);
967 }
968 cc += 1 + LINK_SIZE;
969 break;
970
971 case OP_CALLOUT:
972 if (common->capture_last_ptr == 0)
973 {
974 common->capture_last_ptr = common->ovector_start;
975 common->ovector_start += sizeof(sljit_sw);
976 }
977 cc += 2 + 2 * LINK_SIZE;
978 break;
979
980 case OP_THEN_ARG:
981 common->has_then = TRUE;
982 common->control_head_ptr = 1;
983 /* Fall through. */
984
985 case OP_PRUNE_ARG:
986 common->needs_start_ptr = TRUE;
987 /* Fall through. */
988
989 case OP_MARK:
990 if (common->mark_ptr == 0)
991 {
992 common->mark_ptr = common->ovector_start;
993 common->ovector_start += sizeof(sljit_sw);
994 }
995 cc += 1 + 2 + cc[1];
996 break;
997
998 case OP_THEN:
999 common->has_then = TRUE;
1000 common->control_head_ptr = 1;
1001 /* Fall through. */
1002
1003 case OP_PRUNE:
1004 case OP_SKIP:
1005 common->needs_start_ptr = TRUE;
1006 cc += 1;
1007 break;
1008
1009 case OP_SKIP_ARG:
1010 common->control_head_ptr = 1;
1011 common->has_skip_arg = TRUE;
1012 cc += 1 + 2 + cc[1];
1013 break;
1014
1015 default:
1016 cc = next_opcode(common, cc);
1017 if (cc == NULL)
1018 return -1;
1019 break;
1020 }
1021
1022 if (space > 0 && cc >= end)
1023 private_data_length += sizeof(sljit_sw) * space;
1024
1025 if (size != 0)
1026 {
1027 if (size < 0)
1028 {
1029 cc += -size;
1030 #ifdef SUPPORT_UTF
1031 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1032 #endif
1033 }
1034 else
1035 cc += size;
1036 }
1037
1038 if (bracketlen != 0)
1039 {
1040 if (cc >= end)
1041 {
1042 end = bracketend(cc);
1043 if (end[-1 - LINK_SIZE] == OP_KET)
1044 end = NULL;
1045 }
1046 cc += bracketlen;
1047 }
1048 }
1049 return private_data_length;
1050 }
1051
1052 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
1053 {
1054 pcre_uchar *cc = common->start;
1055 pcre_uchar *alternative;
1056 pcre_uchar *end = NULL;
1057 int space, size, bracketlen;
1058
1059 while (cc < ccend)
1060 {
1061 space = 0;
1062 size = 0;
1063 bracketlen = 0;
1064 switch(*cc)
1065 {
1066 case OP_ASSERT:
1067 case OP_ASSERT_NOT:
1068 case OP_ASSERTBACK:
1069 case OP_ASSERTBACK_NOT:
1070 case OP_ONCE:
1071 case OP_ONCE_NC:
1072 case OP_BRAPOS:
1073 case OP_SBRA:
1074 case OP_SBRAPOS:
1075 case OP_SCOND:
1076 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1077 private_data_ptr += sizeof(sljit_sw);
1078 bracketlen = 1 + LINK_SIZE;
1079 break;
1080
1081 case OP_CBRAPOS:
1082 case OP_SCBRAPOS:
1083 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1084 private_data_ptr += sizeof(sljit_sw);
1085 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1086 break;
1087
1088 case OP_COND:
1089 /* Might be a hidden SCOND. */
1090 alternative = cc + GET(cc, 1);
1091 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1092 {
1093 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1094 private_data_ptr += sizeof(sljit_sw);
1095 }
1096 bracketlen = 1 + LINK_SIZE;
1097 break;
1098
1099 case OP_BRA:
1100 bracketlen = 1 + LINK_SIZE;
1101 break;
1102
1103 case OP_CBRA:
1104 case OP_SCBRA:
1105 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1106 break;
1107
1108 CASE_ITERATOR_PRIVATE_DATA_1
1109 space = 1;
1110 size = -2;
1111 break;
1112
1113 CASE_ITERATOR_PRIVATE_DATA_2A
1114 space = 2;
1115 size = -2;
1116 break;
1117
1118 CASE_ITERATOR_PRIVATE_DATA_2B
1119 space = 2;
1120 size = -(2 + IMM2_SIZE);
1121 break;
1122
1123 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1124 space = 1;
1125 size = 1;
1126 break;
1127
1128 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1129 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1130 space = 2;
1131 size = 1;
1132 break;
1133
1134 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1135 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1136 space = 2;
1137 size = 1 + IMM2_SIZE;
1138 break;
1139
1140 case OP_CLASS:
1141 case OP_NCLASS:
1142 size += 1 + 32 / sizeof(pcre_uchar);
1143 space = get_class_iterator_size(cc + size);
1144 break;
1145
1146 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1147 case OP_XCLASS:
1148 size = GET(cc, 1);
1149 space = get_class_iterator_size(cc + size);
1150 break;
1151 #endif
1152
1153 default:
1154 cc = next_opcode(common, cc);
1155 SLJIT_ASSERT(cc != NULL);
1156 break;
1157 }
1158
1159 if (space > 0 && cc >= end)
1160 {
1161 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1162 private_data_ptr += sizeof(sljit_sw) * space;
1163 }
1164
1165 if (size != 0)
1166 {
1167 if (size < 0)
1168 {
1169 cc += -size;
1170 #ifdef SUPPORT_UTF
1171 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1172 #endif
1173 }
1174 else
1175 cc += size;
1176 }
1177
1178 if (bracketlen > 0)
1179 {
1180 if (cc >= end)
1181 {
1182 end = bracketend(cc);
1183 if (end[-1 - LINK_SIZE] == OP_KET)
1184 end = NULL;
1185 }
1186 cc += bracketlen;
1187 }
1188 }
1189 }
1190
1191 /* Returns with a frame_types (always < 0) if no need for frame. */
1192 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1193 {
1194 int length = 0;
1195 int possessive = 0;
1196 BOOL stack_restore = FALSE;
1197 BOOL setsom_found = recursive;
1198 BOOL setmark_found = recursive;
1199 /* The last capture is a local variable even for recursions. */
1200 BOOL capture_last_found = FALSE;
1201
1202 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1203 SLJIT_ASSERT(common->control_head_ptr != 0);
1204 *needs_control_head = TRUE;
1205 #else
1206 *needs_control_head = FALSE;
1207 #endif
1208
1209 if (ccend == NULL)
1210 {
1211 ccend = bracketend(cc) - (1 + LINK_SIZE);
1212 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1213 {
1214 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1215 /* This is correct regardless of common->capture_last_ptr. */
1216 capture_last_found = TRUE;
1217 }
1218 cc = next_opcode(common, cc);
1219 }
1220
1221 SLJIT_ASSERT(cc != NULL);
1222 while (cc < ccend)
1223 switch(*cc)
1224 {
1225 case OP_SET_SOM:
1226 SLJIT_ASSERT(common->has_set_som);
1227 stack_restore = TRUE;
1228 if (!setsom_found)
1229 {
1230 length += 2;
1231 setsom_found = TRUE;
1232 }
1233 cc += 1;
1234 break;
1235
1236 case OP_MARK:
1237 case OP_PRUNE_ARG:
1238 case OP_THEN_ARG:
1239 SLJIT_ASSERT(common->mark_ptr != 0);
1240 stack_restore = TRUE;
1241 if (!setmark_found)
1242 {
1243 length += 2;
1244 setmark_found = TRUE;
1245 }
1246 if (common->control_head_ptr != 0)
1247 *needs_control_head = TRUE;
1248 cc += 1 + 2 + cc[1];
1249 break;
1250
1251 case OP_RECURSE:
1252 stack_restore = TRUE;
1253 if (common->has_set_som && !setsom_found)
1254 {
1255 length += 2;
1256 setsom_found = TRUE;
1257 }
1258 if (common->mark_ptr != 0 && !setmark_found)
1259 {
1260 length += 2;
1261 setmark_found = TRUE;
1262 }
1263 if (common->capture_last_ptr != 0 && !capture_last_found)
1264 {
1265 length += 2;
1266 capture_last_found = TRUE;
1267 }
1268 cc += 1 + LINK_SIZE;
1269 break;
1270
1271 case OP_CBRA:
1272 case OP_CBRAPOS:
1273 case OP_SCBRA:
1274 case OP_SCBRAPOS:
1275 stack_restore = TRUE;
1276 if (common->capture_last_ptr != 0 && !capture_last_found)
1277 {
1278 length += 2;
1279 capture_last_found = TRUE;
1280 }
1281 length += 3;
1282 cc += 1 + LINK_SIZE + IMM2_SIZE;
1283 break;
1284
1285 default:
1286 stack_restore = TRUE;
1287 /* Fall through. */
1288
1289 case OP_NOT_WORD_BOUNDARY:
1290 case OP_WORD_BOUNDARY:
1291 case OP_NOT_DIGIT:
1292 case OP_DIGIT:
1293 case OP_NOT_WHITESPACE:
1294 case OP_WHITESPACE:
1295 case OP_NOT_WORDCHAR:
1296 case OP_WORDCHAR:
1297 case OP_ANY:
1298 case OP_ALLANY:
1299 case OP_ANYBYTE:
1300 case OP_NOTPROP:
1301 case OP_PROP:
1302 case OP_ANYNL:
1303 case OP_NOT_HSPACE:
1304 case OP_HSPACE:
1305 case OP_NOT_VSPACE:
1306 case OP_VSPACE:
1307 case OP_EXTUNI:
1308 case OP_EODN:
1309 case OP_EOD:
1310 case OP_CIRC:
1311 case OP_CIRCM:
1312 case OP_DOLL:
1313 case OP_DOLLM:
1314 case OP_CHAR:
1315 case OP_CHARI:
1316 case OP_NOT:
1317 case OP_NOTI:
1318
1319 case OP_EXACT:
1320 case OP_POSSTAR:
1321 case OP_POSPLUS:
1322 case OP_POSQUERY:
1323 case OP_POSUPTO:
1324
1325 case OP_EXACTI:
1326 case OP_POSSTARI:
1327 case OP_POSPLUSI:
1328 case OP_POSQUERYI:
1329 case OP_POSUPTOI:
1330
1331 case OP_NOTEXACT:
1332 case OP_NOTPOSSTAR:
1333 case OP_NOTPOSPLUS:
1334 case OP_NOTPOSQUERY:
1335 case OP_NOTPOSUPTO:
1336
1337 case OP_NOTEXACTI:
1338 case OP_NOTPOSSTARI:
1339 case OP_NOTPOSPLUSI:
1340 case OP_NOTPOSQUERYI:
1341 case OP_NOTPOSUPTOI:
1342
1343 case OP_TYPEEXACT:
1344 case OP_TYPEPOSSTAR:
1345 case OP_TYPEPOSPLUS:
1346 case OP_TYPEPOSQUERY:
1347 case OP_TYPEPOSUPTO:
1348
1349 case OP_CLASS:
1350 case OP_NCLASS:
1351 case OP_XCLASS:
1352
1353 cc = next_opcode(common, cc);
1354 SLJIT_ASSERT(cc != NULL);
1355 break;
1356 }
1357
1358 /* Possessive quantifiers can use a special case. */
1359 if (SLJIT_UNLIKELY(possessive == length))
1360 return stack_restore ? no_frame : no_stack;
1361
1362 if (length > 0)
1363 return length + 1;
1364 return stack_restore ? no_frame : no_stack;
1365 }
1366
1367 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1368 {
1369 DEFINE_COMPILER;
1370 BOOL setsom_found = recursive;
1371 BOOL setmark_found = recursive;
1372 /* The last capture is a local variable even for recursions. */
1373 BOOL capture_last_found = FALSE;
1374 int offset;
1375
1376 /* >= 1 + shortest item size (2) */
1377 SLJIT_UNUSED_ARG(stacktop);
1378 SLJIT_ASSERT(stackpos >= stacktop + 2);
1379
1380 stackpos = STACK(stackpos);
1381 if (ccend == NULL)
1382 {
1383 ccend = bracketend(cc) - (1 + LINK_SIZE);
1384 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1385 cc = next_opcode(common, cc);
1386 }
1387
1388 SLJIT_ASSERT(cc != NULL);
1389 while (cc < ccend)
1390 switch(*cc)
1391 {
1392 case OP_SET_SOM:
1393 SLJIT_ASSERT(common->has_set_som);
1394 if (!setsom_found)
1395 {
1396 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1397 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1398 stackpos += (int)sizeof(sljit_sw);
1399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1400 stackpos += (int)sizeof(sljit_sw);
1401 setsom_found = TRUE;
1402 }
1403 cc += 1;
1404 break;
1405
1406 case OP_MARK:
1407 case OP_PRUNE_ARG:
1408 case OP_THEN_ARG:
1409 SLJIT_ASSERT(common->mark_ptr != 0);
1410 if (!setmark_found)
1411 {
1412 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1414 stackpos += (int)sizeof(sljit_sw);
1415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1416 stackpos += (int)sizeof(sljit_sw);
1417 setmark_found = TRUE;
1418 }
1419 cc += 1 + 2 + cc[1];
1420 break;
1421
1422 case OP_RECURSE:
1423 if (common->has_set_som && !setsom_found)
1424 {
1425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1426 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1427 stackpos += (int)sizeof(sljit_sw);
1428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1429 stackpos += (int)sizeof(sljit_sw);
1430 setsom_found = TRUE;
1431 }
1432 if (common->mark_ptr != 0 && !setmark_found)
1433 {
1434 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1435 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1436 stackpos += (int)sizeof(sljit_sw);
1437 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1438 stackpos += (int)sizeof(sljit_sw);
1439 setmark_found = TRUE;
1440 }
1441 if (common->capture_last_ptr != 0 && !capture_last_found)
1442 {
1443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1445 stackpos += (int)sizeof(sljit_sw);
1446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1447 stackpos += (int)sizeof(sljit_sw);
1448 capture_last_found = TRUE;
1449 }
1450 cc += 1 + LINK_SIZE;
1451 break;
1452
1453 case OP_CBRA:
1454 case OP_CBRAPOS:
1455 case OP_SCBRA:
1456 case OP_SCBRAPOS:
1457 if (common->capture_last_ptr != 0 && !capture_last_found)
1458 {
1459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1461 stackpos += (int)sizeof(sljit_sw);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1463 stackpos += (int)sizeof(sljit_sw);
1464 capture_last_found = TRUE;
1465 }
1466 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1468 stackpos += (int)sizeof(sljit_sw);
1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1470 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1472 stackpos += (int)sizeof(sljit_sw);
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1474 stackpos += (int)sizeof(sljit_sw);
1475
1476 cc += 1 + LINK_SIZE + IMM2_SIZE;
1477 break;
1478
1479 default:
1480 cc = next_opcode(common, cc);
1481 SLJIT_ASSERT(cc != NULL);
1482 break;
1483 }
1484
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1486 SLJIT_ASSERT(stackpos == STACK(stacktop));
1487 }
1488
1489 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1490 {
1491 int private_data_length = needs_control_head ? 3 : 2;
1492 int size;
1493 pcre_uchar *alternative;
1494 /* Calculate the sum of the private machine words. */
1495 while (cc < ccend)
1496 {
1497 size = 0;
1498 switch(*cc)
1499 {
1500 case OP_ASSERT:
1501 case OP_ASSERT_NOT:
1502 case OP_ASSERTBACK:
1503 case OP_ASSERTBACK_NOT:
1504 case OP_ONCE:
1505 case OP_ONCE_NC:
1506 case OP_BRAPOS:
1507 case OP_SBRA:
1508 case OP_SBRAPOS:
1509 case OP_SCOND:
1510 private_data_length++;
1511 cc += 1 + LINK_SIZE;
1512 break;
1513
1514 case OP_CBRA:
1515 case OP_SCBRA:
1516 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1517 private_data_length++;
1518 cc += 1 + LINK_SIZE + IMM2_SIZE;
1519 break;
1520
1521 case OP_CBRAPOS:
1522 case OP_SCBRAPOS:
1523 private_data_length += 2;
1524 cc += 1 + LINK_SIZE + IMM2_SIZE;
1525 break;
1526
1527 case OP_COND:
1528 /* Might be a hidden SCOND. */
1529 alternative = cc + GET(cc, 1);
1530 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1531 private_data_length++;
1532 cc += 1 + LINK_SIZE;
1533 break;
1534
1535 CASE_ITERATOR_PRIVATE_DATA_1
1536 if (PRIVATE_DATA(cc))
1537 private_data_length++;
1538 cc += 2;
1539 #ifdef SUPPORT_UTF
1540 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1541 #endif
1542 break;
1543
1544 CASE_ITERATOR_PRIVATE_DATA_2A
1545 if (PRIVATE_DATA(cc))
1546 private_data_length += 2;
1547 cc += 2;
1548 #ifdef SUPPORT_UTF
1549 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1550 #endif
1551 break;
1552
1553 CASE_ITERATOR_PRIVATE_DATA_2B
1554 if (PRIVATE_DATA(cc))
1555 private_data_length += 2;
1556 cc += 2 + IMM2_SIZE;
1557 #ifdef SUPPORT_UTF
1558 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1559 #endif
1560 break;
1561
1562 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1563 if (PRIVATE_DATA(cc))
1564 private_data_length++;
1565 cc += 1;
1566 break;
1567
1568 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1569 if (PRIVATE_DATA(cc))
1570 private_data_length += 2;
1571 cc += 1;
1572 break;
1573
1574 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1575 if (PRIVATE_DATA(cc))
1576 private_data_length += 2;
1577 cc += 1 + IMM2_SIZE;
1578 break;
1579
1580 case OP_CLASS:
1581 case OP_NCLASS:
1582 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1583 case OP_XCLASS:
1584 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1585 #else
1586 size = 1 + 32 / (int)sizeof(pcre_uchar);
1587 #endif
1588 if (PRIVATE_DATA(cc))
1589 private_data_length += get_class_iterator_size(cc + size);
1590 cc += size;
1591 break;
1592
1593 default:
1594 cc = next_opcode(common, cc);
1595 SLJIT_ASSERT(cc != NULL);
1596 break;
1597 }
1598 }
1599 SLJIT_ASSERT(cc == ccend);
1600 return private_data_length;
1601 }
1602
1603 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1604 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1605 {
1606 DEFINE_COMPILER;
1607 int srcw[2];
1608 int count, size;
1609 BOOL tmp1next = TRUE;
1610 BOOL tmp1empty = TRUE;
1611 BOOL tmp2empty = TRUE;
1612 pcre_uchar *alternative;
1613 enum {
1614 start,
1615 loop,
1616 end
1617 } status;
1618
1619 status = save ? start : loop;
1620 stackptr = STACK(stackptr - 2);
1621 stacktop = STACK(stacktop - 1);
1622
1623 if (!save)
1624 {
1625 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1626 if (stackptr < stacktop)
1627 {
1628 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1629 stackptr += sizeof(sljit_sw);
1630 tmp1empty = FALSE;
1631 }
1632 if (stackptr < stacktop)
1633 {
1634 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1635 stackptr += sizeof(sljit_sw);
1636 tmp2empty = FALSE;
1637 }
1638 /* The tmp1next must be TRUE in either way. */
1639 }
1640
1641 do
1642 {
1643 count = 0;
1644 switch(status)
1645 {
1646 case start:
1647 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1648 count = 1;
1649 srcw[0] = common->recursive_head_ptr;
1650 if (needs_control_head)
1651 {
1652 SLJIT_ASSERT(common->control_head_ptr != 0);
1653 count = 2;
1654 srcw[1] = common->control_head_ptr;
1655 }
1656 status = loop;
1657 break;
1658
1659 case loop:
1660 if (cc >= ccend)
1661 {
1662 status = end;
1663 break;
1664 }
1665
1666 switch(*cc)
1667 {
1668 case OP_ASSERT:
1669 case OP_ASSERT_NOT:
1670 case OP_ASSERTBACK:
1671 case OP_ASSERTBACK_NOT:
1672 case OP_ONCE:
1673 case OP_ONCE_NC:
1674 case OP_BRAPOS:
1675 case OP_SBRA:
1676 case OP_SBRAPOS:
1677 case OP_SCOND:
1678 count = 1;
1679 srcw[0] = PRIVATE_DATA(cc);
1680 SLJIT_ASSERT(srcw[0] != 0);
1681 cc += 1 + LINK_SIZE;
1682 break;
1683
1684 case OP_CBRA:
1685 case OP_SCBRA:
1686 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1687 {
1688 count = 1;
1689 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1690 }
1691 cc += 1 + LINK_SIZE + IMM2_SIZE;
1692 break;
1693
1694 case OP_CBRAPOS:
1695 case OP_SCBRAPOS:
1696 count = 2;
1697 srcw[0] = PRIVATE_DATA(cc);
1698 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1699 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1700 cc += 1 + LINK_SIZE + IMM2_SIZE;
1701 break;
1702
1703 case OP_COND:
1704 /* Might be a hidden SCOND. */
1705 alternative = cc + GET(cc, 1);
1706 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1707 {
1708 count = 1;
1709 srcw[0] = PRIVATE_DATA(cc);
1710 SLJIT_ASSERT(srcw[0] != 0);
1711 }
1712 cc += 1 + LINK_SIZE;
1713 break;
1714
1715 CASE_ITERATOR_PRIVATE_DATA_1
1716 if (PRIVATE_DATA(cc))
1717 {
1718 count = 1;
1719 srcw[0] = PRIVATE_DATA(cc);
1720 }
1721 cc += 2;
1722 #ifdef SUPPORT_UTF
1723 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1724 #endif
1725 break;
1726
1727 CASE_ITERATOR_PRIVATE_DATA_2A
1728 if (PRIVATE_DATA(cc))
1729 {
1730 count = 2;
1731 srcw[0] = PRIVATE_DATA(cc);
1732 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1733 }
1734 cc += 2;
1735 #ifdef SUPPORT_UTF
1736 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1737 #endif
1738 break;
1739
1740 CASE_ITERATOR_PRIVATE_DATA_2B
1741 if (PRIVATE_DATA(cc))
1742 {
1743 count = 2;
1744 srcw[0] = PRIVATE_DATA(cc);
1745 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1746 }
1747 cc += 2 + IMM2_SIZE;
1748 #ifdef SUPPORT_UTF
1749 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1750 #endif
1751 break;
1752
1753 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1754 if (PRIVATE_DATA(cc))
1755 {
1756 count = 1;
1757 srcw[0] = PRIVATE_DATA(cc);
1758 }
1759 cc += 1;
1760 break;
1761
1762 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1763 if (PRIVATE_DATA(cc))
1764 {
1765 count = 2;
1766 srcw[0] = PRIVATE_DATA(cc);
1767 srcw[1] = srcw[0] + sizeof(sljit_sw);
1768 }
1769 cc += 1;
1770 break;
1771
1772 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1773 if (PRIVATE_DATA(cc))
1774 {
1775 count = 2;
1776 srcw[0] = PRIVATE_DATA(cc);
1777 srcw[1] = srcw[0] + sizeof(sljit_sw);
1778 }
1779 cc += 1 + IMM2_SIZE;
1780 break;
1781
1782 case OP_CLASS:
1783 case OP_NCLASS:
1784 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1785 case OP_XCLASS:
1786 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1787 #else
1788 size = 1 + 32 / (int)sizeof(pcre_uchar);
1789 #endif
1790 if (PRIVATE_DATA(cc))
1791 switch(get_class_iterator_size(cc + size))
1792 {
1793 case 1:
1794 count = 1;
1795 srcw[0] = PRIVATE_DATA(cc);
1796 break;
1797
1798 case 2:
1799 count = 2;
1800 srcw[0] = PRIVATE_DATA(cc);
1801 srcw[1] = srcw[0] + sizeof(sljit_sw);
1802 break;
1803
1804 default:
1805 SLJIT_ASSERT_STOP();
1806 break;
1807 }
1808 cc += size;
1809 break;
1810
1811 default:
1812 cc = next_opcode(common, cc);
1813 SLJIT_ASSERT(cc != NULL);
1814 break;
1815 }
1816 break;
1817
1818 case end:
1819 SLJIT_ASSERT_STOP();
1820 break;
1821 }
1822
1823 while (count > 0)
1824 {
1825 count--;
1826 if (save)
1827 {
1828 if (tmp1next)
1829 {
1830 if (!tmp1empty)
1831 {
1832 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1833 stackptr += sizeof(sljit_sw);
1834 }
1835 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1836 tmp1empty = FALSE;
1837 tmp1next = FALSE;
1838 }
1839 else
1840 {
1841 if (!tmp2empty)
1842 {
1843 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1844 stackptr += sizeof(sljit_sw);
1845 }
1846 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1847 tmp2empty = FALSE;
1848 tmp1next = TRUE;
1849 }
1850 }
1851 else
1852 {
1853 if (tmp1next)
1854 {
1855 SLJIT_ASSERT(!tmp1empty);
1856 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1857 tmp1empty = stackptr >= stacktop;
1858 if (!tmp1empty)
1859 {
1860 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1861 stackptr += sizeof(sljit_sw);
1862 }
1863 tmp1next = FALSE;
1864 }
1865 else
1866 {
1867 SLJIT_ASSERT(!tmp2empty);
1868 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1869 tmp2empty = stackptr >= stacktop;
1870 if (!tmp2empty)
1871 {
1872 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1873 stackptr += sizeof(sljit_sw);
1874 }
1875 tmp1next = TRUE;
1876 }
1877 }
1878 }
1879 }
1880 while (status != end);
1881
1882 if (save)
1883 {
1884 if (tmp1next)
1885 {
1886 if (!tmp1empty)
1887 {
1888 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1889 stackptr += sizeof(sljit_sw);
1890 }
1891 if (!tmp2empty)
1892 {
1893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1894 stackptr += sizeof(sljit_sw);
1895 }
1896 }
1897 else
1898 {
1899 if (!tmp2empty)
1900 {
1901 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1902 stackptr += sizeof(sljit_sw);
1903 }
1904 if (!tmp1empty)
1905 {
1906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1907 stackptr += sizeof(sljit_sw);
1908 }
1909 }
1910 }
1911 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1912 }
1913
1914 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1915 {
1916 pcre_uchar *end = bracketend(cc);
1917 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1918
1919 /* Assert captures then. */
1920 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1921 current_offset = NULL;
1922 /* Conditional block does not. */
1923 if (*cc == OP_COND || *cc == OP_SCOND)
1924 has_alternatives = FALSE;
1925
1926 cc = next_opcode(common, cc);
1927 if (has_alternatives)
1928 current_offset = common->then_offsets + (cc - common->start);
1929
1930 while (cc < end)
1931 {
1932 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1933 cc = set_then_offsets(common, cc, current_offset);
1934 else
1935 {
1936 if (*cc == OP_ALT && has_alternatives)
1937 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1938 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1939 *current_offset = 1;
1940 cc = next_opcode(common, cc);
1941 }
1942 }
1943
1944 return end;
1945 }
1946
1947 #undef CASE_ITERATOR_PRIVATE_DATA_1
1948 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1949 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1950 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1951 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1952 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1953
1954 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1955 {
1956 return (value & (value - 1)) == 0;
1957 }
1958
1959 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1960 {
1961 while (list)
1962 {
1963 /* sljit_set_label is clever enough to do nothing
1964 if either the jump or the label is NULL. */
1965 SET_LABEL(list->jump, label);
1966 list = list->next;
1967 }
1968 }
1969
1970 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1971 {
1972 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1973 if (list_item)
1974 {
1975 list_item->next = *list;
1976 list_item->jump = jump;
1977 *list = list_item;
1978 }
1979 }
1980
1981 static void add_stub(compiler_common *common, struct sljit_jump *start)
1982 {
1983 DEFINE_COMPILER;
1984 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1985
1986 if (list_item)
1987 {
1988 list_item->start = start;
1989 list_item->quit = LABEL();
1990 list_item->next = common->stubs;
1991 common->stubs = list_item;
1992 }
1993 }
1994
1995 static void flush_stubs(compiler_common *common)
1996 {
1997 DEFINE_COMPILER;
1998 stub_list* list_item = common->stubs;
1999
2000 while (list_item)
2001 {
2002 JUMPHERE(list_item->start);
2003 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2004 JUMPTO(SLJIT_JUMP, list_item->quit);
2005 list_item = list_item->next;
2006 }
2007 common->stubs = NULL;
2008 }
2009
2010 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
2011 {
2012 DEFINE_COMPILER;
2013
2014 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
2015 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2016 }
2017
2018 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2019 {
2020 /* May destroy all locals and registers except TMP2. */
2021 DEFINE_COMPILER;
2022
2023 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2024 #ifdef DESTROY_REGISTERS
2025 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2026 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2027 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2028 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2029 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2030 #endif
2031 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2032 }
2033
2034 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2035 {
2036 DEFINE_COMPILER;
2037 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2038 }
2039
2040 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2041 {
2042 DEFINE_COMPILER;
2043 struct sljit_label *loop;
2044 int i;
2045
2046 /* At this point we can freely use all temporary registers. */
2047 SLJIT_ASSERT(length > 1);
2048 /* TMP1 returns with begin - 1. */
2049 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2050 if (length < 8)
2051 {
2052 for (i = 1; i < length; i++)
2053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2054 }
2055 else
2056 {
2057 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2058 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2059 loop = LABEL();
2060 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2061 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2062 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2063 }
2064 }
2065
2066 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2067 {
2068 DEFINE_COMPILER;
2069 struct sljit_label *loop;
2070 int i;
2071
2072 SLJIT_ASSERT(length > 1);
2073 /* OVECTOR(1) contains the "string begin - 1" constant. */
2074 if (length > 2)
2075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2076 if (length < 8)
2077 {
2078 for (i = 2; i < length; i++)
2079 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2080 }
2081 else
2082 {
2083 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2084 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2085 loop = LABEL();
2086 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2087 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2088 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2089 }
2090
2091 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2092 if (common->mark_ptr != 0)
2093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2094 if (common->control_head_ptr != 0)
2095 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2096 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2097 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2098 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2099 }
2100
2101 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2102 {
2103 while (current != NULL)
2104 {
2105 switch (current[-2])
2106 {
2107 case type_then_trap:
2108 break;
2109
2110 case type_mark:
2111 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2112 return current[-4];
2113 break;
2114
2115 default:
2116 SLJIT_ASSERT_STOP();
2117 break;
2118 }
2119 current = (sljit_sw*)current[-1];
2120 }
2121 return -1;
2122 }
2123
2124 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2125 {
2126 DEFINE_COMPILER;
2127 struct sljit_label *loop;
2128 struct sljit_jump *early_quit;
2129
2130 /* At this point we can freely use all registers. */
2131 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2132 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2133
2134 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2135 if (common->mark_ptr != 0)
2136 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2137 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2138 if (common->mark_ptr != 0)
2139 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2140 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2141 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2142 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2143 /* Unlikely, but possible */
2144 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2145 loop = LABEL();
2146 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2147 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2148 /* Copy the integer value to the output buffer */
2149 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2150 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2151 #endif
2152 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2154 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2155 JUMPHERE(early_quit);
2156
2157 /* Calculate the return value, which is the maximum ovector value. */
2158 if (topbracket > 1)
2159 {
2160 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2161 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2162
2163 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2164 loop = LABEL();
2165 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2166 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2167 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2168 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2169 }
2170 else
2171 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2172 }
2173
2174 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2175 {
2176 DEFINE_COMPILER;
2177 struct sljit_jump *jump;
2178
2179 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2180 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2181 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2182
2183 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2184 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2185 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2186 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2187
2188 /* Store match begin and end. */
2189 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2190 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2191
2192 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2193 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2194 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2195 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2196 #endif
2197 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2198 JUMPHERE(jump);
2199
2200 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2201 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2202 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2203 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2204 #endif
2205 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2206
2207 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2208 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2209 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2210 #endif
2211 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2212
2213 JUMPTO(SLJIT_JUMP, quit);
2214 }
2215
2216 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2217 {
2218 /* May destroy TMP1. */
2219 DEFINE_COMPILER;
2220 struct sljit_jump *jump;
2221
2222 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2223 {
2224 /* The value of -1 must be kept for start_used_ptr! */
2225 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2226 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2227 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2228 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2229 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2230 JUMPHERE(jump);
2231 }
2232 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2233 {
2234 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2236 JUMPHERE(jump);
2237 }
2238 }
2239
2240 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2241 {
2242 /* Detects if the character has an othercase. */
2243 unsigned int c;
2244
2245 #ifdef SUPPORT_UTF
2246 if (common->utf)
2247 {
2248 GETCHAR(c, cc);
2249 if (c > 127)
2250 {
2251 #ifdef SUPPORT_UCP
2252 return c != UCD_OTHERCASE(c);
2253 #else
2254 return FALSE;
2255 #endif
2256 }
2257 #ifndef COMPILE_PCRE8
2258 return common->fcc[c] != c;
2259 #endif
2260 }
2261 else
2262 #endif
2263 c = *cc;
2264 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2265 }
2266
2267 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2268 {
2269 /* Returns with the othercase. */
2270 #ifdef SUPPORT_UTF
2271 if (common->utf && c > 127)
2272 {
2273 #ifdef SUPPORT_UCP
2274 return UCD_OTHERCASE(c);
2275 #else
2276 return c;
2277 #endif
2278 }
2279 #endif
2280 return TABLE_GET(c, common->fcc, c);
2281 }
2282
2283 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2284 {
2285 /* Detects if the character and its othercase has only 1 bit difference. */
2286 unsigned int c, oc, bit;
2287 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2288 int n;
2289 #endif
2290
2291 #ifdef SUPPORT_UTF
2292 if (common->utf)
2293 {
2294 GETCHAR(c, cc);
2295 if (c <= 127)
2296 oc = common->fcc[c];
2297 else
2298 {
2299 #ifdef SUPPORT_UCP
2300 oc = UCD_OTHERCASE(c);
2301 #else
2302 oc = c;
2303 #endif
2304 }
2305 }
2306 else
2307 {
2308 c = *cc;
2309 oc = TABLE_GET(c, common->fcc, c);
2310 }
2311 #else
2312 c = *cc;
2313 oc = TABLE_GET(c, common->fcc, c);
2314 #endif
2315
2316 SLJIT_ASSERT(c != oc);
2317
2318 bit = c ^ oc;
2319 /* Optimized for English alphabet. */
2320 if (c <= 127 && bit == 0x20)
2321 return (0 << 8) | 0x20;
2322
2323 /* Since c != oc, they must have at least 1 bit difference. */
2324 if (!is_powerof2(bit))
2325 return 0;
2326
2327 #if defined COMPILE_PCRE8
2328
2329 #ifdef SUPPORT_UTF
2330 if (common->utf && c > 127)
2331 {
2332 n = GET_EXTRALEN(*cc);
2333 while ((bit & 0x3f) == 0)
2334 {
2335 n--;
2336 bit >>= 6;
2337 }
2338 return (n << 8) | bit;
2339 }
2340 #endif /* SUPPORT_UTF */
2341 return (0 << 8) | bit;
2342
2343 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2344
2345 #ifdef SUPPORT_UTF
2346 if (common->utf && c > 65535)
2347 {
2348 if (bit >= (1 << 10))
2349 bit >>= 10;
2350 else
2351 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2352 }
2353 #endif /* SUPPORT_UTF */
2354 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2355
2356 #endif /* COMPILE_PCRE[8|16|32] */
2357 }
2358
2359 static void check_partial(compiler_common *common, BOOL force)
2360 {
2361 /* Checks whether a partial matching is occured. Does not modify registers. */
2362 DEFINE_COMPILER;
2363 struct sljit_jump *jump = NULL;
2364
2365 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2366
2367 if (common->mode == JIT_COMPILE)
2368 return;
2369
2370 if (!force)
2371 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2372 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2373 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2374
2375 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2377 else
2378 {
2379 if (common->partialmatchlabel != NULL)
2380 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2381 else
2382 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2383 }
2384
2385 if (jump != NULL)
2386 JUMPHERE(jump);
2387 }
2388
2389 static void check_str_end(compiler_common *common, jump_list **end_reached)
2390 {
2391 /* Does not affect registers. Usually used in a tight spot. */
2392 DEFINE_COMPILER;
2393 struct sljit_jump *jump;
2394
2395 if (common->mode == JIT_COMPILE)
2396 {
2397 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2398 return;
2399 }
2400
2401 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2402 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2403 {
2404 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2406 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2407 }
2408 else
2409 {
2410 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2411 if (common->partialmatchlabel != NULL)
2412 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2413 else
2414 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2415 }
2416 JUMPHERE(jump);
2417 }
2418
2419 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2420 {
2421 DEFINE_COMPILER;
2422 struct sljit_jump *jump;
2423
2424 if (common->mode == JIT_COMPILE)
2425 {
2426 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2427 return;
2428 }
2429
2430 /* Partial matching mode. */
2431 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2432 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2433 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2434 {
2435 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2436 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2437 }
2438 else
2439 {
2440 if (common->partialmatchlabel != NULL)
2441 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2442 else
2443 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2444 }
2445 JUMPHERE(jump);
2446 }
2447
2448 static void read_char(compiler_common *common)
2449 {
2450 /* Reads the character into TMP1, updates STR_PTR.
2451 Does not check STR_END. TMP2 Destroyed. */
2452 DEFINE_COMPILER;
2453 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2454 struct sljit_jump *jump;
2455 #endif
2456
2457 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2458 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2459 if (common->utf)
2460 {
2461 #if defined COMPILE_PCRE8
2462 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2463 #elif defined COMPILE_PCRE16
2464 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2465 #endif /* COMPILE_PCRE[8|16] */
2466 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2467 JUMPHERE(jump);
2468 }
2469 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2470 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2471 }
2472
2473 static void peek_char(compiler_common *common)
2474 {
2475 /* Reads the character into TMP1, keeps STR_PTR.
2476 Does not check STR_END. TMP2 Destroyed. */
2477 DEFINE_COMPILER;
2478 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2479 struct sljit_jump *jump;
2480 #endif
2481
2482 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2483 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2484 if (common->utf)
2485 {
2486 #if defined COMPILE_PCRE8
2487 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2488 #elif defined COMPILE_PCRE16
2489 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2490 #endif /* COMPILE_PCRE[8|16] */
2491 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2492 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2493 JUMPHERE(jump);
2494 }
2495 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2496 }
2497
2498 static void read_char8_type(compiler_common *common)
2499 {
2500 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2501 DEFINE_COMPILER;
2502 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2503 struct sljit_jump *jump;
2504 #endif
2505
2506 #ifdef SUPPORT_UTF
2507 if (common->utf)
2508 {
2509 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2510 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2511 #if defined COMPILE_PCRE8
2512 /* This can be an extra read in some situations, but hopefully
2513 it is needed in most cases. */
2514 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2515 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2516 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2517 JUMPHERE(jump);
2518 #elif defined COMPILE_PCRE16
2519 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2520 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2521 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2522 JUMPHERE(jump);
2523 /* Skip low surrogate if necessary. */
2524 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2525 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2526 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2527 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2528 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2529 #elif defined COMPILE_PCRE32
2530 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2531 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2532 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2533 JUMPHERE(jump);
2534 #endif /* COMPILE_PCRE[8|16|32] */
2535 return;
2536 }
2537 #endif /* SUPPORT_UTF */
2538 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2539 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2540 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2541 /* The ctypes array contains only 256 values. */
2542 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2543 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2544 #endif
2545 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2546 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2547 JUMPHERE(jump);
2548 #endif
2549 }
2550
2551 static void skip_char_back(compiler_common *common)
2552 {
2553 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2554 DEFINE_COMPILER;
2555 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2556 #if defined COMPILE_PCRE8
2557 struct sljit_label *label;
2558
2559 if (common->utf)
2560 {
2561 label = LABEL();
2562 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2563 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2564 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2565 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2566 return;
2567 }
2568 #elif defined COMPILE_PCRE16
2569 if (common->utf)
2570 {
2571 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2572 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2573 /* Skip low surrogate if necessary. */
2574 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2575 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2576 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2577 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2578 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2579 return;
2580 }
2581 #endif /* COMPILE_PCRE[8|16] */
2582 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2583 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2584 }
2585
2586 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2587 {
2588 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2589 DEFINE_COMPILER;
2590
2591 if (nltype == NLTYPE_ANY)
2592 {
2593 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2594 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2595 }
2596 else if (nltype == NLTYPE_ANYCRLF)
2597 {
2598 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2599 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2600 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2601 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2602 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2603 }
2604 else
2605 {
2606 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2607 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2608 }
2609 }
2610
2611 #ifdef SUPPORT_UTF
2612
2613 #if defined COMPILE_PCRE8
2614 static void do_utfreadchar(compiler_common *common)
2615 {
2616 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2617 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2618 DEFINE_COMPILER;
2619 struct sljit_jump *jump;
2620
2621 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2622 /* Searching for the first zero. */
2623 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2624 jump = JUMP(SLJIT_C_NOT_ZERO);
2625 /* Two byte sequence. */
2626 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2627 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2628 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2629 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2630 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2631 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2632 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2633 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2634 JUMPHERE(jump);
2635
2636 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2637 jump = JUMP(SLJIT_C_NOT_ZERO);
2638 /* Three byte sequence. */
2639 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2640 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2641 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2642 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2643 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2644 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2645 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2646 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2647 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2648 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2649 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2650 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2651 JUMPHERE(jump);
2652
2653 /* Four byte sequence. */
2654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2655 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2656 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2657 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2658 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2659 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2660 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2661 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2662 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2663 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2664 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2665 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2666 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2667 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2668 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2669 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2670 }
2671
2672 static void do_utfreadtype8(compiler_common *common)
2673 {
2674 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2675 of the character (>= 0xc0). Return value in TMP1. */
2676 DEFINE_COMPILER;
2677 struct sljit_jump *jump;
2678 struct sljit_jump *compare;
2679
2680 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2681
2682 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2683 jump = JUMP(SLJIT_C_NOT_ZERO);
2684 /* Two byte sequence. */
2685 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2686 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2687 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2688 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2689 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2690 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2691 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2692 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2693 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2694
2695 JUMPHERE(compare);
2696 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2697 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2698 JUMPHERE(jump);
2699
2700 /* We only have types for characters less than 256. */
2701 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2702 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2703 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2704 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2705 }
2706
2707 #elif defined COMPILE_PCRE16
2708
2709 static void do_utfreadchar(compiler_common *common)
2710 {
2711 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2712 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2713 DEFINE_COMPILER;
2714 struct sljit_jump *jump;
2715
2716 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2717 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2718 /* Do nothing, only return. */
2719 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2720
2721 JUMPHERE(jump);
2722 /* Combine two 16 bit characters. */
2723 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2724 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2725 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2726 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2727 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2728 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2729 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2730 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2731 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2732 }
2733
2734 #endif /* COMPILE_PCRE[8|16] */
2735
2736 #endif /* SUPPORT_UTF */
2737
2738 #ifdef SUPPORT_UCP
2739
2740 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2741 #define UCD_BLOCK_MASK 127
2742 #define UCD_BLOCK_SHIFT 7
2743
2744 static void do_getucd(compiler_common *common)
2745 {
2746 /* Search the UCD record for the character comes in TMP1.
2747 Returns chartype in TMP1 and UCD offset in TMP2. */
2748 DEFINE_COMPILER;
2749
2750 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2751
2752 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2753 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2754 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2755 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2756 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2757 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2758 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2759 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2760 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2761 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2762 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2763 }
2764 #endif
2765
2766 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2767 {
2768 DEFINE_COMPILER;
2769 struct sljit_label *mainloop;
2770 struct sljit_label *newlinelabel = NULL;
2771 struct sljit_jump *start;
2772 struct sljit_jump *end = NULL;
2773 struct sljit_jump *nl = NULL;
2774 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2775 struct sljit_jump *singlechar;
2776 #endif
2777 jump_list *newline = NULL;
2778 BOOL newlinecheck = FALSE;
2779 BOOL readuchar = FALSE;
2780
2781 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2782 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2783 newlinecheck = TRUE;
2784
2785 if (firstline)
2786 {
2787 /* Search for the end of the first line. */
2788 SLJIT_ASSERT(common->first_line_end != 0);
2789 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2790
2791 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2792 {
2793 mainloop = LABEL();
2794 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2795 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2796 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2797 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2798 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2799 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2800 JUMPHERE(end);
2801 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2802 }
2803 else
2804 {
2805 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2806 mainloop = LABEL();
2807 /* Continual stores does not cause data dependency. */
2808 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2809 read_char(common);
2810 check_newlinechar(common, common->nltype, &newline, TRUE);
2811 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2812 JUMPHERE(end);
2813 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2814 set_jumps(newline, LABEL());
2815 }
2816
2817 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2818 }
2819
2820 start = JUMP(SLJIT_JUMP);
2821
2822 if (newlinecheck)
2823 {
2824 newlinelabel = LABEL();
2825 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2826 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2827 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2828 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2829 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2830 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2831 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2832 #endif
2833 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2834 nl = JUMP(SLJIT_JUMP);
2835 }
2836
2837 mainloop = LABEL();
2838
2839 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2840 #ifdef SUPPORT_UTF
2841 if (common->utf) readuchar = TRUE;
2842 #endif
2843 if (newlinecheck) readuchar = TRUE;
2844
2845 if (readuchar)
2846 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2847
2848 if (newlinecheck)
2849 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2850
2851 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2852 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2853 #if defined COMPILE_PCRE8
2854 if (common->utf)
2855 {
2856 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2857 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2858 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2859 JUMPHERE(singlechar);
2860 }
2861 #elif defined COMPILE_PCRE16
2862 if (common->utf)
2863 {
2864 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2865 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2866 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2867 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2868 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2869 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2870 JUMPHERE(singlechar);
2871 }
2872 #endif /* COMPILE_PCRE[8|16] */
2873 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2874 JUMPHERE(start);
2875
2876 if (newlinecheck)
2877 {
2878 JUMPHERE(end);
2879 JUMPHERE(nl);
2880 }
2881
2882 return mainloop;
2883 }
2884
2885 #define MAX_N_CHARS 3
2886
2887 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2888 {
2889 DEFINE_COMPILER;
2890 struct sljit_label *start;
2891 struct sljit_jump *quit;
2892 pcre_uint32 chars[MAX_N_CHARS * 2];
2893 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2894 int location = 0;
2895 pcre_int32 len, c, bit, caseless;
2896 int must_stop;
2897
2898 /* We do not support alternatives now. */
2899 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2900 return FALSE;
2901
2902 while (TRUE)
2903 {
2904 caseless = 0;
2905 must_stop = 1;
2906 switch(*cc)
2907 {
2908 case OP_CHAR:
2909 must_stop = 0;
2910 cc++;
2911 break;
2912
2913 case OP_CHARI:
2914 caseless = 1;
2915 must_stop = 0;
2916 cc++;
2917 break;
2918
2919 case OP_SOD:
2920 case OP_SOM:
2921 case OP_SET_SOM:
2922 case OP_NOT_WORD_BOUNDARY:
2923 case OP_WORD_BOUNDARY:
2924 case OP_EODN:
2925 case OP_EOD:
2926 case OP_CIRC:
2927 case OP_CIRCM:
2928 case OP_DOLL:
2929 case OP_DOLLM:
2930 /* Zero width assertions. */
2931 cc++;
2932 continue;
2933
2934 case OP_PLUS:
2935 case OP_MINPLUS:
2936 case OP_POSPLUS:
2937 cc++;
2938 break;
2939
2940 case OP_EXACT:
2941 cc += 1 + IMM2_SIZE;
2942 break;
2943
2944 case OP_PLUSI:
2945 case OP_MINPLUSI:
2946 case OP_POSPLUSI:
2947 caseless = 1;
2948 cc++;
2949 break;
2950
2951 case OP_EXACTI:
2952 caseless = 1;
2953 cc += 1 + IMM2_SIZE;
2954 break;
2955
2956 default:
2957 must_stop = 2;
2958 break;
2959 }
2960
2961 if (must_stop == 2)
2962 break;
2963
2964 len = 1;
2965 #ifdef SUPPORT_UTF
2966 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2967 #endif
2968
2969 if (caseless && char_has_othercase(common, cc))
2970 {
2971 caseless = char_get_othercase_bit(common, cc);
2972 if (caseless == 0)
2973 return FALSE;
2974 #ifdef COMPILE_PCRE8
2975 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2976 #else
2977 if ((caseless & 0x100) != 0)
2978 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2979 else
2980 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2981 #endif
2982 }
2983 else
2984 caseless = 0;
2985
2986 while (len > 0 && location < MAX_N_CHARS * 2)
2987 {
2988 c = *cc;
2989 bit = 0;
2990 if (len == (caseless & 0xff))
2991 {
2992 bit = caseless >> 8;
2993 c |= bit;
2994 }
2995
2996 chars[location] = c;
2997 chars[location + 1] = bit;
2998
2999 len--;
3000 location += 2;
3001 cc++;
3002 }
3003
3004 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3005 break;
3006 }
3007
3008 /* At least two characters are required. */
3009 if (location < 2 * 2)
3010 return FALSE;
3011
3012 if (firstline)
3013 {
3014 SLJIT_ASSERT(common->first_line_end != 0);
3015 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3016 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3017 }
3018 else
3019 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3020
3021 start = LABEL();
3022 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3023
3024 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3025 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3027 if (chars[1] != 0)
3028 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3029 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3030 if (location > 2 * 2)
3031 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3032 if (chars[3] != 0)
3033 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3034 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3035 if (location > 2 * 2)
3036 {
3037 if (chars[5] != 0)
3038 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3039 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3040 }
3041 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3042
3043 JUMPHERE(quit);
3044
3045 if (firstline)
3046 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3047 else
3048 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3049 return TRUE;
3050 }
3051
3052 #undef MAX_N_CHARS
3053
3054 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3055 {
3056 DEFINE_COMPILER;
3057 struct sljit_label *start;
3058 struct sljit_jump *quit;
3059 struct sljit_jump *found;
3060 pcre_uchar oc, bit;
3061
3062 if (firstline)
3063 {
3064 SLJIT_ASSERT(common->first_line_end != 0);
3065 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3066 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3067 }
3068
3069 start = LABEL();
3070 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3071 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3072
3073 oc = first_char;
3074 if (caseless)
3075 {
3076 oc = TABLE_GET(first_char, common->fcc, first_char);
3077 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3078 if (first_char > 127 && common->utf)
3079 oc = UCD_OTHERCASE(first_char);
3080 #endif
3081 }
3082 if (first_char == oc)
3083 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3084 else
3085 {
3086 bit = first_char ^ oc;
3087 if (is_powerof2(bit))
3088 {
3089 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3090 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3091 }
3092 else
3093 {
3094 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3095 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3096 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3097 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3098 found = JUMP(SLJIT_C_NOT_ZERO);
3099 }
3100 }
3101
3102 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3103 JUMPTO(SLJIT_JUMP, start);
3104 JUMPHERE(found);
3105 JUMPHERE(quit);
3106
3107 if (firstline)
3108 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3109 }
3110
3111 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3112 {
3113 DEFINE_COMPILER;
3114 struct sljit_label *loop;
3115 struct sljit_jump *lastchar;
3116 struct sljit_jump *firstchar;
3117 struct sljit_jump *quit;
3118 struct sljit_jump *foundcr = NULL;
3119 struct sljit_jump *notfoundnl;
3120 jump_list *newline = NULL;
3121
3122 if (firstline)
3123 {
3124 SLJIT_ASSERT(common->first_line_end != 0);
3125 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3126 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3127 }
3128
3129 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3130 {
3131 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3132 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3133 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3134 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3135 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3136
3137 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3138 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3139 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3140 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3141 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3142 #endif
3143 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3144
3145 loop = LABEL();
3146 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3147 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3148 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3149 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3150 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3151 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3152
3153 JUMPHERE(quit);
3154 JUMPHERE(firstchar);
3155 JUMPHERE(lastchar);
3156
3157 if (firstline)
3158 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3159 return;
3160 }
3161
3162 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3163 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3164 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3165 skip_char_back(common);
3166
3167 loop = LABEL();
3168 read_char(common);
3169 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3170 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3171 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3172 check_newlinechar(common, common->nltype, &newline, FALSE);
3173 set_jumps(newline, loop);
3174
3175 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3176 {
3177 quit = JUMP(SLJIT_JUMP);
3178 JUMPHERE(foundcr);
3179 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3180 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3181 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3182 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3183 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3184 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3185 #endif
3186 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3187 JUMPHERE(notfoundnl);
3188 JUMPHERE(quit);
3189 }
3190 JUMPHERE(lastchar);
3191 JUMPHERE(firstchar);
3192
3193 if (firstline)
3194 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3195 }
3196
3197 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3198
3199 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3200 {
3201 DEFINE_COMPILER;
3202 struct sljit_label *start;
3203 struct sljit_jump *quit;
3204 struct sljit_jump *found = NULL;
3205 jump_list *matches = NULL;
3206 pcre_uint8 inverted_start_bits[32];
3207 int i;
3208 #ifndef COMPILE_PCRE8
3209 struct sljit_jump *jump;
3210 #endif
3211
3212 for (i = 0; i < 32; ++i)
3213 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3214
3215 if (firstline)
3216 {
3217 SLJIT_ASSERT(common->first_line_end != 0);
3218 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3219 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3220 }
3221
3222 start = LABEL();
3223 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3224 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3225 #ifdef SUPPORT_UTF
3226 if (common->utf)
3227 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3228 #endif
3229
3230 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3231 {
3232 #ifndef COMPILE_PCRE8
3233 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3234 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3235 JUMPHERE(jump);
3236 #endif
3237 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3238 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3239 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3240 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3241 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3242 found = JUMP(SLJIT_C_NOT_ZERO);
3243 }
3244
3245 #ifdef SUPPORT_UTF
3246 if (common->utf)
3247 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3248 #endif
3249 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3250 #ifdef SUPPORT_UTF
3251 #if defined COMPILE_PCRE8
3252 if (common->utf)
3253 {
3254 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3255 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3256 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3257 }
3258 #elif defined COMPILE_PCRE16
3259 if (common->utf)
3260 {
3261 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3262 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3263 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3264 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3265 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3266 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3267 }
3268 #endif /* COMPILE_PCRE[8|16] */
3269 #endif /* SUPPORT_UTF */
3270 JUMPTO(SLJIT_JUMP, start);
3271 if (found != NULL)
3272 JUMPHERE(found);
3273 if (matches != NULL)
3274 set_jumps(matches, LABEL());
3275 JUMPHERE(quit);
3276
3277 if (firstline)
3278 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3279 }
3280
3281 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3282 {
3283 DEFINE_COMPILER;
3284 struct sljit_label *loop;
3285 struct sljit_jump *toolong;
3286 struct sljit_jump *alreadyfound;
3287 struct sljit_jump *found;
3288 struct sljit_jump *foundoc = NULL;
3289 struct sljit_jump *notfound;
3290 pcre_uint32 oc, bit;
3291
3292 SLJIT_ASSERT(common->req_char_ptr != 0);
3293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3294 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3295 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3296 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3297
3298 if (has_firstchar)
3299 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3300 else
3301 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3302
3303 loop = LABEL();
3304 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3305
3306 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3307 oc = req_char;
3308 if (caseless)
3309 {
3310 oc = TABLE_GET(req_char, common->fcc, req_char);
3311 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3312 if (req_char > 127 && common->utf)
3313 oc = UCD_OTHERCASE(req_char);
3314 #endif
3315 }
3316 if (req_char == oc)
3317 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3318 else
3319 {
3320 bit = req_char ^ oc;
3321 if (is_powerof2(bit))
3322 {
3323 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3324 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3325 }
3326 else
3327 {
3328 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3329 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3330 }
3331 }
3332 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3333 JUMPTO(SLJIT_JUMP, loop);
3334
3335 JUMPHERE(found);
3336 if (foundoc)
3337 JUMPHERE(foundoc);
3338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3339 JUMPHERE(alreadyfound);
3340 JUMPHERE(toolong);
3341 return notfound;
3342 }
3343
3344 static void do_revertframes(compiler_common *common)
3345 {
3346 DEFINE_COMPILER;
3347 struct sljit_jump *jump;
3348 struct sljit_label *mainloop;
3349
3350 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3351 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3352 GET_LOCAL_BASE(TMP3, 0, 0);
3353
3354 /* Drop frames until we reach STACK_TOP. */
3355 mainloop = LABEL();
3356 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3357 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3358 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3359
3360 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3361 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3362 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3363 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3364 JUMPTO(SLJIT_JUMP, mainloop);
3365
3366 JUMPHERE(jump);
3367 jump = JUMP(SLJIT_C_SIG_LESS);
3368 /* End of dropping frames. */
3369 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3370
3371 JUMPHERE(jump);
3372 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3373 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3374 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3375 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3376 JUMPTO(SLJIT_JUMP, mainloop);
3377 }
3378
3379 static void check_wordboundary(compiler_common *common)
3380 {
3381 DEFINE_COMPILER;
3382 struct sljit_jump *skipread;
3383 jump_list *skipread_list = NULL;
3384 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3385 struct sljit_jump *jump;
3386 #endif
3387
3388 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3389
3390 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3391 /* Get type of the previous char, and put it to LOCALS1. */
3392 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3393 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3394 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3395 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3396 skip_char_back(common);
3397 check_start_used_ptr(common);
3398 read_char(common);
3399
3400 /* Testing char type. */
3401 #ifdef SUPPORT_UCP
3402 if (common->use_ucp)
3403 {
3404 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3405 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3406 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3407 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3408 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3409 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3410 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3411 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3412 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3413 JUMPHERE(jump);
3414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3415 }
3416 else
3417 #endif
3418 {
3419 #ifndef COMPILE_PCRE8
3420 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3421 #elif defined SUPPORT_UTF
3422 /* Here LOCALS1 has already been zeroed. */
3423 jump = NULL;
3424 if (common->utf)
3425 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3426 #endif /* COMPILE_PCRE8 */
3427 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3428 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3429 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3431 #ifndef COMPILE_PCRE8
3432 JUMPHERE(jump);
3433 #elif defined SUPPORT_UTF
3434 if (jump != NULL)
3435 JUMPHERE(jump);
3436 #endif /* COMPILE_PCRE8 */
3437 }
3438 JUMPHERE(skipread);
3439
3440 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3441 check_str_end(common, &skipread_list);
3442 peek_char(common);
3443
3444 /* Testing char type. This is a code duplication. */
3445 #ifdef SUPPORT_UCP
3446 if (common->use_ucp)
3447 {
3448 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3449 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3450 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3451 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3452 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3453 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3454 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3455 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3456 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3457 JUMPHERE(jump);
3458 }
3459 else
3460 #endif
3461 {
3462 #ifndef COMPILE_PCRE8
3463 /* TMP2 may be destroyed by peek_char. */
3464 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3465 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3466 #elif defined SUPPORT_UTF
3467 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3468 jump = NULL;
3469 if (common->utf)
3470 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3471 #endif
3472 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3473 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3474 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3475 #ifndef COMPILE_PCRE8
3476 JUMPHERE(jump);
3477 #elif defined SUPPORT_UTF
3478 if (jump != NULL)
3479 JUMPHERE(jump);
3480 #endif /* COMPILE_PCRE8 */
3481 }
3482 set_jumps(skipread_list, LABEL());
3483
3484 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3485 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3486 }
3487
3488 /*
3489 range format:
3490
3491 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3492 ranges[1] = first bit (0 or 1)
3493 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3494 */
3495
3496 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3497 {
3498 DEFINE_COMPILER;
3499 struct sljit_jump *jump;
3500
3501 if (ranges[0] < 0)
3502 return FALSE;
3503
3504 switch(ranges[0])
3505 {
3506 case 1:
3507 if (readch)
3508 read_char(common);
3509 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3510 return TRUE;
3511
3512 case 2:
3513 if (readch)
3514 read_char(common);
3515 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3516 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3517 return TRUE;
3518
3519 case 4:
3520 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3521 {
3522 if (readch)
3523 read_char(common);
3524 if (ranges[1] != 0)
3525 {
3526 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3527 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3528 }
3529 else
3530 {
3531 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3532 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3533 JUMPHERE(jump);
3534 }
3535 return TRUE;
3536 }
3537 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3538 {
3539 if (readch)
3540 read_char(common);
3541 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3542 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3543 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3544 return TRUE;
3545 }
3546 return FALSE;
3547
3548 default:
3549 return FALSE;
3550 }
3551 }
3552
3553 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3554 {
3555 int i, bit, length;
3556 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3557
3558 bit = ctypes[0] & flag;
3559 ranges[0] = -1;
3560 ranges[1] = bit != 0 ? 1 : 0;
3561 length = 0;
3562
3563 for (i = 1; i < 256; i++)
3564 if ((ctypes[i] & flag) != bit)
3565 {
3566 if (length >= MAX_RANGE_SIZE)
3567 return;
3568 ranges[2 + length] = i;
3569 length++;
3570 bit ^= flag;
3571 }
3572
3573 if (bit != 0)
3574 {
3575 if (length >= MAX_RANGE_SIZE)
3576 return;
3577 ranges[2 + length] = 256;
3578 length++;
3579 }
3580 ranges[0] = length;
3581 }
3582
3583 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3584 {
3585 int ranges[2 + MAX_RANGE_SIZE];
3586 pcre_uint8 bit, cbit, all;
3587 int i, byte, length = 0;
3588
3589 bit = bits[0] & 0x1;
3590 ranges[1] = bit;
3591 /* Can be 0 or 255. */
3592 all = -bit;
3593
3594 for (i = 0; i < 256; )
3595 {
3596 byte = i >> 3;
3597 if ((i & 0x7) == 0 && bits[byte] == all)
3598 i += 8;
3599 else
3600 {
3601 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3602 if (cbit != bit)
3603 {
3604 if (length >= MAX_RANGE_SIZE)
3605 return FALSE;
3606 ranges[2 + length] = i;
3607 length++;
3608 bit = cbit;
3609 all = -cbit;
3610 }
3611 i++;
3612 }
3613 }
3614
3615 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3616 {
3617 if (length >= MAX_RANGE_SIZE)
3618 return FALSE;
3619 ranges[2 + length] = 256;
3620 length++;
3621 }
3622 ranges[0] = length;
3623
3624 return check_ranges(common, ranges, backtracks, FALSE);
3625 }
3626
3627 static void check_anynewline(compiler_common *common)
3628 {
3629 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3630 DEFINE_COMPILER;
3631
3632 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3633
3634 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3635 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3636 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3637 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3638 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3639 #ifdef COMPILE_PCRE8
3640 if (common->utf)
3641 {
3642 #endif
3643 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3644 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3645 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3646 #ifdef COMPILE_PCRE8
3647 }
3648 #endif
3649 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3650 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3651 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3652 }
3653
3654 static void check_hspace(compiler_common *common)
3655 {
3656 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3657 DEFINE_COMPILER;
3658
3659 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3660
3661 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3662 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3663 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3664 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3665 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3666 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3667 #ifdef COMPILE_PCRE8
3668 if (common->utf)
3669 {
3670 #endif
3671 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3672 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3673 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3674 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3675 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3676 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3677 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3678 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3679 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3680 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3681 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3682 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3683 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3684 #ifdef COMPILE_PCRE8
3685 }
3686 #endif
3687 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3688 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3689
3690 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3691 }
3692
3693 static void check_vspace(compiler_common *common)
3694 {
3695 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3696 DEFINE_COMPILER;
3697
3698 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3699
3700 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3701 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3702 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3703 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3704 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3705 #ifdef COMPILE_PCRE8
3706 if (common->utf)
3707 {
3708 #endif
3709 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3710 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3711 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3712 #ifdef COMPILE_PCRE8
3713 }
3714 #endif
3715 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3716 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3717
3718 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3719 }
3720
3721 #define CHAR1 STR_END
3722 #define CHAR2 STACK_TOP
3723
3724 static void do_casefulcmp(compiler_common *common)
3725 {
3726 DEFINE_COMPILER;
3727 struct sljit_jump *jump;
3728 struct sljit_label *label;
3729
3730 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3731 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3732 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3734 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3735 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3736
3737 label = LABEL();
3738 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3739 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3740 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3741 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3742 JUMPTO(SLJIT_C_NOT_ZERO, label);
3743
3744 JUMPHERE(jump);
3745 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3746 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3747 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3748 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3749 }
3750
3751 #define LCC_TABLE STACK_LIMIT
3752
3753 static void do_caselesscmp(compiler_common *common)
3754 {
3755 DEFINE_COMPILER;
3756 struct sljit_jump *jump;
3757 struct sljit_label *label;
3758
3759 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3760 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3761
3762 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3763 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3764 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3765 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3766 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3767 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3768
3769 label = LABEL();
3770 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3771 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3772 #ifndef COMPILE_PCRE8
3773 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3774 #endif
3775 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3776 #ifndef COMPILE_PCRE8
3777 JUMPHERE(jump);
3778 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3779 #endif
3780 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3781 #ifndef COMPILE_PCRE8
3782 JUMPHERE(jump);
3783 #endif
3784 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3785 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3786 JUMPTO(SLJIT_C_NOT_ZERO, label);
3787
3788 JUMPHERE(jump);
3789 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3790 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3791 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3792 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3793 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3794 }
3795
3796 #undef LCC_TABLE
3797 #undef CHAR1
3798 #undef CHAR2
3799
3800 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3801
3802 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3803 {
3804 /* This function would be ineffective to do in JIT level. */
3805 pcre_uint32 c1, c2;
3806 const pcre_uchar *src2 = args->uchar_ptr;
3807 const pcre_uchar *end2 = args->end;
3808 const ucd_record *ur;
3809 const pcre_uint32 *pp;
3810
3811 while (src1 < end1)
3812 {
3813 if (src2 >= end2)
3814 return (pcre_uchar*)1;
3815 GETCHARINC(c1, src1);
3816 GETCHARINC(c2, src2);
3817 ur = GET_UCD(c2);
3818 if (c1 != c2 && c1 != c2 + ur->other_case)
3819 {
3820 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3821 for (;;)
3822 {
3823 if (c1 < *pp) return NULL;
3824 if (c1 == *pp++) break;
3825 }
3826 }
3827 }
3828 return src2;
3829 }
3830
3831 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3832
3833 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3834 compare_context* context, jump_list **backtracks)
3835 {
3836 DEFINE_COMPILER;
3837 unsigned int othercasebit = 0;
3838 pcre_uchar *othercasechar = NULL;
3839 #ifdef SUPPORT_UTF
3840 int utflength;
3841 #endif
3842
3843 if (caseless && char_has_othercase(common, cc))
3844 {
3845 othercasebit = char_get_othercase_bit(common, cc);
3846 SLJIT_ASSERT(othercasebit);
3847 /* Extracting bit difference info. */
3848 #if defined COMPILE_PCRE8
3849 othercasechar = cc + (othercasebit >> 8);
3850 othercasebit &= 0xff;
3851 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3852 /* Note that this code only handles characters in the BMP. If there
3853 ever are characters outside the BMP whose othercase differs in only one
3854 bit from itself (there currently are none), this code will need to be
3855 revised for COMPILE_PCRE32. */
3856 othercasechar = cc + (othercasebit >> 9);
3857 if ((othercasebit & 0x100) != 0)
3858 othercasebit = (othercasebit & 0xff) << 8;
3859 else
3860 othercasebit &= 0xff;
3861 #endif /* COMPILE_PCRE[8|16|32] */
3862 }
3863
3864 if (context->sourcereg == -1)
3865 {
3866 #if defined COMPILE_PCRE8
3867 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3868 if (context->length >= 4)
3869 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3870 else if (context->length >= 2)
3871 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3872 else
3873 #endif
3874 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3875 #elif defined COMPILE_PCRE16
3876 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3877 if (context->length >= 4)
3878 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3879 else
3880 #endif
3881 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3882 #elif defined COMPILE_PCRE32
3883 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3884 #endif /* COMPILE_PCRE[8|16|32] */
3885 context->sourcereg = TMP2;
3886 }
3887
3888 #ifdef SUPPORT_UTF
3889 utflength = 1;
3890 if (common->utf && HAS_EXTRALEN(*cc))
3891 utflength += GET_EXTRALEN(*cc);
3892
3893 do
3894 {
3895 #endif
3896
3897 context->length -= IN_UCHARS(1);
3898 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3899
3900 /* Unaligned read is supported. */
3901 if (othercasebit != 0 && othercasechar == cc)
3902 {
3903 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3904 context->oc.asuchars[context->ucharptr] = othercasebit;
3905 }
3906 else
3907 {
3908 context->c.asuchars[context->ucharptr] = *cc;
3909 context->oc.asuchars[context->ucharptr] = 0;
3910 }
3911 context->ucharptr++;
3912
3913 #if defined COMPILE_PCRE8
3914 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3915 #else
3916 if (context->ucharptr >= 2 || context->length == 0)
3917 #endif
3918 {
3919 if (context->length >= 4)
3920 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3921 else if (context->length >= 2)
3922 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3923 #if defined COMPILE_PCRE8
3924 else if (context->length >= 1)
3925 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3926 #endif /* COMPILE_PCRE8 */
3927 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3928
3929 switch(context->ucharptr)
3930 {
3931 case 4 / sizeof(pcre_uchar):
3932 if (context->oc.asint != 0)
3933 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3934 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3935 break;
3936
3937 case 2 / sizeof(pcre_uchar):
3938 if (context->oc.asushort != 0)
3939 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3940 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3941 break;
3942
3943 #ifdef COMPILE_PCRE8
3944 case 1:
3945 if (context->oc.asbyte != 0)
3946 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3947 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3948 break;
3949 #endif
3950
3951 default:
3952 SLJIT_ASSERT_STOP();
3953 break;
3954 }
3955 context->ucharptr = 0;
3956 }
3957
3958 #else
3959
3960 /* Unaligned read is unsupported or in 32 bit mode. */
3961 if (context->length >= 1)
3962 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3963
3964 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3965
3966 if (othercasebit != 0 && othercasechar == cc)
3967 {
3968 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3969 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3970 }
3971 else
3972 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3973
3974 #endif
3975
3976 cc++;
3977 #ifdef SUPPORT_UTF
3978 utflength--;
3979 }
3980 while (utflength > 0);
3981 #endif
3982
3983 return cc;
3984 }
3985
3986 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3987
3988 #define SET_TYPE_OFFSET(value) \
3989 if ((value) != typeoffset) \
3990 { \
3991 if ((value) > typeoffset) \
3992 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3993 else \
3994 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3995 } \
3996 typeoffset = (value);
3997
3998 #define SET_CHAR_OFFSET(value) \
3999 if ((value) != charoffset) \
4000 { \
4001 if ((value) > charoffset) \
4002 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4003 else \
4004 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4005 } \
4006 charoffset = (value);
4007
4008 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4009 {
4010 DEFINE_COMPILER;
4011 jump_list *found = NULL;
4012 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4013 pcre_int32 c, charoffset;
4014 const pcre_uint32 *other_cases;
4015 struct sljit_jump *jump = NULL;
4016 pcre_uchar *ccbegin;
4017 int compares, invertcmp, numberofcmps;
4018 #ifdef SUPPORT_UCP
4019 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4020 BOOL charsaved = FALSE;
4021 int typereg = TMP1, scriptreg = TMP1;
4022 pcre_int32 typeoffset;
4023 #endif
4024
4025 /* Although SUPPORT_UTF must be defined, we are
4026 not necessary in utf mode even in 8 bit mode. */
4027 detect_partial_match(common, backtracks);
4028 read_char(common);
4029
4030 if ((*cc++ & XCL_MAP) != 0)
4031 {
4032 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4033 #ifndef COMPILE_PCRE8
4034 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4035 #elif defined SUPPORT_UTF
4036 if (common->utf)
4037 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4038 #endif
4039
4040 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4041 {
4042 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4043 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4044 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4045 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4046 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4047 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4048 }
4049
4050 #ifndef COMPILE_PCRE8
4051 JUMPHERE(jump);
4052 #elif defined SUPPORT_UTF
4053 if (common->utf)
4054 JUMPHERE(jump);
4055 #endif
4056 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4057 #ifdef SUPPORT_UCP
4058 charsaved = TRUE;
4059 #endif
4060 cc += 32 / sizeof(pcre_uchar);
4061 }
4062
4063 /* Scanning the necessary info. */
4064 ccbegin = cc;
4065 compares = 0;
4066 while (*cc != XCL_END)
4067 {
4068 compares++;
4069 if (*cc == XCL_SINGLE)
4070 {
4071 cc += 2;
4072 #ifdef SUPPORT_UTF
4073 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4074 #endif
4075 #ifdef SUPPORT_UCP
4076 needschar = TRUE;
4077 #endif
4078 }
4079 else if (*cc == XCL_RANGE)
4080 {
4081 cc += 2;
4082 #ifdef SUPPORT_UTF
4083 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4084 #endif
4085 cc++;
4086 #ifdef SUPPORT_UTF
4087 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4088 #endif
4089 #ifdef SUPPORT_UCP
4090 needschar = TRUE;
4091 #endif
4092 }
4093 #ifdef SUPPORT_UCP
4094 else
4095 {
4096 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4097 cc++;
4098 switch(*cc)
4099 {
4100 case PT_ANY:
4101 break;
4102
4103 case PT_LAMP:
4104 case PT_GC:
4105 case PT_PC:
4106 case PT_ALNUM:
4107 needstype = TRUE;
4108 break;
4109
4110 case PT_SC:
4111 needsscript = TRUE;
4112 break;
4113
4114 case PT_SPACE:
4115 case PT_PXSPACE:
4116 case PT_WORD:
4117 needstype = TRUE;
4118 needschar = TRUE;
4119 break;
4120
4121 case PT_CLIST:
4122 case PT_UCNC:
4123 needschar = TRUE;
4124 break;
4125
4126 default:
4127 SLJIT_ASSERT_STOP();
4128 break;
4129 }
4130 cc += 2;
4131 }
4132 #endif
4133 }
4134
4135 #ifdef SUPPORT_UCP
4136 /* Simple register allocation. TMP1 is preferred if possible. */
4137 if (needstype || needsscript)
4138 {
4139 if (needschar && !charsaved)
4140 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4141 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4142 if (needschar)
4143 {
4144 if (needstype)
4145 {
4146 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4147 typereg = RETURN_ADDR;
4148 }
4149
4150 if (needsscript)
4151 scriptreg = TMP3;
4152 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4153 }
4154 else if (needstype && needsscript)
4155 scriptreg = TMP3;
4156 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4157
4158 if (needsscript)
4159 {
4160 if (scriptreg == TMP1)
4161 {
4162 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4163 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4164 }
4165 else
4166 {
4167 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4168 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4169 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4170 }
4171 }
4172 }
4173 #endif
4174
4175 /* Generating code. */
4176 cc = ccbegin;
4177 charoffset = 0;
4178 numberofcmps = 0;
4179 #ifdef SUPPORT_UCP
4180 typeoffset = 0;
4181 #endif
4182
4183 while (*cc != XCL_END)
4184 {
4185 compares--;
4186 invertcmp = (compares == 0 && list != backtracks);
4187 jump = NULL;
4188
4189 if (*cc == XCL_SINGLE)
4190 {
4191 cc ++;
4192 #ifdef SUPPORT_UTF
4193 if (common->utf)
4194 {
4195 GETCHARINC(c, cc);
4196 }
4197 else
4198 #endif
4199 c = *cc++;
4200
4201 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4202 {
4203 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4204 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4205 numberofcmps++;
4206 }
4207 else if (numberofcmps > 0)
4208 {
4209 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4210 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4211 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4212 numberofcmps = 0;
4213 }
4214 else
4215 {
4216 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4217 numberofcmps = 0;
4218 }
4219 }
4220 else if (*cc == XCL_RANGE)
4221 {
4222 cc ++;
4223 #ifdef SUPPORT_UTF
4224 if (common->utf)
4225 {
4226 GETCHARINC(c, cc);
4227 }
4228 else
4229 #endif
4230 c = *cc++;
4231 SET_CHAR_OFFSET(c);
4232 #ifdef SUPPORT_UTF
4233 if (common->utf)
4234 {
4235 GETCHARINC(c, cc);
4236 }
4237 else
4238 #endif
4239 c = *cc++;
4240 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4241 {
4242 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4243 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4244 numberofcmps++;
4245 }
4246 else if (numberofcmps > 0)
4247 {
4248 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4249 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4250 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4251 numberofcmps = 0;
4252 }
4253 else
4254 {
4255 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4256 numberofcmps = 0;
4257 }
4258 }
4259 #ifdef SUPPORT_UCP
4260 else
4261 {
4262 if (*cc == XCL_NOTPROP)
4263 invertcmp ^= 0x1;
4264 cc++;
4265 switch(*cc)
4266 {
4267 case PT_ANY:
4268 if (list != backtracks)
4269 {
4270 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4271 continue;
4272 }
4273 else if (cc[-1] == XCL_NOTPROP)
4274 continue;
4275 jump = JUMP(SLJIT_JUMP);
4276 break;
4277
4278 case PT_LAMP:
4279 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4280 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4282 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4283 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4284 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4285 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4286 break;
4287
4288 case PT_GC:
4289 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4290 SET_TYPE_OFFSET(c);
4291 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4292 break;
4293
4294 case PT_PC:
4295 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4296 break;
4297
4298 case PT_SC:
4299 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4300 break;
4301
4302 case PT_SPACE:
4303 case PT_PXSPACE:
4304 if (*cc == PT_SPACE)
4305 {
4306 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4307 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4308 }
4309 SET_CHAR_OFFSET(9);
4310 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4311 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4312 if (*cc == PT_SPACE)
4313 JUMPHERE(jump);
4314
4315 SET_TYPE_OFFSET(ucp_Zl);
4316 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4317 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4318 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4319 break;
4320
4321 case PT_WORD:
4322 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4323 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4324 /* Fall through. */
4325
4326 case PT_ALNUM:
4327 SET_TYPE_OFFSET(ucp_Ll);
4328 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4329 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4330 SET_TYPE_OFFSET(ucp_Nd);
4331 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4332 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4333 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4334 break;
4335
4336 case PT_CLIST:
4337 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4338
4339 /* At least three characters are required.
4340 Otherwise this case would be handled by the normal code path. */
4341 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4342 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4343
4344 /* Optimizing character pairs, if their difference is power of 2. */
4345 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4346 {
4347 if (charoffset == 0)
4348 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4349 else
4350 {
4351 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4352 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4353 }
4354 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4355 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4356 other_cases += 2;
4357 }
4358 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4359 {
4360 if (charoffset == 0)
4361 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4362 else
4363 {
4364 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4365 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4366 }
4367 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4368 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4369
4370 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4371 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4372
4373 other_cases += 3;
4374 }
4375 else
4376 {
4377 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4378 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4379 }
4380
4381 while (*other_cases != NOTACHAR)
4382 {
4383 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4384 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4385 }
4386 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4387 break;
4388
4389 case PT_UCNC:
4390 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4391 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4392 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4393 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4394 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4395 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4396
4397 SET_CHAR_OFFSET(0xa0);
4398 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4399 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4400 SET_CHAR_OFFSET(0);
4401 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4402 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4403 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4404 break;
4405 }
4406 cc += 2;
4407 }
4408 #endif
4409
4410 if (jump != NULL)
4411 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4412 }
4413
4414 if (found != NULL)
4415 set_jumps(found, LABEL());
4416 }
4417
4418 #undef SET_TYPE_OFFSET
4419 #undef SET_CHAR_OFFSET
4420
4421 #endif
4422
4423 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4424 {
4425 DEFINE_COMPILER;
4426 int length;
4427 unsigned int c, oc, bit;
4428 compare_context context;
4429 struct sljit_jump *jump[4];
4430 jump_list *end_list;
4431 #ifdef SUPPORT_UTF
4432 struct sljit_label *label;
4433 #ifdef SUPPORT_UCP
4434 pcre_uchar propdata[5];
4435 #endif
4436 #endif
4437
4438 switch(type)
4439 {
4440 case OP_SOD:
4441 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4442 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4443 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4444 return cc;
4445
4446 case OP_SOM:
4447 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4448 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4449 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4450 return cc;
4451
4452 case OP_NOT_WORD_BOUNDARY:
4453 case OP_WORD_BOUNDARY:
4454 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4455 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4456 return cc;
4457
4458 case OP_NOT_DIGIT:
4459 case OP_DIGIT:
4460 /* Digits are usually 0-9, so it is worth to optimize them. */
4461 if (common->digits[0] == -2)
4462 get_ctype_ranges(common, ctype_digit, common->digits);
4463 detect_partial_match(common, backtracks);
4464 /* Flip the starting bit in the negative case. */
4465 if (type == OP_NOT_DIGIT)
4466 common->digits[1] ^= 1;
4467 if (!check_ranges(common, common->digits, backtracks, TRUE))
4468 {
4469 read_char8_type(common);
4470 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4471 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4472 }
4473 if (type == OP_NOT_DIGIT)
4474 common->digits[1] ^= 1;
4475 return cc;
4476
4477 case OP_NOT_WHITESPACE:
4478 case OP_WHITESPACE:
4479 detect_partial_match(common, backtracks);
4480 read_char8_type(common);
4481 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4482 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4483 return cc;
4484
4485 case OP_NOT_WORDCHAR:
4486 case OP_WORDCHAR:
4487 detect_partial_match(common, backtracks);
4488 read_char8_type(common);
4489 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4490 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4491 return cc;
4492
4493 case OP_ANY:
4494 detect_partial_match(common, backtracks);
4495 read_char(common);
4496 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4497 {
4498 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4499 end_list = NULL;
4500 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4501 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4502 else
4503 check_str_end(common, &end_list);
4504
4505 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4506 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4507 set_jumps(end_list, LABEL());
4508 JUMPHERE(jump[0]);
4509 }
4510 else
4511 check_newlinechar(common, common->nltype, backtracks, TRUE);
4512 return cc;
4513
4514 case OP_ALLANY:
4515 detect_partial_match(common, backtracks);
4516 #ifdef SUPPORT_UTF
4517 if (common->utf)
4518 {
4519 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4520 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4521 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4522 #if defined COMPILE_PCRE8
4523 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4524 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4526 #elif defined COMPILE_PCRE16
4527 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4528 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4529 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4530 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4531 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4532 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4533 #endif
4534 JUMPHERE(jump[0]);
4535 #endif /* COMPILE_PCRE[8|16] */
4536 return cc;
4537 }
4538 #endif
4539 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4540 return cc;
4541
4542 case OP_ANYBYTE:
4543 detect_partial_match(common, backtracks);
4544 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4545 return cc;
4546
4547 #ifdef SUPPORT_UTF
4548 #ifdef SUPPORT_UCP
4549 case OP_NOTPROP:
4550 case OP_PROP:
4551 propdata[0] = 0;
4552 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4553 propdata[2] = cc[0];
4554 propdata[3] = cc[1];
4555 propdata[4] = XCL_END;
4556 compile_xclass_matchingpath(common, propdata, backtracks);
4557 return cc + 2;
4558 #endif
4559 #endif
4560
4561 case OP_ANYNL:
4562 detect_partial_match(common, backtracks);
4563 read_char(common);
4564 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4565 /* We don't need to handle soft partial matching case. */
4566 end_list = NULL;
4567 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4568 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4569 else
4570 check_str_end(common, &end_list);
4571 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4572 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4573 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4574 jump[2] = JUMP(SLJIT_JUMP);
4575 JUMPHERE(jump[0]);
4576 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4577 set_jumps(end_list, LABEL());
4578 JUMPHERE(jump[1]);
4579 JUMPHERE(jump[2]);
4580 return cc;
4581
4582 case OP_NOT_HSPACE:
4583 case OP_HSPACE:
4584 detect_partial_match(common, backtracks);
4585 read_char(common);
4586 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4587 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4588 return cc;
4589
4590 case OP_NOT_VSPACE:
4591 case OP_VSPACE:
4592 detect_partial_match(common, backtracks);
4593 read_char(common);
4594 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4595 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4596 return cc;
4597
4598 #ifdef SUPPORT_UCP
4599 case OP_EXTUNI:
4600 detect_partial_match(common, backtracks);
4601 read_char(common);
4602 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4603 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4604 /* Optimize register allocation: use a real register. */
4605 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4606 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4607
4608 label = LABEL();
4609 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4610 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4611 read_char(common);
4612 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4613 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4614 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4615
4616 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4617 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4618 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4619 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4620 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4621 JUMPTO(SLJIT_C_NOT_ZERO, label);
4622
4623 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4624 JUMPHERE(jump[0]);
4625 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4626
4627 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4628 {
4629 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4630 /* Since we successfully read a char above, partial matching must occure. */
4631 check_partial(common, TRUE);
4632 JUMPHERE(jump[0]);
4633 }
4634 return cc;
4635 #endif
4636
4637 case OP_EODN:
4638 /* Requires rather complex checks. */
4639 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4640 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4641 {
4642 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4643 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4644 if (common->mode == JIT_COMPILE)
4645 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4646 else
4647 {
4648 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4649 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4650 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4651 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4652 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4653 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4654 check_partial(common, TRUE);
4655 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4656 JUMPHERE(jump[1]);
4657 }
4658 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4659 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4660 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4661 }
4662 else if (common->nltype == NLTYPE_FIXED)
4663 {
4664 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4665 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4666 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4667 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4668 }
4669 else
4670 {
4671 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4672 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4673 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4674 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4675 jump[2] = JUMP(SLJIT_C_GREATER);
4676 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4677 /* Equal. */
4678 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4679 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4680 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4681
4682 JUMPHERE(jump[1]);
4683 if (common->nltype == NLTYPE_ANYCRLF)
4684 {
4685 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4686 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4687 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4688 }
4689 else
4690 {
4691 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4692 read_char(common);
4693 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4694 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4695 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4696 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4697 }
4698 JUMPHERE(jump[2]);
4699 JUMPHERE(jump[3]);
4700 }
4701 JUMPHERE(jump[0]);
4702 check_partial(common, FALSE);
4703 return cc;
4704
4705 case OP_EOD:
4706 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4707 check_partial(common, FALSE);
4708 return cc;
4709
4710 case OP_CIRC:
4711 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4712 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4713 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4714 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4715 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4716 return cc;
4717
4718 case OP_CIRCM:
4719 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4721 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4722 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4723 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4724 jump[0] = JUMP(SLJIT_JUMP);
4725 JUMPHERE(jump[1]);
4726
4727 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4728 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4729 {
4730 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4731 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4732 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4733 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4734 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4735 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4736 }
4737 else
4738 {
4739 skip_char_back(common);
4740 read_char(common);
4741 check_newlinechar(common, common->nltype, backtracks, FALSE);
4742 }
4743 JUMPHERE(jump[0]);
4744 return cc;
4745
4746 case OP_DOLL:
4747 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4748 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4749 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4750
4751 if (!common->endonly)
4752 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4753 else
4754 {
4755 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4756 check_partial(common, FALSE);
4757 }
4758 return cc;
4759
4760 case OP_DOLLM:
4761 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4762 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4763 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4764 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4765 check_partial(common, FALSE);
4766 jump[0] = JUMP(SLJIT_JUMP);
4767 JUMPHERE(jump[1]);
4768
4769 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4770 {
4771 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4772 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4773 if (common->mode == JIT_COMPILE)
4774 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4775 else
4776 {
4777 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4778 /* STR_PTR = STR_END - IN_UCHARS(1) */
4779 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4780 check_partial(common, TRUE);
4781 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4782 JUMPHERE(jump[1]);
4783 }
4784
4785 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4786 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4787 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4788 }
4789 else
4790 {
4791 peek_char(common);
4792 check_newlinechar(common, common->nltype, backtracks, FALSE);
4793 }
4794 JUMPHERE(jump[0]);
4795 return cc;
4796
4797 case OP_CHAR:
4798 case OP_CHARI:
4799 length = 1;
4800 #ifdef SUPPORT_UTF
4801 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4802 #endif
4803 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4804 {
4805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4806 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4807
4808 context.length = IN_UCHARS(length);
4809 context.sourcereg = -1;
4810 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4811 context.ucharptr = 0;
4812 #endif
4813 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4814 }
4815 detect_partial_match(common, backtracks);
4816 read_char(common);
4817 #ifdef SUPPORT_UTF
4818 if (common->utf)
4819 {
4820 GETCHAR(c, cc);
4821 }
4822 else
4823 #endif
4824 c = *cc;
4825 if (type == OP_CHAR || !char_has_othercase(common, cc))
4826 {
4827 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4828 return cc + length;
4829 }
4830 oc = char_othercase(common, c);
4831 bit = c ^ oc;
4832 if (is_powerof2(bit))
4833 {
4834 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4835 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4836 return cc + length;
4837 }
4838 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4839 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4840 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4841 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4842 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4843 return cc + length;
4844
4845 case OP_NOT:
4846 case OP_NOTI:
4847 detect_partial_match(common, backtracks);
4848 length = 1;
4849 #ifdef SUPPORT_UTF
4850 if (common->utf)
4851 {
4852 #ifdef COMPILE_PCRE8
4853 c = *cc;
4854 if (c < 128)
4855 {
4856 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4857 if (type == OP_NOT || !char_has_othercase(common, cc))
4858 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4859 else
4860 {
4861 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4862 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4863 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4864 }
4865 /* Skip the variable-length character. */
4866 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4867 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4868 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4869 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4870 JUMPHERE(jump[0]);
4871 return cc + 1;
4872 }
4873 else
4874 #endif /* COMPILE_PCRE8 */
4875 {
4876 GETCHARLEN(c, cc, length);
4877 read_char(common);
4878 }
4879 }
4880 else
4881 #endif /* SUPPORT_UTF */
4882 {
4883 read_char(common);
4884 c = *cc;
4885 }
4886
4887 if (type == OP_NOT || !char_has_othercase(common, cc))
4888 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4889 else
4890 {
4891 oc = char_othercase(common, c);
4892 bit = c ^ oc;
4893 if (is_powerof2(bit))
4894 {
4895 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4896 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4897 }
4898 else
4899 {
4900 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4901 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4902 }
4903 }
4904 return cc + length;
4905
4906 case OP_CLASS:
4907 case OP_NCLASS:
4908 detect_partial_match(common, backtracks);
4909 read_char(common);
4910 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4911 return cc + 32 / sizeof(pcre_uchar);
4912
4913 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4914 jump[0] = NULL;
4915 #ifdef COMPILE_PCRE8
4916 /* This check only affects 8 bit mode. In other modes, we
4917 always need to compare the value with 255. */
4918 if (common->utf)
4919 #endif /* COMPILE_PCRE8 */
4920 {
4921 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4922 if (type == OP_CLASS)
4923 {
4924 add_jump(compiler, backtracks, jump[0]);
4925 jump[0] = NULL;
4926 }
4927 }
4928 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4929 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4930 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4931 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4932 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4933 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4934 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4935 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4936 if (jump[0] != NULL)
4937 JUMPHERE(jump[0]);
4938 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4939 return cc + 32 / sizeof(pcre_uchar);
4940
4941 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4942 case OP_XCLASS:
4943 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4944 return cc + GET(cc, 0) - 1;
4945 #endif
4946
4947 case OP_REVERSE:
4948 length = GET(cc, 0);
4949 if (length == 0)
4950 return cc + LINK_SIZE;
4951 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4952 #ifdef SUPPORT_UTF
4953 if (common->utf)
4954 {
4955 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4956 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4957 label = LABEL();
4958 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4959 skip_char_back(common);
4960 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4961 JUMPTO(SLJIT_C_NOT_ZERO, label);
4962 }
4963 else
4964 #endif
4965 {
4966 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4967 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4968 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4969 }
4970 check_start_used_ptr(common);
4971 return cc + LINK_SIZE;
4972 }
4973 SLJIT_ASSERT_STOP();
4974 return cc;
4975 }
4976
4977 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4978 {
4979 /* This function consumes at least one input character. */
4980 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4981 DEFINE_COMPILER;
4982 pcre_uchar *ccbegin = cc;
4983 compare_context context;
4984 int size;
4985
4986 context.length = 0;
4987 do
4988 {
4989 if (cc >= ccend)
4990 break;
4991
4992 if (*cc == OP_CHAR)
4993 {
4994 size = 1;
4995 #ifdef SUPPORT_UTF
4996 if (common->utf && HAS_EXTRALEN(cc[1]))
4997 size += GET_EXTRALEN(cc[1]);
4998 #endif
4999 }
5000 else if (*cc == OP_CHARI)
5001 {
5002 size = 1;
5003 #ifdef SUPPORT_UTF
5004 if (common->utf)
5005 {
5006 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5007 size = 0;
5008 else if (HAS_EXTRALEN(cc[1]))
5009 size += GET_EXTRALEN(cc[1]);
5010 }
5011 else
5012 #endif
5013 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5014 size = 0;
5015 }
5016 else
5017 size = 0;
5018
5019 cc += 1 + size;
5020 context.length += IN_UCHARS(size);
5021 }
5022 while (size > 0 && context.length <= 128);
5023
5024 cc = ccbegin;
5025 if (context.length > 0)
5026 {
5027 /* We have a fixed-length byte sequence. */
5028 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5029 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5030
5031 context.sourcereg = -1;
5032 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5033 context.ucharptr = 0;
5034 #endif
5035 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5036 return cc;
5037 }
5038
5039 /* A non-fixed length character will be checked if length == 0. */
5040 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5041 }
5042
5043 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5044 {
5045 DEFINE_COMPILER;
5046 int offset = GET2(cc, 1) << 1;
5047
5048 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5049 if (!common->jscript_compat)
5050 {
5051 if (backtracks == NULL)
5052 {
5053 /* OVECTOR(1) contains the "string begin - 1" constant. */
5054 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5055 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5056 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5057 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5058 return JUMP(SLJIT_C_NOT_ZERO);
5059 }
5060 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5061 }
5062 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5063 }
5064
5065 /* Forward definitions. */
5066 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5067 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5068
5069 #define PUSH_BACKTRACK(size, ccstart, error) \
5070 do \
5071 { \
5072 backtrack = sljit_alloc_memory(compiler, (size)); \
5073 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5074 return error; \
5075 memset(backtrack, 0, size); \
5076 backtrack->prev = parent->top; \
5077 backtrack->cc = (ccstart); \
5078 parent->top = backtrack; \
5079 } \
5080 while (0)
5081
5082 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5083 do \
5084 { \
5085 backtrack = sljit_alloc_memory(compiler, (size)); \
5086 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5087 return; \
5088 memset(backtrack, 0, size); \
5089 backtrack->prev = parent->top; \
5090 backtrack->cc = (ccstart); \
5091 parent->top = backtrack; \
5092 } \
5093 while (0)
5094
5095 #define BACKTRACK_AS(type) ((type *)backtrack)
5096
5097 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5098 {
5099 DEFINE_COMPILER;
5100 int offset = GET2(cc, 1) << 1;
5101 struct sljit_jump *jump = NULL;
5102 struct sljit_jump *partial;
5103 struct sljit_jump *nopartial;
5104
5105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5106 /* OVECTOR(1) contains the "string begin - 1" constant. */
5107 if (withchecks && !common->jscript_compat)
5108 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5109
5110 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5111 if (common->utf && *cc == OP_REFI)
5112 {
5113 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5114 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5115 if (withchecks)
5116 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5117
5118 /* Needed to save important temporary registers. */
5119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5120 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5122 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5123 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5124 if (common->mode == JIT_COMPILE)
5125 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5126 else
5127 {
5128 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5129 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5130 check_partial(common, FALSE);
5131 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5132 JUMPHERE(nopartial);
5133 }
5134 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5135 }
5136 else
5137 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5138 {
5139 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5140 if (withchecks)
5141 jump = JUMP(SLJIT_C_ZERO);
5142
5143 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5144 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5145 if (common->mode == JIT_COMPILE)
5146 add_jump(compiler, backtracks, partial);
5147
5148 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5149 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5150
5151 if (common->mode != JIT_COMPILE)
5152 {
5153 nopartial = JUMP(SLJIT_JUMP);
5154 JUMPHERE(partial);
5155 /* TMP2 -= STR_END - STR_PTR */
5156 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5157 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5158 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5159 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5160 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5161 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5162 JUMPHERE(partial);
5163 check_partial(common, FALSE);
5164 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5165 JUMPHERE(nopartial);
5166 }
5167 }
5168
5169 if (jump != NULL)
5170 {
5171 if (emptyfail)
5172 add_jump(compiler, backtracks, jump);
5173 else
5174 JUMPHERE(jump);
5175 }
5176 return cc + 1 + IMM2_SIZE;
5177 }
5178
5179 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5180 {
5181 DEFINE_COMPILER;
5182 backtrack_common *backtrack;
5183 pcre_uchar type;
5184 struct sljit_label *label;
5185 struct sljit_jump *zerolength;
5186 struct sljit_jump *jump = NULL;
5187 pcre_uchar *ccbegin = cc;
5188 int min = 0, max = 0;
5189 BOOL minimize;
5190
5191 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5192
5193 type = cc[1 + IMM2_SIZE];
5194 minimize = (type & 0x1) != 0;
5195 switch(type)
5196 {
5197 case OP_CRSTAR:
5198 case OP_CRMINSTAR:
5199 min = 0;
5200 max = 0;
5201 cc += 1 + IMM2_SIZE + 1;
5202 break;
5203 case OP_CRPLUS:
5204 case OP_CRMINPLUS:
5205 min = 1;
5206 max = 0;
5207 cc += 1 + IMM2_SIZE + 1;
5208 break;
5209 case OP_CRQUERY:
5210 case OP_CRMINQUERY:
5211 min = 0;
5212 max = 1;
5213 cc += 1 + IMM2_SIZE + 1;
5214 break;
5215 case OP_CRRANGE:
5216 case OP_CRMINRANGE:
5217 min = GET2(cc, 1 + IMM2_SIZE + 1);
5218 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5219 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5220 break;
5221 default:
5222 SLJIT_ASSERT_STOP();
5223 break;
5224 }
5225
5226 if (!minimize)
5227 {
5228 if (min == 0)
5229 {
5230 allocate_stack(common, 2);
5231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5232 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5233 /* Temporary release of STR_PTR. */
5234 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5235 zerolength = compile_ref_checks(common, ccbegin, NULL);
5236 /* Restore if not zero length. */
5237 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5238 }
5239 else
5240 {
5241 allocate_stack(common, 1);
5242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5243 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5244 }
5245
5246 if (min > 1 || max > 1)
5247 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5248
5249 label = LABEL();
5250 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5251
5252 if (min > 1 || max > 1)
5253 {
5254 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5255 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5257 if (min > 1)
5258 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5259 if (max > 1)
5260 {
5261 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5262 allocate_stack(common, 1);
5263 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5264 JUMPTO(SLJIT_JUMP, label);
5265 JUMPHERE(jump);
5266 }
5267 }
5268
5269 if (max == 0)
5270 {
5271 /* Includes min > 1 case as well. */
5272 allocate_stack(common, 1);
5273 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5274 JUMPTO(SLJIT_JUMP, label);
5275 }
5276
5277 JUMPHERE(zerolength);
5278 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5279
5280 decrease_call_count(common);
5281 return cc;
5282 }
5283
5284 allocate_stack(common, 2);
5285 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5286 if (type != OP_CRMINSTAR)
5287 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5288
5289 if (min == 0)
5290 {
5291 zerolength = compile_ref_checks(common, ccbegin, NULL);
5292 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5293 jump = JUMP(SLJIT_JUMP);
5294 }
5295 else
5296 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5297
5298 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5299 if (max > 0)
5300 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5301
5302 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5303 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5304
5305 if (min > 1)
5306 {
5307 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5308 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5309 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5310 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5311 }
5312 else if (max > 0)
5313 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5314
5315 if (jump != NULL)
5316 JUMPHERE(jump);
5317 JUMPHERE(zerolength);
5318
5319 decrease_call_count(common);
5320 return cc;
5321 }
5322
5323 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5324 {
5325 DEFINE_COMPILER;
5326 backtrack_common *backtrack;
5327 recurse_entry *entry = common->entries;
5328 recurse_entry *prev = NULL;
5329 sljit_sw start = GET(cc, 1);
5330 pcre_uchar *start_cc;
5331 BOOL needs_control_head;
5332
5333 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5334
5335 /* Inlining simple patterns. */
5336 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5337 {
5338 start_cc = common->start + start;
5339 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5340 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5341 return cc + 1 + LINK_SIZE;
5342 }
5343
5344 while (entry != NULL)
5345 {
5346 if (entry->start == start)
5347 break;
5348 prev = entry;
5349 entry = entry->next;
5350 }
5351
5352 if (entry == NULL)
5353 {
5354 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5355 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5356 return NULL;
5357 entry->next = NULL;
5358 entry->entry = NULL;
5359 entry->calls = NULL;
5360 entry->start = start;
5361
5362 if (prev != NULL)
5363 prev->next = entry;
5364 else
5365 common->entries = entry;
5366 }
5367
5368 if (common->has_set_som && common->mark_ptr != 0)
5369 {
5370 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5371 allocate_stack(common, 2);
5372 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5373 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5374 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5375 }
5376 else if (common->has_set_som || common->mark_ptr != 0)
5377 {
5378 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5379 allocate_stack(common, 1);
5380 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5381 }
5382
5383 if (entry->entry == NULL)
5384 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5385 else
5386 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5387 /* Leave if the match is failed. */
5388 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5389 return cc + 1 + LINK_SIZE;
5390 }
5391
5392 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5393 {
5394 const pcre_uchar *begin = arguments->begin;
5395 int *offset_vector = arguments->offsets;
5396 int offset_count = arguments->offset_count;
5397 int i;
5398
5399 if (PUBL(callout) == NULL)
5400 return 0;
5401
5402 callout_block->version = 2;
5403 callout_block->callout_data = arguments->callout_data;
5404
5405 /* Offsets in subject. */
5406 callout_block->subject_length = arguments->end - arguments->begin;
5407 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5408 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5409 #if defined COMPILE_PCRE8
5410 callout_block->subject = (PCRE_SPTR)begin;
5411 #elif defined COMPILE_PCRE16
5412 callout_block->subject = (PCRE_SPTR16)begin;
5413 #elif defined COMPILE_PCRE32
5414 callout_block->subject = (PCRE_SPTR32)begin;
5415 #endif
5416
5417 /* Convert and copy the JIT offset vector to the offset_vector array. */
5418 callout_block->capture_top = 0;
5419 callout_block->offset_vector = offset_vector;
5420 for (i = 2; i < offset_count; i += 2)
5421 {
5422 offset_vector[i] = jit_ovector[i] - begin;
5423 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5424 if (jit_ovector[i] >= begin)
5425 callout_block->capture_top = i;
5426 }
5427
5428 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5429 if (offset_count > 0)
5430 offset_vector[0] = -1;
5431 if (offset_count > 1)
5432 offset_vector[1] = -1;
5433 return (*PUBL(callout))(callout_block);
5434 }
5435
5436 /* Aligning to 8 byte. */
5437 #define CALLOUT_ARG_SIZE \
5438 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5439
5440 #define CALLOUT_ARG_OFFSET(arg) \
5441 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5442
5443 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5444 {
5445 DEFINE_COMPILER;
5446 backtrack_common *backtrack;
5447
5448 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5449
5450 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5451
5452 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5453 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5454 SLJIT_ASSERT(common->capture_last_ptr != 0);
5455 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5456 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5457
5458 /* These pointer sized fields temporarly stores internal variables. */
5459 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5462
5463 if (common->mark_ptr != 0)
5464 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5465 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5466 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5468
5469 /* Needed to save important temporary registers. */
5470 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5471 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5472 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5473 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5474 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5475 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5476 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5477
5478 /* Check return value. */
5479 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5480 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5481 if (common->forced_quit_label == NULL)
5482 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5483 else
5484 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5485 return cc + 2 + 2 * LINK_SIZE;
5486 }
5487
5488 #undef CALLOUT_ARG_SIZE
5489 #undef CALLOUT_ARG_OFFSET
5490
5491 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5492 {
5493 DEFINE_COMPILER;
5494 int framesize;
5495 int extrasize;
5496 BOOL needs_control_head;
5497 int private_data_ptr;
5498 backtrack_common altbacktrack;
5499 pcre_uchar *ccbegin;
5500 pcre_uchar opcode;
5501 pcre_uchar bra = OP_BRA;
5502 jump_list *tmp = NULL;
5503 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5504 jump_list **found;
5505 /* Saving previous accept variables. */
5506 int save_then_local_exit = common->then_local_exit;
5507 then_trap_backtrack *save_then_trap = common->then_trap;
5508 struct sljit_label *save_accept_label = common->accept_label;
5509 jump_list *save_then_quit = common->then_quit;
5510 jump_list *save_accept = common->accept;
5511 struct sljit_jump *jump;
5512 struct sljit_jump *brajump = NULL;
5513
5514 /* Assert captures then. */
5515 common->then_trap = NULL;
5516
5517 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5518 {
5519 SLJIT_ASSERT(!conditional);
5520 bra = *cc;
5521 cc++;
5522 }
5523 private_data_ptr = PRIVATE_DATA(cc);
5524 SLJIT_ASSERT(private_data_ptr != 0);
5525 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5526 backtrack->framesize = framesize;
5527 backtrack->private_data_ptr = private_data_ptr;
5528 opcode = *cc;
5529 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5530 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5531 ccbegin = cc;
5532 cc += GET(cc, 1);
5533
5534 if (bra == OP_BRAMINZERO)
5535 {
5536 /* This is a braminzero backtrack path. */
5537 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5538 free_stack(common, 1);
5539 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5540 }
5541
5542 if (framesize < 0)
5543 {
5544 extrasize = needs_control_head ? 2 : 1;
5545 if (framesize == no_frame)
5546 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5547 allocate_stack(common, extrasize);
5548 if (needs_control_head)
5549 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5550 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5551 if (needs_control_head)
5552 {
5553 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5554 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5555 }
5556 }
5557 else
5558 {
5559 extrasize = needs_control_head ? 3 : 2;
5560 allocate_stack(common, framesize + extrasize);
5561 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5562 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5564 if (needs_control_head)
5565 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5566 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5567 if (needs_control_head)
5568 {
5569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5570 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5572 }
5573 else
5574 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5575 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5576 }
5577
5578 memset(&altbacktrack, 0, sizeof(backtrack_common));
5579 common->then_local_exit = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? 1 : -1;
5580 common->then_quit = NULL;
5581 while (1)
5582 {
5583 common->accept_label = NULL;
5584 common->accept = NULL;
5585 altbacktrack.top = NULL;
5586 altbacktrack.topbacktracks = NULL;
5587
5588 if (*ccbegin == OP_ALT)
5589 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5590
5591 altbacktrack.cc = ccbegin;
5592 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5593 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5594 {
5595 common->then_local_exit = save_then_local_exit;
5596 common->then_trap = save_then_trap;
5597 common->accept_label = save_accept_label;
5598 common->then_quit = save_then_quit;
5599 common->accept = save_accept;
5600 return NULL;
5601 }
5602 common->accept_label = LABEL();
5603 if (common->accept != NULL)
5604 set_jumps(common->accept, common->accept_label);
5605
5606 /* Reset stack. */
5607 if (framesize < 0)
5608 {
5609 if (framesize == no_frame)
5610 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5611 else
5612 free_stack(common, extrasize);
5613 if (needs_control_head)
5614 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5615 }
5616 else
5617 {
5618 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5619 {
5620 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5621 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5622 if (needs_control_head)
5623 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5624 }
5625 else
5626 {
5627 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5628 if (needs_control_head)
5629 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5630 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5631 }
5632 }
5633
5634 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5635 {
5636 /* We know that STR_PTR was stored on the top of the stack. */
5637 if (conditional)
5638 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5639 else if (bra == OP_BRAZERO)
5640 {
5641 if (framesize < 0)
5642 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5643 else
5644 {
5645 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5646 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5648 }
5649 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5650 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5651 }
5652 else if (framesize >= 0)
5653 {
5654 /* For OP_BRA and OP_BRAMINZERO. */
5655 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5656 }
5657 }
5658 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5659
5660 compile_backtrackingpath(common, altbacktrack.top);
5661 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5662 {
5663 common->then_local_exit = save_then_local_exit;
5664 common->then_trap = save_then_trap;
5665 common->accept_label = save_accept_label;
5666 common->then_quit = save_then_quit;
5667 common->accept = save_accept;
5668 return NULL;
5669 }
5670 set_jumps(altbacktrack.topbacktracks, LABEL());
5671
5672 if (*cc != OP_ALT)
5673 break;
5674
5675 ccbegin = cc;
5676 cc += GET(cc, 1);
5677 }
5678
5679 /* None of them matched. */
5680 if (common->then_quit != NULL)
5681 {
5682 jump = JUMP(SLJIT_JUMP);
5683 set_jumps(common->then_quit, LABEL());
5684 SLJIT_ASSERT(framesize != no_stack);
5685 if (framesize < 0)
5686 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5687 else
5688 {
5689 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5690 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5691 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5692 }
5693 JUMPHERE(jump);
5694 }
5695
5696 if (needs_control_head)
5697 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5698
5699 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5700 {
5701 /* Assert is failed. */
5702 if (conditional || bra == OP_BRAZERO)
5703 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5704
5705 if (framesize < 0)
5706 {
5707 /* The topmost item should be 0. */
5708 if (bra == OP_BRAZERO)
5709 {
5710 if (extrasize == 2)
5711 free_stack(common, 1);
5712 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5713 }
5714 else
5715 free_stack(common, extrasize);
5716 }
5717 else
5718 {
5719 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5720 /* The topmost item should be 0. */
5721 if (bra == OP_BRAZERO)
5722 {
5723 free_stack(common, framesize + extrasize - 1);
5724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5725 }
5726 else
5727 free_stack(common, framesize + extrasize);
5728 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5729 }
5730 jump = JUMP(SLJIT_JUMP);
5731 if (bra != OP_BRAZERO)
5732 add_jump(compiler, target, jump);
5733
5734 /* Assert is successful. */
5735 set_jumps(tmp, LABEL());
5736 if (framesize < 0)
5737 {
5738 /* We know that STR_PTR was stored on the top of the stack. */
5739 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5740 /* Keep the STR_PTR on the top of the stack. */
5741 if (bra == OP_BRAZERO)
5742 {
5743 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5744 if (extrasize == 2)
5745 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5746 }
5747 else if (bra == OP_BRAMINZERO)
5748 {
5749 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5750 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5751 }
5752 }
5753 else
5754 {
5755 if (bra == OP_BRA)
5756 {
5757 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5758 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5759 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5760 }
5761 else
5762 {
5763 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5764 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5765 if (extrasize == 2)
5766 {
5767 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5768 if (bra == OP_BRAMINZERO)
5769 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5770 }
5771 else
5772 {
5773 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5774 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5775 }
5776 }
5777 }
5778
5779 if (bra == OP_BRAZERO)
5780 {
5781 backtrack->matchingpath = LABEL();
5782 SET_LABEL(jump, backtrack->matchingpath);
5783 }
5784 else if (bra == OP_BRAMINZERO)
5785 {
5786 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5787 JUMPHERE(brajump);
5788 if (framesize >= 0)
5789 {
5790 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5791 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5792 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5793 }
5794 set_jumps(backtrack->common.topbacktracks, LABEL());
5795 }
5796 }
5797 else
5798 {
5799 /* AssertNot is successful. */
5800 if (framesize < 0)
5801 {
5802 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5803 if (bra != OP_BRA)
5804 {
5805 if (extrasize == 2)
5806 free_stack(common, 1);
5807 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5808 }
5809 else
5810 free_stack(common, extrasize);
5811 }
5812 else
5813 {
5814 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5815 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5816 /* The topmost item should be 0. */
5817 if (bra != OP_BRA)
5818 {
5819 free_stack(common, framesize + extrasize - 1);
5820 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5821 }
5822 else
5823 free_stack(common, framesize + extrasize);
5824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5825 }
5826
5827 if (bra == OP_BRAZERO)
5828 backtrack->matchingpath = LABEL();
5829 else if (bra == OP_BRAMINZERO)
5830 {
5831 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5832 JUMPHERE(brajump);
5833 }
5834
5835 if (bra != OP_BRA)
5836 {
5837 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5838 set_jumps(backtrack->common.topbacktracks, LABEL());
5839 backtrack->common.topbacktracks = NULL;
5840 }
5841 }
5842
5843 common->then_local_exit = save_then_local_exit;
5844 common->then_trap = save_then_trap;
5845 common->accept_label = save_accept_label;
5846 common->then_quit = save_then_quit;
5847 common->accept = save_accept;
5848 return cc + 1 + LINK_SIZE;
5849 }
5850
5851 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5852 {
5853 int condition = FALSE;
5854 pcre_uchar *slotA = name_table;
5855 pcre_uchar *slotB;
5856 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5857 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5858 sljit_sw no_capture;
5859 int i;
5860
5861 locals += refno & 0xff;
5862 refno >>= 8;
5863 no_capture = locals[1];
5864
5865 for (i = 0; i < name_count; i++)
5866 {
5867 if (GET2(slotA, 0) == refno) break;
5868 slotA += name_entry_size;
5869 }
5870
5871 if (i < name_count)
5872 {
5873 /* Found a name for the number - there can be only one; duplicate names
5874 for different numbers are allowed, but not vice versa. First scan down
5875 for duplicates. */
5876
5877 slotB = slotA;
5878 while (slotB > name_table)
5879 {
5880 slotB -= name_entry_size;
5881 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5882 {
5883 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5884 if (condition) break;
5885 }
5886 else break;
5887 }
5888
5889 /* Scan up for duplicates */
5890 if (!condition)
5891 {
5892 slotB = slotA;
5893 for (i++; i < name_count; i++)
5894 {
5895 slotB += name_entry_size;
5896 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5897 {
5898 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5899 if (condition) break;
5900 }
5901 else break;
5902 }
5903 }
5904 }
5905 return condition;
5906 }
5907
5908 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5909 {
5910 int condition = FALSE;
5911 pcre_uchar *slotA = name_table;
5912 pcre_uchar *slotB;
5913 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5914 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5915 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5916 sljit_uw i;
5917
5918 for (i = 0; i < name_count; i++)
5919 {
5920 if (GET2(slotA, 0) == recno) break;
5921 slotA += name_entry_size;
5922 }
5923
5924 if (i < name_count)
5925 {
5926 /* Found a name for the number - there can be only one; duplicate
5927 names for different numbers are allowed, but not vice versa. First
5928 scan down for duplicates. */
5929
5930 slotB = slotA;
5931 while (slotB > name_table)
5932 {
5933 slotB -= name_entry_size;
5934 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5935 {
5936 condition = GET2(slotB, 0) == group_num;
5937 if (condition) break;
5938 }
5939 else break;
5940 }
5941
5942 /* Scan up for duplicates */
5943 if (!condition)
5944 {
5945 slotB = slotA;
5946 for (i++; i < name_count; i++)
5947 {
5948 slotB += name_entry_size;
5949 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5950 {
5951 condition = GET2(slotB, 0) == group_num;
5952 if (condition) break;
5953 }
5954 else break;
5955 }
5956 }
5957 }
5958 return condition;
5959 }
5960
5961 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
5962 {
5963 DEFINE_COMPILER;
5964 int stacksize;
5965
5966 if (framesize < 0)
5967 {
5968 if (framesize == no_frame)
5969 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5970 else
5971 {
5972 stacksize = needs_control_head ? 1 : 0;
5973 if (ket != OP_KET || has_alternatives)
5974 stacksize++;
5975 free_stack(common, stacksize);
5976 }
5977
5978 if (needs_control_head)
5979 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
5980
5981 /* TMP2 which is set here used by OP_KETRMAX below. */
5982 if (ket == OP_KETRMAX)
5983 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5984 else if (ket == OP_KETRMIN)
5985 {
5986 /* Move the STR_PTR to the private_data_ptr. */
5987 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5988 }
5989 }
5990 else
5991 {
5992 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
5993 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
5994 if (needs_control_head)
5995 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
5996
5997 if (ket == OP_KETRMAX)
5998 {
5999 /* TMP2 which is set here used by OP_KETRMAX below. */
6000 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6001 }
6002 }
6003 if (needs_control_head)
6004 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6005 }
6006
6007 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6008 {
6009 DEFINE_COMPILER;
6010
6011 if (common->capture_last_ptr != 0)
6012 {
6013 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6014 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6015 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6016 stacksize++;
6017 }
6018 if (common->optimized_cbracket[offset >> 1] == 0)
6019 {
6020 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6021 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6022 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6027 stacksize += 2;
6028 }
6029 return stacksize;
6030 }
6031
6032 /*
6033 Handling bracketed expressions is probably the most complex part.
6034
6035 Stack layout naming characters:
6036 S - Push the current STR_PTR
6037 0 - Push a 0 (NULL)
6038 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6039 before the next alternative. Not pushed if there are no alternatives.
6040 M - Any values pushed by the current alternative. Can be empty, or anything.
6041 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6042 L - Push the previous local (pointed by localptr) to the stack
6043 () - opional values stored on the stack
6044 ()* - optonal, can be stored multiple times
6045
6046 The following list shows the regular expression templates, their PCRE byte codes
6047 and stack layout supported by pcre-sljit.
6048
6049 (?:) OP_BRA | OP_KET A M
6050 () OP_CBRA | OP_KET C M
6051 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6052 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6053 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6054 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6055 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6056 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6057 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6058 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6059 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6060 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6061 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6062 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6063 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6064 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6065 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6066 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6067 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6068 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6069 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6070 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6071
6072
6073 Stack layout naming characters:
6074 A - Push the alternative index (starting from 0) on the stack.
6075 Not pushed if there is no alternatives.
6076 M - Any values pushed by the current alternative. Can be empty, or anything.
6077
6078 The next list shows the possible content of a bracket:
6079 (|) OP_*BRA | OP_ALT ... M A
6080 (?()|) OP_*COND | OP_ALT M A
6081 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6082 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6083 Or nothing, if trace is unnecessary
6084 */
6085
6086 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6087 {
6088 DEFINE_COMPILER;
6089 backtrack_common *backtrack;
6090 pcre_uchar opcode;
6091 int private_data_ptr = 0;
6092 int offset = 0;
6093 int stacksize;
6094 pcre_uchar *ccbegin;
6095 pcre_uchar *matchingpath;
6096 pcre_uchar bra = OP_BRA;
6097 pcre_uchar ket;
6098 assert_backtrack *assert;
6099 BOOL has_alternatives;
6100 BOOL needs_control_head = FALSE;
6101 struct sljit_jump *jump;
6102 struct sljit_jump *skip;
6103 struct sljit_label *rmaxlabel = NULL;
6104 struct sljit_jump *braminzerojump = NULL;
6105
6106 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6107
6108 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6109 {
6110 bra = *cc;
6111 cc++;
6112 opcode = *cc;
6113 }
6114
6115 opcode = *cc;
6116 ccbegin = cc;
6117 matchingpath = ccbegin + 1 + LINK_SIZE;
6118
6119 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6120 {
6121 /* Drop this bracket_backtrack. */
6122 parent->top = backtrack->prev;
6123 return bracketend(cc);
6124 }
6125
6126 ket = *(bracketend(cc) - 1 - LINK_SIZE);
6127 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6128 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6129 cc += GET(cc, 1);
6130
6131 has_alternatives = *cc == OP_ALT;
6132 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6133 {
6134 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6135 if (*matchingpath == OP_NRREF)
6136 {
6137 stacksize = GET2(matchingpath, 1);
6138 if (common->currententry == NULL || stacksize == RREF_ANY)
6139 has_alternatives = FALSE;
6140 else if (common->currententry->start == 0)
6141 has_alternatives = stacksize != 0;
6142 else
6143 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6144 }
6145 }
6146
6147 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6148 opcode = OP_SCOND;
6149 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6150 opcode = OP_ONCE;
6151
6152 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6153 {
6154 /* Capturing brackets has a pre-allocated space. */
6155 offset = GET2(ccbegin, 1 + LINK_SIZE);
6156 if (common->optimized_cbracket[offset] == 0)
6157 {
6158 private_data_ptr = OVECTOR_PRIV(offset);
6159 offset <<= 1;
6160 }
6161 else
6162 {
6163 offset <<= 1;
6164 private_data_ptr = OVECTOR(offset);
6165 }
6166 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6167 matchingpath += IMM2_SIZE;
6168 }
6169 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6170 {
6171 /* Other brackets simply allocate the next entry. */
6172 private_data_ptr = PRIVATE_DATA(ccbegin);
6173 SLJIT_ASSERT(private_data_ptr != 0);
6174 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6175 if (opcode == OP_ONCE)
6176 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6177 }
6178
6179 /* Instructions before the first alternative. */
6180 stacksize = 0;
6181 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6182 stacksize++;
6183 if (bra == OP_BRAZERO)
6184 stacksize++;
6185
6186 if (stacksize > 0)
6187 allocate_stack(common, stacksize);
6188
6189 stacksize = 0;
6190 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6191 {
6192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6193 stacksize++;
6194 }
6195
6196 if (bra == OP_BRAZERO)
6197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6198
6199 if (bra == OP_BRAMINZERO)
6200 {
6201 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6202 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6203 if (ket != OP_KETRMIN)
6204 {
6205 free_stack(common, 1);
6206 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6207 }
6208 else
6209 {
6210 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6211 {
6212 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6213 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6214 /* Nothing stored during the first run. */
6215 skip = JUMP(SLJIT_JUMP);
6216 JUMPHERE(jump);
6217 /* Checking zero-length iteration. */
6218 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6219 {
6220 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6221 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6222 }
6223 else
6224 {
6225 /* Except when the whole stack frame must be saved. */
6226 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6227 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6228 }
6229 JUMPHERE(skip);
6230 }
6231 else
6232 {
6233 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6234 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6235 JUMPHERE(jump);
6236 }
6237 }
6238 }
6239
6240 if (ket == OP_KETRMIN)
6241 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6242
6243 if (ket == OP_KETRMAX)
6244 {
6245 rmaxlabel = LABEL();
6246 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
6247 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
6248 }
6249
6250 /* Handling capturing brackets and alternatives. */
6251 if (opcode == OP_ONCE)
6252 {
6253 stacksize = 0;
6254 if (needs_control_head)
6255 {
6256 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6257 stacksize++;
6258 }
6259
6260 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6261 {
6262 /* Neither capturing brackets nor recursions are found in the block. */
6263 if (ket == OP_KETRMIN)
6264 {
6265 stacksize += 2;
6266 if (!needs_control_head)
6267 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6268 }
6269 else
6270 {
6271 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6273 if (ket == OP_KETRMAX || has_alternatives)
6274 stacksize++;
6275 }
6276
6277 if (stacksize > 0)
6278 allocate_stack(common, stacksize);
6279
6280 stacksize = 0;
6281 if (needs_control_head)
6282 {
6283 stacksize++;
6284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6285 }
6286
6287 if (ket == OP_KETRMIN)
6288 {
6289 if (needs_control_head)
6290 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6291 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6292 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6293 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6294 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6295 }
6296 else if (ket == OP_KETRMAX || has_alternatives)
6297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6298 }
6299 else
6300 {
6301 if (ket != OP_KET || has_alternatives)
6302 stacksize++;
6303
6304 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6305 allocate_stack(common, stacksize);
6306
6307 if (needs_control_head)
6308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6309
6310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6311 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6312
6313 stacksize = needs_control_head ? 1 : 0;
6314 if (ket != OP_KET || has_alternatives)
6315 {
6316 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6317 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6318 stacksize++;
6319 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6320 }
6321 else
6322 {
6323 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6324 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6325 }
6326 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6327 }
6328 }
6329 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6330 {
6331 /* Saving the previous values. */
6332 if (common->optimized_cbracket[offset >> 1] != 0)
6333 {
6334 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6335 allocate_stack(common, 2);
6336 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6337 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6339 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6341 }
6342 else
6343 {
6344 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6345 allocate_stack(common, 1);
6346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6347 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6348 }
6349 }
6350 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6351 {
6352 /* Saving the previous value. */
6353 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6354 allocate_stack(common, 1);
6355 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6356 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6357 }
6358 else if (has_alternatives)
6359 {
6360 /* Pushing the starting string pointer. */
6361 allocate_stack(common, 1);
6362 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6363 }
6364
6365 /* Generating code for the first alternative. */
6366 if (opcode == OP_COND || opcode == OP_SCOND)
6367 {
6368 if (*matchingpath == OP_CREF)
6369 {
6370 SLJIT_ASSERT(has_alternatives);
6371 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6372 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6373 matchingpath += 1 + IMM2_SIZE;
6374 }
6375 else if (*matchingpath == OP_NCREF)
6376 {
6377 SLJIT_ASSERT(has_alternatives);
6378 stacksize = GET2(matchingpath, 1);
6379 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6380
6381 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6383 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6384 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6385 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6386 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6387 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6388 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6389 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6390
6391 JUMPHERE(jump);
6392 matchingpath += 1 + IMM2_SIZE;
6393 }
6394 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6395 {
6396 /* Never has other case. */
6397 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6398
6399 stacksize = GET2(matchingpath, 1);
6400 if (common->currententry == NULL)
6401 stacksize = 0;
6402 else if (stacksize == RREF_ANY)
6403 stacksize = 1;
6404 else if (common->currententry->start == 0)
6405 stacksize = stacksize == 0;
6406 else
6407 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6408
6409 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6410 {
6411 SLJIT_ASSERT(!has_alternatives);
6412 if (stacksize != 0)
6413 matchingpath += 1 + IMM2_SIZE;
6414 else
6415 {
6416 if (*cc == OP_ALT)
6417 {
6418 matchingpath = cc + 1 + LINK_SIZE;
6419 cc += GET(cc, 1);
6420 }
6421 else
6422 matchingpath = cc;
6423 }
6424 }
6425 else
6426 {
6427 SLJIT_ASSERT(has_alternatives);
6428
6429 stacksize = GET2(matchingpath, 1);
6430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6433 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6434 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6435 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6436 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6437 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6438 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6439 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6440 matchingpath += 1 + IMM2_SIZE;
6441 }
6442 }
6443 else
6444 {
6445 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6446 /* Similar code as PUSH_BACKTRACK macro. */
6447 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6448 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6449 return NULL;
6450 memset(assert, 0, sizeof(assert_backtrack));
6451 assert->common.cc = matchingpath;
6452 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6453 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6454 }
6455 }
6456
6457 compile_matchingpath(common, matchingpath, cc, backtrack);
6458 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6459 return NULL;
6460
6461 if (opcode == OP_ONCE)
6462 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6463
6464 stacksize = 0;
6465 if (ket != OP_KET || bra != OP_BRA)
6466 stacksize++;
6467 if (offset != 0)
6468 {
6469 if (common->capture_last_ptr != 0)
6470 stacksize++;
6471 if (common->optimized_cbracket[offset >> 1] == 0)
6472 stacksize += 2;
6473 }
6474 if (has_alternatives && opcode != OP_ONCE)
6475 stacksize++;
6476
6477 if (stacksize > 0)
6478 allocate_stack(common, stacksize);
6479
6480 stacksize = 0;
6481 if (ket != OP_KET || bra != OP_BRA)
6482 {
6483 if (ket != OP_KET)
6484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6485 else
6486 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6487 stacksize++;
6488 }
6489
6490 if (offset != 0)
6491 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6492
6493 if (has_alternatives)
6494 {
6495 if (opcode != OP_ONCE)
6496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6497 if (ket != OP_KETRMAX)
6498 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6499 }
6500
6501 /* Must be after the matchingpath label. */
6502 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6503 {
6504 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6506 }
6507
6508 if (ket == OP_KETRMAX)
6509 {
6510 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6511 {
6512 if (has_alternatives)
6513 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6514 /* Checking zero-length iteration. */
6515 if (opcode != OP_ONCE)
6516 {
6517 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6518 /* Drop STR_PTR for greedy plus quantifier. */
6519 if (bra != OP_BRAZERO)
6520 free_stack(common, 1);
6521 }
6522 else
6523 /* TMP2 must contain the starting STR_PTR. */
6524 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6525 }
6526 else
6527 JUMPTO(SLJIT_JUMP, rmaxlabel);
6528 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6529 }
6530
6531 if (bra == OP_BRAZERO)
6532 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6533
6534 if (bra == OP_BRAMINZERO)
6535 {
6536 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6537 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6538 if (braminzerojump != NULL)
6539 {
6540 JUMPHERE(braminzerojump);
6541 /* We need to release the end pointer to perform the
6542 backtrack for the zero-length iteration. When
6543 framesize is < 0, OP_ONCE will do the release itself. */
6544 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6545 {
6546 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6547 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6548 }
6549 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6550 free_stack(common, 1);
6551 }
6552 /* Continue to the normal backtrack. */
6553 }
6554
6555 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6556 decrease_call_count(common);
6557
6558 /* Skip the other alternatives. */
6559 while (*cc == OP_ALT)
6560 cc += GET(cc, 1);
6561 cc += 1 + LINK_SIZE;
6562
6563 /* Temporarily encoding the needs_control_head in framesize. */
6564 if (opcode == OP_ONCE)
6565 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6566 return cc;
6567 }
6568
6569 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6570 {
6571 DEFINE_COMPILER;
6572 backtrack_common *backtrack;
6573 pcre_uchar opcode;
6574 int private_data_ptr;
6575 int cbraprivptr = 0;
6576 BOOL needs_control_head;
6577 int framesize;
6578 int stacksize;
6579 int offset = 0;
6580 BOOL zero = FALSE;
6581 pcre_uchar *ccbegin = NULL;
6582 int stack; /* Also contains the offset of control head. */
6583 struct sljit_label *loop = NULL;
6584 struct jump_list *emptymatch = NULL;
6585
6586 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6587 if (*cc == OP_BRAPOSZERO)
6588 {
6589 zero = TRUE;
6590 cc++;
6591 }
6592
6593 opcode = *cc;
6594 private_data_ptr = PRIVATE_DATA(cc);
6595 SLJIT_ASSERT(private_data_ptr != 0);
6596 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6597 switch(opcode)
6598 {
6599 case OP_BRAPOS:
6600 case OP_SBRAPOS:
6601 ccbegin = cc + 1 + LINK_SIZE;
6602 break;
6603
6604 case OP_CBRAPOS:
6605 case OP_SCBRAPOS:
6606 offset = GET2(cc, 1 + LINK_SIZE);
6607 /* This case cannot be optimized in the same was as
6608 normal capturing brackets. */
6609 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6610 cbraprivptr = OVECTOR_PRIV(offset);
6611 offset <<= 1;
6612 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6613 break;
6614
6615 default:
6616 SLJIT_ASSERT_STOP();
6617 break;
6618 }
6619
6620 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6621 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6622 if (framesize < 0)
6623 {
6624 if (offset != 0)
6625 {
6626 stacksize = 2;
6627 if (common->capture_last_ptr != 0)
6628 stacksize++;
6629 }
6630 else
6631 stacksize = 1;
6632
6633 if (needs_control_head)
6634 stacksize++;
6635 if (!zero)
6636 stacksize++;
6637
6638 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6639 allocate_stack(common, stacksize);
6640 if (framesize == no_frame)
6641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6642
6643 stack = 0;
6644 if (offset != 0)
6645 {
6646 stack = 2;
6647 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6648 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6649 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6650 if (common->capture_last_ptr != 0)
6651 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6652 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6653 if (needs_control_head)
6654 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6655 if (common->capture_last_ptr != 0)
6656 {
6657 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6658 stack = 3;
6659 }
6660 }
6661 else
6662 {
6663 if (needs_control_head)
6664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6666 stack = 1;
6667 }
6668
6669 if (needs_control_head)
6670 stack++;
6671 if (!zero)
6672 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6673 if (needs_control_head)
6674 {
6675 stack--;
6676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6677 }
6678 }
6679 else
6680 {
6681 stacksize = framesize + 1;
6682 if (!zero)
6683 stacksize++;
6684 if (needs_control_head)
6685 stacksize++;
6686 if (offset == 0)
6687 stacksize++;
6688 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6689
6690 allocate_stack(common, stacksize);
6691 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6692 if (needs_control_head)
6693 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6694 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6695
6696 stack = 0;
6697 if (!zero)
6698 {
6699 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6700 stack = 1;
6701 }
6702 if (needs_control_head)
6703 {
6704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6705 stack++;
6706 }
6707 if (offset == 0)
6708 {
6709 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6710 stack++;
6711 }
6712 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6713 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE);
6714 stack -= 1 + (offset == 0);
6715 }
6716
6717 if (offset != 0)
6718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6719
6720 loop = LABEL();
6721 while (*cc != OP_KETRPOS)
6722 {
6723 backtrack->top = NULL;
6724 backtrack->topbacktracks = NULL;
6725 cc += GET(cc, 1);
6726
6727 compile_matchingpath(common, ccbegin, cc, backtrack);
6728 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6729 return NULL;
6730
6731 if (framesize < 0)
6732 {
6733 if (framesize == no_frame)
6734 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6735
6736 if (offset != 0)
6737 {
6738 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6739 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6740 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6741 if (common->capture_last_ptr != 0)
6742 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6743 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6744 }
6745