/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1310 - (show annotations)
Sat Apr 6 06:51:09 2013 UTC (6 years, 5 months ago) by zherczeg
File MIME type: text/plain
File size: 306318 byte(s)
Fix compiler warnings in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum frame_types {
200 no_frame = -1,
201 no_stack = -2
202 };
203
204 enum control_types {
205 type_mark = 0,
206 type_then_trap = 1
207 };
208
209 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
210
211 /* The following structure is the key data type for the recursive
212 code generator. It is allocated by compile_matchingpath, and contains
213 the aguments for compile_backtrackingpath. Must be the first member
214 of its descendants. */
215 typedef struct backtrack_common {
216 /* Concatenation stack. */
217 struct backtrack_common *prev;
218 jump_list *nextbacktracks;
219 /* Internal stack (for component operators). */
220 struct backtrack_common *top;
221 jump_list *topbacktracks;
222 /* Opcode pointer. */
223 pcre_uchar *cc;
224 } backtrack_common;
225
226 typedef struct assert_backtrack {
227 backtrack_common common;
228 jump_list *condfailed;
229 /* Less than 0 if a frame is not needed. */
230 int framesize;
231 /* Points to our private memory word on the stack. */
232 int private_data_ptr;
233 /* For iterators. */
234 struct sljit_label *matchingpath;
235 } assert_backtrack;
236
237 typedef struct bracket_backtrack {
238 backtrack_common common;
239 /* Where to coninue if an alternative is successfully matched. */
240 struct sljit_label *alternative_matchingpath;
241 /* For rmin and rmax iterators. */
242 struct sljit_label *recursive_matchingpath;
243 /* For greedy ? operator. */
244 struct sljit_label *zero_matchingpath;
245 /* Contains the branches of a failed condition. */
246 union {
247 /* Both for OP_COND, OP_SCOND. */
248 jump_list *condfailed;
249 assert_backtrack *assert;
250 /* For OP_ONCE. Less than 0 if not needed. */
251 int framesize;
252 } u;
253 /* Points to our private memory word on the stack. */
254 int private_data_ptr;
255 } bracket_backtrack;
256
257 typedef struct bracketpos_backtrack {
258 backtrack_common common;
259 /* Points to our private memory word on the stack. */
260 int private_data_ptr;
261 /* Reverting stack is needed. */
262 int framesize;
263 /* Allocated stack size. */
264 int stacksize;
265 } bracketpos_backtrack;
266
267 typedef struct braminzero_backtrack {
268 backtrack_common common;
269 struct sljit_label *matchingpath;
270 } braminzero_backtrack;
271
272 typedef struct iterator_backtrack {
273 backtrack_common common;
274 /* Next iteration. */
275 struct sljit_label *matchingpath;
276 } iterator_backtrack;
277
278 typedef struct recurse_entry {
279 struct recurse_entry *next;
280 /* Contains the function entry. */
281 struct sljit_label *entry;
282 /* Collects the calls until the function is not created. */
283 jump_list *calls;
284 /* Points to the starting opcode. */
285 sljit_sw start;
286 } recurse_entry;
287
288 typedef struct recurse_backtrack {
289 backtrack_common common;
290 BOOL inlined_pattern;
291 } recurse_backtrack;
292
293 #define OP_THEN_TRAP OP_TABLE_LENGTH
294
295 typedef struct then_trap_backtrack {
296 backtrack_common common;
297 /* If then_trap is not NULL, this structure contains the real
298 then_trap for the backtracking path. */
299 struct then_trap_backtrack *then_trap;
300 /* Points to the starting opcode. */
301 sljit_sw start;
302 /* Exit point for the then opcodes of this alternative. */
303 jump_list *quit;
304 /* Frame size of the current alternative. */
305 int framesize;
306 } then_trap_backtrack;
307
308 #define MAX_RANGE_SIZE 6
309
310 typedef struct compiler_common {
311 /* The sljit ceneric compiler. */
312 struct sljit_compiler *compiler;
313 /* First byte code. */
314 pcre_uchar *start;
315 /* Maps private data offset to each opcode. */
316 sljit_si *private_data_ptrs;
317 /* Tells whether the capturing bracket is optimized. */
318 pcre_uint8 *optimized_cbracket;
319 /* Tells whether the starting offset is a target of then. */
320 pcre_uint8 *then_offsets;
321 /* Current position where a THEN must jump. */
322 then_trap_backtrack *then_trap;
323 /* Starting offset of private data for capturing brackets. */
324 int cbra_ptr;
325 /* Output vector starting point. Must be divisible by 2. */
326 int ovector_start;
327 /* Last known position of the requested byte. */
328 int req_char_ptr;
329 /* Head of the last recursion. */
330 int recursive_head_ptr;
331 /* First inspected character for partial matching. */
332 int start_used_ptr;
333 /* Starting pointer for partial soft matches. */
334 int hit_start;
335 /* End pointer of the first line. */
336 int first_line_end;
337 /* Points to the marked string. */
338 int mark_ptr;
339 /* Recursive control verb management chain. */
340 int control_head_ptr;
341 /* Points to the last matched capture block index. */
342 int capture_last_ptr;
343 /* Points to the starting position of the current match. */
344 int start_ptr;
345
346 /* Flipped and lower case tables. */
347 const pcre_uint8 *fcc;
348 sljit_sw lcc;
349 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
350 int mode;
351 /* \K is found in the pattern. */
352 BOOL has_set_som;
353 /* (*SKIP:arg) is found in the pattern. */
354 BOOL has_skip_arg;
355 /* (*THEN) is found in the pattern. */
356 BOOL has_then;
357 /* Needs to know the start position anytime. */
358 BOOL needs_start_ptr;
359 /* Currently in recurse or negative assert. */
360 BOOL local_exit;
361 /* Currently in a positive assert. */
362 BOOL positive_assert;
363 /* Newline control. */
364 int nltype;
365 int newline;
366 int bsr_nltype;
367 /* Dollar endonly. */
368 int endonly;
369 /* Tables. */
370 sljit_sw ctypes;
371 int digits[2 + MAX_RANGE_SIZE];
372 /* Named capturing brackets. */
373 sljit_uw name_table;
374 sljit_sw name_count;
375 sljit_sw name_entry_size;
376
377 /* Labels and jump lists. */
378 struct sljit_label *partialmatchlabel;
379 struct sljit_label *quit_label;
380 struct sljit_label *forced_quit_label;
381 struct sljit_label *accept_label;
382 stub_list *stubs;
383 recurse_entry *entries;
384 recurse_entry *currententry;
385 jump_list *partialmatch;
386 jump_list *quit;
387 jump_list *positive_assert_quit;
388 jump_list *forced_quit;
389 jump_list *accept;
390 jump_list *calllimit;
391 jump_list *stackalloc;
392 jump_list *revertframes;
393 jump_list *wordboundary;
394 jump_list *anynewline;
395 jump_list *hspace;
396 jump_list *vspace;
397 jump_list *casefulcmp;
398 jump_list *caselesscmp;
399 jump_list *reset_match;
400 BOOL jscript_compat;
401 #ifdef SUPPORT_UTF
402 BOOL utf;
403 #ifdef SUPPORT_UCP
404 BOOL use_ucp;
405 #endif
406 #ifndef COMPILE_PCRE32
407 jump_list *utfreadchar;
408 #endif
409 #ifdef COMPILE_PCRE8
410 jump_list *utfreadtype8;
411 #endif
412 #endif /* SUPPORT_UTF */
413 #ifdef SUPPORT_UCP
414 jump_list *getucd;
415 #endif
416 } compiler_common;
417
418 /* For byte_sequence_compare. */
419
420 typedef struct compare_context {
421 int length;
422 int sourcereg;
423 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
424 int ucharptr;
425 union {
426 sljit_si asint;
427 sljit_uh asushort;
428 #if defined COMPILE_PCRE8
429 sljit_ub asbyte;
430 sljit_ub asuchars[4];
431 #elif defined COMPILE_PCRE16
432 sljit_uh asuchars[2];
433 #elif defined COMPILE_PCRE32
434 sljit_ui asuchars[1];
435 #endif
436 } c;
437 union {
438 sljit_si asint;
439 sljit_uh asushort;
440 #if defined COMPILE_PCRE8
441 sljit_ub asbyte;
442 sljit_ub asuchars[4];
443 #elif defined COMPILE_PCRE16
444 sljit_uh asuchars[2];
445 #elif defined COMPILE_PCRE32
446 sljit_ui asuchars[1];
447 #endif
448 } oc;
449 #endif
450 } compare_context;
451
452 /* Undefine sljit macros. */
453 #undef CMP
454
455 /* Used for accessing the elements of the stack. */
456 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
457
458 #define TMP1 SLJIT_SCRATCH_REG1
459 #define TMP2 SLJIT_SCRATCH_REG3
460 #define TMP3 SLJIT_TEMPORARY_EREG2
461 #define STR_PTR SLJIT_SAVED_REG1
462 #define STR_END SLJIT_SAVED_REG2
463 #define STACK_TOP SLJIT_SCRATCH_REG2
464 #define STACK_LIMIT SLJIT_SAVED_REG3
465 #define ARGUMENTS SLJIT_SAVED_EREG1
466 #define CALL_COUNT SLJIT_SAVED_EREG2
467 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
468
469 /* Local space layout. */
470 /* These two locals can be used by the current opcode. */
471 #define LOCALS0 (0 * sizeof(sljit_sw))
472 #define LOCALS1 (1 * sizeof(sljit_sw))
473 /* Two local variables for possessive quantifiers (char1 cannot use them). */
474 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
475 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
476 /* Max limit of recursions. */
477 #define CALL_LIMIT (4 * sizeof(sljit_sw))
478 /* The output vector is stored on the stack, and contains pointers
479 to characters. The vector data is divided into two groups: the first
480 group contains the start / end character pointers, and the second is
481 the start pointers when the end of the capturing group has not yet reached. */
482 #define OVECTOR_START (common->ovector_start)
483 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
484 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
485 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
486
487 #if defined COMPILE_PCRE8
488 #define MOV_UCHAR SLJIT_MOV_UB
489 #define MOVU_UCHAR SLJIT_MOVU_UB
490 #elif defined COMPILE_PCRE16
491 #define MOV_UCHAR SLJIT_MOV_UH
492 #define MOVU_UCHAR SLJIT_MOVU_UH
493 #elif defined COMPILE_PCRE32
494 #define MOV_UCHAR SLJIT_MOV_UI
495 #define MOVU_UCHAR SLJIT_MOVU_UI
496 #else
497 #error Unsupported compiling mode
498 #endif
499
500 /* Shortcuts. */
501 #define DEFINE_COMPILER \
502 struct sljit_compiler *compiler = common->compiler
503 #define OP1(op, dst, dstw, src, srcw) \
504 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
505 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
506 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
507 #define LABEL() \
508 sljit_emit_label(compiler)
509 #define JUMP(type) \
510 sljit_emit_jump(compiler, (type))
511 #define JUMPTO(type, label) \
512 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
513 #define JUMPHERE(jump) \
514 sljit_set_label((jump), sljit_emit_label(compiler))
515 #define SET_LABEL(jump, label) \
516 sljit_set_label((jump), (label))
517 #define CMP(type, src1, src1w, src2, src2w) \
518 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
519 #define CMPTO(type, src1, src1w, src2, src2w, label) \
520 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
521 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
522 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
523 #define GET_LOCAL_BASE(dst, dstw, offset) \
524 sljit_get_local_base(compiler, (dst), (dstw), (offset))
525
526 static pcre_uchar* bracketend(pcre_uchar* cc)
527 {
528 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
529 do cc += GET(cc, 1); while (*cc == OP_ALT);
530 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
531 cc += 1 + LINK_SIZE;
532 return cc;
533 }
534
535 /* Functions whose might need modification for all new supported opcodes:
536 next_opcode
537 check_opcode_types
538 set_private_data_ptrs
539 get_framesize
540 init_frame
541 get_private_data_copy_length
542 copy_private_data
543 compile_matchingpath
544 compile_backtrackingpath
545 */
546
547 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
548 {
549 SLJIT_UNUSED_ARG(common);
550 switch(*cc)
551 {
552 case OP_SOD:
553 case OP_SOM:
554 case OP_SET_SOM:
555 case OP_NOT_WORD_BOUNDARY:
556 case OP_WORD_BOUNDARY:
557 case OP_NOT_DIGIT:
558 case OP_DIGIT:
559 case OP_NOT_WHITESPACE:
560 case OP_WHITESPACE:
561 case OP_NOT_WORDCHAR:
562 case OP_WORDCHAR:
563 case OP_ANY:
564 case OP_ALLANY:
565 case OP_NOTPROP:
566 case OP_PROP:
567 case OP_ANYNL:
568 case OP_NOT_HSPACE:
569 case OP_HSPACE:
570 case OP_NOT_VSPACE:
571 case OP_VSPACE:
572 case OP_EXTUNI:
573 case OP_EODN:
574 case OP_EOD:
575 case OP_CIRC:
576 case OP_CIRCM:
577 case OP_DOLL:
578 case OP_DOLLM:
579 case OP_CRSTAR:
580 case OP_CRMINSTAR:
581 case OP_CRPLUS:
582 case OP_CRMINPLUS:
583 case OP_CRQUERY:
584 case OP_CRMINQUERY:
585 case OP_CRRANGE:
586 case OP_CRMINRANGE:
587 case OP_CLASS:
588 case OP_NCLASS:
589 case OP_REF:
590 case OP_REFI:
591 case OP_RECURSE:
592 case OP_CALLOUT:
593 case OP_ALT:
594 case OP_KET:
595 case OP_KETRMAX:
596 case OP_KETRMIN:
597 case OP_KETRPOS:
598 case OP_REVERSE:
599 case OP_ASSERT:
600 case OP_ASSERT_NOT:
601 case OP_ASSERTBACK:
602 case OP_ASSERTBACK_NOT:
603 case OP_ONCE:
604 case OP_ONCE_NC:
605 case OP_BRA:
606 case OP_BRAPOS:
607 case OP_CBRA:
608 case OP_CBRAPOS:
609 case OP_COND:
610 case OP_SBRA:
611 case OP_SBRAPOS:
612 case OP_SCBRA:
613 case OP_SCBRAPOS:
614 case OP_SCOND:
615 case OP_CREF:
616 case OP_NCREF:
617 case OP_RREF:
618 case OP_NRREF:
619 case OP_DEF:
620 case OP_BRAZERO:
621 case OP_BRAMINZERO:
622 case OP_BRAPOSZERO:
623 case OP_PRUNE:
624 case OP_SKIP:
625 case OP_THEN:
626 case OP_COMMIT:
627 case OP_FAIL:
628 case OP_ACCEPT:
629 case OP_ASSERT_ACCEPT:
630 case OP_CLOSE:
631 case OP_SKIPZERO:
632 return cc + PRIV(OP_lengths)[*cc];
633
634 case OP_CHAR:
635 case OP_CHARI:
636 case OP_NOT:
637 case OP_NOTI:
638 case OP_STAR:
639 case OP_MINSTAR:
640 case OP_PLUS:
641 case OP_MINPLUS:
642 case OP_QUERY:
643 case OP_MINQUERY:
644 case OP_UPTO:
645 case OP_MINUPTO:
646 case OP_EXACT:
647 case OP_POSSTAR:
648 case OP_POSPLUS:
649 case OP_POSQUERY:
650 case OP_POSUPTO:
651 case OP_STARI:
652 case OP_MINSTARI:
653 case OP_PLUSI:
654 case OP_MINPLUSI:
655 case OP_QUERYI:
656 case OP_MINQUERYI:
657 case OP_UPTOI:
658 case OP_MINUPTOI:
659 case OP_EXACTI:
660 case OP_POSSTARI:
661 case OP_POSPLUSI:
662 case OP_POSQUERYI:
663 case OP_POSUPTOI:
664 case OP_NOTSTAR:
665 case OP_NOTMINSTAR:
666 case OP_NOTPLUS:
667 case OP_NOTMINPLUS:
668 case OP_NOTQUERY:
669 case OP_NOTMINQUERY:
670 case OP_NOTUPTO:
671 case OP_NOTMINUPTO:
672 case OP_NOTEXACT:
673 case OP_NOTPOSSTAR:
674 case OP_NOTPOSPLUS:
675 case OP_NOTPOSQUERY:
676 case OP_NOTPOSUPTO:
677 case OP_NOTSTARI:
678 case OP_NOTMINSTARI:
679 case OP_NOTPLUSI:
680 case OP_NOTMINPLUSI:
681 case OP_NOTQUERYI:
682 case OP_NOTMINQUERYI:
683 case OP_NOTUPTOI:
684 case OP_NOTMINUPTOI:
685 case OP_NOTEXACTI:
686 case OP_NOTPOSSTARI:
687 case OP_NOTPOSPLUSI:
688 case OP_NOTPOSQUERYI:
689 case OP_NOTPOSUPTOI:
690 cc += PRIV(OP_lengths)[*cc];
691 #ifdef SUPPORT_UTF
692 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
693 #endif
694 return cc;
695
696 /* Special cases. */
697 case OP_TYPESTAR:
698 case OP_TYPEMINSTAR:
699 case OP_TYPEPLUS:
700 case OP_TYPEMINPLUS:
701 case OP_TYPEQUERY:
702 case OP_TYPEMINQUERY:
703 case OP_TYPEUPTO:
704 case OP_TYPEMINUPTO:
705 case OP_TYPEEXACT:
706 case OP_TYPEPOSSTAR:
707 case OP_TYPEPOSPLUS:
708 case OP_TYPEPOSQUERY:
709 case OP_TYPEPOSUPTO:
710 return cc + PRIV(OP_lengths)[*cc] - 1;
711
712 case OP_ANYBYTE:
713 #ifdef SUPPORT_UTF
714 if (common->utf) return NULL;
715 #endif
716 return cc + 1;
717
718 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
719 case OP_XCLASS:
720 return cc + GET(cc, 1);
721 #endif
722
723 case OP_MARK:
724 case OP_PRUNE_ARG:
725 case OP_SKIP_ARG:
726 case OP_THEN_ARG:
727 return cc + 1 + 2 + cc[1];
728
729 default:
730 /* All opcodes are supported now! */
731 SLJIT_ASSERT_STOP();
732 return NULL;
733 }
734 }
735
736 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
737 {
738 pcre_uchar *name;
739 pcre_uchar *name2;
740 unsigned int cbra_index;
741 int i;
742
743 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
744 while (cc < ccend)
745 {
746 switch(*cc)
747 {
748 case OP_SET_SOM:
749 common->has_set_som = TRUE;
750 cc += 1;
751 break;
752
753 case OP_REF:
754 case OP_REFI:
755 common->optimized_cbracket[GET2(cc, 1)] = 0;
756 cc += 1 + IMM2_SIZE;
757 break;
758
759 case OP_CBRAPOS:
760 case OP_SCBRAPOS:
761 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
762 cc += 1 + LINK_SIZE + IMM2_SIZE;
763 break;
764
765 case OP_COND:
766 case OP_SCOND:
767 /* Only AUTO_CALLOUT can insert this opcode. We do
768 not intend to support this case. */
769 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
770 return FALSE;
771 cc += 1 + LINK_SIZE;
772 break;
773
774 case OP_CREF:
775 i = GET2(cc, 1);
776 common->optimized_cbracket[i] = 0;
777 cc += 1 + IMM2_SIZE;
778 break;
779
780 case OP_NCREF:
781 cbra_index = GET2(cc, 1);
782 name = (pcre_uchar *)common->name_table;
783 name2 = name;
784 for (i = 0; i < common->name_count; i++)
785 {
786 if (GET2(name, 0) == cbra_index) break;
787 name += common->name_entry_size;
788 }
789 SLJIT_ASSERT(i != common->name_count);
790
791 for (i = 0; i < common->name_count; i++)
792 {
793 if (STRCMP_UC_UC(name2 + IMM2_SIZE, name + IMM2_SIZE) == 0)
794 common->optimized_cbracket[GET2(name2, 0)] = 0;
795 name2 += common->name_entry_size;
796 }
797 cc += 1 + IMM2_SIZE;
798 break;
799
800 case OP_RECURSE:
801 /* Set its value only once. */
802 if (common->recursive_head_ptr == 0)
803 {
804 common->recursive_head_ptr = common->ovector_start;
805 common->ovector_start += sizeof(sljit_sw);
806 }
807 cc += 1 + LINK_SIZE;
808 break;
809
810 case OP_CALLOUT:
811 if (common->capture_last_ptr == 0)
812 {
813 common->capture_last_ptr = common->ovector_start;
814 common->ovector_start += sizeof(sljit_sw);
815 }
816 cc += 2 + 2 * LINK_SIZE;
817 break;
818
819 case OP_THEN_ARG:
820 common->has_then = TRUE;
821 common->control_head_ptr = 1;
822 /* Fall through. */
823
824 case OP_PRUNE_ARG:
825 common->needs_start_ptr = TRUE;
826 /* Fall through. */
827
828 case OP_MARK:
829 if (common->mark_ptr == 0)
830 {
831 common->mark_ptr = common->ovector_start;
832 common->ovector_start += sizeof(sljit_sw);
833 }
834 cc += 1 + 2 + cc[1];
835 break;
836
837 case OP_THEN:
838 common->has_then = TRUE;
839 common->control_head_ptr = 1;
840 /* Fall through. */
841
842 case OP_PRUNE:
843 case OP_SKIP:
844 common->needs_start_ptr = TRUE;
845 cc += 1;
846 break;
847
848 case OP_SKIP_ARG:
849 common->control_head_ptr = 1;
850 common->has_skip_arg = TRUE;
851 cc += 1 + 2 + cc[1];
852 break;
853
854 default:
855 cc = next_opcode(common, cc);
856 if (cc == NULL)
857 return FALSE;
858 break;
859 }
860 }
861 return TRUE;
862 }
863
864 static int get_class_iterator_size(pcre_uchar *cc)
865 {
866 switch(*cc)
867 {
868 case OP_CRSTAR:
869 case OP_CRPLUS:
870 return 2;
871
872 case OP_CRMINSTAR:
873 case OP_CRMINPLUS:
874 case OP_CRQUERY:
875 case OP_CRMINQUERY:
876 return 1;
877
878 case OP_CRRANGE:
879 case OP_CRMINRANGE:
880 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
881 return 0;
882 return 2;
883
884 default:
885 return 0;
886 }
887 }
888
889 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
890 {
891 pcre_uchar *end = bracketend(begin);
892 pcre_uchar *next;
893 pcre_uchar *next_end;
894 pcre_uchar *max_end;
895 pcre_uchar type;
896 sljit_sw length = end - begin;
897 int min, max, i;
898
899 /* Detect fixed iterations first. */
900 if (end[-(1 + LINK_SIZE)] != OP_KET)
901 return FALSE;
902
903 /* Already detected repeat. */
904 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
905 return TRUE;
906
907 next = end;
908 min = 1;
909 while (1)
910 {
911 if (*next != *begin)
912 break;
913 next_end = bracketend(next);
914 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
915 break;
916 next = next_end;
917 min++;
918 }
919
920 if (min == 2)
921 return FALSE;
922
923 max = 0;
924 max_end = next;
925 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
926 {
927 type = *next;
928 while (1)
929 {
930 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
931 break;
932 next_end = bracketend(next + 2 + LINK_SIZE);
933 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
934 break;
935 next = next_end;
936 max++;
937 }
938
939 if (next[0] == type && next[1] == *begin && max >= 1)
940 {
941 next_end = bracketend(next + 1);
942 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
943 {
944 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
945 if (*next_end != OP_KET)
946 break;
947
948 if (i == max)
949 {
950 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
951 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
952 /* +2 the original and the last. */
953 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
954 if (min == 1)
955 return TRUE;
956 min--;
957 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
958 }
959 }
960 }
961 }
962
963 if (min >= 3)
964 {
965 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
966 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
967 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
968 return TRUE;
969 }
970
971 return FALSE;
972 }
973
974 #define CASE_ITERATOR_PRIVATE_DATA_1 \
975 case OP_MINSTAR: \
976 case OP_MINPLUS: \
977 case OP_QUERY: \
978 case OP_MINQUERY: \
979 case OP_MINSTARI: \
980 case OP_MINPLUSI: \
981 case OP_QUERYI: \
982 case OP_MINQUERYI: \
983 case OP_NOTMINSTAR: \
984 case OP_NOTMINPLUS: \
985 case OP_NOTQUERY: \
986 case OP_NOTMINQUERY: \
987 case OP_NOTMINSTARI: \
988 case OP_NOTMINPLUSI: \
989 case OP_NOTQUERYI: \
990 case OP_NOTMINQUERYI:
991
992 #define CASE_ITERATOR_PRIVATE_DATA_2A \
993 case OP_STAR: \
994 case OP_PLUS: \
995 case OP_STARI: \
996 case OP_PLUSI: \
997 case OP_NOTSTAR: \
998 case OP_NOTPLUS: \
999 case OP_NOTSTARI: \
1000 case OP_NOTPLUSI:
1001
1002 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1003 case OP_UPTO: \
1004 case OP_MINUPTO: \
1005 case OP_UPTOI: \
1006 case OP_MINUPTOI: \
1007 case OP_NOTUPTO: \
1008 case OP_NOTMINUPTO: \
1009 case OP_NOTUPTOI: \
1010 case OP_NOTMINUPTOI:
1011
1012 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1013 case OP_TYPEMINSTAR: \
1014 case OP_TYPEMINPLUS: \
1015 case OP_TYPEQUERY: \
1016 case OP_TYPEMINQUERY:
1017
1018 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1019 case OP_TYPESTAR: \
1020 case OP_TYPEPLUS:
1021
1022 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1023 case OP_TYPEUPTO: \
1024 case OP_TYPEMINUPTO:
1025
1026 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1027 {
1028 pcre_uchar *cc = common->start;
1029 pcre_uchar *alternative;
1030 pcre_uchar *end = NULL;
1031 int private_data_ptr = *private_data_start;
1032 int space, size, bracketlen;
1033
1034 while (cc < ccend)
1035 {
1036 space = 0;
1037 size = 0;
1038 bracketlen = 0;
1039 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1040 return;
1041
1042 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1043 if (detect_repeat(common, cc))
1044 {
1045 /* These brackets are converted to repeats, so no global
1046 based single character repeat is allowed. */
1047 if (cc >= end)
1048 end = bracketend(cc);
1049 }
1050
1051 switch(*cc)
1052 {
1053 case OP_KET:
1054 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1055 {
1056 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1057 private_data_ptr += sizeof(sljit_sw);
1058 cc += common->private_data_ptrs[cc + 1 - common->start];
1059 }
1060 cc += 1 + LINK_SIZE;
1061 break;
1062
1063 case OP_ASSERT:
1064 case OP_ASSERT_NOT:
1065 case OP_ASSERTBACK:
1066 case OP_ASSERTBACK_NOT:
1067 case OP_ONCE:
1068 case OP_ONCE_NC:
1069 case OP_BRAPOS:
1070 case OP_SBRA:
1071 case OP_SBRAPOS:
1072 case OP_SCOND:
1073 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1074 private_data_ptr += sizeof(sljit_sw);
1075 bracketlen = 1 + LINK_SIZE;
1076 break;
1077
1078 case OP_CBRAPOS:
1079 case OP_SCBRAPOS:
1080 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1081 private_data_ptr += sizeof(sljit_sw);
1082 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1083 break;
1084
1085 case OP_COND:
1086 /* Might be a hidden SCOND. */
1087 alternative = cc + GET(cc, 1);
1088 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1089 {
1090 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091 private_data_ptr += sizeof(sljit_sw);
1092 }
1093 bracketlen = 1 + LINK_SIZE;
1094 break;
1095
1096 case OP_BRA:
1097 bracketlen = 1 + LINK_SIZE;
1098 break;
1099
1100 case OP_CBRA:
1101 case OP_SCBRA:
1102 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1103 break;
1104
1105 CASE_ITERATOR_PRIVATE_DATA_1
1106 space = 1;
1107 size = -2;
1108 break;
1109
1110 CASE_ITERATOR_PRIVATE_DATA_2A
1111 space = 2;
1112 size = -2;
1113 break;
1114
1115 CASE_ITERATOR_PRIVATE_DATA_2B
1116 space = 2;
1117 size = -(2 + IMM2_SIZE);
1118 break;
1119
1120 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1121 space = 1;
1122 size = 1;
1123 break;
1124
1125 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1126 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1127 space = 2;
1128 size = 1;
1129 break;
1130
1131 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1132 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1133 space = 2;
1134 size = 1 + IMM2_SIZE;
1135 break;
1136
1137 case OP_CLASS:
1138 case OP_NCLASS:
1139 size += 1 + 32 / sizeof(pcre_uchar);
1140 space = get_class_iterator_size(cc + size);
1141 break;
1142
1143 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1144 case OP_XCLASS:
1145 size = GET(cc, 1);
1146 space = get_class_iterator_size(cc + size);
1147 break;
1148 #endif
1149
1150 default:
1151 cc = next_opcode(common, cc);
1152 SLJIT_ASSERT(cc != NULL);
1153 break;
1154 }
1155
1156 /* Character iterators, which are not inside a repeated bracket,
1157 gets a private slot instead of allocating it on the stack. */
1158 if (space > 0 && cc >= end)
1159 {
1160 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1161 private_data_ptr += sizeof(sljit_sw) * space;
1162 }
1163
1164 if (size != 0)
1165 {
1166 if (size < 0)
1167 {
1168 cc += -size;
1169 #ifdef SUPPORT_UTF
1170 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1171 #endif
1172 }
1173 else
1174 cc += size;
1175 }
1176
1177 if (bracketlen > 0)
1178 {
1179 if (cc >= end)
1180 {
1181 end = bracketend(cc);
1182 if (end[-1 - LINK_SIZE] == OP_KET)
1183 end = NULL;
1184 }
1185 cc += bracketlen;
1186 }
1187 }
1188 *private_data_start = private_data_ptr;
1189 }
1190
1191 /* Returns with a frame_types (always < 0) if no need for frame. */
1192 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1193 {
1194 int length = 0;
1195 int possessive = 0;
1196 BOOL stack_restore = FALSE;
1197 BOOL setsom_found = recursive;
1198 BOOL setmark_found = recursive;
1199 /* The last capture is a local variable even for recursions. */
1200 BOOL capture_last_found = FALSE;
1201
1202 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1203 SLJIT_ASSERT(common->control_head_ptr != 0);
1204 *needs_control_head = TRUE;
1205 #else
1206 *needs_control_head = FALSE;
1207 #endif
1208
1209 if (ccend == NULL)
1210 {
1211 ccend = bracketend(cc) - (1 + LINK_SIZE);
1212 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1213 {
1214 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1215 /* This is correct regardless of common->capture_last_ptr. */
1216 capture_last_found = TRUE;
1217 }
1218 cc = next_opcode(common, cc);
1219 }
1220
1221 SLJIT_ASSERT(cc != NULL);
1222 while (cc < ccend)
1223 switch(*cc)
1224 {
1225 case OP_SET_SOM:
1226 SLJIT_ASSERT(common->has_set_som);
1227 stack_restore = TRUE;
1228 if (!setsom_found)
1229 {
1230 length += 2;
1231 setsom_found = TRUE;
1232 }
1233 cc += 1;
1234 break;
1235
1236 case OP_MARK:
1237 case OP_PRUNE_ARG:
1238 case OP_THEN_ARG:
1239 SLJIT_ASSERT(common->mark_ptr != 0);
1240 stack_restore = TRUE;
1241 if (!setmark_found)
1242 {
1243 length += 2;
1244 setmark_found = TRUE;
1245 }
1246 if (common->control_head_ptr != 0)
1247 *needs_control_head = TRUE;
1248 cc += 1 + 2 + cc[1];
1249 break;
1250
1251 case OP_RECURSE:
1252 stack_restore = TRUE;
1253 if (common->has_set_som && !setsom_found)
1254 {
1255 length += 2;
1256 setsom_found = TRUE;
1257 }
1258 if (common->mark_ptr != 0 && !setmark_found)
1259 {
1260 length += 2;
1261 setmark_found = TRUE;
1262 }
1263 if (common->capture_last_ptr != 0 && !capture_last_found)
1264 {
1265 length += 2;
1266 capture_last_found = TRUE;
1267 }
1268 cc += 1 + LINK_SIZE;
1269 break;
1270
1271 case OP_CBRA:
1272 case OP_CBRAPOS:
1273 case OP_SCBRA:
1274 case OP_SCBRAPOS:
1275 stack_restore = TRUE;
1276 if (common->capture_last_ptr != 0 && !capture_last_found)
1277 {
1278 length += 2;
1279 capture_last_found = TRUE;
1280 }
1281 length += 3;
1282 cc += 1 + LINK_SIZE + IMM2_SIZE;
1283 break;
1284
1285 default:
1286 stack_restore = TRUE;
1287 /* Fall through. */
1288
1289 case OP_NOT_WORD_BOUNDARY:
1290 case OP_WORD_BOUNDARY:
1291 case OP_NOT_DIGIT:
1292 case OP_DIGIT:
1293 case OP_NOT_WHITESPACE:
1294 case OP_WHITESPACE:
1295 case OP_NOT_WORDCHAR:
1296 case OP_WORDCHAR:
1297 case OP_ANY:
1298 case OP_ALLANY:
1299 case OP_ANYBYTE:
1300 case OP_NOTPROP:
1301 case OP_PROP:
1302 case OP_ANYNL:
1303 case OP_NOT_HSPACE:
1304 case OP_HSPACE:
1305 case OP_NOT_VSPACE:
1306 case OP_VSPACE:
1307 case OP_EXTUNI:
1308 case OP_EODN:
1309 case OP_EOD:
1310 case OP_CIRC:
1311 case OP_CIRCM:
1312 case OP_DOLL:
1313 case OP_DOLLM:
1314 case OP_CHAR:
1315 case OP_CHARI:
1316 case OP_NOT:
1317 case OP_NOTI:
1318
1319 case OP_EXACT:
1320 case OP_POSSTAR:
1321 case OP_POSPLUS:
1322 case OP_POSQUERY:
1323 case OP_POSUPTO:
1324
1325 case OP_EXACTI:
1326 case OP_POSSTARI:
1327 case OP_POSPLUSI:
1328 case OP_POSQUERYI:
1329 case OP_POSUPTOI:
1330
1331 case OP_NOTEXACT:
1332 case OP_NOTPOSSTAR:
1333 case OP_NOTPOSPLUS:
1334 case OP_NOTPOSQUERY:
1335 case OP_NOTPOSUPTO:
1336
1337 case OP_NOTEXACTI:
1338 case OP_NOTPOSSTARI:
1339 case OP_NOTPOSPLUSI:
1340 case OP_NOTPOSQUERYI:
1341 case OP_NOTPOSUPTOI:
1342
1343 case OP_TYPEEXACT:
1344 case OP_TYPEPOSSTAR:
1345 case OP_TYPEPOSPLUS:
1346 case OP_TYPEPOSQUERY:
1347 case OP_TYPEPOSUPTO:
1348
1349 case OP_CLASS:
1350 case OP_NCLASS:
1351 case OP_XCLASS:
1352
1353 cc = next_opcode(common, cc);
1354 SLJIT_ASSERT(cc != NULL);
1355 break;
1356 }
1357
1358 /* Possessive quantifiers can use a special case. */
1359 if (SLJIT_UNLIKELY(possessive == length))
1360 return stack_restore ? no_frame : no_stack;
1361
1362 if (length > 0)
1363 return length + 1;
1364 return stack_restore ? no_frame : no_stack;
1365 }
1366
1367 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1368 {
1369 DEFINE_COMPILER;
1370 BOOL setsom_found = recursive;
1371 BOOL setmark_found = recursive;
1372 /* The last capture is a local variable even for recursions. */
1373 BOOL capture_last_found = FALSE;
1374 int offset;
1375
1376 /* >= 1 + shortest item size (2) */
1377 SLJIT_UNUSED_ARG(stacktop);
1378 SLJIT_ASSERT(stackpos >= stacktop + 2);
1379
1380 stackpos = STACK(stackpos);
1381 if (ccend == NULL)
1382 {
1383 ccend = bracketend(cc) - (1 + LINK_SIZE);
1384 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1385 cc = next_opcode(common, cc);
1386 }
1387
1388 SLJIT_ASSERT(cc != NULL);
1389 while (cc < ccend)
1390 switch(*cc)
1391 {
1392 case OP_SET_SOM:
1393 SLJIT_ASSERT(common->has_set_som);
1394 if (!setsom_found)
1395 {
1396 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1397 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1398 stackpos += (int)sizeof(sljit_sw);
1399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1400 stackpos += (int)sizeof(sljit_sw);
1401 setsom_found = TRUE;
1402 }
1403 cc += 1;
1404 break;
1405
1406 case OP_MARK:
1407 case OP_PRUNE_ARG:
1408 case OP_THEN_ARG:
1409 SLJIT_ASSERT(common->mark_ptr != 0);
1410 if (!setmark_found)
1411 {
1412 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1414 stackpos += (int)sizeof(sljit_sw);
1415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1416 stackpos += (int)sizeof(sljit_sw);
1417 setmark_found = TRUE;
1418 }
1419 cc += 1 + 2 + cc[1];
1420 break;
1421
1422 case OP_RECURSE:
1423 if (common->has_set_som && !setsom_found)
1424 {
1425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1426 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1427 stackpos += (int)sizeof(sljit_sw);
1428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1429 stackpos += (int)sizeof(sljit_sw);
1430 setsom_found = TRUE;
1431 }
1432 if (common->mark_ptr != 0 && !setmark_found)
1433 {
1434 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1435 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1436 stackpos += (int)sizeof(sljit_sw);
1437 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1438 stackpos += (int)sizeof(sljit_sw);
1439 setmark_found = TRUE;
1440 }
1441 if (common->capture_last_ptr != 0 && !capture_last_found)
1442 {
1443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1445 stackpos += (int)sizeof(sljit_sw);
1446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1447 stackpos += (int)sizeof(sljit_sw);
1448 capture_last_found = TRUE;
1449 }
1450 cc += 1 + LINK_SIZE;
1451 break;
1452
1453 case OP_CBRA:
1454 case OP_CBRAPOS:
1455 case OP_SCBRA:
1456 case OP_SCBRAPOS:
1457 if (common->capture_last_ptr != 0 && !capture_last_found)
1458 {
1459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1461 stackpos += (int)sizeof(sljit_sw);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1463 stackpos += (int)sizeof(sljit_sw);
1464 capture_last_found = TRUE;
1465 }
1466 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1468 stackpos += (int)sizeof(sljit_sw);
1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1470 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1472 stackpos += (int)sizeof(sljit_sw);
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1474 stackpos += (int)sizeof(sljit_sw);
1475
1476 cc += 1 + LINK_SIZE + IMM2_SIZE;
1477 break;
1478
1479 default:
1480 cc = next_opcode(common, cc);
1481 SLJIT_ASSERT(cc != NULL);
1482 break;
1483 }
1484
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1486 SLJIT_ASSERT(stackpos == STACK(stacktop));
1487 }
1488
1489 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1490 {
1491 int private_data_length = needs_control_head ? 3 : 2;
1492 int size;
1493 pcre_uchar *alternative;
1494 /* Calculate the sum of the private machine words. */
1495 while (cc < ccend)
1496 {
1497 size = 0;
1498 switch(*cc)
1499 {
1500 case OP_KET:
1501 if (PRIVATE_DATA(cc) != 0)
1502 private_data_length++;
1503 cc += 1 + LINK_SIZE;
1504 break;
1505
1506 case OP_ASSERT:
1507 case OP_ASSERT_NOT:
1508 case OP_ASSERTBACK:
1509 case OP_ASSERTBACK_NOT:
1510 case OP_ONCE:
1511 case OP_ONCE_NC:
1512 case OP_BRAPOS:
1513 case OP_SBRA:
1514 case OP_SBRAPOS:
1515 case OP_SCOND:
1516 private_data_length++;
1517 cc += 1 + LINK_SIZE;
1518 break;
1519
1520 case OP_CBRA:
1521 case OP_SCBRA:
1522 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1523 private_data_length++;
1524 cc += 1 + LINK_SIZE + IMM2_SIZE;
1525 break;
1526
1527 case OP_CBRAPOS:
1528 case OP_SCBRAPOS:
1529 private_data_length += 2;
1530 cc += 1 + LINK_SIZE + IMM2_SIZE;
1531 break;
1532
1533 case OP_COND:
1534 /* Might be a hidden SCOND. */
1535 alternative = cc + GET(cc, 1);
1536 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1537 private_data_length++;
1538 cc += 1 + LINK_SIZE;
1539 break;
1540
1541 CASE_ITERATOR_PRIVATE_DATA_1
1542 if (PRIVATE_DATA(cc))
1543 private_data_length++;
1544 cc += 2;
1545 #ifdef SUPPORT_UTF
1546 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1547 #endif
1548 break;
1549
1550 CASE_ITERATOR_PRIVATE_DATA_2A
1551 if (PRIVATE_DATA(cc))
1552 private_data_length += 2;
1553 cc += 2;
1554 #ifdef SUPPORT_UTF
1555 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1556 #endif
1557 break;
1558
1559 CASE_ITERATOR_PRIVATE_DATA_2B
1560 if (PRIVATE_DATA(cc))
1561 private_data_length += 2;
1562 cc += 2 + IMM2_SIZE;
1563 #ifdef SUPPORT_UTF
1564 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1565 #endif
1566 break;
1567
1568 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1569 if (PRIVATE_DATA(cc))
1570 private_data_length++;
1571 cc += 1;
1572 break;
1573
1574 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1575 if (PRIVATE_DATA(cc))
1576 private_data_length += 2;
1577 cc += 1;
1578 break;
1579
1580 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1581 if (PRIVATE_DATA(cc))
1582 private_data_length += 2;
1583 cc += 1 + IMM2_SIZE;
1584 break;
1585
1586 case OP_CLASS:
1587 case OP_NCLASS:
1588 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1589 case OP_XCLASS:
1590 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1591 #else
1592 size = 1 + 32 / (int)sizeof(pcre_uchar);
1593 #endif
1594 if (PRIVATE_DATA(cc))
1595 private_data_length += get_class_iterator_size(cc + size);
1596 cc += size;
1597 break;
1598
1599 default:
1600 cc = next_opcode(common, cc);
1601 SLJIT_ASSERT(cc != NULL);
1602 break;
1603 }
1604 }
1605 SLJIT_ASSERT(cc == ccend);
1606 return private_data_length;
1607 }
1608
1609 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1610 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1611 {
1612 DEFINE_COMPILER;
1613 int srcw[2];
1614 int count, size;
1615 BOOL tmp1next = TRUE;
1616 BOOL tmp1empty = TRUE;
1617 BOOL tmp2empty = TRUE;
1618 pcre_uchar *alternative;
1619 enum {
1620 start,
1621 loop,
1622 end
1623 } status;
1624
1625 status = save ? start : loop;
1626 stackptr = STACK(stackptr - 2);
1627 stacktop = STACK(stacktop - 1);
1628
1629 if (!save)
1630 {
1631 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1632 if (stackptr < stacktop)
1633 {
1634 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1635 stackptr += sizeof(sljit_sw);
1636 tmp1empty = FALSE;
1637 }
1638 if (stackptr < stacktop)
1639 {
1640 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1641 stackptr += sizeof(sljit_sw);
1642 tmp2empty = FALSE;
1643 }
1644 /* The tmp1next must be TRUE in either way. */
1645 }
1646
1647 do
1648 {
1649 count = 0;
1650 switch(status)
1651 {
1652 case start:
1653 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1654 count = 1;
1655 srcw[0] = common->recursive_head_ptr;
1656 if (needs_control_head)
1657 {
1658 SLJIT_ASSERT(common->control_head_ptr != 0);
1659 count = 2;
1660 srcw[1] = common->control_head_ptr;
1661 }
1662 status = loop;
1663 break;
1664
1665 case loop:
1666 if (cc >= ccend)
1667 {
1668 status = end;
1669 break;
1670 }
1671
1672 switch(*cc)
1673 {
1674 case OP_KET:
1675 if (PRIVATE_DATA(cc) != 0)
1676 {
1677 count = 1;
1678 srcw[0] = PRIVATE_DATA(cc);
1679 }
1680 cc += 1 + LINK_SIZE;
1681 break;
1682
1683 case OP_ASSERT:
1684 case OP_ASSERT_NOT:
1685 case OP_ASSERTBACK:
1686 case OP_ASSERTBACK_NOT:
1687 case OP_ONCE:
1688 case OP_ONCE_NC:
1689 case OP_BRAPOS:
1690 case OP_SBRA:
1691 case OP_SBRAPOS:
1692 case OP_SCOND:
1693 count = 1;
1694 srcw[0] = PRIVATE_DATA(cc);
1695 SLJIT_ASSERT(srcw[0] != 0);
1696 cc += 1 + LINK_SIZE;
1697 break;
1698
1699 case OP_CBRA:
1700 case OP_SCBRA:
1701 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1702 {
1703 count = 1;
1704 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1705 }
1706 cc += 1 + LINK_SIZE + IMM2_SIZE;
1707 break;
1708
1709 case OP_CBRAPOS:
1710 case OP_SCBRAPOS:
1711 count = 2;
1712 srcw[0] = PRIVATE_DATA(cc);
1713 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1714 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1715 cc += 1 + LINK_SIZE + IMM2_SIZE;
1716 break;
1717
1718 case OP_COND:
1719 /* Might be a hidden SCOND. */
1720 alternative = cc + GET(cc, 1);
1721 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1722 {
1723 count = 1;
1724 srcw[0] = PRIVATE_DATA(cc);
1725 SLJIT_ASSERT(srcw[0] != 0);
1726 }
1727 cc += 1 + LINK_SIZE;
1728 break;
1729
1730 CASE_ITERATOR_PRIVATE_DATA_1
1731 if (PRIVATE_DATA(cc))
1732 {
1733 count = 1;
1734 srcw[0] = PRIVATE_DATA(cc);
1735 }
1736 cc += 2;
1737 #ifdef SUPPORT_UTF
1738 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1739 #endif
1740 break;
1741
1742 CASE_ITERATOR_PRIVATE_DATA_2A
1743 if (PRIVATE_DATA(cc))
1744 {
1745 count = 2;
1746 srcw[0] = PRIVATE_DATA(cc);
1747 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1748 }
1749 cc += 2;
1750 #ifdef SUPPORT_UTF
1751 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1752 #endif
1753 break;
1754
1755 CASE_ITERATOR_PRIVATE_DATA_2B
1756 if (PRIVATE_DATA(cc))
1757 {
1758 count = 2;
1759 srcw[0] = PRIVATE_DATA(cc);
1760 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1761 }
1762 cc += 2 + IMM2_SIZE;
1763 #ifdef SUPPORT_UTF
1764 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1765 #endif
1766 break;
1767
1768 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1769 if (PRIVATE_DATA(cc))
1770 {
1771 count = 1;
1772 srcw[0] = PRIVATE_DATA(cc);
1773 }
1774 cc += 1;
1775 break;
1776
1777 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1778 if (PRIVATE_DATA(cc))
1779 {
1780 count = 2;
1781 srcw[0] = PRIVATE_DATA(cc);
1782 srcw[1] = srcw[0] + sizeof(sljit_sw);
1783 }
1784 cc += 1;
1785 break;
1786
1787 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1788 if (PRIVATE_DATA(cc))
1789 {
1790 count = 2;
1791 srcw[0] = PRIVATE_DATA(cc);
1792 srcw[1] = srcw[0] + sizeof(sljit_sw);
1793 }
1794 cc += 1 + IMM2_SIZE;
1795 break;
1796
1797 case OP_CLASS:
1798 case OP_NCLASS:
1799 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1800 case OP_XCLASS:
1801 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1802 #else
1803 size = 1 + 32 / (int)sizeof(pcre_uchar);
1804 #endif
1805 if (PRIVATE_DATA(cc))
1806 switch(get_class_iterator_size(cc + size))
1807 {
1808 case 1:
1809 count = 1;
1810 srcw[0] = PRIVATE_DATA(cc);
1811 break;
1812
1813 case 2:
1814 count = 2;
1815 srcw[0] = PRIVATE_DATA(cc);
1816 srcw[1] = srcw[0] + sizeof(sljit_sw);
1817 break;
1818
1819 default:
1820 SLJIT_ASSERT_STOP();
1821 break;
1822 }
1823 cc += size;
1824 break;
1825
1826 default:
1827 cc = next_opcode(common, cc);
1828 SLJIT_ASSERT(cc != NULL);
1829 break;
1830 }
1831 break;
1832
1833 case end:
1834 SLJIT_ASSERT_STOP();
1835 break;
1836 }
1837
1838 while (count > 0)
1839 {
1840 count--;
1841 if (save)
1842 {
1843 if (tmp1next)
1844 {
1845 if (!tmp1empty)
1846 {
1847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1848 stackptr += sizeof(sljit_sw);
1849 }
1850 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1851 tmp1empty = FALSE;
1852 tmp1next = FALSE;
1853 }
1854 else
1855 {
1856 if (!tmp2empty)
1857 {
1858 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1859 stackptr += sizeof(sljit_sw);
1860 }
1861 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1862 tmp2empty = FALSE;
1863 tmp1next = TRUE;
1864 }
1865 }
1866 else
1867 {
1868 if (tmp1next)
1869 {
1870 SLJIT_ASSERT(!tmp1empty);
1871 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1872 tmp1empty = stackptr >= stacktop;
1873 if (!tmp1empty)
1874 {
1875 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1876 stackptr += sizeof(sljit_sw);
1877 }
1878 tmp1next = FALSE;
1879 }
1880 else
1881 {
1882 SLJIT_ASSERT(!tmp2empty);
1883 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1884 tmp2empty = stackptr >= stacktop;
1885 if (!tmp2empty)
1886 {
1887 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1888 stackptr += sizeof(sljit_sw);
1889 }
1890 tmp1next = TRUE;
1891 }
1892 }
1893 }
1894 }
1895 while (status != end);
1896
1897 if (save)
1898 {
1899 if (tmp1next)
1900 {
1901 if (!tmp1empty)
1902 {
1903 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1904 stackptr += sizeof(sljit_sw);
1905 }
1906 if (!tmp2empty)
1907 {
1908 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1909 stackptr += sizeof(sljit_sw);
1910 }
1911 }
1912 else
1913 {
1914 if (!tmp2empty)
1915 {
1916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1917 stackptr += sizeof(sljit_sw);
1918 }
1919 if (!tmp1empty)
1920 {
1921 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1922 stackptr += sizeof(sljit_sw);
1923 }
1924 }
1925 }
1926 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1927 }
1928
1929 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1930 {
1931 pcre_uchar *end = bracketend(cc);
1932 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1933
1934 /* Assert captures then. */
1935 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1936 current_offset = NULL;
1937 /* Conditional block does not. */
1938 if (*cc == OP_COND || *cc == OP_SCOND)
1939 has_alternatives = FALSE;
1940
1941 cc = next_opcode(common, cc);
1942 if (has_alternatives)
1943 current_offset = common->then_offsets + (cc - common->start);
1944
1945 while (cc < end)
1946 {
1947 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1948 cc = set_then_offsets(common, cc, current_offset);
1949 else
1950 {
1951 if (*cc == OP_ALT && has_alternatives)
1952 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1953 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1954 *current_offset = 1;
1955 cc = next_opcode(common, cc);
1956 }
1957 }
1958
1959 return end;
1960 }
1961
1962 #undef CASE_ITERATOR_PRIVATE_DATA_1
1963 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1964 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1965 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1966 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1967 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1968
1969 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1970 {
1971 return (value & (value - 1)) == 0;
1972 }
1973
1974 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1975 {
1976 while (list)
1977 {
1978 /* sljit_set_label is clever enough to do nothing
1979 if either the jump or the label is NULL. */
1980 SET_LABEL(list->jump, label);
1981 list = list->next;
1982 }
1983 }
1984
1985 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1986 {
1987 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1988 if (list_item)
1989 {
1990 list_item->next = *list;
1991 list_item->jump = jump;
1992 *list = list_item;
1993 }
1994 }
1995
1996 static void add_stub(compiler_common *common, struct sljit_jump *start)
1997 {
1998 DEFINE_COMPILER;
1999 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2000
2001 if (list_item)
2002 {
2003 list_item->start = start;
2004 list_item->quit = LABEL();
2005 list_item->next = common->stubs;
2006 common->stubs = list_item;
2007 }
2008 }
2009
2010 static void flush_stubs(compiler_common *common)
2011 {
2012 DEFINE_COMPILER;
2013 stub_list* list_item = common->stubs;
2014
2015 while (list_item)
2016 {
2017 JUMPHERE(list_item->start);
2018 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2019 JUMPTO(SLJIT_JUMP, list_item->quit);
2020 list_item = list_item->next;
2021 }
2022 common->stubs = NULL;
2023 }
2024
2025 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
2026 {
2027 DEFINE_COMPILER;
2028
2029 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
2030 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2031 }
2032
2033 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2034 {
2035 /* May destroy all locals and registers except TMP2. */
2036 DEFINE_COMPILER;
2037
2038 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2039 #ifdef DESTROY_REGISTERS
2040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2041 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2042 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2045 #endif
2046 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2047 }
2048
2049 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2050 {
2051 DEFINE_COMPILER;
2052 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2053 }
2054
2055 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2056 {
2057 DEFINE_COMPILER;
2058 struct sljit_label *loop;
2059 int i;
2060
2061 /* At this point we can freely use all temporary registers. */
2062 SLJIT_ASSERT(length > 1);
2063 /* TMP1 returns with begin - 1. */
2064 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2065 if (length < 8)
2066 {
2067 for (i = 1; i < length; i++)
2068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2069 }
2070 else
2071 {
2072 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2073 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2074 loop = LABEL();
2075 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2076 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2077 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2078 }
2079 }
2080
2081 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2082 {
2083 DEFINE_COMPILER;
2084 struct sljit_label *loop;
2085 int i;
2086
2087 SLJIT_ASSERT(length > 1);
2088 /* OVECTOR(1) contains the "string begin - 1" constant. */
2089 if (length > 2)
2090 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2091 if (length < 8)
2092 {
2093 for (i = 2; i < length; i++)
2094 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2095 }
2096 else
2097 {
2098 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2099 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2100 loop = LABEL();
2101 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2102 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2103 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2104 }
2105
2106 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2107 if (common->mark_ptr != 0)
2108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2109 if (common->control_head_ptr != 0)
2110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2111 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2112 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2113 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2114 }
2115
2116 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2117 {
2118 while (current != NULL)
2119 {
2120 switch (current[-2])
2121 {
2122 case type_then_trap:
2123 break;
2124
2125 case type_mark:
2126 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2127 return current[-4];
2128 break;
2129
2130 default:
2131 SLJIT_ASSERT_STOP();
2132 break;
2133 }
2134 current = (sljit_sw*)current[-1];
2135 }
2136 return -1;
2137 }
2138
2139 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2140 {
2141 DEFINE_COMPILER;
2142 struct sljit_label *loop;
2143 struct sljit_jump *early_quit;
2144
2145 /* At this point we can freely use all registers. */
2146 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2148
2149 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2150 if (common->mark_ptr != 0)
2151 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2152 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2153 if (common->mark_ptr != 0)
2154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2155 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2156 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2157 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2158 /* Unlikely, but possible */
2159 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2160 loop = LABEL();
2161 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2162 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2163 /* Copy the integer value to the output buffer */
2164 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2165 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2166 #endif
2167 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2168 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2169 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2170 JUMPHERE(early_quit);
2171
2172 /* Calculate the return value, which is the maximum ovector value. */
2173 if (topbracket > 1)
2174 {
2175 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2176 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2177
2178 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2179 loop = LABEL();
2180 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2181 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2182 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2183 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2184 }
2185 else
2186 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2187 }
2188
2189 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2190 {
2191 DEFINE_COMPILER;
2192 struct sljit_jump *jump;
2193
2194 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2195 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2196 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2197
2198 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2199 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2200 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2201 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2202
2203 /* Store match begin and end. */
2204 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2205 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2206
2207 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2208 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2209 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2210 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2211 #endif
2212 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2213 JUMPHERE(jump);
2214
2215 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2216 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2217 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2218 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2219 #endif
2220 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2221
2222 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2223 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2224 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2225 #endif
2226 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2227
2228 JUMPTO(SLJIT_JUMP, quit);
2229 }
2230
2231 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2232 {
2233 /* May destroy TMP1. */
2234 DEFINE_COMPILER;
2235 struct sljit_jump *jump;
2236
2237 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2238 {
2239 /* The value of -1 must be kept for start_used_ptr! */
2240 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2241 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2242 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2243 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2244 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2245 JUMPHERE(jump);
2246 }
2247 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2248 {
2249 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2251 JUMPHERE(jump);
2252 }
2253 }
2254
2255 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2256 {
2257 /* Detects if the character has an othercase. */
2258 unsigned int c;
2259
2260 #ifdef SUPPORT_UTF
2261 if (common->utf)
2262 {
2263 GETCHAR(c, cc);
2264 if (c > 127)
2265 {
2266 #ifdef SUPPORT_UCP
2267 return c != UCD_OTHERCASE(c);
2268 #else
2269 return FALSE;
2270 #endif
2271 }
2272 #ifndef COMPILE_PCRE8
2273 return common->fcc[c] != c;
2274 #endif
2275 }
2276 else
2277 #endif
2278 c = *cc;
2279 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2280 }
2281
2282 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2283 {
2284 /* Returns with the othercase. */
2285 #ifdef SUPPORT_UTF
2286 if (common->utf && c > 127)
2287 {
2288 #ifdef SUPPORT_UCP
2289 return UCD_OTHERCASE(c);
2290 #else
2291 return c;
2292 #endif
2293 }
2294 #endif
2295 return TABLE_GET(c, common->fcc, c);
2296 }
2297
2298 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2299 {
2300 /* Detects if the character and its othercase has only 1 bit difference. */
2301 unsigned int c, oc, bit;
2302 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2303 int n;
2304 #endif
2305
2306 #ifdef SUPPORT_UTF
2307 if (common->utf)
2308 {
2309 GETCHAR(c, cc);
2310 if (c <= 127)
2311 oc = common->fcc[c];
2312 else
2313 {
2314 #ifdef SUPPORT_UCP
2315 oc = UCD_OTHERCASE(c);
2316 #else
2317 oc = c;
2318 #endif
2319 }
2320 }
2321 else
2322 {
2323 c = *cc;
2324 oc = TABLE_GET(c, common->fcc, c);
2325 }
2326 #else
2327 c = *cc;
2328 oc = TABLE_GET(c, common->fcc, c);
2329 #endif
2330
2331 SLJIT_ASSERT(c != oc);
2332
2333 bit = c ^ oc;
2334 /* Optimized for English alphabet. */
2335 if (c <= 127 && bit == 0x20)
2336 return (0 << 8) | 0x20;
2337
2338 /* Since c != oc, they must have at least 1 bit difference. */
2339 if (!is_powerof2(bit))
2340 return 0;
2341
2342 #if defined COMPILE_PCRE8
2343
2344 #ifdef SUPPORT_UTF
2345 if (common->utf && c > 127)
2346 {
2347 n = GET_EXTRALEN(*cc);
2348 while ((bit & 0x3f) == 0)
2349 {
2350 n--;
2351 bit >>= 6;
2352 }
2353 return (n << 8) | bit;
2354 }
2355 #endif /* SUPPORT_UTF */
2356 return (0 << 8) | bit;
2357
2358 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2359
2360 #ifdef SUPPORT_UTF
2361 if (common->utf && c > 65535)
2362 {
2363 if (bit >= (1 << 10))
2364 bit >>= 10;
2365 else
2366 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2367 }
2368 #endif /* SUPPORT_UTF */
2369 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2370
2371 #endif /* COMPILE_PCRE[8|16|32] */
2372 }
2373
2374 static void check_partial(compiler_common *common, BOOL force)
2375 {
2376 /* Checks whether a partial matching is occured. Does not modify registers. */
2377 DEFINE_COMPILER;
2378 struct sljit_jump *jump = NULL;
2379
2380 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2381
2382 if (common->mode == JIT_COMPILE)
2383 return;
2384
2385 if (!force)
2386 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2387 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2388 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2389
2390 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2392 else
2393 {
2394 if (common->partialmatchlabel != NULL)
2395 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2396 else
2397 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2398 }
2399
2400 if (jump != NULL)
2401 JUMPHERE(jump);
2402 }
2403
2404 static void check_str_end(compiler_common *common, jump_list **end_reached)
2405 {
2406 /* Does not affect registers. Usually used in a tight spot. */
2407 DEFINE_COMPILER;
2408 struct sljit_jump *jump;
2409
2410 if (common->mode == JIT_COMPILE)
2411 {
2412 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2413 return;
2414 }
2415
2416 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2417 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2418 {
2419 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2420 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2421 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2422 }
2423 else
2424 {
2425 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2426 if (common->partialmatchlabel != NULL)
2427 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2428 else
2429 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2430 }
2431 JUMPHERE(jump);
2432 }
2433
2434 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2435 {
2436 DEFINE_COMPILER;
2437 struct sljit_jump *jump;
2438
2439 if (common->mode == JIT_COMPILE)
2440 {
2441 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2442 return;
2443 }
2444
2445 /* Partial matching mode. */
2446 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2447 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2448 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2449 {
2450 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2451 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2452 }
2453 else
2454 {
2455 if (common->partialmatchlabel != NULL)
2456 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2457 else
2458 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2459 }
2460 JUMPHERE(jump);
2461 }
2462
2463 static void read_char(compiler_common *common)
2464 {
2465 /* Reads the character into TMP1, updates STR_PTR.
2466 Does not check STR_END. TMP2 Destroyed. */
2467 DEFINE_COMPILER;
2468 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2469 struct sljit_jump *jump;
2470 #endif
2471
2472 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2473 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2474 if (common->utf)
2475 {
2476 #if defined COMPILE_PCRE8
2477 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2478 #elif defined COMPILE_PCRE16
2479 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2480 #endif /* COMPILE_PCRE[8|16] */
2481 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2482 JUMPHERE(jump);
2483 }
2484 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2485 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2486 }
2487
2488 static void peek_char(compiler_common *common)
2489 {
2490 /* Reads the character into TMP1, keeps STR_PTR.
2491 Does not check STR_END. TMP2 Destroyed. */
2492 DEFINE_COMPILER;
2493 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2494 struct sljit_jump *jump;
2495 #endif
2496
2497 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2498 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2499 if (common->utf)
2500 {
2501 #if defined COMPILE_PCRE8
2502 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2503 #elif defined COMPILE_PCRE16
2504 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2505 #endif /* COMPILE_PCRE[8|16] */
2506 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2508 JUMPHERE(jump);
2509 }
2510 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2511 }
2512
2513 static void read_char8_type(compiler_common *common)
2514 {
2515 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2516 DEFINE_COMPILER;
2517 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2518 struct sljit_jump *jump;
2519 #endif
2520
2521 #ifdef SUPPORT_UTF
2522 if (common->utf)
2523 {
2524 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2526 #if defined COMPILE_PCRE8
2527 /* This can be an extra read in some situations, but hopefully
2528 it is needed in most cases. */
2529 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2530 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2531 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2532 JUMPHERE(jump);
2533 #elif defined COMPILE_PCRE16
2534 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2535 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2536 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2537 JUMPHERE(jump);
2538 /* Skip low surrogate if necessary. */
2539 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2540 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2541 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2542 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2544 #elif defined COMPILE_PCRE32
2545 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2546 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2547 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2548 JUMPHERE(jump);
2549 #endif /* COMPILE_PCRE[8|16|32] */
2550 return;
2551 }
2552 #endif /* SUPPORT_UTF */
2553 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2554 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2555 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2556 /* The ctypes array contains only 256 values. */
2557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2558 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2559 #endif
2560 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2561 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2562 JUMPHERE(jump);
2563 #endif
2564 }
2565
2566 static void skip_char_back(compiler_common *common)
2567 {
2568 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2569 DEFINE_COMPILER;
2570 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2571 #if defined COMPILE_PCRE8
2572 struct sljit_label *label;
2573
2574 if (common->utf)
2575 {
2576 label = LABEL();
2577 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2578 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2579 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2580 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2581 return;
2582 }
2583 #elif defined COMPILE_PCRE16
2584 if (common->utf)
2585 {
2586 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2587 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2588 /* Skip low surrogate if necessary. */
2589 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2590 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2591 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2592 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2593 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2594 return;
2595 }
2596 #endif /* COMPILE_PCRE[8|16] */
2597 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2598 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2599 }
2600
2601 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2602 {
2603 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2604 DEFINE_COMPILER;
2605
2606 if (nltype == NLTYPE_ANY)
2607 {
2608 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2609 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2610 }
2611 else if (nltype == NLTYPE_ANYCRLF)
2612 {
2613 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2614 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2615 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2616 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2617 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2618 }
2619 else
2620 {
2621 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2622 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2623 }
2624 }
2625
2626 #ifdef SUPPORT_UTF
2627
2628 #if defined COMPILE_PCRE8
2629 static void do_utfreadchar(compiler_common *common)
2630 {
2631 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2632 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2633 DEFINE_COMPILER;
2634 struct sljit_jump *jump;
2635
2636 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2637 /* Searching for the first zero. */
2638 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2639 jump = JUMP(SLJIT_C_NOT_ZERO);
2640 /* Two byte sequence. */
2641 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2642 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2643 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2644 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2645 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2646 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2647 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2648 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2649 JUMPHERE(jump);
2650
2651 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2652 jump = JUMP(SLJIT_C_NOT_ZERO);
2653 /* Three byte sequence. */
2654 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2655 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2656 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2657 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2658 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2659 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2660 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2661 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2662 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2663 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2664 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2665 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2666 JUMPHERE(jump);
2667
2668 /* Four byte sequence. */
2669 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2670 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2671 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2672 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2673 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2674 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2675 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2676 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2677 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2678 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2679 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2680 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2681 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2682 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2683 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2684 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2685 }
2686
2687 static void do_utfreadtype8(compiler_common *common)
2688 {
2689 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2690 of the character (>= 0xc0). Return value in TMP1. */
2691 DEFINE_COMPILER;
2692 struct sljit_jump *jump;
2693 struct sljit_jump *compare;
2694
2695 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2696
2697 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2698 jump = JUMP(SLJIT_C_NOT_ZERO);
2699 /* Two byte sequence. */
2700 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2701 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2702 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2703 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2704 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2705 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2706 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2707 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2708 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2709
2710 JUMPHERE(compare);
2711 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2712 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2713 JUMPHERE(jump);
2714
2715 /* We only have types for characters less than 256. */
2716 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2717 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2718 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2719 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2720 }
2721
2722 #elif defined COMPILE_PCRE16
2723
2724 static void do_utfreadchar(compiler_common *common)
2725 {
2726 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2727 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2728 DEFINE_COMPILER;
2729 struct sljit_jump *jump;
2730
2731 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2732 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2733 /* Do nothing, only return. */
2734 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2735
2736 JUMPHERE(jump);
2737 /* Combine two 16 bit characters. */
2738 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2739 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2740 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2741 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2742 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2743 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2744 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2745 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2746 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2747 }
2748
2749 #endif /* COMPILE_PCRE[8|16] */
2750
2751 #endif /* SUPPORT_UTF */
2752
2753 #ifdef SUPPORT_UCP
2754
2755 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2756 #define UCD_BLOCK_MASK 127
2757 #define UCD_BLOCK_SHIFT 7
2758
2759 static void do_getucd(compiler_common *common)
2760 {
2761 /* Search the UCD record for the character comes in TMP1.
2762 Returns chartype in TMP1 and UCD offset in TMP2. */
2763 DEFINE_COMPILER;
2764
2765 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2766
2767 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2768 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2769 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2770 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2771 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2772 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2773 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2774 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2775 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2776 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2777 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2778 }
2779 #endif
2780
2781 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2782 {
2783 DEFINE_COMPILER;
2784 struct sljit_label *mainloop;
2785 struct sljit_label *newlinelabel = NULL;
2786 struct sljit_jump *start;
2787 struct sljit_jump *end = NULL;
2788 struct sljit_jump *nl = NULL;
2789 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2790 struct sljit_jump *singlechar;
2791 #endif
2792 jump_list *newline = NULL;
2793 BOOL newlinecheck = FALSE;
2794 BOOL readuchar = FALSE;
2795
2796 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2797 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2798 newlinecheck = TRUE;
2799
2800 if (firstline)
2801 {
2802 /* Search for the end of the first line. */
2803 SLJIT_ASSERT(common->first_line_end != 0);
2804 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2805
2806 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2807 {
2808 mainloop = LABEL();
2809 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2810 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2811 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2812 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2813 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2814 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2815 JUMPHERE(end);
2816 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2817 }
2818 else
2819 {
2820 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2821 mainloop = LABEL();
2822 /* Continual stores does not cause data dependency. */
2823 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2824 read_char(common);
2825 check_newlinechar(common, common->nltype, &newline, TRUE);
2826 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2827 JUMPHERE(end);
2828 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2829 set_jumps(newline, LABEL());
2830 }
2831
2832 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2833 }
2834
2835 start = JUMP(SLJIT_JUMP);
2836
2837 if (newlinecheck)
2838 {
2839 newlinelabel = LABEL();
2840 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2841 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2842 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2843 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2844 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2845 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2846 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2847 #endif
2848 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2849 nl = JUMP(SLJIT_JUMP);
2850 }
2851
2852 mainloop = LABEL();
2853
2854 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2855 #ifdef SUPPORT_UTF
2856 if (common->utf) readuchar = TRUE;
2857 #endif
2858 if (newlinecheck) readuchar = TRUE;
2859
2860 if (readuchar)
2861 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2862
2863 if (newlinecheck)
2864 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2865
2866 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2867 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2868 #if defined COMPILE_PCRE8
2869 if (common->utf)
2870 {
2871 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2872 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2873 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2874 JUMPHERE(singlechar);
2875 }
2876 #elif defined COMPILE_PCRE16
2877 if (common->utf)
2878 {
2879 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2880 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2881 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2882 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2883 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2884 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2885 JUMPHERE(singlechar);
2886 }
2887 #endif /* COMPILE_PCRE[8|16] */
2888 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2889 JUMPHERE(start);
2890
2891 if (newlinecheck)
2892 {
2893 JUMPHERE(end);
2894 JUMPHERE(nl);
2895 }
2896
2897 return mainloop;
2898 }
2899
2900 #define MAX_N_CHARS 3
2901
2902 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2903 {
2904 DEFINE_COMPILER;
2905 struct sljit_label *start;
2906 struct sljit_jump *quit;
2907 pcre_uint32 chars[MAX_N_CHARS * 2];
2908 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2909 int location = 0;
2910 pcre_int32 len, c, bit, caseless;
2911 int must_stop;
2912
2913 /* We do not support alternatives now. */
2914 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2915 return FALSE;
2916
2917 while (TRUE)
2918 {
2919 caseless = 0;
2920 must_stop = 1;
2921 switch(*cc)
2922 {
2923 case OP_CHAR:
2924 must_stop = 0;
2925 cc++;
2926 break;
2927
2928 case OP_CHARI:
2929 caseless = 1;
2930 must_stop = 0;
2931 cc++;
2932 break;
2933
2934 case OP_SOD:
2935 case OP_SOM:
2936 case OP_SET_SOM:
2937 case OP_NOT_WORD_BOUNDARY:
2938 case OP_WORD_BOUNDARY:
2939 case OP_EODN:
2940 case OP_EOD:
2941 case OP_CIRC:
2942 case OP_CIRCM:
2943 case OP_DOLL:
2944 case OP_DOLLM:
2945 /* Zero width assertions. */
2946 cc++;
2947 continue;
2948
2949 case OP_PLUS:
2950 case OP_MINPLUS:
2951 case OP_POSPLUS:
2952 cc++;
2953 break;
2954
2955 case OP_EXACT:
2956 cc += 1 + IMM2_SIZE;
2957 break;
2958
2959 case OP_PLUSI:
2960 case OP_MINPLUSI:
2961 case OP_POSPLUSI:
2962 caseless = 1;
2963 cc++;
2964 break;
2965
2966 case OP_EXACTI:
2967 caseless = 1;
2968 cc += 1 + IMM2_SIZE;
2969 break;
2970
2971 default:
2972 must_stop = 2;
2973 break;
2974 }
2975
2976 if (must_stop == 2)
2977 break;
2978
2979 len = 1;
2980 #ifdef SUPPORT_UTF
2981 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2982 #endif
2983
2984 if (caseless && char_has_othercase(common, cc))
2985 {
2986 caseless = char_get_othercase_bit(common, cc);
2987 if (caseless == 0)
2988 return FALSE;
2989 #ifdef COMPILE_PCRE8
2990 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2991 #else
2992 if ((caseless & 0x100) != 0)
2993 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2994 else
2995 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2996 #endif
2997 }
2998 else
2999 caseless = 0;
3000
3001 while (len > 0 && location < MAX_N_CHARS * 2)
3002 {
3003 c = *cc;
3004 bit = 0;
3005 if (len == (caseless & 0xff))
3006 {
3007 bit = caseless >> 8;
3008 c |= bit;
3009 }
3010
3011 chars[location] = c;
3012 chars[location + 1] = bit;
3013
3014 len--;
3015 location += 2;
3016 cc++;
3017 }
3018
3019 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3020 break;
3021 }
3022
3023 /* At least two characters are required. */
3024 if (location < 2 * 2)
3025 return FALSE;
3026
3027 if (firstline)
3028 {
3029 SLJIT_ASSERT(common->first_line_end != 0);
3030 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3031 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3032 }
3033 else
3034 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3035
3036 start = LABEL();
3037 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3038
3039 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3040 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3041 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3042 if (chars[1] != 0)
3043 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3044 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3045 if (location > 2 * 2)
3046 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3047 if (chars[3] != 0)
3048 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3049 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3050 if (location > 2 * 2)
3051 {
3052 if (chars[5] != 0)
3053 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3054 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3055 }
3056 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3057
3058 JUMPHERE(quit);
3059
3060 if (firstline)
3061 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3062 else
3063 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3064 return TRUE;
3065 }
3066
3067 #undef MAX_N_CHARS
3068
3069 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3070 {
3071 DEFINE_COMPILER;
3072 struct sljit_label *start;
3073 struct sljit_jump *quit;
3074 struct sljit_jump *found;
3075 pcre_uchar oc, bit;
3076
3077 if (firstline)
3078 {
3079 SLJIT_ASSERT(common->first_line_end != 0);
3080 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3081 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3082 }
3083
3084 start = LABEL();
3085 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3086 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3087
3088 oc = first_char;
3089 if (caseless)
3090 {
3091 oc = TABLE_GET(first_char, common->fcc, first_char);
3092 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3093 if (first_char > 127 && common->utf)
3094 oc = UCD_OTHERCASE(first_char);
3095 #endif
3096 }
3097 if (first_char == oc)
3098 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3099 else
3100 {
3101 bit = first_char ^ oc;
3102 if (is_powerof2(bit))
3103 {
3104 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3105 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3106 }
3107 else
3108 {
3109 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3110 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3111 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3112 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3113 found = JUMP(SLJIT_C_NOT_ZERO);
3114 }
3115 }
3116
3117 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3118 JUMPTO(SLJIT_JUMP, start);
3119 JUMPHERE(found);
3120 JUMPHERE(quit);
3121
3122 if (firstline)
3123 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3124 }
3125
3126 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3127 {
3128 DEFINE_COMPILER;
3129 struct sljit_label *loop;
3130 struct sljit_jump *lastchar;
3131 struct sljit_jump *firstchar;
3132 struct sljit_jump *quit;
3133 struct sljit_jump *foundcr = NULL;
3134 struct sljit_jump *notfoundnl;
3135 jump_list *newline = NULL;
3136
3137 if (firstline)
3138 {
3139 SLJIT_ASSERT(common->first_line_end != 0);
3140 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3141 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3142 }
3143
3144 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3145 {
3146 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3147 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3148 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3150 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3151
3152 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3153 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3154 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3155 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3156 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3157 #endif
3158 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3159
3160 loop = LABEL();
3161 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3162 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3163 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3164 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3165 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3166 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3167
3168 JUMPHERE(quit);
3169 JUMPHERE(firstchar);
3170 JUMPHERE(lastchar);
3171
3172 if (firstline)
3173 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3174 return;
3175 }
3176
3177 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3178 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3179 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3180 skip_char_back(common);
3181
3182 loop = LABEL();
3183 read_char(common);
3184 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3185 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3186 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3187 check_newlinechar(common, common->nltype, &newline, FALSE);
3188 set_jumps(newline, loop);
3189
3190 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3191 {
3192 quit = JUMP(SLJIT_JUMP);
3193 JUMPHERE(foundcr);
3194 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3195 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3196 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3197 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3198 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3199 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3200 #endif
3201 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3202 JUMPHERE(notfoundnl);
3203 JUMPHERE(quit);
3204 }
3205 JUMPHERE(lastchar);
3206 JUMPHERE(firstchar);
3207
3208 if (firstline)
3209 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3210 }
3211
3212 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3213
3214 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3215 {
3216 DEFINE_COMPILER;
3217 struct sljit_label *start;
3218 struct sljit_jump *quit;
3219 struct sljit_jump *found = NULL;
3220 jump_list *matches = NULL;
3221 pcre_uint8 inverted_start_bits[32];
3222 int i;
3223 #ifndef COMPILE_PCRE8
3224 struct sljit_jump *jump;
3225 #endif
3226
3227 for (i = 0; i < 32; ++i)
3228 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3229
3230 if (firstline)
3231 {
3232 SLJIT_ASSERT(common->first_line_end != 0);
3233 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3234 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3235 }
3236
3237 start = LABEL();
3238 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3239 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3240 #ifdef SUPPORT_UTF
3241 if (common->utf)
3242 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3243 #endif
3244
3245 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3246 {
3247 #ifndef COMPILE_PCRE8
3248 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3249 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3250 JUMPHERE(jump);
3251 #endif
3252 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3253 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3254 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3255 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3256 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3257 found = JUMP(SLJIT_C_NOT_ZERO);
3258 }
3259
3260 #ifdef SUPPORT_UTF
3261 if (common->utf)
3262 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3263 #endif
3264 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3265 #ifdef SUPPORT_UTF
3266 #if defined COMPILE_PCRE8
3267 if (common->utf)
3268 {
3269 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3270 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3271 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3272 }
3273 #elif defined COMPILE_PCRE16
3274 if (common->utf)
3275 {
3276 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3277 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3278 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3279 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3280 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3281 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3282 }
3283 #endif /* COMPILE_PCRE[8|16] */
3284 #endif /* SUPPORT_UTF */
3285 JUMPTO(SLJIT_JUMP, start);
3286 if (found != NULL)
3287 JUMPHERE(found);
3288 if (matches != NULL)
3289 set_jumps(matches, LABEL());
3290 JUMPHERE(quit);
3291
3292 if (firstline)
3293 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3294 }
3295
3296 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3297 {
3298 DEFINE_COMPILER;
3299 struct sljit_label *loop;
3300 struct sljit_jump *toolong;
3301 struct sljit_jump *alreadyfound;
3302 struct sljit_jump *found;
3303 struct sljit_jump *foundoc = NULL;
3304 struct sljit_jump *notfound;
3305 pcre_uint32 oc, bit;
3306
3307 SLJIT_ASSERT(common->req_char_ptr != 0);
3308 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3309 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3310 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3311 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3312
3313 if (has_firstchar)
3314 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3315 else
3316 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3317
3318 loop = LABEL();
3319 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3320
3321 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3322 oc = req_char;
3323 if (caseless)
3324 {
3325 oc = TABLE_GET(req_char, common->fcc, req_char);
3326 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3327 if (req_char > 127 && common->utf)
3328 oc = UCD_OTHERCASE(req_char);
3329 #endif
3330 }
3331 if (req_char == oc)
3332 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3333 else
3334 {
3335 bit = req_char ^ oc;
3336 if (is_powerof2(bit))
3337 {
3338 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3339 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3340 }
3341 else
3342 {
3343 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3344 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3345 }
3346 }
3347 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3348 JUMPTO(SLJIT_JUMP, loop);
3349
3350 JUMPHERE(found);
3351 if (foundoc)
3352 JUMPHERE(foundoc);
3353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3354 JUMPHERE(alreadyfound);
3355 JUMPHERE(toolong);
3356 return notfound;
3357 }
3358
3359 static void do_revertframes(compiler_common *common)
3360 {
3361 DEFINE_COMPILER;
3362 struct sljit_jump *jump;
3363 struct sljit_label *mainloop;
3364
3365 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3366 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3367 GET_LOCAL_BASE(TMP3, 0, 0);
3368
3369 /* Drop frames until we reach STACK_TOP. */
3370 mainloop = LABEL();
3371 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3372 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3373 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3374
3375 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3376 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3377 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3378 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3379 JUMPTO(SLJIT_JUMP, mainloop);
3380
3381 JUMPHERE(jump);
3382 jump = JUMP(SLJIT_C_SIG_LESS);
3383 /* End of dropping frames. */
3384 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3385
3386 JUMPHERE(jump);
3387 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3388 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3389 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3390 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3391 JUMPTO(SLJIT_JUMP, mainloop);
3392 }
3393
3394 static void check_wordboundary(compiler_common *common)
3395 {
3396 DEFINE_COMPILER;
3397 struct sljit_jump *skipread;
3398 jump_list *skipread_list = NULL;
3399 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3400 struct sljit_jump *jump;
3401 #endif
3402
3403 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3404
3405 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3406 /* Get type of the previous char, and put it to LOCALS1. */
3407 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3408 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3409 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3410 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3411 skip_char_back(common);
3412 check_start_used_ptr(common);
3413 read_char(common);
3414
3415 /* Testing char type. */
3416 #ifdef SUPPORT_UCP
3417 if (common->use_ucp)
3418 {
3419 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3420 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3421 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3422 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3423 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3424 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3425 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3426 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3427 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3428 JUMPHERE(jump);
3429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3430 }
3431 else
3432 #endif
3433 {
3434 #ifndef COMPILE_PCRE8
3435 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3436 #elif defined SUPPORT_UTF
3437 /* Here LOCALS1 has already been zeroed. */
3438 jump = NULL;
3439 if (common->utf)
3440 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3441 #endif /* COMPILE_PCRE8 */
3442 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3443 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3444 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3445 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3446 #ifndef COMPILE_PCRE8
3447 JUMPHERE(jump);
3448 #elif defined SUPPORT_UTF
3449 if (jump != NULL)
3450 JUMPHERE(jump);
3451 #endif /* COMPILE_PCRE8 */
3452 }
3453 JUMPHERE(skipread);
3454
3455 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3456 check_str_end(common, &skipread_list);
3457 peek_char(common);
3458
3459 /* Testing char type. This is a code duplication. */
3460 #ifdef SUPPORT_UCP
3461 if (common->use_ucp)
3462 {
3463 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3464 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3465 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3466 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3467 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3468 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3469 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3470 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3471 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3472 JUMPHERE(jump);
3473 }
3474 else
3475 #endif
3476 {
3477 #ifndef COMPILE_PCRE8
3478 /* TMP2 may be destroyed by peek_char. */
3479 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3480 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3481 #elif defined SUPPORT_UTF
3482 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3483 jump = NULL;
3484 if (common->utf)
3485 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3486 #endif
3487 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3488 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3489 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3490 #ifndef COMPILE_PCRE8
3491 JUMPHERE(jump);
3492 #elif defined SUPPORT_UTF
3493 if (jump != NULL)
3494 JUMPHERE(jump);
3495 #endif /* COMPILE_PCRE8 */
3496 }
3497 set_jumps(skipread_list, LABEL());
3498
3499 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3500 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3501 }
3502
3503 /*
3504 range format:
3505
3506 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3507 ranges[1] = first bit (0 or 1)
3508 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3509 */
3510
3511 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3512 {
3513 DEFINE_COMPILER;
3514 struct sljit_jump *jump;
3515
3516 if (ranges[0] < 0)
3517 return FALSE;
3518
3519 switch(ranges[0])
3520 {
3521 case 1:
3522 if (readch)
3523 read_char(common);
3524 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3525 return TRUE;
3526
3527 case 2:
3528 if (readch)
3529 read_char(common);
3530 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3531 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3532 return TRUE;
3533
3534 case 4:
3535 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3536 {
3537 if (readch)
3538 read_char(common);
3539 if (ranges[1] != 0)
3540 {
3541 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3542 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3543 }
3544 else
3545 {
3546 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3547 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3548 JUMPHERE(jump);
3549 }
3550 return TRUE;
3551 }
3552 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3553 {
3554 if (readch)
3555 read_char(common);
3556 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3557 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3558 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3559 return TRUE;
3560 }
3561 return FALSE;
3562
3563 default:
3564 return FALSE;
3565 }
3566 }
3567
3568 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3569 {
3570 int i, bit, length;
3571 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3572
3573 bit = ctypes[0] & flag;
3574 ranges[0] = -1;
3575 ranges[1] = bit != 0 ? 1 : 0;
3576 length = 0;
3577
3578 for (i = 1; i < 256; i++)
3579 if ((ctypes[i] & flag) != bit)
3580 {
3581 if (length >= MAX_RANGE_SIZE)
3582 return;
3583 ranges[2 + length] = i;
3584 length++;
3585 bit ^= flag;
3586 }
3587
3588 if (bit != 0)
3589 {
3590 if (length >= MAX_RANGE_SIZE)
3591 return;
3592 ranges[2 + length] = 256;
3593 length++;
3594 }
3595 ranges[0] = length;
3596 }
3597
3598 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3599 {
3600 int ranges[2 + MAX_RANGE_SIZE];
3601 pcre_uint8 bit, cbit, all;
3602 int i, byte, length = 0;
3603
3604 bit = bits[0] & 0x1;
3605 ranges[1] = bit;
3606 /* Can be 0 or 255. */
3607 all = -bit;
3608
3609 for (i = 0; i < 256; )
3610 {
3611 byte = i >> 3;
3612 if ((i & 0x7) == 0 && bits[byte] == all)
3613 i += 8;
3614 else
3615 {
3616 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3617 if (cbit != bit)
3618 {
3619 if (length >= MAX_RANGE_SIZE)
3620 return FALSE;
3621 ranges[2 + length] = i;
3622 length++;
3623 bit = cbit;
3624 all = -cbit;
3625 }
3626 i++;
3627 }
3628 }
3629
3630 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3631 {
3632 if (length >= MAX_RANGE_SIZE)
3633 return FALSE;
3634 ranges[2 + length] = 256;
3635 length++;
3636 }
3637 ranges[0] = length;
3638
3639 return check_ranges(common, ranges, backtracks, FALSE);
3640 }
3641
3642 static void check_anynewline(compiler_common *common)
3643 {
3644 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3645 DEFINE_COMPILER;
3646
3647 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3648
3649 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3650 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3651 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3652 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3653 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3654 #ifdef COMPILE_PCRE8
3655 if (common->utf)
3656 {
3657 #endif
3658 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3659 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3660 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3661 #ifdef COMPILE_PCRE8
3662 }
3663 #endif
3664 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3665 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3666 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3667 }
3668
3669 static void check_hspace(compiler_common *common)
3670 {
3671 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3672 DEFINE_COMPILER;
3673
3674 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3675
3676 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3677 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3678 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3679 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3680 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3681 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3682 #ifdef COMPILE_PCRE8
3683 if (common->utf)
3684 {
3685 #endif
3686 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3687 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3688 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3689 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3690 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3691 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3692 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3693 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3694 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3695 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3696 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3697 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3698 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3699 #ifdef COMPILE_PCRE8
3700 }
3701 #endif
3702 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3703 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3704
3705 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3706 }
3707
3708 static void check_vspace(compiler_common *common)
3709 {
3710 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3711 DEFINE_COMPILER;
3712
3713 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3714
3715 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3716 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3717 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3718 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3719 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3720 #ifdef COMPILE_PCRE8
3721 if (common->utf)
3722 {
3723 #endif
3724 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3725 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3726 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3727 #ifdef COMPILE_PCRE8
3728 }
3729 #endif
3730 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3731 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3732
3733 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3734 }
3735
3736 #define CHAR1 STR_END
3737 #define CHAR2 STACK_TOP
3738
3739 static void do_casefulcmp(compiler_common *common)
3740 {
3741 DEFINE_COMPILER;
3742 struct sljit_jump *jump;
3743 struct sljit_label *label;
3744
3745 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3746 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3747 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3748 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3749 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3750 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3751
3752 label = LABEL();
3753 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3754 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3755 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3756 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3757 JUMPTO(SLJIT_C_NOT_ZERO, label);
3758
3759 JUMPHERE(jump);
3760 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3761 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3762 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3763 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3764 }
3765
3766 #define LCC_TABLE STACK_LIMIT
3767
3768 static void do_caselesscmp(compiler_common *common)
3769 {
3770 DEFINE_COMPILER;
3771 struct sljit_jump *jump;
3772 struct sljit_label *label;
3773
3774 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3775 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3776
3777 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3778 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3779 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3780 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3781 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3782 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3783
3784 label = LABEL();
3785 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3786 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3787 #ifndef COMPILE_PCRE8
3788 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3789 #endif
3790 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3791 #ifndef COMPILE_PCRE8
3792 JUMPHERE(jump);
3793 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3794 #endif
3795 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3796 #ifndef COMPILE_PCRE8
3797 JUMPHERE(jump);
3798 #endif
3799 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3800 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3801 JUMPTO(SLJIT_C_NOT_ZERO, label);
3802
3803 JUMPHERE(jump);
3804 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3805 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3806 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3807 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3808 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3809 }
3810
3811 #undef LCC_TABLE
3812 #undef CHAR1
3813 #undef CHAR2
3814
3815 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3816
3817 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3818 {
3819 /* This function would be ineffective to do in JIT level. */
3820 pcre_uint32 c1, c2;
3821 const pcre_uchar *src2 = args->uchar_ptr;
3822 const pcre_uchar *end2 = args->end;
3823 const ucd_record *ur;
3824 const pcre_uint32 *pp;
3825
3826 while (src1 < end1)
3827 {
3828 if (src2 >= end2)
3829 return (pcre_uchar*)1;
3830 GETCHARINC(c1, src1);
3831 GETCHARINC(c2, src2);
3832 ur = GET_UCD(c2);
3833 if (c1 != c2 && c1 != c2 + ur->other_case)
3834 {
3835 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3836 for (;;)
3837 {
3838 if (c1 < *pp) return NULL;
3839 if (c1 == *pp++) break;
3840 }
3841 }
3842 }
3843 return src2;
3844 }
3845
3846 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3847
3848 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3849 compare_context* context, jump_list **backtracks)
3850 {
3851 DEFINE_COMPILER;
3852 unsigned int othercasebit = 0;
3853 pcre_uchar *othercasechar = NULL;
3854 #ifdef SUPPORT_UTF
3855 int utflength;
3856 #endif
3857
3858 if (caseless && char_has_othercase(common, cc))
3859 {
3860 othercasebit = char_get_othercase_bit(common, cc);
3861 SLJIT_ASSERT(othercasebit);
3862 /* Extracting bit difference info. */
3863 #if defined COMPILE_PCRE8
3864 othercasechar = cc + (othercasebit >> 8);
3865 othercasebit &= 0xff;
3866 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3867 /* Note that this code only handles characters in the BMP. If there
3868 ever are characters outside the BMP whose othercase differs in only one
3869 bit from itself (there currently are none), this code will need to be
3870 revised for COMPILE_PCRE32. */
3871 othercasechar = cc + (othercasebit >> 9);
3872 if ((othercasebit & 0x100) != 0)
3873 othercasebit = (othercasebit & 0xff) << 8;
3874 else
3875 othercasebit &= 0xff;
3876 #endif /* COMPILE_PCRE[8|16|32] */
3877 }
3878
3879 if (context->sourcereg == -1)
3880 {
3881 #if defined COMPILE_PCRE8
3882 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3883 if (context->length >= 4)
3884 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3885 else if (context->length >= 2)
3886 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3887 else
3888 #endif
3889 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3890 #elif defined COMPILE_PCRE16
3891 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3892 if (context->length >= 4)
3893 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3894 else
3895 #endif
3896 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3897 #elif defined COMPILE_PCRE32
3898 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3899 #endif /* COMPILE_PCRE[8|16|32] */
3900 context->sourcereg = TMP2;
3901 }
3902
3903 #ifdef SUPPORT_UTF
3904 utflength = 1;
3905 if (common->utf && HAS_EXTRALEN(*cc))
3906 utflength += GET_EXTRALEN(*cc);
3907
3908 do
3909 {
3910 #endif
3911
3912 context->length -= IN_UCHARS(1);
3913 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3914
3915 /* Unaligned read is supported. */
3916 if (othercasebit != 0 && othercasechar == cc)
3917 {
3918 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3919 context->oc.asuchars[context->ucharptr] = othercasebit;
3920 }
3921 else
3922 {
3923 context->c.asuchars[context->ucharptr] = *cc;
3924 context->oc.asuchars[context->ucharptr] = 0;
3925 }
3926 context->ucharptr++;
3927
3928 #if defined COMPILE_PCRE8
3929 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3930 #else
3931 if (context->ucharptr >= 2 || context->length == 0)
3932 #endif
3933 {
3934 if (context->length >= 4)
3935 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3936 else if (context->length >= 2)
3937 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3938 #if defined COMPILE_PCRE8
3939 else if (context->length >= 1)
3940 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3941 #endif /* COMPILE_PCRE8 */
3942 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3943
3944 switch(context->ucharptr)
3945 {
3946 case 4 / sizeof(pcre_uchar):
3947 if (context->oc.asint != 0)
3948 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3949 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3950 break;
3951
3952 case 2 / sizeof(pcre_uchar):
3953 if (context->oc.asushort != 0)
3954 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3955 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3956 break;
3957
3958 #ifdef COMPILE_PCRE8
3959 case 1:
3960 if (context->oc.asbyte != 0)
3961 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3962 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3963 break;
3964 #endif
3965
3966 default:
3967 SLJIT_ASSERT_STOP();
3968 break;
3969 }
3970 context->ucharptr = 0;
3971 }
3972
3973 #else
3974
3975 /* Unaligned read is unsupported or in 32 bit mode. */
3976 if (context->length >= 1)
3977 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3978
3979 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3980
3981 if (othercasebit != 0 && othercasechar == cc)
3982 {
3983 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3984 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3985 }
3986 else
3987 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3988
3989 #endif
3990
3991 cc++;
3992 #ifdef SUPPORT_UTF
3993 utflength--;
3994 }
3995 while (utflength > 0);
3996 #endif
3997
3998 return cc;
3999 }
4000
4001 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4002
4003 #define SET_TYPE_OFFSET(value) \
4004 if ((value) != typeoffset) \
4005 { \
4006 if ((value) > typeoffset) \
4007 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4008 else \
4009 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4010 } \
4011 typeoffset = (value);
4012
4013 #define SET_CHAR_OFFSET(value) \
4014 if ((value) != charoffset) \
4015 { \
4016 if ((value) > charoffset) \
4017 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4018 else \
4019 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4020 } \
4021 charoffset = (value);
4022
4023 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4024 {
4025 DEFINE_COMPILER;
4026 jump_list *found = NULL;
4027 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4028 pcre_int32 c, charoffset;
4029 const pcre_uint32 *other_cases;
4030 struct sljit_jump *jump = NULL;
4031 pcre_uchar *ccbegin;
4032 int compares, invertcmp, numberofcmps;
4033 #ifdef SUPPORT_UCP
4034 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4035 BOOL charsaved = FALSE;
4036 int typereg = TMP1, scriptreg = TMP1;
4037 pcre_int32 typeoffset;
4038 #endif
4039
4040 /* Although SUPPORT_UTF must be defined, we are
4041 not necessary in utf mode even in 8 bit mode. */
4042 detect_partial_match(common, backtracks);
4043 read_char(common);
4044
4045 if ((*cc++ & XCL_MAP) != 0)
4046 {
4047 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4048 #ifndef COMPILE_PCRE8
4049 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4050 #elif defined SUPPORT_UTF
4051 if (common->utf)
4052 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4053 #endif
4054
4055 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4056 {
4057 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4058 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4059 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4060 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4061 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4062 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4063 }
4064
4065 #ifndef COMPILE_PCRE8
4066 JUMPHERE(jump);
4067 #elif defined SUPPORT_UTF
4068 if (common->utf)
4069 JUMPHERE(jump);
4070 #endif
4071 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4072 #ifdef SUPPORT_UCP
4073 charsaved = TRUE;
4074 #endif
4075 cc += 32 / sizeof(pcre_uchar);
4076 }
4077
4078 /* Scanning the necessary info. */
4079 ccbegin = cc;
4080 compares = 0;
4081 while (*cc != XCL_END)
4082 {
4083 compares++;
4084 if (*cc == XCL_SINGLE)
4085 {
4086 cc += 2;
4087 #ifdef SUPPORT_UTF
4088 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4089 #endif
4090 #ifdef SUPPORT_UCP
4091 needschar = TRUE;
4092 #endif
4093 }
4094 else if (*cc == XCL_RANGE)
4095 {
4096 cc += 2;
4097 #ifdef SUPPORT_UTF
4098 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4099 #endif
4100 cc++;
4101 #ifdef SUPPORT_UTF
4102 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4103 #endif
4104 #ifdef SUPPORT_UCP
4105 needschar = TRUE;
4106 #endif
4107 }
4108 #ifdef SUPPORT_UCP
4109 else
4110 {
4111 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4112 cc++;
4113 switch(*cc)
4114 {
4115 case PT_ANY:
4116 break;
4117
4118 case PT_LAMP:
4119 case PT_GC:
4120 case PT_PC:
4121 case PT_ALNUM:
4122 needstype = TRUE;
4123 break;
4124
4125 case PT_SC:
4126 needsscript = TRUE;
4127 break;
4128
4129 case PT_SPACE:
4130 case PT_PXSPACE:
4131 case PT_WORD:
4132 needstype = TRUE;
4133 needschar = TRUE;
4134 break;
4135
4136 case PT_CLIST:
4137 case PT_UCNC:
4138 needschar = TRUE;
4139 break;
4140
4141 default:
4142 SLJIT_ASSERT_STOP();
4143 break;
4144 }
4145 cc += 2;
4146 }
4147 #endif
4148 }
4149
4150 #ifdef SUPPORT_UCP
4151 /* Simple register allocation. TMP1 is preferred if possible. */
4152 if (needstype || needsscript)
4153 {
4154 if (needschar && !charsaved)
4155 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4156 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4157 if (needschar)
4158 {
4159 if (needstype)
4160 {
4161 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4162 typereg = RETURN_ADDR;
4163 }
4164
4165 if (needsscript)
4166 scriptreg = TMP3;
4167 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4168 }
4169 else if (needstype && needsscript)
4170 scriptreg = TMP3;
4171 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4172
4173 if (needsscript)
4174 {
4175 if (scriptreg == TMP1)
4176 {
4177 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4178 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4179 }
4180 else
4181 {
4182 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4183 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4184 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4185 }
4186 }
4187 }
4188 #endif
4189
4190 /* Generating code. */
4191 cc = ccbegin;
4192 charoffset = 0;
4193 numberofcmps = 0;
4194 #ifdef SUPPORT_UCP
4195 typeoffset = 0;
4196 #endif
4197
4198 while (*cc != XCL_END)
4199 {
4200 compares--;
4201 invertcmp = (compares == 0 && list != backtracks);
4202 jump = NULL;
4203
4204 if (*cc == XCL_SINGLE)
4205 {
4206 cc ++;
4207 #ifdef SUPPORT_UTF
4208 if (common->utf)
4209 {
4210 GETCHARINC(c, cc);
4211 }
4212 else
4213 #endif
4214 c = *cc++;
4215
4216 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4217 {
4218 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4219 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4220 numberofcmps++;
4221 }
4222 else if (numberofcmps > 0)
4223 {
4224 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4225 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4226 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4227 numberofcmps = 0;
4228 }
4229 else
4230 {
4231 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4232 numberofcmps = 0;
4233 }
4234 }
4235 else if (*cc == XCL_RANGE)
4236 {
4237 cc ++;
4238 #ifdef SUPPORT_UTF
4239 if (common->utf)
4240 {
4241 GETCHARINC(c, cc);
4242 }
4243 else
4244 #endif
4245 c = *cc++;
4246 SET_CHAR_OFFSET(c);
4247 #ifdef SUPPORT_UTF
4248 if (common->utf)
4249 {
4250 GETCHARINC(c, cc);
4251 }
4252 else
4253 #endif
4254 c = *cc++;
4255 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4256 {
4257 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4258 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4259 numberofcmps++;
4260 }
4261 else if (numberofcmps > 0)
4262 {
4263 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4264 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4265 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4266 numberofcmps = 0;
4267 }
4268 else
4269 {
4270 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4271 numberofcmps = 0;
4272 }
4273 }
4274 #ifdef SUPPORT_UCP
4275 else
4276 {
4277 if (*cc == XCL_NOTPROP)
4278 invertcmp ^= 0x1;
4279 cc++;
4280 switch(*cc)
4281 {
4282 case PT_ANY:
4283 if (list != backtracks)
4284 {
4285 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4286 continue;
4287 }
4288 else if (cc[-1] == XCL_NOTPROP)
4289 continue;
4290 jump = JUMP(SLJIT_JUMP);
4291 break;
4292
4293 case PT_LAMP:
4294 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4295 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4296 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4297 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4298 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4299 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4300 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4301 break;
4302
4303 case PT_GC:
4304 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4305 SET_TYPE_OFFSET(c);
4306 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4307 break;
4308
4309 case PT_PC:
4310 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4311 break;
4312
4313 case PT_SC:
4314 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4315 break;
4316
4317 case PT_SPACE:
4318 case PT_PXSPACE:
4319 if (*cc == PT_SPACE)
4320 {
4321 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4322 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4323 }
4324 SET_CHAR_OFFSET(9);
4325 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4326 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4327 if (*cc == PT_SPACE)
4328 JUMPHERE(jump);
4329
4330 SET_TYPE_OFFSET(ucp_Zl);
4331 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4332 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4333 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4334 break;
4335
4336 case PT_WORD:
4337 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4338 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4339 /* Fall through. */
4340
4341 case PT_ALNUM:
4342 SET_TYPE_OFFSET(ucp_Ll);
4343 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4344 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4345 SET_TYPE_OFFSET(ucp_Nd);
4346 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4347 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4348 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4349 break;
4350
4351 case PT_CLIST:
4352 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4353
4354 /* At least three characters are required.
4355 Otherwise this case would be handled by the normal code path. */
4356 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4357 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4358
4359 /* Optimizing character pairs, if their difference is power of 2. */
4360 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4361 {
4362 if (charoffset == 0)
4363 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4364 else
4365 {
4366 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4367 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4368 }
4369 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4370 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4371 other_cases += 2;
4372 }
4373 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4374 {
4375 if (charoffset == 0)
4376 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4377 else
4378 {
4379 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4380 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4381 }
4382 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4383 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4384
4385 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4386 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4387
4388 other_cases += 3;
4389 }
4390 else
4391 {
4392 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4393 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4394 }
4395
4396 while (*other_cases != NOTACHAR)
4397 {
4398 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4399 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4400 }
4401 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4402 break;
4403
4404 case PT_UCNC:
4405 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4406 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4407 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4408 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4409 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4410 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4411
4412 SET_CHAR_OFFSET(0xa0);
4413 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4414 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4415 SET_CHAR_OFFSET(0);
4416 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4417 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4418 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4419 break;
4420 }
4421 cc += 2;
4422 }
4423 #endif
4424
4425 if (jump != NULL)
4426 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4427 }
4428
4429 if (found != NULL)
4430 set_jumps(found, LABEL());
4431 }
4432
4433 #undef SET_TYPE_OFFSET
4434 #undef SET_CHAR_OFFSET
4435
4436 #endif
4437
4438 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4439 {
4440 DEFINE_COMPILER;
4441 int length;
4442 unsigned int c, oc, bit;
4443 compare_context context;
4444 struct sljit_jump *jump[4];
4445 jump_list *end_list;
4446 #ifdef SUPPORT_UTF
4447 struct sljit_label *label;
4448 #ifdef SUPPORT_UCP
4449 pcre_uchar propdata[5];
4450 #endif
4451 #endif
4452
4453 switch(type)
4454 {
4455 case OP_SOD:
4456 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4457 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4458 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4459 return cc;
4460
4461 case OP_SOM:
4462 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4464 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4465 return cc;
4466
4467 case OP_NOT_WORD_BOUNDARY:
4468 case OP_WORD_BOUNDARY:
4469 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4470 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4471 return cc;
4472
4473 case OP_NOT_DIGIT:
4474 case OP_DIGIT:
4475 /* Digits are usually 0-9, so it is worth to optimize them. */
4476 if (common->digits[0] == -2)
4477 get_ctype_ranges(common, ctype_digit, common->digits);
4478 detect_partial_match(common, backtracks);
4479 /* Flip the starting bit in the negative case. */
4480 if (type == OP_NOT_DIGIT)
4481 common->digits[1] ^= 1;
4482 if (!check_ranges(common, common->digits, backtracks, TRUE))
4483 {
4484 read_char8_type(common);
4485 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4486 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4487 }
4488 if (type == OP_NOT_DIGIT)
4489 common->digits[1] ^= 1;
4490 return cc;
4491
4492 case OP_NOT_WHITESPACE:
4493 case OP_WHITESPACE:
4494 detect_partial_match(common, backtracks);
4495 read_char8_type(common);
4496 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4497 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4498 return cc;
4499
4500 case OP_NOT_WORDCHAR:
4501 case OP_WORDCHAR:
4502 detect_partial_match(common, backtracks);
4503 read_char8_type(common);
4504 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4505 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4506 return cc;
4507
4508 case OP_ANY:
4509 detect_partial_match(common, backtracks);
4510 read_char(common);
4511 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4512 {
4513 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4514 end_list = NULL;
4515 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4516 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4517 else
4518 check_str_end(common, &end_list);
4519
4520 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4521 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4522 set_jumps(end_list, LABEL());
4523 JUMPHERE(jump[0]);
4524 }
4525 else
4526 check_newlinechar(common, common->nltype, backtracks, TRUE);
4527 return cc;
4528
4529 case OP_ALLANY:
4530 detect_partial_match(common, backtracks);
4531 #ifdef SUPPORT_UTF
4532 if (common->utf)
4533 {
4534 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4535 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4536 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4537 #if defined COMPILE_PCRE8
4538 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4539 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4540 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4541 #elif defined COMPILE_PCRE16
4542 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4543 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4544 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4545 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4546 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4547 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4548 #endif
4549 JUMPHERE(jump[0]);
4550 #endif /* COMPILE_PCRE[8|16] */
4551 return cc;
4552 }
4553 #endif
4554 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4555 return cc;
4556
4557 case OP_ANYBYTE:
4558 detect_partial_match(common, backtracks);
4559 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4560 return cc;
4561
4562 #ifdef SUPPORT_UTF
4563 #ifdef SUPPORT_UCP
4564 case OP_NOTPROP:
4565 case OP_PROP:
4566 propdata[0] = 0;
4567 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4568 propdata[2] = cc[0];
4569 propdata[3] = cc[1];
4570 propdata[4] = XCL_END;
4571 compile_xclass_matchingpath(common, propdata, backtracks);
4572 return cc + 2;
4573 #endif
4574 #endif
4575
4576 case OP_ANYNL:
4577 detect_partial_match(common, backtracks);
4578 read_char(common);
4579 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4580 /* We don't need to handle soft partial matching case. */
4581 end_list = NULL;
4582 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4583 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4584 else
4585 check_str_end(common, &end_list);
4586 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4587 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4588 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4589 jump[2] = JUMP(SLJIT_JUMP);
4590 JUMPHERE(jump[0]);
4591 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4592 set_jumps(end_list, LABEL());
4593 JUMPHERE(jump[1]);
4594 JUMPHERE(jump[2]);
4595 return cc;
4596
4597 case OP_NOT_HSPACE:
4598 case OP_HSPACE:
4599 detect_partial_match(common, backtracks);
4600 read_char(common);
4601 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4602 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4603 return cc;
4604
4605 case OP_NOT_VSPACE:
4606 case OP_VSPACE:
4607 detect_partial_match(common, backtracks);
4608 read_char(common);
4609 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4610 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4611 return cc;
4612
4613 #ifdef SUPPORT_UCP
4614 case OP_EXTUNI:
4615 detect_partial_match(common, backtracks);
4616 read_char(common);
4617 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4618 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4619 /* Optimize register allocation: use a real register. */
4620 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4621 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4622
4623 label = LABEL();
4624 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4625 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4626 read_char(common);
4627 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4628 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4629 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4630
4631 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4632 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4633 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4634 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4635 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4636 JUMPTO(SLJIT_C_NOT_ZERO, label);
4637
4638 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4639 JUMPHERE(jump[0]);
4640 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4641
4642 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4643 {
4644 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4645 /* Since we successfully read a char above, partial matching must occure. */
4646 check_partial(common, TRUE);
4647 JUMPHERE(jump[0]);
4648 }
4649 return cc;
4650 #endif
4651
4652 case OP_EODN:
4653 /* Requires rather complex checks. */
4654 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4655 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4656 {
4657 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4658 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4659 if (common->mode == JIT_COMPILE)
4660 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4661 else
4662 {
4663 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4664 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4665 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4666 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4667 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4668 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4669 check_partial(common, TRUE);
4670 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4671 JUMPHERE(jump[1]);
4672 }
4673 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4674 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4675 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4676 }
4677 else if (common->nltype == NLTYPE_FIXED)
4678 {
4679 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4680 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4681 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4682 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4683 }
4684 else
4685 {
4686 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4687 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4688 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4689 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4690 jump[2] = JUMP(SLJIT_C_GREATER);
4691 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4692 /* Equal. */
4693 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4694 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4695 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4696
4697 JUMPHERE(jump[1]);
4698 if (common->nltype == NLTYPE_ANYCRLF)
4699 {
4700 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4701 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4702 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4703 }
4704 else
4705 {
4706 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4707 read_char(common);
4708 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4709 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4710 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4711 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4712 }
4713 JUMPHERE(jump[2]);
4714 JUMPHERE(jump[3]);
4715 }
4716 JUMPHERE(jump[0]);
4717 check_partial(common, FALSE);
4718 return cc;
4719
4720 case OP_EOD:
4721 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4722 check_partial(common, FALSE);
4723 return cc;
4724
4725 case OP_CIRC:
4726 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4727 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4728 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4729 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4730 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4731 return cc;
4732
4733 case OP_CIRCM:
4734 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4735 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4736 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4737 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4738 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4739 jump[0] = JUMP(SLJIT_JUMP);
4740 JUMPHERE(jump[1]);
4741
4742 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4743 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4744 {
4745 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4746 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4747 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4748 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4749 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4750 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4751 }
4752 else
4753 {
4754 skip_char_back(common);
4755 read_char(common);
4756 check_newlinechar(common, common->nltype, backtracks, FALSE);
4757 }
4758 JUMPHERE(jump[0]);
4759 return cc;
4760
4761 case OP_DOLL:
4762 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4763 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4764 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4765
4766 if (!common->endonly)
4767 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4768 else
4769 {
4770 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4771 check_partial(common, FALSE);
4772 }
4773 return cc;
4774
4775 case OP_DOLLM:
4776 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4777 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4778 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4779 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4780 check_partial(common, FALSE);
4781 jump[0] = JUMP(SLJIT_JUMP);
4782 JUMPHERE(jump[1]);
4783
4784 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4785 {
4786 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4787 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4788 if (common->mode == JIT_COMPILE)
4789 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4790 else
4791 {
4792 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4793 /* STR_PTR = STR_END - IN_UCHARS(1) */
4794 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4795 check_partial(common, TRUE);
4796 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4797 JUMPHERE(jump[1]);
4798 }
4799
4800 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4801 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4802 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4803 }
4804 else
4805 {
4806 peek_char(common);
4807 check_newlinechar(common, common->nltype, backtracks, FALSE);
4808 }
4809 JUMPHERE(jump[0]);
4810 return cc;
4811
4812 case OP_CHAR:
4813 case OP_CHARI:
4814 length = 1;
4815 #ifdef SUPPORT_UTF
4816 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4817 #endif
4818 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4819 {
4820 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4821 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4822
4823 context.length = IN_UCHARS(length);
4824 context.sourcereg = -1;
4825 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4826 context.ucharptr = 0;
4827 #endif
4828 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4829 }
4830 detect_partial_match(common, backtracks);
4831 read_char(common);
4832 #ifdef SUPPORT_UTF
4833 if (common->utf)
4834 {
4835 GETCHAR(c, cc);
4836 }
4837 else
4838 #endif
4839 c = *cc;
4840 if (type == OP_CHAR || !char_has_othercase(common, cc))
4841 {
4842 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4843 return cc + length;
4844 }
4845 oc = char_othercase(common, c);
4846 bit = c ^ oc;
4847 if (is_powerof2(bit))
4848 {
4849 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4850 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4851 return cc + length;
4852 }
4853 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4854 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4855 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4856 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4857 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4858 return cc + length;
4859
4860 case OP_NOT:
4861 case OP_NOTI:
4862 detect_partial_match(common, backtracks);
4863 length = 1;
4864 #ifdef SUPPORT_UTF
4865 if (common->utf)
4866 {
4867 #ifdef COMPILE_PCRE8
4868 c = *cc;
4869 if (c < 128)
4870 {
4871 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4872 if (type == OP_NOT || !char_has_othercase(common, cc))
4873 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4874 else
4875 {
4876 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4877 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4878 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4879 }
4880 /* Skip the variable-length character. */
4881 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4882 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4883 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4884 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4885 JUMPHERE(jump[0]);
4886 return cc + 1;
4887 }
4888 else
4889 #endif /* COMPILE_PCRE8 */
4890 {
4891 GETCHARLEN(c, cc, length);
4892 read_char(common);
4893 }
4894 }
4895 else
4896 #endif /* SUPPORT_UTF */
4897 {
4898 read_char(common);
4899 c = *cc;
4900 }
4901
4902 if (type == OP_NOT || !char_has_othercase(common, cc))
4903 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4904 else
4905 {
4906 oc = char_othercase(common, c);
4907 bit = c ^ oc;
4908 if (is_powerof2(bit))
4909 {
4910 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4911 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4912 }
4913 else
4914 {
4915 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4916 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4917 }
4918 }
4919 return cc + length;
4920
4921 case OP_CLASS:
4922 case OP_NCLASS:
4923 detect_partial_match(common, backtracks);
4924 read_char(common);
4925 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4926 return cc + 32 / sizeof(pcre_uchar);
4927
4928 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4929 jump[0] = NULL;
4930 #ifdef COMPILE_PCRE8
4931 /* This check only affects 8 bit mode. In other modes, we
4932 always need to compare the value with 255. */
4933 if (common->utf)
4934 #endif /* COMPILE_PCRE8 */
4935 {
4936 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4937 if (type == OP_CLASS)
4938 {
4939 add_jump(compiler, backtracks, jump[0]);
4940 jump[0] = NULL;
4941 }
4942 }
4943 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4944 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4945 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4946 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4947 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4948 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4949 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4950 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4951 if (jump[0] != NULL)
4952 JUMPHERE(jump[0]);
4953 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4954 return cc + 32 / sizeof(pcre_uchar);
4955
4956 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4957 case OP_XCLASS:
4958 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4959 return cc + GET(cc, 0) - 1;
4960 #endif
4961
4962 case OP_REVERSE:
4963 length = GET(cc, 0);
4964 if (length == 0)
4965 return cc + LINK_SIZE;
4966 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4967 #ifdef SUPPORT_UTF
4968 if (common->utf)
4969 {
4970 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4971 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4972 label = LABEL();
4973 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4974 skip_char_back(common);
4975 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4976 JUMPTO(SLJIT_C_NOT_ZERO, label);
4977 }
4978 else
4979 #endif
4980 {
4981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4982 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4983 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4984 }
4985 check_start_used_ptr(common);
4986 return cc + LINK_SIZE;
4987 }
4988 SLJIT_ASSERT_STOP();
4989 return cc;
4990 }
4991
4992 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4993 {
4994 /* This function consumes at least one input character. */
4995 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4996 DEFINE_COMPILER;
4997 pcre_uchar *ccbegin = cc;
4998 compare_context context;
4999 int size;
5000
5001 context.length = 0;
5002 do
5003 {
5004 if (cc >= ccend)
5005 break;
5006
5007 if (*cc == OP_CHAR)
5008 {
5009 size = 1;
5010 #ifdef SUPPORT_UTF
5011 if (common->utf && HAS_EXTRALEN(cc[1]))
5012 size += GET_EXTRALEN(cc[1]);
5013 #endif
5014 }
5015 else if (*cc == OP_CHARI)
5016 {
5017 size = 1;
5018 #ifdef SUPPORT_UTF
5019 if (common->utf)
5020 {
5021 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5022 size = 0;
5023 else if (HAS_EXTRALEN(cc[1]))
5024 size += GET_EXTRALEN(cc[1]);
5025 }
5026 else
5027 #endif
5028 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5029 size = 0;
5030 }
5031 else
5032 size = 0;
5033
5034 cc += 1 + size;
5035 context.length += IN_UCHARS(size);
5036 }
5037 while (size > 0 && context.length <= 128);
5038
5039 cc = ccbegin;
5040 if (context.length > 0)
5041 {
5042 /* We have a fixed-length byte sequence. */
5043 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5044 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5045
5046 context.sourcereg = -1;
5047 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5048 context.ucharptr = 0;
5049 #endif
5050 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5051 return cc;
5052 }
5053
5054 /* A non-fixed length character will be checked if length == 0. */
5055 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5056 }
5057
5058 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5059 {
5060 DEFINE_COMPILER;
5061 int offset = GET2(cc, 1) << 1;
5062
5063 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5064 if (!common->jscript_compat)
5065 {
5066 if (backtracks == NULL)
5067 {
5068 /* OVECTOR(1) contains the "string begin - 1" constant. */
5069 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5070 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5071 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5072 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5073 return JUMP(SLJIT_C_NOT_ZERO);
5074 }
5075 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5076 }
5077 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5078 }
5079
5080 /* Forward definitions. */
5081 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5082 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5083
5084 #define PUSH_BACKTRACK(size, ccstart, error) \
5085 do \
5086 { \
5087 backtrack = sljit_alloc_memory(compiler, (size)); \
5088 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5089 return error; \
5090 memset(backtrack, 0, size); \
5091 backtrack->prev = parent->top; \
5092 backtrack->cc = (ccstart); \
5093 parent->top = backtrack; \
5094 } \
5095 while (0)
5096
5097 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5098 do \
5099 { \
5100 backtrack = sljit_alloc_memory(compiler, (size)); \
5101 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5102 return; \
5103 memset(backtrack, 0, size); \
5104 backtrack->prev = parent->top; \
5105 backtrack->cc = (ccstart); \
5106 parent->top = backtrack; \
5107 } \
5108 while (0)
5109
5110 #define BACKTRACK_AS(type) ((type *)backtrack)
5111
5112 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5113 {
5114 DEFINE_COMPILER;
5115 int offset = GET2(cc, 1) << 1;
5116 struct sljit_jump *jump = NULL;
5117 struct sljit_jump *partial;
5118 struct sljit_jump *nopartial;
5119
5120 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5121 /* OVECTOR(1) contains the "string begin - 1" constant. */
5122 if (withchecks && !common->jscript_compat)
5123 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5124
5125 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5126 if (common->utf && *cc == OP_REFI)
5127 {
5128 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5129 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5130 if (withchecks)
5131 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5132
5133 /* Needed to save important temporary registers. */
5134 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5135 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5136 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5137 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5138 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5139 if (common->mode == JIT_COMPILE)
5140 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5141 else
5142 {
5143 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5144 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5145 check_partial(common, FALSE);
5146 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5147 JUMPHERE(nopartial);
5148 }
5149 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5150 }
5151 else
5152 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5153 {
5154 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5155 if (withchecks)
5156 jump = JUMP(SLJIT_C_ZERO);
5157
5158 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5159 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5160 if (common->mode == JIT_COMPILE)
5161 add_jump(compiler, backtracks, partial);
5162
5163 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5164 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5165
5166 if (common->mode != JIT_COMPILE)
5167 {
5168 nopartial = JUMP(SLJIT_JUMP);
5169 JUMPHERE(partial);
5170 /* TMP2 -= STR_END - STR_PTR */
5171 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5172 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5173 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5174 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5175 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5176 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5177 JUMPHERE(partial);
5178 check_partial(common, FALSE);
5179 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5180 JUMPHERE(nopartial);
5181 }
5182 }
5183
5184 if (jump != NULL)
5185 {
5186 if (emptyfail)
5187 add_jump(compiler, backtracks, jump);
5188 else
5189 JUMPHERE(jump);
5190 }
5191 return cc + 1 + IMM2_SIZE;
5192 }
5193
5194 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5195 {
5196 DEFINE_COMPILER;
5197 backtrack_common *backtrack;
5198 pcre_uchar type;
5199 struct sljit_label *label;
5200 struct sljit_jump *zerolength;
5201 struct sljit_jump *jump = NULL;
5202 pcre_uchar *ccbegin = cc;
5203 int min = 0, max = 0;
5204 BOOL minimize;
5205
5206 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5207
5208 type = cc[1 + IMM2_SIZE];
5209 minimize = (type & 0x1) != 0;
5210 switch(type)
5211 {
5212 case OP_CRSTAR:
5213 case OP_CRMINSTAR:
5214 min = 0;
5215 max = 0;
5216 cc += 1 + IMM2_SIZE + 1;
5217 break;
5218 case OP_CRPLUS:
5219 case OP_CRMINPLUS:
5220 min = 1;
5221 max = 0;
5222 cc += 1 + IMM2_SIZE + 1;
5223 break;
5224 case OP_CRQUERY:
5225 case OP_CRMINQUERY:
5226 min = 0;
5227 max = 1;
5228 cc += 1 + IMM2_SIZE + 1;
5229 break;
5230 case OP_CRRANGE:
5231 case OP_CRMINRANGE:
5232 min = GET2(cc, 1 + IMM2_SIZE + 1);
5233 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5234 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5235 break;
5236 default:
5237 SLJIT_ASSERT_STOP();
5238 break;
5239 }
5240
5241 if (!minimize)
5242 {
5243 if (min == 0)
5244 {
5245 allocate_stack(common, 2);
5246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5247 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5248 /* Temporary release of STR_PTR. */
5249 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5250 zerolength = compile_ref_checks(common, ccbegin, NULL);
5251 /* Restore if not zero length. */
5252 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5253 }
5254 else
5255 {
5256 allocate_stack(common, 1);
5257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5258 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5259 }
5260
5261 if (min > 1 || max > 1)
5262 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5263
5264 label = LABEL();
5265 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5266
5267 if (min > 1 || max > 1)
5268 {
5269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5270 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5271 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5272 if (min > 1)
5273 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5274 if (max > 1)
5275 {
5276 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5277 allocate_stack(common, 1);
5278 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5279 JUMPTO(SLJIT_JUMP, label);
5280 JUMPHERE(jump);
5281 }
5282 }
5283
5284 if (max == 0)
5285 {
5286 /* Includes min > 1 case as well. */
5287 allocate_stack(common, 1);
5288 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5289 JUMPTO(SLJIT_JUMP, label);
5290 }
5291
5292 JUMPHERE(zerolength);
5293 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5294
5295 decrease_call_count(common);
5296 return cc;
5297 }
5298
5299 allocate_stack(common, 2);
5300 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5301 if (type != OP_CRMINSTAR)
5302 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5303
5304 if (min == 0)
5305 {
5306 zerolength = compile_ref_checks(common, ccbegin, NULL);
5307 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5308 jump = JUMP(SLJIT_JUMP);
5309 }
5310 else
5311 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5312
5313 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5314 if (max > 0)
5315 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5316
5317 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5318 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5319
5320 if (min > 1)
5321 {
5322 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5323 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5324 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5325 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5326 }
5327 else if (max > 0)
5328 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5329
5330 if (jump != NULL)
5331 JUMPHERE(jump);
5332 JUMPHERE(zerolength);
5333
5334 decrease_call_count(common);
5335 return cc;
5336 }
5337
5338 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5339 {
5340 DEFINE_COMPILER;
5341 backtrack_common *backtrack;
5342 recurse_entry *entry = common->entries;
5343 recurse_entry *prev = NULL;
5344 sljit_sw start = GET(cc, 1);
5345 pcre_uchar *start_cc;
5346 BOOL needs_control_head;
5347
5348 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5349
5350 /* Inlining simple patterns. */
5351 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5352 {
5353 start_cc = common->start + start;
5354 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5355 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5356 return cc + 1 + LINK_SIZE;
5357 }
5358
5359 while (entry != NULL)
5360 {
5361 if (entry->start == start)
5362 break;
5363 prev = entry;
5364 entry = entry->next;
5365 }
5366
5367 if (entry == NULL)
5368 {
5369 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5370 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5371 return NULL;
5372 entry->next = NULL;
5373 entry->entry = NULL;
5374 entry->calls = NULL;
5375 entry->start = start;
5376
5377 if (prev != NULL)
5378 prev->next = entry;
5379 else
5380 common->entries = entry;
5381 }
5382
5383 if (common->has_set_som && common->mark_ptr != 0)
5384 {
5385 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5386 allocate_stack(common, 2);
5387 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5389 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5390 }
5391 else if (common->has_set_som || common->mark_ptr != 0)
5392 {
5393 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5394 allocate_stack(common, 1);
5395 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5396 }
5397
5398 if (entry->entry == NULL)
5399 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5400 else
5401 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5402 /* Leave if the match is failed. */
5403 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5404 return cc + 1 + LINK_SIZE;
5405 }
5406
5407 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5408 {
5409 const pcre_uchar *begin = arguments->begin;
5410 int *offset_vector = arguments->offsets;
5411 int offset_count = arguments->offset_count;
5412 int i;
5413
5414 if (PUBL(callout) == NULL)
5415 return 0;
5416
5417 callout_block->version = 2;
5418 callout_block->callout_data = arguments->callout_data;
5419
5420 /* Offsets in subject. */
5421 callout_block->subject_length = arguments->end - arguments->begin;
5422 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5423 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5424 #if defined COMPILE_PCRE8
5425 callout_block->subject = (PCRE_SPTR)begin;
5426 #elif defined COMPILE_PCRE16
5427 callout_block->subject = (PCRE_SPTR16)begin;
5428 #elif defined COMPILE_PCRE32
5429 callout_block->subject = (PCRE_SPTR32)begin;
5430 #endif
5431
5432 /* Convert and copy the JIT offset vector to the offset_vector array. */
5433 callout_block->capture_top = 0;
5434 callout_block->offset_vector = offset_vector;
5435 for (i = 2; i < offset_count; i += 2)
5436 {
5437 offset_vector[i] = jit_ovector[i] - begin;
5438 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5439 if (jit_ovector[i] >= begin)
5440 callout_block->capture_top = i;
5441 }
5442
5443 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5444 if (offset_count > 0)
5445 offset_vector[0] = -1;
5446 if (offset_count > 1)
5447 offset_vector[1] = -1;
5448 return (*PUBL(callout))(callout_block);
5449 }
5450
5451 /* Aligning to 8 byte. */
5452 #define CALLOUT_ARG_SIZE \
5453 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5454
5455 #define CALLOUT_ARG_OFFSET(arg) \
5456 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5457
5458 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5459 {
5460 DEFINE_COMPILER;
5461 backtrack_common *backtrack;
5462
5463 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5464
5465 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5466
5467 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5468 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5469 SLJIT_ASSERT(common->capture_last_ptr != 0);
5470 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5471 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5472
5473 /* These pointer sized fields temporarly stores internal variables. */
5474 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5477
5478 if (common->mark_ptr != 0)
5479 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5480 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5481 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5483
5484 /* Needed to save important temporary registers. */
5485 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5486 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5487 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5488 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5489 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5490 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5491 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5492
5493 /* Check return value. */
5494 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5495 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5496 if (common->forced_quit_label == NULL)
5497 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5498 else
5499 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5500 return cc + 2 + 2 * LINK_SIZE;
5501 }
5502
5503 #undef CALLOUT_ARG_SIZE
5504 #undef CALLOUT_ARG_OFFSET
5505
5506 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5507 {
5508 DEFINE_COMPILER;
5509 int framesize;
5510 int extrasize;
5511 BOOL needs_control_head;
5512 int private_data_ptr;
5513 backtrack_common altbacktrack;
5514 pcre_uchar *ccbegin;
5515 pcre_uchar opcode;
5516 pcre_uchar bra = OP_BRA;
5517 jump_list *tmp = NULL;
5518 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5519 jump_list **found;
5520 /* Saving previous accept variables. */
5521 BOOL save_local_exit = common->local_exit;
5522 BOOL save_positive_assert = common->positive_assert;
5523 then_trap_backtrack *save_then_trap = common->then_trap;
5524 struct sljit_label *save_quit_label = common->quit_label;
5525 struct sljit_label *save_accept_label = common->accept_label;
5526 jump_list *save_quit = common->quit;
5527 jump_list *save_positive_assert_quit = common->positive_assert_quit;
5528 jump_list *save_accept = common->accept;
5529 struct sljit_jump *jump;
5530 struct sljit_jump *brajump = NULL;
5531
5532 /* Assert captures then. */
5533 common->then_trap = NULL;
5534
5535 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5536 {
5537 SLJIT_ASSERT(!conditional);
5538 bra = *cc;
5539 cc++;
5540 }
5541 private_data_ptr = PRIVATE_DATA(cc);
5542 SLJIT_ASSERT(private_data_ptr != 0);
5543 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5544 backtrack->framesize = framesize;
5545 backtrack->private_data_ptr = private_data_ptr;
5546 opcode = *cc;
5547 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5548 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5549 ccbegin = cc;
5550 cc += GET(cc, 1);
5551
5552 if (bra == OP_BRAMINZERO)
5553 {
5554 /* This is a braminzero backtrack path. */
5555 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5556 free_stack(common, 1);
5557 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5558 }
5559
5560 if (framesize < 0)
5561 {
5562 extrasize = needs_control_head ? 2 : 1;
5563 if (framesize == no_frame)
5564 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5565 allocate_stack(common, extrasize);
5566 if (needs_control_head)
5567 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5568 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5569 if (needs_control_head)
5570 {
5571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5573 }
5574 }
5575 else
5576 {
5577 extrasize = needs_control_head ? 3 : 2;
5578 allocate_stack(common, framesize + extrasize);
5579 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5580 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5581 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5582 if (needs_control_head)
5583 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5584 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5585 if (needs_control_head)
5586 {
5587 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5588 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5589 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5590 }
5591 else
5592 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5593 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5594 }
5595
5596 memset(&altbacktrack, 0, sizeof(backtrack_common));
5597 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5598 {
5599 /* Negative assert is stronger than positive assert. */
5600 common->local_exit = TRUE;
5601 common->quit_label = NULL;
5602 common->quit = NULL;
5603 common->positive_assert = FALSE;
5604 }
5605 else
5606 common->positive_assert = TRUE;
5607 common->positive_assert_quit = NULL;
5608
5609 while (1)
5610 {
5611 common->accept_label = NULL;
5612 common->accept = NULL;
5613 altbacktrack.top = NULL;
5614 altbacktrack.topbacktracks = NULL;
5615
5616 if (*ccbegin == OP_ALT)
5617 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5618
5619 altbacktrack.cc = ccbegin;
5620 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5621 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5622 {
5623 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5624 {
5625 common->local_exit = save_local_exit;
5626 common->quit_label = save_quit_label;
5627 common->quit = save_quit;
5628 }
5629 common->positive_assert = save_positive_assert;
5630 common->then_trap = save_then_trap;
5631 common->accept_label = save_accept_label;
5632 common->positive_assert_quit = save_positive_assert_quit;
5633 common->accept = save_accept;
5634 return NULL;
5635 }
5636 common->accept_label = LABEL();
5637 if (common->accept != NULL)
5638 set_jumps(common->accept, common->accept_label);
5639
5640 /* Reset stack. */
5641 if (framesize < 0)
5642 {
5643 if (framesize == no_frame)
5644 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5645 else
5646 free_stack(common, extrasize);
5647 if (needs_control_head)
5648 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5649 }
5650 else
5651 {
5652 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5653 {
5654 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5655 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5656 if (needs_control_head)
5657 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5658 }
5659 else
5660 {
5661 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5662 if (needs_control_head)
5663 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5664 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5665 }
5666 }
5667
5668 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5669 {
5670 /* We know that STR_PTR was stored on the top of the stack. */
5671 if (conditional)
5672 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5673 else if (bra == OP_BRAZERO)
5674 {
5675 if (framesize < 0)
5676 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5677 else
5678 {
5679 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5680 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5682 }
5683 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5684 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5685 }
5686 else if (framesize >= 0)
5687 {
5688 /* For OP_BRA and OP_BRAMINZERO. */
5689 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5690 }
5691 }
5692 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5693
5694 compile_backtrackingpath(common, altbacktrack.top);
5695 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5696 {
5697 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5698 {
5699 common->local_exit = save_local_exit;
5700 common->quit_label = save_quit_label;
5701 common->quit = save_quit;
5702 }
5703 common->positive_assert = save_positive_assert;
5704 common->then_trap = save_then_trap;
5705 common->accept_label = save_accept_label;
5706 common->positive_assert_quit = save_positive_assert_quit;
5707 common->accept = save_accept;
5708 return NULL;
5709 }
5710 set_jumps(altbacktrack.topbacktracks, LABEL());
5711
5712 if (*cc != OP_ALT)
5713 break;
5714
5715 ccbegin = cc;
5716 cc += GET(cc, 1);
5717 }
5718
5719 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5720 {
5721 SLJIT_ASSERT(common->positive_assert_quit == NULL);
5722 /* Makes the check less complicated below. */
5723 common->positive_assert_quit = common->quit;
5724 }
5725
5726 /* None of them matched. */
5727 if (common->positive_assert_quit != NULL)
5728 {
5729 jump = JUMP(SLJIT_JUMP);
5730 set_jumps(common->positive_assert_quit, LABEL());
5731 SLJIT_ASSERT(framesize != no_stack);
5732 if (framesize < 0)
5733 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5734 else
5735 {
5736 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5737 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5738 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5739 }
5740 JUMPHERE(jump);
5741 }
5742
5743 if (needs_control_head)
5744 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5745
5746 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5747 {
5748 /* Assert is failed. */
5749 if (conditional || bra == OP_BRAZERO)
5750 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5751
5752 if (framesize < 0)
5753 {
5754 /* The topmost item should be 0. */
5755 if (bra == OP_BRAZERO)
5756 {
5757 if (extrasize == 2)
5758 free_stack(common, 1);
5759 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5760 }
5761 else
5762 free_stack(common, extrasize);
5763 }
5764 else
5765 {
5766 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5767 /* The topmost item should be 0. */
5768 if (bra == OP_BRAZERO)
5769 {
5770 free_stack(common, framesize + extrasize - 1);
5771 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5772 }
5773 else
5774 free_stack(common, framesize + extrasize);
5775 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5776 }
5777 jump = JUMP(SLJIT_JUMP);
5778 if (bra != OP_BRAZERO)
5779 add_jump(compiler, target, jump);
5780
5781 /* Assert is successful. */
5782 set_jumps(tmp, LABEL());
5783 if (framesize < 0)
5784 {
5785 /* We know that STR_PTR was stored on the top of the stack. */
5786 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5787 /* Keep the STR_PTR on the top of the stack. */
5788 if (bra == OP_BRAZERO)
5789 {
5790 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5791 if (extrasize == 2)
5792 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5793 }
5794 else if (bra == OP_BRAMINZERO)
5795 {
5796 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5797 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5798 }
5799 }
5800 else
5801 {
5802 if (bra == OP_BRA)
5803 {
5804 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5805 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5806 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5807 }
5808 else
5809 {
5810 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5811 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5812 if (extrasize == 2)
5813 {
5814 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5815 if (bra == OP_BRAMINZERO)
5816 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5817 }
5818 else
5819 {
5820 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5821 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5822 }
5823 }
5824 }
5825
5826 if (bra == OP_BRAZERO)
5827 {
5828 backtrack->matchingpath = LABEL();
5829 SET_LABEL(jump, backtrack->matchingpath);
5830 }
5831 else if (bra == OP_BRAMINZERO)
5832 {
5833 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5834 JUMPHERE(brajump);
5835 if (framesize >= 0)
5836 {
5837 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5838 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5839 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5840 }
5841 set_jumps(backtrack->common.topbacktracks, LABEL());
5842 }
5843 }
5844 else
5845 {
5846 /* AssertNot is successful. */
5847 if (framesize < 0)
5848 {
5849 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5850 if (bra != OP_BRA)
5851 {
5852 if (extrasize == 2)
5853 free_stack(common, 1);
5854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5855 }
5856 else
5857 free_stack(common, extrasize);
5858 }
5859 else
5860 {
5861 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5862 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5863 /* The topmost item should be 0. */
5864 if (bra != OP_BRA)
5865 {
5866 free_stack(common, framesize + extrasize - 1);
5867 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5868 }
5869 else
5870 free_stack(common, framesize + extrasize);
5871 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5872 }
5873
5874 if (bra == OP_BRAZERO)
5875 backtrack->matchingpath = LABEL();
5876 else if (bra == OP_BRAMINZERO)
5877 {
5878 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5879 JUMPHERE(brajump);
5880 }
5881
5882 if (bra != OP_BRA)
5883 {
5884 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5885 set_jumps(backtrack->common.topbacktracks, LABEL());
5886 backtrack->common.topbacktracks = NULL;
5887 }
5888 }
5889
5890 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5891 {
5892 common->local_exit = save_local_exit;
5893 common->quit_label = save_quit_label;
5894 common->quit = save_quit;
5895 }
5896 common->positive_assert = save_positive_assert;
5897 common->then_trap = save_then_trap;
5898 common->accept_label = save_accept_label;
5899 common->positive_assert_quit = save_positive_assert_quit;
5900 common->accept = save_accept;
5901 return cc + 1 + LINK_SIZE;
5902 }
5903
5904 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5905 {
5906 int condition = FALSE;
5907 pcre_uchar *slotA = name_table;
5908 pcre_uchar *slotB;
5909 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5910 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5911 sljit_sw no_capture;
5912 int i;
5913
5914 locals += refno & 0xff;
5915 refno >>= 8;
5916 no_capture = locals[1];
5917
5918 for (i = 0; i < name_count; i++)
5919 {
5920 if (GET2(slotA, 0) == refno) break;
5921 slotA += name_entry_size;
5922 }
5923
5924 if (i < name_count)
5925 {
5926 /* Found a name for the number - there can be only one; duplicate names
5927 for different numbers are allowed, but not vice versa. First scan down
5928 for duplicates. */
5929
5930 slotB = slotA;
5931 while (slotB > name_table)
5932 {
5933 slotB -= name_entry_size;
5934 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5935 {
5936 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5937 if (condition) break;
5938 }
5939 else break;
5940 }
5941
5942 /* Scan up for duplicates */
5943 if (!condition)
5944 {
5945 slotB = slotA;
5946 for (i++; i < name_count; i++)
5947 {
5948 slotB += name_entry_size;
5949 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5950 {
5951 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5952 if (condition) break;
5953 }
5954 else break;
5955 }
5956 }
5957 }
5958 return condition;
5959 }
5960
5961 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5962 {
5963 int condition = FALSE;
5964 pcre_uchar *slotA = name_table;
5965 pcre_uchar *slotB;
5966 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5967 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5968 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5969 sljit_uw i;
5970
5971 for (i = 0; i < name_count; i++)
5972 {
5973 if (GET2(slotA, 0) == recno) break;
5974 slotA += name_entry_size;
5975 }
5976
5977 if (i < name_count)
5978 {
5979 /* Found a name for the number - there can be only one; duplicate
5980 names for different numbers are allowed, but not vice versa. First
5981 scan down for duplicates. */
5982
5983 slotB = slotA;
5984 while (slotB > name_table)
5985 {
5986 slotB -= name_entry_size;
5987 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5988 {
5989 condition = GET2(slotB, 0) == group_num;
5990 if (condition) break;
5991 }
5992 else break;
5993 }
5994
5995 /* Scan up for duplicates */
5996 if (!condition)
5997 {
5998 slotB = slotA;
5999 for (i++; i < name_count; i++)
6000 {
6001 slotB += name_entry_size;
6002 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
6003 {
6004 condition = GET2(slotB, 0) == group_num;
6005 if (condition) break;
6006 }
6007 else break;
6008 }
6009 }
6010 }
6011 return condition;
6012 }
6013
6014 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6015 {
6016 DEFINE_COMPILER;
6017 int stacksize;
6018
6019 if (framesize < 0)
6020 {
6021 if (framesize == no_frame)
6022 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6023 else
6024 {
6025 stacksize = needs_control_head ? 1 : 0;
6026 if (ket != OP_KET || has_alternatives)
6027 stacksize++;
6028 free_stack(common, stacksize);
6029 }
6030
6031 if (needs_control_head)
6032 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6033
6034 /* TMP2 which is set here used by OP_KETRMAX below. */
6035 if (ket == OP_KETRMAX)
6036 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6037 else if (ket == OP_KETRMIN)
6038 {
6039 /* Move the STR_PTR to the private_data_ptr. */
6040 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6041 }
6042 }
6043 else
6044 {
6045 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6046 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6047 if (needs_control_head)
6048 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6049
6050 if (ket == OP_KETRMAX)
6051 {
6052 /* TMP2 which is set here used by OP_KETRMAX below. */
6053 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6054 }
6055 }
6056 if (needs_control_head)
6057 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6058 }
6059
6060 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6061 {
6062 DEFINE_COMPILER;
6063
6064 if (common->capture_last_ptr != 0)
6065 {
6066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6067 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6068 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6069 stacksize++;
6070 }
6071 if (common->optimized_cbracket[offset >> 1] == 0)
6072 {
6073 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6074 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6075 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6076 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6077 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6079 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6080 stacksize += 2;
6081 }
6082 return stacksize;
6083 }
6084
6085 /*
6086 Handling bracketed expressions is probably the most complex part.
6087
6088 Stack layout naming characters:
6089 S - Push the current STR_PTR
6090 0 - Push a 0 (NULL)
6091 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6092 before the next alternative. Not pushed if there are no alternatives.
6093 M - Any values pushed by the current alternative. Can be empty, or anything.
6094 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6095 L - Push the previous local (pointed by localptr) to the stack
6096 () - opional values stored on the stack
6097 ()* - optonal, can be stored multiple times
6098
6099 The following list shows the regular expression templates, their PCRE byte codes
6100 and stack layout supported by pcre-sljit.
6101
6102 (?:) OP_BRA | OP_KET A M
6103 () OP_CBRA | OP_KET C M
6104 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6105 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6106 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6107 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6108 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6109 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6110 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6111 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6112 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6113 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6114 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6115 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6116 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6117 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6118 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6119 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6120 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6121 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6122 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6123 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6124
6125
6126 Stack layout naming characters:
6127 A - Push the alternative index (starting from 0) on the stack.
6128 Not pushed if there is no alternatives.
6129 M - Any values pushed by the current alternative. Can be empty, or anything.
6130
6131 The next list shows the possible content of a bracket:
6132 (|) OP_*BRA | OP_ALT ... M A
6133 (?()|) OP_*COND | OP_ALT M A
6134 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6135 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6136 Or nothing, if trace is unnecessary
6137 */
6138
6139 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6140 {
6141 DEFINE_COMPILER;
6142 backtrack_common *backtrack;
6143 pcre_uchar opcode;
6144 int private_data_ptr = 0;
6145 int offset = 0;
6146 int stacksize;
6147 int repeat_ptr = 0, repeat_length = 0;
6148 int repeat_type = 0, repeat_count = 0;
6149 pcre_uchar *ccbegin;
6150 pcre_uchar *matchingpath;
6151 pcre_uchar bra = OP_BRA;
6152 pcre_uchar ket;
6153 assert_backtrack *assert;
6154 BOOL has_alternatives;
6155 BOOL needs_control_head = FALSE;
6156 struct sljit_jump *jump;
6157 struct sljit_jump *skip;
6158 struct sljit_label *rmax_label = NULL;
6159 struct sljit_jump *braminzero = NULL;
6160
6161 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6162
6163 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6164 {
6165 bra = *cc;
6166 cc++;
6167 opcode = *cc;
6168 }
6169
6170 opcode = *cc;
6171 ccbegin = cc;
6172 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6173 ket = *matchingpath;
6174 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6175 {
6176 repeat_ptr = PRIVATE_DATA(matchingpath);
6177 repeat_length = PRIVATE_DATA(matchingpath + 1);
6178 repeat_type = PRIVATE_DATA(matchingpath + 2);
6179 repeat_count = PRIVATE_DATA(matchingpath + 3);
6180 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6181 if (repeat_type == OP_UPTO)
6182 ket = OP_KETRMAX;
6183 if (repeat_type == OP_MINUPTO)
6184 ket = OP_KETRMIN;
6185 }
6186
6187 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6188 {
6189 /* Drop this bracket_backtrack. */
6190 parent->top = backtrack->prev;
6191 return matchingpath + 1 + LINK_SIZE + repeat_length;
6192 }
6193
6194 matchingpath = ccbegin + 1 + LINK_SIZE;
6195 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6196 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6197 cc += GET(cc, 1);
6198
6199 has_alternatives = *cc == OP_ALT;
6200 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6201 {
6202 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6203 if (*matchingpath == OP_NRREF)
6204 {
6205 stacksize = GET2(matchingpath, 1);
6206 if (common->currententry == NULL || stacksize == RREF_ANY)
6207 has_alternatives = FALSE;
6208 else if (common->currententry->start == 0)
6209 has_alternatives = stacksize != 0;
6210 else
6211 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6212 }
6213 }
6214
6215 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6216 opcode = OP_SCOND;
6217 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6218 opcode = OP_ONCE;
6219
6220 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6221 {
6222 /* Capturing brackets has a pre-allocated space. */
6223 offset = GET2(ccbegin, 1 + LINK_SIZE);
6224 if (common->optimized_cbracket[offset] == 0)
6225 {
6226 private_data_ptr = OVECTOR_PRIV(offset);
6227 offset <<= 1;
6228 }
6229 else
6230 {
6231 offset <<= 1;
6232 private_data_ptr = OVECTOR(offset);
6233 }
6234 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6235 matchingpath += IMM2_SIZE;
6236 }
6237 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6238 {
6239 /* Other brackets simply allocate the next entry. */
6240 private_data_ptr = PRIVATE_DATA(ccbegin);
6241 SLJIT_ASSERT(private_data_ptr != 0);
6242 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6243 if (opcode == OP_ONCE)
6244 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6245 }
6246
6247 /* Instructions before the first alternative. */
6248 stacksize = 0;
6249 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6250 stacksize++;
6251 if (bra == OP_BRAZERO)
6252 stacksize++;
6253
6254 if (stacksize > 0)
6255 allocate_stack(common, stacksize);
6256
6257 stacksize = 0;
6258 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6259 {
6260 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6261 stacksize++;
6262 }
6263
6264 if (bra == OP_BRAZERO)
6265 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6266
6267 if (bra == OP_BRAMINZERO)
6268 {
6269 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6270 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6271 if (ket != OP_KETRMIN)
6272 {
6273 free_stack(common, 1);
6274 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6275 }
6276 else
6277 {
6278 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6279 {
6280 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6281 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6282 /* Nothing stored during the first run. */
6283 skip = JUMP(SLJIT_JUMP);
6284 JUMPHERE(jump);
6285 /* Checking zero-length iteration. */
6286 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6287 {
6288 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6289 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6290 }
6291 else
6292 {
6293 /* Except when the whole stack frame must be saved. */
6294 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6295 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6296 }
6297 JUMPHERE(skip);
6298 }
6299 else
6300 {
6301 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6302 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6303 JUMPHERE(jump);
6304 }
6305 }
6306 }
6307
6308 if (repeat_type != 0)
6309 {
6310 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6311 if (repeat_type == OP_EXACT)
6312 rmax_label = LABEL();
6313 }
6314
6315 if (ket == OP_KETRMIN)
6316 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6317
6318 if (ket == OP_KETRMAX)
6319 {
6320 rmax_label = LABEL();
6321 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6322 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6323 }
6324
6325 /* Handling capturing brackets and alternatives. */
6326 if (opcode == OP_ONCE)
6327 {
6328 stacksize = 0;
6329 if (needs_control_head)
6330 {
6331 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6332 stacksize++;
6333 }
6334
6335 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6336 {
6337 /* Neither capturing brackets nor recursions are found in the block. */
6338 if (ket == OP_KETRMIN)
6339 {
6340 stacksize += 2;
6341 if (!needs_control_head)
6342 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6343 }
6344 else
6345 {
6346 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6347 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6348 if (ket == OP_KETRMAX || has_alternatives)
6349 stacksize++;
6350 }
6351
6352 if (stacksize > 0)
6353 allocate_stack(common, stacksize);
6354
6355 stacksize = 0;
6356 if (needs_control_head)
6357 {
6358 stacksize++;
6359 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6360 }
6361
6362 if (ket == OP_KETRMIN)
6363 {
6364 if (needs_control_head)
6365 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6366 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6367 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6368 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6370 }
6371 else if (ket == OP_KETRMAX || has_alternatives)
6372 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6373 }
6374 else
6375 {
6376 if (ket != OP_KET || has_alternatives)
6377 stacksize++;
6378
6379 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6380 allocate_stack(common, stacksize);
6381
6382 if (needs_control_head)
6383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6384
6385 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6386 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6387
6388 stacksize = needs_control_head ? 1 : 0;
6389 if (ket != OP_KET || has_alternatives)
6390 {
6391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6393 stacksize++;
6394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6395 }
6396 else
6397 {
6398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6400 }
6401 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6402 }
6403 }
6404 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6405 {
6406 /* Saving the previous values. */
6407 if (common->optimized_cbracket[offset >> 1] != 0)
6408 {
6409 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6410 allocate_stack(common, 2);
6411 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6412 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6416 }
6417 else
6418 {
6419 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6420 allocate_stack(common, 1);
6421 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6422 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6423 }
6424 }
6425 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6426 {
6427 /* Saving the previous value. */
6428 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6429 allocate_stack(common, 1);
6430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6431 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6432 }
6433 else if (has_alternatives)
6434 {
6435 /* Pushing the starting string pointer. */
6436 allocate_stack(common, 1);
6437 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6438 }
6439
6440 /* Generating code for the first alternative. */
6441 if (opcode == OP_COND || opcode == OP_SCOND)
6442 {
6443 if (*matchingpath == OP_CREF)
6444 {
6445 SLJIT_ASSERT(has_alternatives);
6446 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6447 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6448 matchingpath += 1 + IMM2_SIZE;
6449 }
6450 else if (*matchingpath == OP_NCREF)
6451 {
6452 SLJIT_ASSERT(has_alternatives);
6453 stacksize = GET2(matchingpath, 1);
6454 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6455
6456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6458 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6459 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6460 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6461 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6462 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6463 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6464 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6465
6466 JUMPHERE(jump);
6467 matchingpath += 1 + IMM2_SIZE;
6468 }
6469 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6470 {
6471 /* Never has other case. */
6472 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6473
6474 stacksize = GET2(matchingpath, 1);
6475 if (common->currententry == NULL)
6476 stacksize = 0;
6477 else if (stacksize == RREF_ANY)
6478 stacksize = 1;
6479 else if (common->currententry->start == 0)
6480 stacksize = stacksize == 0;
6481 else
6482 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6483
6484 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6485 {
6486 SLJIT_ASSERT(!has_alternatives);
6487 if (stacksize != 0)
6488 matchingpath += 1 + IMM2_SIZE;
6489 else
6490 {
6491 if (*cc == OP_ALT)
6492 {
6493 matchingpath = cc + 1 + LINK_SIZE;
6494 cc += GET(cc, 1);
6495 }
6496 else
6497 matchingpath = cc;
6498 }
6499 }
6500 else
6501 {
6502 SLJIT_ASSERT(has_alternatives);
6503
6504 stacksize = GET2(matchingpath, 1);
6505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6506 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6509 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6510 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6511 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6512 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6513 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6514 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6515 matchingpath += 1 + IMM2_SIZE;
6516 }
6517 }
6518 else
6519 {
6520 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6521 /* Similar code as PUSH_BACKTRACK macro. */
6522 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6523 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6524 return NULL;
6525 memset(assert, 0, sizeof(assert_backtrack));
6526 assert->common.cc = matchingpath;
6527 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6528 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6529 }
6530 }
6531
6532 compile_matchingpath(common, matchingpath, cc, backtrack);
6533 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6534 return NULL;
6535
6536 if (opcode == OP_ONCE)
6537 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6538
6539 stacksize = 0;
6540 if (repeat_type == OP_MINUPTO)
6541 {
6542 /* We need to preserve the counter. TMP2 will be used below. */
6543 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6544 stacksize++;
6545 }
6546 if (ket != OP_KET || bra != OP_BRA)
6547 stacksize++;
6548 if (offset != 0)
6549 {
6550 if (common->capture_last_ptr != 0)
6551 stacksize++;
6552 if (common->optimized_cbracket[offset >> 1] == 0)
6553 stacksize += 2;
6554 }
6555 if (has_alternatives && opcode != OP_ONCE)
6556 stacksize++;
6557
6558 if (stacksize > 0)
6559 allocate_stack(common, stacksize);
6560
6561 stacksize = 0;
6562 if (repeat_type == OP_MINUPTO)
6563 {
6564 /* TMP2 was set above. */
6565 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
6566 stacksize++;
6567 }
6568
6569 if (ket != OP_KET || bra != OP_BRA)
6570 {
6571 if (ket != OP_KET)
6572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6573 else
6574 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6575 stacksize++;
6576 }
6577
6578 if (offset != 0)
6579 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6580
6581 if (has_alternatives)
6582 {
6583 if (opcode != OP_ONCE)
6584 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6585 if (ket != OP_KETRMAX)
6586 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6587 }
6588
6589 /* Must be after the matchingpath label. */
6590 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6591 {
6592 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6593 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6594 }
6595
6596 if (ket == OP_KETRMAX)
6597 {
6598 if (repeat_type != 0)
6599 {
6600 if (has_alternatives)
6601 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6602 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6603 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6604 /* Drop STR_PTR for greedy plus quantifier. */
6605 if (opcode != OP_ONCE)
6606 free_stack(common, 1);
6607 }
6608 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
6609 {
6610 if (has_alternatives)
6611 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6612 /* Checking zero-length iteration. */
6613 if (opcode != OP_ONCE)
6614 {
6615 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
6616 /* Drop STR_PTR for greedy plus quantifier. */
6617 if (bra != OP_BRAZERO)
6618 free_stack(common, 1);
6619 }
6620 else
6621 /* TMP2 must contain the starting STR_PTR. */
6622 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
6623 }
6624 else
6625 JUMPTO(SLJIT_JUMP, rmax_label);
6626 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6627 }
6628
6629 if (repeat_type == OP_EXACT)
6630 {
6631 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6632 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6633 }
6634 else if (repeat_type == OP_UPTO)
6635 {
6636 /* We need to preserve the counter. */
6637 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6638 allocate_stack(common, 1);
6639 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6640 }
6641
6642 if (bra == OP_BRAZERO)
6643 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6644
6645 if (bra == OP_BRAMINZERO)
6646 {
6647 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6648 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6649 if (braminzero != NULL)
6650 {
6651 JUMPHERE(braminzero);
6652 /* We need to release the end pointer to perform the
6653 backtrack for the zero-length iteration. When
6654 framesize is < 0, OP_ONCE will do the release itself. */
6655 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6656 {
6657 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6658 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6659 }
6660 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6661 free_stack(common, 1);
6662 }
6663 /* Continue to the normal backtrack. */
6664 }
6665
6666 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6667 decrease_call_count(common);
6668
6669 /* Skip the other alternatives. */
6670 while (*cc == OP_ALT)
6671 cc += GET(cc, 1);
6672 cc += 1 + LINK_SIZE;
6673
6674 /* Temporarily encoding the needs_control_head in framesize. */
6675 if (opcode == OP_ONCE)
6676 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6677 return cc + repeat_length;
6678 }
6679
6680 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6681 {
6682 DEFINE_COMPILER;
6683 backtrack_common *backtrack;
6684 pcre_uchar opcode;
6685 int private_data_ptr;
6686 int cbraprivptr = 0;
6687 BOOL needs_control_head;
6688 int framesize;
6689 int stacksize;
6690 int offset = 0;
6691 BOOL zero = FALSE;
6692 pcre_uchar *ccbegin = NULL;
6693 int stack; /* Also contains the offset of control head. */
6694 struct sljit_label *loop = NULL;
6695 struct jump_list *emptymatch = NULL;
6696
6697 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6698 if (*cc == OP_BRAPOSZERO)
6699 {
6700 zero = TRUE;
6701 cc++;
6702 }
6703
6704 opcode = *cc;
6705 private_data_ptr = PRIVATE_DATA(cc);
6706 SLJIT_ASSERT(private_data_ptr != 0);
6707 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6708 switch(opcode)
6709 {
6710 case OP_BRAPOS:
6711 case OP_SBRAPOS:
6712 ccbegin = cc + 1 + LINK_SIZE;
6713 break;
6714
6715 case OP_CBRAPOS:
6716 case OP_SCBRAPOS:
6717 offset = GET2(cc, 1 + LINK_SIZE);
6718 /* This case cannot be optimized in the same was as
6719 normal capturing brackets. */
6720 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6721 cbraprivptr = OVECTOR_PRIV(offset);
6722 offset <<= 1;
6723 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6724 break;
6725
6726 default:
6727 SLJIT_ASSERT_STOP();
6728 break;
6729 }
6730
6731 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6732 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6733 if (framesize < 0)
6734 {
6735 if (offset != 0)
6736 {
6737 stacksize = 2;
6738 if (common->capture_last_ptr != 0)