/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1308 - (show annotations)
Tue Apr 2 06:58:55 2013 UTC (6 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 306305 byte(s)
Fix for recursions to preserve repeat counters.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum frame_types {
200 no_frame = -1,
201 no_stack = -2
202 };
203
204 enum control_types {
205 type_mark = 0,
206 type_then_trap = 1
207 };
208
209 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
210
211 /* The following structure is the key data type for the recursive
212 code generator. It is allocated by compile_matchingpath, and contains
213 the aguments for compile_backtrackingpath. Must be the first member
214 of its descendants. */
215 typedef struct backtrack_common {
216 /* Concatenation stack. */
217 struct backtrack_common *prev;
218 jump_list *nextbacktracks;
219 /* Internal stack (for component operators). */
220 struct backtrack_common *top;
221 jump_list *topbacktracks;
222 /* Opcode pointer. */
223 pcre_uchar *cc;
224 } backtrack_common;
225
226 typedef struct assert_backtrack {
227 backtrack_common common;
228 jump_list *condfailed;
229 /* Less than 0 if a frame is not needed. */
230 int framesize;
231 /* Points to our private memory word on the stack. */
232 int private_data_ptr;
233 /* For iterators. */
234 struct sljit_label *matchingpath;
235 } assert_backtrack;
236
237 typedef struct bracket_backtrack {
238 backtrack_common common;
239 /* Where to coninue if an alternative is successfully matched. */
240 struct sljit_label *alternative_matchingpath;
241 /* For rmin and rmax iterators. */
242 struct sljit_label *recursive_matchingpath;
243 /* For greedy ? operator. */
244 struct sljit_label *zero_matchingpath;
245 /* Contains the branches of a failed condition. */
246 union {
247 /* Both for OP_COND, OP_SCOND. */
248 jump_list *condfailed;
249 assert_backtrack *assert;
250 /* For OP_ONCE. Less than 0 if not needed. */
251 int framesize;
252 } u;
253 /* Points to our private memory word on the stack. */
254 int private_data_ptr;
255 } bracket_backtrack;
256
257 typedef struct bracketpos_backtrack {
258 backtrack_common common;
259 /* Points to our private memory word on the stack. */
260 int private_data_ptr;
261 /* Reverting stack is needed. */
262 int framesize;
263 /* Allocated stack size. */
264 int stacksize;
265 } bracketpos_backtrack;
266
267 typedef struct braminzero_backtrack {
268 backtrack_common common;
269 struct sljit_label *matchingpath;
270 } braminzero_backtrack;
271
272 typedef struct iterator_backtrack {
273 backtrack_common common;
274 /* Next iteration. */
275 struct sljit_label *matchingpath;
276 } iterator_backtrack;
277
278 typedef struct recurse_entry {
279 struct recurse_entry *next;
280 /* Contains the function entry. */
281 struct sljit_label *entry;
282 /* Collects the calls until the function is not created. */
283 jump_list *calls;
284 /* Points to the starting opcode. */
285 sljit_sw start;
286 } recurse_entry;
287
288 typedef struct recurse_backtrack {
289 backtrack_common common;
290 BOOL inlined_pattern;
291 } recurse_backtrack;
292
293 #define OP_THEN_TRAP OP_TABLE_LENGTH
294
295 typedef struct then_trap_backtrack {
296 backtrack_common common;
297 /* If then_trap is not NULL, this structure contains the real
298 then_trap for the backtracking path. */
299 struct then_trap_backtrack *then_trap;
300 /* Points to the starting opcode. */
301 sljit_sw start;
302 /* Exit point for the then opcodes of this alternative. */
303 jump_list *quit;
304 /* Frame size of the current alternative. */
305 int framesize;
306 } then_trap_backtrack;
307
308 #define MAX_RANGE_SIZE 6
309
310 typedef struct compiler_common {
311 /* The sljit ceneric compiler. */
312 struct sljit_compiler *compiler;
313 /* First byte code. */
314 pcre_uchar *start;
315 /* Maps private data offset to each opcode. */
316 sljit_si *private_data_ptrs;
317 /* Tells whether the capturing bracket is optimized. */
318 pcre_uint8 *optimized_cbracket;
319 /* Tells whether the starting offset is a target of then. */
320 pcre_uint8 *then_offsets;
321 /* Current position where a THEN must jump. */
322 then_trap_backtrack *then_trap;
323 /* Starting offset of private data for capturing brackets. */
324 int cbra_ptr;
325 /* Output vector starting point. Must be divisible by 2. */
326 int ovector_start;
327 /* Last known position of the requested byte. */
328 int req_char_ptr;
329 /* Head of the last recursion. */
330 int recursive_head_ptr;
331 /* First inspected character for partial matching. */
332 int start_used_ptr;
333 /* Starting pointer for partial soft matches. */
334 int hit_start;
335 /* End pointer of the first line. */
336 int first_line_end;
337 /* Points to the marked string. */
338 int mark_ptr;
339 /* Recursive control verb management chain. */
340 int control_head_ptr;
341 /* Points to the last matched capture block index. */
342 int capture_last_ptr;
343 /* Points to the starting position of the current match. */
344 int start_ptr;
345
346 /* Flipped and lower case tables. */
347 const pcre_uint8 *fcc;
348 sljit_sw lcc;
349 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
350 int mode;
351 /* \K is found in the pattern. */
352 BOOL has_set_som;
353 /* (*SKIP:arg) is found in the pattern. */
354 BOOL has_skip_arg;
355 /* (*THEN) is found in the pattern. */
356 BOOL has_then;
357 /* Needs to know the start position anytime. */
358 BOOL needs_start_ptr;
359 /* Currently in recurse or negative assert. */
360 BOOL local_exit;
361 /* Currently in a positive assert. */
362 BOOL positive_assert;
363 /* Newline control. */
364 int nltype;
365 int newline;
366 int bsr_nltype;
367 /* Dollar endonly. */
368 int endonly;
369 /* Tables. */
370 sljit_sw ctypes;
371 int digits[2 + MAX_RANGE_SIZE];
372 /* Named capturing brackets. */
373 sljit_uw name_table;
374 sljit_sw name_count;
375 sljit_sw name_entry_size;
376
377 /* Labels and jump lists. */
378 struct sljit_label *partialmatchlabel;
379 struct sljit_label *quit_label;
380 struct sljit_label *forced_quit_label;
381 struct sljit_label *accept_label;
382 stub_list *stubs;
383 recurse_entry *entries;
384 recurse_entry *currententry;
385 jump_list *partialmatch;
386 jump_list *quit;
387 jump_list *positive_assert_quit;
388 jump_list *forced_quit;
389 jump_list *accept;
390 jump_list *calllimit;
391 jump_list *stackalloc;
392 jump_list *revertframes;
393 jump_list *wordboundary;
394 jump_list *anynewline;
395 jump_list *hspace;
396 jump_list *vspace;
397 jump_list *casefulcmp;
398 jump_list *caselesscmp;
399 jump_list *reset_match;
400 BOOL jscript_compat;
401 #ifdef SUPPORT_UTF
402 BOOL utf;
403 #ifdef SUPPORT_UCP
404 BOOL use_ucp;
405 #endif
406 #ifndef COMPILE_PCRE32
407 jump_list *utfreadchar;
408 #endif
409 #ifdef COMPILE_PCRE8
410 jump_list *utfreadtype8;
411 #endif
412 #endif /* SUPPORT_UTF */
413 #ifdef SUPPORT_UCP
414 jump_list *getucd;
415 #endif
416 } compiler_common;
417
418 /* For byte_sequence_compare. */
419
420 typedef struct compare_context {
421 int length;
422 int sourcereg;
423 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
424 int ucharptr;
425 union {
426 sljit_si asint;
427 sljit_uh asushort;
428 #if defined COMPILE_PCRE8
429 sljit_ub asbyte;
430 sljit_ub asuchars[4];
431 #elif defined COMPILE_PCRE16
432 sljit_uh asuchars[2];
433 #elif defined COMPILE_PCRE32
434 sljit_ui asuchars[1];
435 #endif
436 } c;
437 union {
438 sljit_si asint;
439 sljit_uh asushort;
440 #if defined COMPILE_PCRE8
441 sljit_ub asbyte;
442 sljit_ub asuchars[4];
443 #elif defined COMPILE_PCRE16
444 sljit_uh asuchars[2];
445 #elif defined COMPILE_PCRE32
446 sljit_ui asuchars[1];
447 #endif
448 } oc;
449 #endif
450 } compare_context;
451
452 /* Undefine sljit macros. */
453 #undef CMP
454
455 /* Used for accessing the elements of the stack. */
456 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
457
458 #define TMP1 SLJIT_SCRATCH_REG1
459 #define TMP2 SLJIT_SCRATCH_REG3
460 #define TMP3 SLJIT_TEMPORARY_EREG2
461 #define STR_PTR SLJIT_SAVED_REG1
462 #define STR_END SLJIT_SAVED_REG2
463 #define STACK_TOP SLJIT_SCRATCH_REG2
464 #define STACK_LIMIT SLJIT_SAVED_REG3
465 #define ARGUMENTS SLJIT_SAVED_EREG1
466 #define CALL_COUNT SLJIT_SAVED_EREG2
467 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
468
469 /* Local space layout. */
470 /* These two locals can be used by the current opcode. */
471 #define LOCALS0 (0 * sizeof(sljit_sw))
472 #define LOCALS1 (1 * sizeof(sljit_sw))
473 /* Two local variables for possessive quantifiers (char1 cannot use them). */
474 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
475 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
476 /* Max limit of recursions. */
477 #define CALL_LIMIT (4 * sizeof(sljit_sw))
478 /* The output vector is stored on the stack, and contains pointers
479 to characters. The vector data is divided into two groups: the first
480 group contains the start / end character pointers, and the second is
481 the start pointers when the end of the capturing group has not yet reached. */
482 #define OVECTOR_START (common->ovector_start)
483 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
484 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
485 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
486
487 #if defined COMPILE_PCRE8
488 #define MOV_UCHAR SLJIT_MOV_UB
489 #define MOVU_UCHAR SLJIT_MOVU_UB
490 #elif defined COMPILE_PCRE16
491 #define MOV_UCHAR SLJIT_MOV_UH
492 #define MOVU_UCHAR SLJIT_MOVU_UH
493 #elif defined COMPILE_PCRE32
494 #define MOV_UCHAR SLJIT_MOV_UI
495 #define MOVU_UCHAR SLJIT_MOVU_UI
496 #else
497 #error Unsupported compiling mode
498 #endif
499
500 /* Shortcuts. */
501 #define DEFINE_COMPILER \
502 struct sljit_compiler *compiler = common->compiler
503 #define OP1(op, dst, dstw, src, srcw) \
504 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
505 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
506 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
507 #define LABEL() \
508 sljit_emit_label(compiler)
509 #define JUMP(type) \
510 sljit_emit_jump(compiler, (type))
511 #define JUMPTO(type, label) \
512 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
513 #define JUMPHERE(jump) \
514 sljit_set_label((jump), sljit_emit_label(compiler))
515 #define SET_LABEL(jump, label) \
516 sljit_set_label((jump), (label))
517 #define CMP(type, src1, src1w, src2, src2w) \
518 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
519 #define CMPTO(type, src1, src1w, src2, src2w, label) \
520 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
521 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
522 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
523 #define GET_LOCAL_BASE(dst, dstw, offset) \
524 sljit_get_local_base(compiler, (dst), (dstw), (offset))
525
526 static pcre_uchar* bracketend(pcre_uchar* cc)
527 {
528 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
529 do cc += GET(cc, 1); while (*cc == OP_ALT);
530 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
531 cc += 1 + LINK_SIZE;
532 return cc;
533 }
534
535 /* Functions whose might need modification for all new supported opcodes:
536 next_opcode
537 check_opcode_types
538 set_private_data_ptrs
539 get_framesize
540 init_frame
541 get_private_data_copy_length
542 copy_private_data
543 compile_matchingpath
544 compile_backtrackingpath
545 */
546
547 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
548 {
549 SLJIT_UNUSED_ARG(common);
550 switch(*cc)
551 {
552 case OP_SOD:
553 case OP_SOM:
554 case OP_SET_SOM:
555 case OP_NOT_WORD_BOUNDARY:
556 case OP_WORD_BOUNDARY:
557 case OP_NOT_DIGIT:
558 case OP_DIGIT:
559 case OP_NOT_WHITESPACE:
560 case OP_WHITESPACE:
561 case OP_NOT_WORDCHAR:
562 case OP_WORDCHAR:
563 case OP_ANY:
564 case OP_ALLANY:
565 case OP_NOTPROP:
566 case OP_PROP:
567 case OP_ANYNL:
568 case OP_NOT_HSPACE:
569 case OP_HSPACE:
570 case OP_NOT_VSPACE:
571 case OP_VSPACE:
572 case OP_EXTUNI:
573 case OP_EODN:
574 case OP_EOD:
575 case OP_CIRC:
576 case OP_CIRCM:
577 case OP_DOLL:
578 case OP_DOLLM:
579 case OP_CRSTAR:
580 case OP_CRMINSTAR:
581 case OP_CRPLUS:
582 case OP_CRMINPLUS:
583 case OP_CRQUERY:
584 case OP_CRMINQUERY:
585 case OP_CRRANGE:
586 case OP_CRMINRANGE:
587 case OP_CLASS:
588 case OP_NCLASS:
589 case OP_REF:
590 case OP_REFI:
591 case OP_RECURSE:
592 case OP_CALLOUT:
593 case OP_ALT:
594 case OP_KET:
595 case OP_KETRMAX:
596 case OP_KETRMIN:
597 case OP_KETRPOS:
598 case OP_REVERSE:
599 case OP_ASSERT:
600 case OP_ASSERT_NOT:
601 case OP_ASSERTBACK:
602 case OP_ASSERTBACK_NOT:
603 case OP_ONCE:
604 case OP_ONCE_NC:
605 case OP_BRA:
606 case OP_BRAPOS:
607 case OP_CBRA:
608 case OP_CBRAPOS:
609 case OP_COND:
610 case OP_SBRA:
611 case OP_SBRAPOS:
612 case OP_SCBRA:
613 case OP_SCBRAPOS:
614 case OP_SCOND:
615 case OP_CREF:
616 case OP_NCREF:
617 case OP_RREF:
618 case OP_NRREF:
619 case OP_DEF:
620 case OP_BRAZERO:
621 case OP_BRAMINZERO:
622 case OP_BRAPOSZERO:
623 case OP_PRUNE:
624 case OP_SKIP:
625 case OP_THEN:
626 case OP_COMMIT:
627 case OP_FAIL:
628 case OP_ACCEPT:
629 case OP_ASSERT_ACCEPT:
630 case OP_CLOSE:
631 case OP_SKIPZERO:
632 return cc + PRIV(OP_lengths)[*cc];
633
634 case OP_CHAR:
635 case OP_CHARI:
636 case OP_NOT:
637 case OP_NOTI:
638 case OP_STAR:
639 case OP_MINSTAR:
640 case OP_PLUS:
641 case OP_MINPLUS:
642 case OP_QUERY:
643 case OP_MINQUERY:
644 case OP_UPTO:
645 case OP_MINUPTO:
646 case OP_EXACT:
647 case OP_POSSTAR:
648 case OP_POSPLUS:
649 case OP_POSQUERY:
650 case OP_POSUPTO:
651 case OP_STARI:
652 case OP_MINSTARI:
653 case OP_PLUSI:
654 case OP_MINPLUSI:
655 case OP_QUERYI:
656 case OP_MINQUERYI:
657 case OP_UPTOI:
658 case OP_MINUPTOI:
659 case OP_EXACTI:
660 case OP_POSSTARI:
661 case OP_POSPLUSI:
662 case OP_POSQUERYI:
663 case OP_POSUPTOI:
664 case OP_NOTSTAR:
665 case OP_NOTMINSTAR:
666 case OP_NOTPLUS:
667 case OP_NOTMINPLUS:
668 case OP_NOTQUERY:
669 case OP_NOTMINQUERY:
670 case OP_NOTUPTO:
671 case OP_NOTMINUPTO:
672 case OP_NOTEXACT:
673 case OP_NOTPOSSTAR:
674 case OP_NOTPOSPLUS:
675 case OP_NOTPOSQUERY:
676 case OP_NOTPOSUPTO:
677 case OP_NOTSTARI:
678 case OP_NOTMINSTARI:
679 case OP_NOTPLUSI:
680 case OP_NOTMINPLUSI:
681 case OP_NOTQUERYI:
682 case OP_NOTMINQUERYI:
683 case OP_NOTUPTOI:
684 case OP_NOTMINUPTOI:
685 case OP_NOTEXACTI:
686 case OP_NOTPOSSTARI:
687 case OP_NOTPOSPLUSI:
688 case OP_NOTPOSQUERYI:
689 case OP_NOTPOSUPTOI:
690 cc += PRIV(OP_lengths)[*cc];
691 #ifdef SUPPORT_UTF
692 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
693 #endif
694 return cc;
695
696 /* Special cases. */
697 case OP_TYPESTAR:
698 case OP_TYPEMINSTAR:
699 case OP_TYPEPLUS:
700 case OP_TYPEMINPLUS:
701 case OP_TYPEQUERY:
702 case OP_TYPEMINQUERY:
703 case OP_TYPEUPTO:
704 case OP_TYPEMINUPTO:
705 case OP_TYPEEXACT:
706 case OP_TYPEPOSSTAR:
707 case OP_TYPEPOSPLUS:
708 case OP_TYPEPOSQUERY:
709 case OP_TYPEPOSUPTO:
710 return cc + PRIV(OP_lengths)[*cc] - 1;
711
712 case OP_ANYBYTE:
713 #ifdef SUPPORT_UTF
714 if (common->utf) return NULL;
715 #endif
716 return cc + 1;
717
718 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
719 case OP_XCLASS:
720 return cc + GET(cc, 1);
721 #endif
722
723 case OP_MARK:
724 case OP_PRUNE_ARG:
725 case OP_SKIP_ARG:
726 case OP_THEN_ARG:
727 return cc + 1 + 2 + cc[1];
728
729 default:
730 /* All opcodes are supported now! */
731 SLJIT_ASSERT_STOP();
732 return NULL;
733 }
734 }
735
736 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
737 {
738 pcre_uchar *name;
739 pcre_uchar *name2;
740 int i, cbra_index;
741
742 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
743 while (cc < ccend)
744 {
745 switch(*cc)
746 {
747 case OP_SET_SOM:
748 common->has_set_som = TRUE;
749 cc += 1;
750 break;
751
752 case OP_REF:
753 case OP_REFI:
754 common->optimized_cbracket[GET2(cc, 1)] = 0;
755 cc += 1 + IMM2_SIZE;
756 break;
757
758 case OP_CBRAPOS:
759 case OP_SCBRAPOS:
760 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
761 cc += 1 + LINK_SIZE + IMM2_SIZE;
762 break;
763
764 case OP_COND:
765 case OP_SCOND:
766 /* Only AUTO_CALLOUT can insert this opcode. We do
767 not intend to support this case. */
768 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
769 return FALSE;
770 cc += 1 + LINK_SIZE;
771 break;
772
773 case OP_CREF:
774 i = GET2(cc, 1);
775 common->optimized_cbracket[i] = 0;
776 cc += 1 + IMM2_SIZE;
777 break;
778
779 case OP_NCREF:
780 cbra_index = GET2(cc, 1);
781 name = (pcre_uchar *)common->name_table;
782 name2 = name;
783 for (i = 0; i < common->name_count; i++)
784 {
785 if (GET2(name, 0) == cbra_index) break;
786 name += common->name_entry_size;
787 }
788 SLJIT_ASSERT(i != common->name_count);
789
790 for (i = 0; i < common->name_count; i++)
791 {
792 if (STRCMP_UC_UC(name2 + IMM2_SIZE, name + IMM2_SIZE) == 0)
793 common->optimized_cbracket[GET2(name2, 0)] = 0;
794 name2 += common->name_entry_size;
795 }
796 cc += 1 + IMM2_SIZE;
797 break;
798
799 case OP_RECURSE:
800 /* Set its value only once. */
801 if (common->recursive_head_ptr == 0)
802 {
803 common->recursive_head_ptr = common->ovector_start;
804 common->ovector_start += sizeof(sljit_sw);
805 }
806 cc += 1 + LINK_SIZE;
807 break;
808
809 case OP_CALLOUT:
810 if (common->capture_last_ptr == 0)
811 {
812 common->capture_last_ptr = common->ovector_start;
813 common->ovector_start += sizeof(sljit_sw);
814 }
815 cc += 2 + 2 * LINK_SIZE;
816 break;
817
818 case OP_THEN_ARG:
819 common->has_then = TRUE;
820 common->control_head_ptr = 1;
821 /* Fall through. */
822
823 case OP_PRUNE_ARG:
824 common->needs_start_ptr = TRUE;
825 /* Fall through. */
826
827 case OP_MARK:
828 if (common->mark_ptr == 0)
829 {
830 common->mark_ptr = common->ovector_start;
831 common->ovector_start += sizeof(sljit_sw);
832 }
833 cc += 1 + 2 + cc[1];
834 break;
835
836 case OP_THEN:
837 common->has_then = TRUE;
838 common->control_head_ptr = 1;
839 /* Fall through. */
840
841 case OP_PRUNE:
842 case OP_SKIP:
843 common->needs_start_ptr = TRUE;
844 cc += 1;
845 break;
846
847 case OP_SKIP_ARG:
848 common->control_head_ptr = 1;
849 common->has_skip_arg = TRUE;
850 cc += 1 + 2 + cc[1];
851 break;
852
853 default:
854 cc = next_opcode(common, cc);
855 if (cc == NULL)
856 return FALSE;
857 break;
858 }
859 }
860 return TRUE;
861 }
862
863 static int get_class_iterator_size(pcre_uchar *cc)
864 {
865 switch(*cc)
866 {
867 case OP_CRSTAR:
868 case OP_CRPLUS:
869 return 2;
870
871 case OP_CRMINSTAR:
872 case OP_CRMINPLUS:
873 case OP_CRQUERY:
874 case OP_CRMINQUERY:
875 return 1;
876
877 case OP_CRRANGE:
878 case OP_CRMINRANGE:
879 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
880 return 0;
881 return 2;
882
883 default:
884 return 0;
885 }
886 }
887
888 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
889 {
890 pcre_uchar *end = bracketend(begin);
891 pcre_uchar *next;
892 pcre_uchar *next_end;
893 pcre_uchar *max_end;
894 pcre_uchar type;
895 sljit_uw length = end - begin;
896 int min, max, i;
897
898 /* Detect fixed iterations first. */
899 if (end[-(1 + LINK_SIZE)] != OP_KET)
900 return FALSE;
901
902 /* Already detected repeat. */
903 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
904 return TRUE;
905
906 next = end;
907 min = 1;
908 while (1)
909 {
910 if (*next != *begin)
911 break;
912 next_end = bracketend(next);
913 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
914 break;
915 next = next_end;
916 min++;
917 }
918
919 if (min == 2)
920 return FALSE;
921
922 max = 0;
923 max_end = next;
924 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
925 {
926 type = *next;
927 while (1)
928 {
929 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
930 break;
931 next_end = bracketend(next + 2 + LINK_SIZE);
932 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
933 break;
934 next = next_end;
935 max++;
936 }
937
938 if (next[0] == type && next[1] == *begin && max >= 1)
939 {
940 next_end = bracketend(next + 1);
941 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
942 {
943 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
944 if (*next_end != OP_KET)
945 break;
946
947 if (i == max)
948 {
949 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
950 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
951 /* +2 the original and the last. */
952 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
953 if (min == 1)
954 return TRUE;
955 min--;
956 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
957 }
958 }
959 }
960 }
961
962 if (min >= 3)
963 {
964 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
965 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
966 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
967 return TRUE;
968 }
969
970 return FALSE;
971 }
972
973 #define CASE_ITERATOR_PRIVATE_DATA_1 \
974 case OP_MINSTAR: \
975 case OP_MINPLUS: \
976 case OP_QUERY: \
977 case OP_MINQUERY: \
978 case OP_MINSTARI: \
979 case OP_MINPLUSI: \
980 case OP_QUERYI: \
981 case OP_MINQUERYI: \
982 case OP_NOTMINSTAR: \
983 case OP_NOTMINPLUS: \
984 case OP_NOTQUERY: \
985 case OP_NOTMINQUERY: \
986 case OP_NOTMINSTARI: \
987 case OP_NOTMINPLUSI: \
988 case OP_NOTQUERYI: \
989 case OP_NOTMINQUERYI:
990
991 #define CASE_ITERATOR_PRIVATE_DATA_2A \
992 case OP_STAR: \
993 case OP_PLUS: \
994 case OP_STARI: \
995 case OP_PLUSI: \
996 case OP_NOTSTAR: \
997 case OP_NOTPLUS: \
998 case OP_NOTSTARI: \
999 case OP_NOTPLUSI:
1000
1001 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1002 case OP_UPTO: \
1003 case OP_MINUPTO: \
1004 case OP_UPTOI: \
1005 case OP_MINUPTOI: \
1006 case OP_NOTUPTO: \
1007 case OP_NOTMINUPTO: \
1008 case OP_NOTUPTOI: \
1009 case OP_NOTMINUPTOI:
1010
1011 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1012 case OP_TYPEMINSTAR: \
1013 case OP_TYPEMINPLUS: \
1014 case OP_TYPEQUERY: \
1015 case OP_TYPEMINQUERY:
1016
1017 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1018 case OP_TYPESTAR: \
1019 case OP_TYPEPLUS:
1020
1021 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1022 case OP_TYPEUPTO: \
1023 case OP_TYPEMINUPTO:
1024
1025 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1026 {
1027 pcre_uchar *cc = common->start;
1028 pcre_uchar *alternative;
1029 pcre_uchar *end = NULL;
1030 int private_data_ptr = *private_data_start;
1031 int space, size, bracketlen;
1032
1033 while (cc < ccend)
1034 {
1035 space = 0;
1036 size = 0;
1037 bracketlen = 0;
1038 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1039 return;
1040
1041 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1042 if (detect_repeat(common, cc))
1043 {
1044 /* These brackets are converted to repeats, so no global
1045 based single character repeat is allowed. */
1046 if (cc >= end)
1047 end = bracketend(cc);
1048 }
1049
1050 switch(*cc)
1051 {
1052 case OP_KET:
1053 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1054 {
1055 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1056 private_data_ptr += sizeof(sljit_sw);
1057 cc += common->private_data_ptrs[cc + 1 - common->start];
1058 }
1059 cc += 1 + LINK_SIZE;
1060 break;
1061
1062 case OP_ASSERT:
1063 case OP_ASSERT_NOT:
1064 case OP_ASSERTBACK:
1065 case OP_ASSERTBACK_NOT:
1066 case OP_ONCE:
1067 case OP_ONCE_NC:
1068 case OP_BRAPOS:
1069 case OP_SBRA:
1070 case OP_SBRAPOS:
1071 case OP_SCOND:
1072 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1073 private_data_ptr += sizeof(sljit_sw);
1074 bracketlen = 1 + LINK_SIZE;
1075 break;
1076
1077 case OP_CBRAPOS:
1078 case OP_SCBRAPOS:
1079 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1080 private_data_ptr += sizeof(sljit_sw);
1081 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1082 break;
1083
1084 case OP_COND:
1085 /* Might be a hidden SCOND. */
1086 alternative = cc + GET(cc, 1);
1087 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1088 {
1089 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1090 private_data_ptr += sizeof(sljit_sw);
1091 }
1092 bracketlen = 1 + LINK_SIZE;
1093 break;
1094
1095 case OP_BRA:
1096 bracketlen = 1 + LINK_SIZE;
1097 break;
1098
1099 case OP_CBRA:
1100 case OP_SCBRA:
1101 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1102 break;
1103
1104 CASE_ITERATOR_PRIVATE_DATA_1
1105 space = 1;
1106 size = -2;
1107 break;
1108
1109 CASE_ITERATOR_PRIVATE_DATA_2A
1110 space = 2;
1111 size = -2;
1112 break;
1113
1114 CASE_ITERATOR_PRIVATE_DATA_2B
1115 space = 2;
1116 size = -(2 + IMM2_SIZE);
1117 break;
1118
1119 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1120 space = 1;
1121 size = 1;
1122 break;
1123
1124 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1125 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1126 space = 2;
1127 size = 1;
1128 break;
1129
1130 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1131 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1132 space = 2;
1133 size = 1 + IMM2_SIZE;
1134 break;
1135
1136 case OP_CLASS:
1137 case OP_NCLASS:
1138 size += 1 + 32 / sizeof(pcre_uchar);
1139 space = get_class_iterator_size(cc + size);
1140 break;
1141
1142 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1143 case OP_XCLASS:
1144 size = GET(cc, 1);
1145 space = get_class_iterator_size(cc + size);
1146 break;
1147 #endif
1148
1149 default:
1150 cc = next_opcode(common, cc);
1151 SLJIT_ASSERT(cc != NULL);
1152 break;
1153 }
1154
1155 /* Character iterators, which are not inside a repeated bracket,
1156 gets a private slot instead of allocating it on the stack. */
1157 if (space > 0 && cc >= end)
1158 {
1159 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1160 private_data_ptr += sizeof(sljit_sw) * space;
1161 }
1162
1163 if (size != 0)
1164 {
1165 if (size < 0)
1166 {
1167 cc += -size;
1168 #ifdef SUPPORT_UTF
1169 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1170 #endif
1171 }
1172 else
1173 cc += size;
1174 }
1175
1176 if (bracketlen > 0)
1177 {
1178 if (cc >= end)
1179 {
1180 end = bracketend(cc);
1181 if (end[-1 - LINK_SIZE] == OP_KET)
1182 end = NULL;
1183 }
1184 cc += bracketlen;
1185 }
1186 }
1187 *private_data_start = private_data_ptr;
1188 }
1189
1190 /* Returns with a frame_types (always < 0) if no need for frame. */
1191 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1192 {
1193 int length = 0;
1194 int possessive = 0;
1195 BOOL stack_restore = FALSE;
1196 BOOL setsom_found = recursive;
1197 BOOL setmark_found = recursive;
1198 /* The last capture is a local variable even for recursions. */
1199 BOOL capture_last_found = FALSE;
1200
1201 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1202 SLJIT_ASSERT(common->control_head_ptr != 0);
1203 *needs_control_head = TRUE;
1204 #else
1205 *needs_control_head = FALSE;
1206 #endif
1207
1208 if (ccend == NULL)
1209 {
1210 ccend = bracketend(cc) - (1 + LINK_SIZE);
1211 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1212 {
1213 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1214 /* This is correct regardless of common->capture_last_ptr. */
1215 capture_last_found = TRUE;
1216 }
1217 cc = next_opcode(common, cc);
1218 }
1219
1220 SLJIT_ASSERT(cc != NULL);
1221 while (cc < ccend)
1222 switch(*cc)
1223 {
1224 case OP_SET_SOM:
1225 SLJIT_ASSERT(common->has_set_som);
1226 stack_restore = TRUE;
1227 if (!setsom_found)
1228 {
1229 length += 2;
1230 setsom_found = TRUE;
1231 }
1232 cc += 1;
1233 break;
1234
1235 case OP_MARK:
1236 case OP_PRUNE_ARG:
1237 case OP_THEN_ARG:
1238 SLJIT_ASSERT(common->mark_ptr != 0);
1239 stack_restore = TRUE;
1240 if (!setmark_found)
1241 {
1242 length += 2;
1243 setmark_found = TRUE;
1244 }
1245 if (common->control_head_ptr != 0)
1246 *needs_control_head = TRUE;
1247 cc += 1 + 2 + cc[1];
1248 break;
1249
1250 case OP_RECURSE:
1251 stack_restore = TRUE;
1252 if (common->has_set_som && !setsom_found)
1253 {
1254 length += 2;
1255 setsom_found = TRUE;
1256 }
1257 if (common->mark_ptr != 0 && !setmark_found)
1258 {
1259 length += 2;
1260 setmark_found = TRUE;
1261 }
1262 if (common->capture_last_ptr != 0 && !capture_last_found)
1263 {
1264 length += 2;
1265 capture_last_found = TRUE;
1266 }
1267 cc += 1 + LINK_SIZE;
1268 break;
1269
1270 case OP_CBRA:
1271 case OP_CBRAPOS:
1272 case OP_SCBRA:
1273 case OP_SCBRAPOS:
1274 stack_restore = TRUE;
1275 if (common->capture_last_ptr != 0 && !capture_last_found)
1276 {
1277 length += 2;
1278 capture_last_found = TRUE;
1279 }
1280 length += 3;
1281 cc += 1 + LINK_SIZE + IMM2_SIZE;
1282 break;
1283
1284 default:
1285 stack_restore = TRUE;
1286 /* Fall through. */
1287
1288 case OP_NOT_WORD_BOUNDARY:
1289 case OP_WORD_BOUNDARY:
1290 case OP_NOT_DIGIT:
1291 case OP_DIGIT:
1292 case OP_NOT_WHITESPACE:
1293 case OP_WHITESPACE:
1294 case OP_NOT_WORDCHAR:
1295 case OP_WORDCHAR:
1296 case OP_ANY:
1297 case OP_ALLANY:
1298 case OP_ANYBYTE:
1299 case OP_NOTPROP:
1300 case OP_PROP:
1301 case OP_ANYNL:
1302 case OP_NOT_HSPACE:
1303 case OP_HSPACE:
1304 case OP_NOT_VSPACE:
1305 case OP_VSPACE:
1306 case OP_EXTUNI:
1307 case OP_EODN:
1308 case OP_EOD:
1309 case OP_CIRC:
1310 case OP_CIRCM:
1311 case OP_DOLL:
1312 case OP_DOLLM:
1313 case OP_CHAR:
1314 case OP_CHARI:
1315 case OP_NOT:
1316 case OP_NOTI:
1317
1318 case OP_EXACT:
1319 case OP_POSSTAR:
1320 case OP_POSPLUS:
1321 case OP_POSQUERY:
1322 case OP_POSUPTO:
1323
1324 case OP_EXACTI:
1325 case OP_POSSTARI:
1326 case OP_POSPLUSI:
1327 case OP_POSQUERYI:
1328 case OP_POSUPTOI:
1329
1330 case OP_NOTEXACT:
1331 case OP_NOTPOSSTAR:
1332 case OP_NOTPOSPLUS:
1333 case OP_NOTPOSQUERY:
1334 case OP_NOTPOSUPTO:
1335
1336 case OP_NOTEXACTI:
1337 case OP_NOTPOSSTARI:
1338 case OP_NOTPOSPLUSI:
1339 case OP_NOTPOSQUERYI:
1340 case OP_NOTPOSUPTOI:
1341
1342 case OP_TYPEEXACT:
1343 case OP_TYPEPOSSTAR:
1344 case OP_TYPEPOSPLUS:
1345 case OP_TYPEPOSQUERY:
1346 case OP_TYPEPOSUPTO:
1347
1348 case OP_CLASS:
1349 case OP_NCLASS:
1350 case OP_XCLASS:
1351
1352 cc = next_opcode(common, cc);
1353 SLJIT_ASSERT(cc != NULL);
1354 break;
1355 }
1356
1357 /* Possessive quantifiers can use a special case. */
1358 if (SLJIT_UNLIKELY(possessive == length))
1359 return stack_restore ? no_frame : no_stack;
1360
1361 if (length > 0)
1362 return length + 1;
1363 return stack_restore ? no_frame : no_stack;
1364 }
1365
1366 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1367 {
1368 DEFINE_COMPILER;
1369 BOOL setsom_found = recursive;
1370 BOOL setmark_found = recursive;
1371 /* The last capture is a local variable even for recursions. */
1372 BOOL capture_last_found = FALSE;
1373 int offset;
1374
1375 /* >= 1 + shortest item size (2) */
1376 SLJIT_UNUSED_ARG(stacktop);
1377 SLJIT_ASSERT(stackpos >= stacktop + 2);
1378
1379 stackpos = STACK(stackpos);
1380 if (ccend == NULL)
1381 {
1382 ccend = bracketend(cc) - (1 + LINK_SIZE);
1383 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1384 cc = next_opcode(common, cc);
1385 }
1386
1387 SLJIT_ASSERT(cc != NULL);
1388 while (cc < ccend)
1389 switch(*cc)
1390 {
1391 case OP_SET_SOM:
1392 SLJIT_ASSERT(common->has_set_som);
1393 if (!setsom_found)
1394 {
1395 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1396 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1397 stackpos += (int)sizeof(sljit_sw);
1398 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1399 stackpos += (int)sizeof(sljit_sw);
1400 setsom_found = TRUE;
1401 }
1402 cc += 1;
1403 break;
1404
1405 case OP_MARK:
1406 case OP_PRUNE_ARG:
1407 case OP_THEN_ARG:
1408 SLJIT_ASSERT(common->mark_ptr != 0);
1409 if (!setmark_found)
1410 {
1411 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1413 stackpos += (int)sizeof(sljit_sw);
1414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1415 stackpos += (int)sizeof(sljit_sw);
1416 setmark_found = TRUE;
1417 }
1418 cc += 1 + 2 + cc[1];
1419 break;
1420
1421 case OP_RECURSE:
1422 if (common->has_set_som && !setsom_found)
1423 {
1424 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1426 stackpos += (int)sizeof(sljit_sw);
1427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1428 stackpos += (int)sizeof(sljit_sw);
1429 setsom_found = TRUE;
1430 }
1431 if (common->mark_ptr != 0 && !setmark_found)
1432 {
1433 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1435 stackpos += (int)sizeof(sljit_sw);
1436 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1437 stackpos += (int)sizeof(sljit_sw);
1438 setmark_found = TRUE;
1439 }
1440 if (common->capture_last_ptr != 0 && !capture_last_found)
1441 {
1442 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1444 stackpos += (int)sizeof(sljit_sw);
1445 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1446 stackpos += (int)sizeof(sljit_sw);
1447 capture_last_found = TRUE;
1448 }
1449 cc += 1 + LINK_SIZE;
1450 break;
1451
1452 case OP_CBRA:
1453 case OP_CBRAPOS:
1454 case OP_SCBRA:
1455 case OP_SCBRAPOS:
1456 if (common->capture_last_ptr != 0 && !capture_last_found)
1457 {
1458 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1460 stackpos += (int)sizeof(sljit_sw);
1461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1462 stackpos += (int)sizeof(sljit_sw);
1463 capture_last_found = TRUE;
1464 }
1465 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1467 stackpos += (int)sizeof(sljit_sw);
1468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1469 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1471 stackpos += (int)sizeof(sljit_sw);
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1473 stackpos += (int)sizeof(sljit_sw);
1474
1475 cc += 1 + LINK_SIZE + IMM2_SIZE;
1476 break;
1477
1478 default:
1479 cc = next_opcode(common, cc);
1480 SLJIT_ASSERT(cc != NULL);
1481 break;
1482 }
1483
1484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1485 SLJIT_ASSERT(stackpos == STACK(stacktop));
1486 }
1487
1488 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1489 {
1490 int private_data_length = needs_control_head ? 3 : 2;
1491 int size;
1492 pcre_uchar *alternative;
1493 /* Calculate the sum of the private machine words. */
1494 while (cc < ccend)
1495 {
1496 size = 0;
1497 switch(*cc)
1498 {
1499 case OP_KET:
1500 if (PRIVATE_DATA(cc) != 0)
1501 private_data_length++;
1502 cc += 1 + LINK_SIZE;
1503 break;
1504
1505 case OP_ASSERT:
1506 case OP_ASSERT_NOT:
1507 case OP_ASSERTBACK:
1508 case OP_ASSERTBACK_NOT:
1509 case OP_ONCE:
1510 case OP_ONCE_NC:
1511 case OP_BRAPOS:
1512 case OP_SBRA:
1513 case OP_SBRAPOS:
1514 case OP_SCOND:
1515 private_data_length++;
1516 cc += 1 + LINK_SIZE;
1517 break;
1518
1519 case OP_CBRA:
1520 case OP_SCBRA:
1521 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1522 private_data_length++;
1523 cc += 1 + LINK_SIZE + IMM2_SIZE;
1524 break;
1525
1526 case OP_CBRAPOS:
1527 case OP_SCBRAPOS:
1528 private_data_length += 2;
1529 cc += 1 + LINK_SIZE + IMM2_SIZE;
1530 break;
1531
1532 case OP_COND:
1533 /* Might be a hidden SCOND. */
1534 alternative = cc + GET(cc, 1);
1535 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1536 private_data_length++;
1537 cc += 1 + LINK_SIZE;
1538 break;
1539
1540 CASE_ITERATOR_PRIVATE_DATA_1
1541 if (PRIVATE_DATA(cc))
1542 private_data_length++;
1543 cc += 2;
1544 #ifdef SUPPORT_UTF
1545 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1546 #endif
1547 break;
1548
1549 CASE_ITERATOR_PRIVATE_DATA_2A
1550 if (PRIVATE_DATA(cc))
1551 private_data_length += 2;
1552 cc += 2;
1553 #ifdef SUPPORT_UTF
1554 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1555 #endif
1556 break;
1557
1558 CASE_ITERATOR_PRIVATE_DATA_2B
1559 if (PRIVATE_DATA(cc))
1560 private_data_length += 2;
1561 cc += 2 + IMM2_SIZE;
1562 #ifdef SUPPORT_UTF
1563 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1564 #endif
1565 break;
1566
1567 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1568 if (PRIVATE_DATA(cc))
1569 private_data_length++;
1570 cc += 1;
1571 break;
1572
1573 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1574 if (PRIVATE_DATA(cc))
1575 private_data_length += 2;
1576 cc += 1;
1577 break;
1578
1579 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1580 if (PRIVATE_DATA(cc))
1581 private_data_length += 2;
1582 cc += 1 + IMM2_SIZE;
1583 break;
1584
1585 case OP_CLASS:
1586 case OP_NCLASS:
1587 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1588 case OP_XCLASS:
1589 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1590 #else
1591 size = 1 + 32 / (int)sizeof(pcre_uchar);
1592 #endif
1593 if (PRIVATE_DATA(cc))
1594 private_data_length += get_class_iterator_size(cc + size);
1595 cc += size;
1596 break;
1597
1598 default:
1599 cc = next_opcode(common, cc);
1600 SLJIT_ASSERT(cc != NULL);
1601 break;
1602 }
1603 }
1604 SLJIT_ASSERT(cc == ccend);
1605 return private_data_length;
1606 }
1607
1608 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1609 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1610 {
1611 DEFINE_COMPILER;
1612 int srcw[2];
1613 int count, size;
1614 BOOL tmp1next = TRUE;
1615 BOOL tmp1empty = TRUE;
1616 BOOL tmp2empty = TRUE;
1617 pcre_uchar *alternative;
1618 enum {
1619 start,
1620 loop,
1621 end
1622 } status;
1623
1624 status = save ? start : loop;
1625 stackptr = STACK(stackptr - 2);
1626 stacktop = STACK(stacktop - 1);
1627
1628 if (!save)
1629 {
1630 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1631 if (stackptr < stacktop)
1632 {
1633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1634 stackptr += sizeof(sljit_sw);
1635 tmp1empty = FALSE;
1636 }
1637 if (stackptr < stacktop)
1638 {
1639 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1640 stackptr += sizeof(sljit_sw);
1641 tmp2empty = FALSE;
1642 }
1643 /* The tmp1next must be TRUE in either way. */
1644 }
1645
1646 do
1647 {
1648 count = 0;
1649 switch(status)
1650 {
1651 case start:
1652 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1653 count = 1;
1654 srcw[0] = common->recursive_head_ptr;
1655 if (needs_control_head)
1656 {
1657 SLJIT_ASSERT(common->control_head_ptr != 0);
1658 count = 2;
1659 srcw[1] = common->control_head_ptr;
1660 }
1661 status = loop;
1662 break;
1663
1664 case loop:
1665 if (cc >= ccend)
1666 {
1667 status = end;
1668 break;
1669 }
1670
1671 switch(*cc)
1672 {
1673 case OP_KET:
1674 if (PRIVATE_DATA(cc) != 0)
1675 {
1676 count = 1;
1677 srcw[0] = PRIVATE_DATA(cc);
1678 }
1679 cc += 1 + LINK_SIZE;
1680 break;
1681
1682 case OP_ASSERT:
1683 case OP_ASSERT_NOT:
1684 case OP_ASSERTBACK:
1685 case OP_ASSERTBACK_NOT:
1686 case OP_ONCE:
1687 case OP_ONCE_NC:
1688 case OP_BRAPOS:
1689 case OP_SBRA:
1690 case OP_SBRAPOS:
1691 case OP_SCOND:
1692 count = 1;
1693 srcw[0] = PRIVATE_DATA(cc);
1694 SLJIT_ASSERT(srcw[0] != 0);
1695 cc += 1 + LINK_SIZE;
1696 break;
1697
1698 case OP_CBRA:
1699 case OP_SCBRA:
1700 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1701 {
1702 count = 1;
1703 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1704 }
1705 cc += 1 + LINK_SIZE + IMM2_SIZE;
1706 break;
1707
1708 case OP_CBRAPOS:
1709 case OP_SCBRAPOS:
1710 count = 2;
1711 srcw[0] = PRIVATE_DATA(cc);
1712 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1713 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1714 cc += 1 + LINK_SIZE + IMM2_SIZE;
1715 break;
1716
1717 case OP_COND:
1718 /* Might be a hidden SCOND. */
1719 alternative = cc + GET(cc, 1);
1720 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1721 {
1722 count = 1;
1723 srcw[0] = PRIVATE_DATA(cc);
1724 SLJIT_ASSERT(srcw[0] != 0);
1725 }
1726 cc += 1 + LINK_SIZE;
1727 break;
1728
1729 CASE_ITERATOR_PRIVATE_DATA_1
1730 if (PRIVATE_DATA(cc))
1731 {
1732 count = 1;
1733 srcw[0] = PRIVATE_DATA(cc);
1734 }
1735 cc += 2;
1736 #ifdef SUPPORT_UTF
1737 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1738 #endif
1739 break;
1740
1741 CASE_ITERATOR_PRIVATE_DATA_2A
1742 if (PRIVATE_DATA(cc))
1743 {
1744 count = 2;
1745 srcw[0] = PRIVATE_DATA(cc);
1746 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1747 }
1748 cc += 2;
1749 #ifdef SUPPORT_UTF
1750 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1751 #endif
1752 break;
1753
1754 CASE_ITERATOR_PRIVATE_DATA_2B
1755 if (PRIVATE_DATA(cc))
1756 {
1757 count = 2;
1758 srcw[0] = PRIVATE_DATA(cc);
1759 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1760 }
1761 cc += 2 + IMM2_SIZE;
1762 #ifdef SUPPORT_UTF
1763 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1764 #endif
1765 break;
1766
1767 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1768 if (PRIVATE_DATA(cc))
1769 {
1770 count = 1;
1771 srcw[0] = PRIVATE_DATA(cc);
1772 }
1773 cc += 1;
1774 break;
1775
1776 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1777 if (PRIVATE_DATA(cc))
1778 {
1779 count = 2;
1780 srcw[0] = PRIVATE_DATA(cc);
1781 srcw[1] = srcw[0] + sizeof(sljit_sw);
1782 }
1783 cc += 1;
1784 break;
1785
1786 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1787 if (PRIVATE_DATA(cc))
1788 {
1789 count = 2;
1790 srcw[0] = PRIVATE_DATA(cc);
1791 srcw[1] = srcw[0] + sizeof(sljit_sw);
1792 }
1793 cc += 1 + IMM2_SIZE;
1794 break;
1795
1796 case OP_CLASS:
1797 case OP_NCLASS:
1798 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1799 case OP_XCLASS:
1800 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1801 #else
1802 size = 1 + 32 / (int)sizeof(pcre_uchar);
1803 #endif
1804 if (PRIVATE_DATA(cc))
1805 switch(get_class_iterator_size(cc + size))
1806 {
1807 case 1:
1808 count = 1;
1809 srcw[0] = PRIVATE_DATA(cc);
1810 break;
1811
1812 case 2:
1813 count = 2;
1814 srcw[0] = PRIVATE_DATA(cc);
1815 srcw[1] = srcw[0] + sizeof(sljit_sw);
1816 break;
1817
1818 default:
1819 SLJIT_ASSERT_STOP();
1820 break;
1821 }
1822 cc += size;
1823 break;
1824
1825 default:
1826 cc = next_opcode(common, cc);
1827 SLJIT_ASSERT(cc != NULL);
1828 break;
1829 }
1830 break;
1831
1832 case end:
1833 SLJIT_ASSERT_STOP();
1834 break;
1835 }
1836
1837 while (count > 0)
1838 {
1839 count--;
1840 if (save)
1841 {
1842 if (tmp1next)
1843 {
1844 if (!tmp1empty)
1845 {
1846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1847 stackptr += sizeof(sljit_sw);
1848 }
1849 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1850 tmp1empty = FALSE;
1851 tmp1next = FALSE;
1852 }
1853 else
1854 {
1855 if (!tmp2empty)
1856 {
1857 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1858 stackptr += sizeof(sljit_sw);
1859 }
1860 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1861 tmp2empty = FALSE;
1862 tmp1next = TRUE;
1863 }
1864 }
1865 else
1866 {
1867 if (tmp1next)
1868 {
1869 SLJIT_ASSERT(!tmp1empty);
1870 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1871 tmp1empty = stackptr >= stacktop;
1872 if (!tmp1empty)
1873 {
1874 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1875 stackptr += sizeof(sljit_sw);
1876 }
1877 tmp1next = FALSE;
1878 }
1879 else
1880 {
1881 SLJIT_ASSERT(!tmp2empty);
1882 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1883 tmp2empty = stackptr >= stacktop;
1884 if (!tmp2empty)
1885 {
1886 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1887 stackptr += sizeof(sljit_sw);
1888 }
1889 tmp1next = TRUE;
1890 }
1891 }
1892 }
1893 }
1894 while (status != end);
1895
1896 if (save)
1897 {
1898 if (tmp1next)
1899 {
1900 if (!tmp1empty)
1901 {
1902 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1903 stackptr += sizeof(sljit_sw);
1904 }
1905 if (!tmp2empty)
1906 {
1907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1908 stackptr += sizeof(sljit_sw);
1909 }
1910 }
1911 else
1912 {
1913 if (!tmp2empty)
1914 {
1915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1916 stackptr += sizeof(sljit_sw);
1917 }
1918 if (!tmp1empty)
1919 {
1920 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1921 stackptr += sizeof(sljit_sw);
1922 }
1923 }
1924 }
1925 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1926 }
1927
1928 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1929 {
1930 pcre_uchar *end = bracketend(cc);
1931 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1932
1933 /* Assert captures then. */
1934 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1935 current_offset = NULL;
1936 /* Conditional block does not. */
1937 if (*cc == OP_COND || *cc == OP_SCOND)
1938 has_alternatives = FALSE;
1939
1940 cc = next_opcode(common, cc);
1941 if (has_alternatives)
1942 current_offset = common->then_offsets + (cc - common->start);
1943
1944 while (cc < end)
1945 {
1946 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1947 cc = set_then_offsets(common, cc, current_offset);
1948 else
1949 {
1950 if (*cc == OP_ALT && has_alternatives)
1951 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1952 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1953 *current_offset = 1;
1954 cc = next_opcode(common, cc);
1955 }
1956 }
1957
1958 return end;
1959 }
1960
1961 #undef CASE_ITERATOR_PRIVATE_DATA_1
1962 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1963 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1964 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1965 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1966 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1967
1968 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1969 {
1970 return (value & (value - 1)) == 0;
1971 }
1972
1973 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1974 {
1975 while (list)
1976 {
1977 /* sljit_set_label is clever enough to do nothing
1978 if either the jump or the label is NULL. */
1979 SET_LABEL(list->jump, label);
1980 list = list->next;
1981 }
1982 }
1983
1984 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1985 {
1986 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1987 if (list_item)
1988 {
1989 list_item->next = *list;
1990 list_item->jump = jump;
1991 *list = list_item;
1992 }
1993 }
1994
1995 static void add_stub(compiler_common *common, struct sljit_jump *start)
1996 {
1997 DEFINE_COMPILER;
1998 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1999
2000 if (list_item)
2001 {
2002 list_item->start = start;
2003 list_item->quit = LABEL();
2004 list_item->next = common->stubs;
2005 common->stubs = list_item;
2006 }
2007 }
2008
2009 static void flush_stubs(compiler_common *common)
2010 {
2011 DEFINE_COMPILER;
2012 stub_list* list_item = common->stubs;
2013
2014 while (list_item)
2015 {
2016 JUMPHERE(list_item->start);
2017 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2018 JUMPTO(SLJIT_JUMP, list_item->quit);
2019 list_item = list_item->next;
2020 }
2021 common->stubs = NULL;
2022 }
2023
2024 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
2025 {
2026 DEFINE_COMPILER;
2027
2028 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
2029 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2030 }
2031
2032 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2033 {
2034 /* May destroy all locals and registers except TMP2. */
2035 DEFINE_COMPILER;
2036
2037 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2038 #ifdef DESTROY_REGISTERS
2039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2040 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2041 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2044 #endif
2045 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2046 }
2047
2048 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2049 {
2050 DEFINE_COMPILER;
2051 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2052 }
2053
2054 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2055 {
2056 DEFINE_COMPILER;
2057 struct sljit_label *loop;
2058 int i;
2059
2060 /* At this point we can freely use all temporary registers. */
2061 SLJIT_ASSERT(length > 1);
2062 /* TMP1 returns with begin - 1. */
2063 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2064 if (length < 8)
2065 {
2066 for (i = 1; i < length; i++)
2067 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2068 }
2069 else
2070 {
2071 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2072 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2073 loop = LABEL();
2074 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2075 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2076 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2077 }
2078 }
2079
2080 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2081 {
2082 DEFINE_COMPILER;
2083 struct sljit_label *loop;
2084 int i;
2085
2086 SLJIT_ASSERT(length > 1);
2087 /* OVECTOR(1) contains the "string begin - 1" constant. */
2088 if (length > 2)
2089 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2090 if (length < 8)
2091 {
2092 for (i = 2; i < length; i++)
2093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2094 }
2095 else
2096 {
2097 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2098 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2099 loop = LABEL();
2100 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2101 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2102 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2103 }
2104
2105 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2106 if (common->mark_ptr != 0)
2107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2108 if (common->control_head_ptr != 0)
2109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2110 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2111 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2112 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2113 }
2114
2115 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2116 {
2117 while (current != NULL)
2118 {
2119 switch (current[-2])
2120 {
2121 case type_then_trap:
2122 break;
2123
2124 case type_mark:
2125 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2126 return current[-4];
2127 break;
2128
2129 default:
2130 SLJIT_ASSERT_STOP();
2131 break;
2132 }
2133 current = (sljit_sw*)current[-1];
2134 }
2135 return -1;
2136 }
2137
2138 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2139 {
2140 DEFINE_COMPILER;
2141 struct sljit_label *loop;
2142 struct sljit_jump *early_quit;
2143
2144 /* At this point we can freely use all registers. */
2145 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2147
2148 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2149 if (common->mark_ptr != 0)
2150 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2151 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2152 if (common->mark_ptr != 0)
2153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2154 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2155 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2156 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2157 /* Unlikely, but possible */
2158 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2159 loop = LABEL();
2160 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2161 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2162 /* Copy the integer value to the output buffer */
2163 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2164 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2165 #endif
2166 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2167 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2168 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2169 JUMPHERE(early_quit);
2170
2171 /* Calculate the return value, which is the maximum ovector value. */
2172 if (topbracket > 1)
2173 {
2174 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2175 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2176
2177 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2178 loop = LABEL();
2179 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2180 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2181 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2182 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2183 }
2184 else
2185 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2186 }
2187
2188 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2189 {
2190 DEFINE_COMPILER;
2191 struct sljit_jump *jump;
2192
2193 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2194 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2195 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2196
2197 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2198 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2199 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2200 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2201
2202 /* Store match begin and end. */
2203 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2204 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2205
2206 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2207 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2208 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2209 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2210 #endif
2211 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2212 JUMPHERE(jump);
2213
2214 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2215 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2216 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2217 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2218 #endif
2219 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2220
2221 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2222 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2223 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2224 #endif
2225 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2226
2227 JUMPTO(SLJIT_JUMP, quit);
2228 }
2229
2230 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2231 {
2232 /* May destroy TMP1. */
2233 DEFINE_COMPILER;
2234 struct sljit_jump *jump;
2235
2236 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2237 {
2238 /* The value of -1 must be kept for start_used_ptr! */
2239 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2240 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2241 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2242 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2243 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2244 JUMPHERE(jump);
2245 }
2246 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2247 {
2248 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2249 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2250 JUMPHERE(jump);
2251 }
2252 }
2253
2254 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2255 {
2256 /* Detects if the character has an othercase. */
2257 unsigned int c;
2258
2259 #ifdef SUPPORT_UTF
2260 if (common->utf)
2261 {
2262 GETCHAR(c, cc);
2263 if (c > 127)
2264 {
2265 #ifdef SUPPORT_UCP
2266 return c != UCD_OTHERCASE(c);
2267 #else
2268 return FALSE;
2269 #endif
2270 }
2271 #ifndef COMPILE_PCRE8
2272 return common->fcc[c] != c;
2273 #endif
2274 }
2275 else
2276 #endif
2277 c = *cc;
2278 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2279 }
2280
2281 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2282 {
2283 /* Returns with the othercase. */
2284 #ifdef SUPPORT_UTF
2285 if (common->utf && c > 127)
2286 {
2287 #ifdef SUPPORT_UCP
2288 return UCD_OTHERCASE(c);
2289 #else
2290 return c;
2291 #endif
2292 }
2293 #endif
2294 return TABLE_GET(c, common->fcc, c);
2295 }
2296
2297 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2298 {
2299 /* Detects if the character and its othercase has only 1 bit difference. */
2300 unsigned int c, oc, bit;
2301 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2302 int n;
2303 #endif
2304
2305 #ifdef SUPPORT_UTF
2306 if (common->utf)
2307 {
2308 GETCHAR(c, cc);
2309 if (c <= 127)
2310 oc = common->fcc[c];
2311 else
2312 {
2313 #ifdef SUPPORT_UCP
2314 oc = UCD_OTHERCASE(c);
2315 #else
2316 oc = c;
2317 #endif
2318 }
2319 }
2320 else
2321 {
2322 c = *cc;
2323 oc = TABLE_GET(c, common->fcc, c);
2324 }
2325 #else
2326 c = *cc;
2327 oc = TABLE_GET(c, common->fcc, c);
2328 #endif
2329
2330 SLJIT_ASSERT(c != oc);
2331
2332 bit = c ^ oc;
2333 /* Optimized for English alphabet. */
2334 if (c <= 127 && bit == 0x20)
2335 return (0 << 8) | 0x20;
2336
2337 /* Since c != oc, they must have at least 1 bit difference. */
2338 if (!is_powerof2(bit))
2339 return 0;
2340
2341 #if defined COMPILE_PCRE8
2342
2343 #ifdef SUPPORT_UTF
2344 if (common->utf && c > 127)
2345 {
2346 n = GET_EXTRALEN(*cc);
2347 while ((bit & 0x3f) == 0)
2348 {
2349 n--;
2350 bit >>= 6;
2351 }
2352 return (n << 8) | bit;
2353 }
2354 #endif /* SUPPORT_UTF */
2355 return (0 << 8) | bit;
2356
2357 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2358
2359 #ifdef SUPPORT_UTF
2360 if (common->utf && c > 65535)
2361 {
2362 if (bit >= (1 << 10))
2363 bit >>= 10;
2364 else
2365 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2366 }
2367 #endif /* SUPPORT_UTF */
2368 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2369
2370 #endif /* COMPILE_PCRE[8|16|32] */
2371 }
2372
2373 static void check_partial(compiler_common *common, BOOL force)
2374 {
2375 /* Checks whether a partial matching is occured. Does not modify registers. */
2376 DEFINE_COMPILER;
2377 struct sljit_jump *jump = NULL;
2378
2379 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2380
2381 if (common->mode == JIT_COMPILE)
2382 return;
2383
2384 if (!force)
2385 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2386 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2387 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2388
2389 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2390 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2391 else
2392 {
2393 if (common->partialmatchlabel != NULL)
2394 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2395 else
2396 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2397 }
2398
2399 if (jump != NULL)
2400 JUMPHERE(jump);
2401 }
2402
2403 static void check_str_end(compiler_common *common, jump_list **end_reached)
2404 {
2405 /* Does not affect registers. Usually used in a tight spot. */
2406 DEFINE_COMPILER;
2407 struct sljit_jump *jump;
2408
2409 if (common->mode == JIT_COMPILE)
2410 {
2411 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2412 return;
2413 }
2414
2415 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2416 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2417 {
2418 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2419 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2420 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2421 }
2422 else
2423 {
2424 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2425 if (common->partialmatchlabel != NULL)
2426 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2427 else
2428 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2429 }
2430 JUMPHERE(jump);
2431 }
2432
2433 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2434 {
2435 DEFINE_COMPILER;
2436 struct sljit_jump *jump;
2437
2438 if (common->mode == JIT_COMPILE)
2439 {
2440 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2441 return;
2442 }
2443
2444 /* Partial matching mode. */
2445 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2446 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2447 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2448 {
2449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2450 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2451 }
2452 else
2453 {
2454 if (common->partialmatchlabel != NULL)
2455 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2456 else
2457 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2458 }
2459 JUMPHERE(jump);
2460 }
2461
2462 static void read_char(compiler_common *common)
2463 {
2464 /* Reads the character into TMP1, updates STR_PTR.
2465 Does not check STR_END. TMP2 Destroyed. */
2466 DEFINE_COMPILER;
2467 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2468 struct sljit_jump *jump;
2469 #endif
2470
2471 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2472 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2473 if (common->utf)
2474 {
2475 #if defined COMPILE_PCRE8
2476 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2477 #elif defined COMPILE_PCRE16
2478 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2479 #endif /* COMPILE_PCRE[8|16] */
2480 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2481 JUMPHERE(jump);
2482 }
2483 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2485 }
2486
2487 static void peek_char(compiler_common *common)
2488 {
2489 /* Reads the character into TMP1, keeps STR_PTR.
2490 Does not check STR_END. TMP2 Destroyed. */
2491 DEFINE_COMPILER;
2492 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2493 struct sljit_jump *jump;
2494 #endif
2495
2496 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2497 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2498 if (common->utf)
2499 {
2500 #if defined COMPILE_PCRE8
2501 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2502 #elif defined COMPILE_PCRE16
2503 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2504 #endif /* COMPILE_PCRE[8|16] */
2505 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2506 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2507 JUMPHERE(jump);
2508 }
2509 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2510 }
2511
2512 static void read_char8_type(compiler_common *common)
2513 {
2514 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2515 DEFINE_COMPILER;
2516 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2517 struct sljit_jump *jump;
2518 #endif
2519
2520 #ifdef SUPPORT_UTF
2521 if (common->utf)
2522 {
2523 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2524 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2525 #if defined COMPILE_PCRE8
2526 /* This can be an extra read in some situations, but hopefully
2527 it is needed in most cases. */
2528 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2529 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2530 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2531 JUMPHERE(jump);
2532 #elif defined COMPILE_PCRE16
2533 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2534 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2535 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2536 JUMPHERE(jump);
2537 /* Skip low surrogate if necessary. */
2538 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2539 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2540 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2541 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2542 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2543 #elif defined COMPILE_PCRE32
2544 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2545 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2546 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2547 JUMPHERE(jump);
2548 #endif /* COMPILE_PCRE[8|16|32] */
2549 return;
2550 }
2551 #endif /* SUPPORT_UTF */
2552 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2553 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2554 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2555 /* The ctypes array contains only 256 values. */
2556 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2557 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2558 #endif
2559 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2560 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2561 JUMPHERE(jump);
2562 #endif
2563 }
2564
2565 static void skip_char_back(compiler_common *common)
2566 {
2567 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2568 DEFINE_COMPILER;
2569 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2570 #if defined COMPILE_PCRE8
2571 struct sljit_label *label;
2572
2573 if (common->utf)
2574 {
2575 label = LABEL();
2576 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2577 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2578 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2579 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2580 return;
2581 }
2582 #elif defined COMPILE_PCRE16
2583 if (common->utf)
2584 {
2585 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2586 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2587 /* Skip low surrogate if necessary. */
2588 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2589 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2590 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2591 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2592 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2593 return;
2594 }
2595 #endif /* COMPILE_PCRE[8|16] */
2596 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2597 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2598 }
2599
2600 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2601 {
2602 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2603 DEFINE_COMPILER;
2604
2605 if (nltype == NLTYPE_ANY)
2606 {
2607 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2608 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2609 }
2610 else if (nltype == NLTYPE_ANYCRLF)
2611 {
2612 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2613 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2614 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2615 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2616 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2617 }
2618 else
2619 {
2620 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2621 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2622 }
2623 }
2624
2625 #ifdef SUPPORT_UTF
2626
2627 #if defined COMPILE_PCRE8
2628 static void do_utfreadchar(compiler_common *common)
2629 {
2630 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2631 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2632 DEFINE_COMPILER;
2633 struct sljit_jump *jump;
2634
2635 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2636 /* Searching for the first zero. */
2637 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2638 jump = JUMP(SLJIT_C_NOT_ZERO);
2639 /* Two byte sequence. */
2640 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2641 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2642 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2643 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2644 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2645 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2647 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2648 JUMPHERE(jump);
2649
2650 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2651 jump = JUMP(SLJIT_C_NOT_ZERO);
2652 /* Three byte sequence. */
2653 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2654 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2655 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2656 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2657 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2658 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2659 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2660 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2661 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2662 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2663 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2665 JUMPHERE(jump);
2666
2667 /* Four byte sequence. */
2668 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2669 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2670 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2671 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2672 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2673 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2674 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2675 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2676 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2677 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2678 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2679 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2680 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2681 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2682 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2683 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2684 }
2685
2686 static void do_utfreadtype8(compiler_common *common)
2687 {
2688 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2689 of the character (>= 0xc0). Return value in TMP1. */
2690 DEFINE_COMPILER;
2691 struct sljit_jump *jump;
2692 struct sljit_jump *compare;
2693
2694 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2695
2696 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2697 jump = JUMP(SLJIT_C_NOT_ZERO);
2698 /* Two byte sequence. */
2699 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2700 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2701 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2702 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2703 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2704 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2705 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2706 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2707 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2708
2709 JUMPHERE(compare);
2710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2711 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2712 JUMPHERE(jump);
2713
2714 /* We only have types for characters less than 256. */
2715 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2716 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2718 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2719 }
2720
2721 #elif defined COMPILE_PCRE16
2722
2723 static void do_utfreadchar(compiler_common *common)
2724 {
2725 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2726 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2727 DEFINE_COMPILER;
2728 struct sljit_jump *jump;
2729
2730 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2731 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2732 /* Do nothing, only return. */
2733 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2734
2735 JUMPHERE(jump);
2736 /* Combine two 16 bit characters. */
2737 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2738 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2739 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2740 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2741 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2742 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2743 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2744 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2745 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2746 }
2747
2748 #endif /* COMPILE_PCRE[8|16] */
2749
2750 #endif /* SUPPORT_UTF */
2751
2752 #ifdef SUPPORT_UCP
2753
2754 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2755 #define UCD_BLOCK_MASK 127
2756 #define UCD_BLOCK_SHIFT 7
2757
2758 static void do_getucd(compiler_common *common)
2759 {
2760 /* Search the UCD record for the character comes in TMP1.
2761 Returns chartype in TMP1 and UCD offset in TMP2. */
2762 DEFINE_COMPILER;
2763
2764 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2765
2766 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2767 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2768 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2769 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2770 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2771 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2772 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2773 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2774 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2775 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2776 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2777 }
2778 #endif
2779
2780 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2781 {
2782 DEFINE_COMPILER;
2783 struct sljit_label *mainloop;
2784 struct sljit_label *newlinelabel = NULL;
2785 struct sljit_jump *start;
2786 struct sljit_jump *end = NULL;
2787 struct sljit_jump *nl = NULL;
2788 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2789 struct sljit_jump *singlechar;
2790 #endif
2791 jump_list *newline = NULL;
2792 BOOL newlinecheck = FALSE;
2793 BOOL readuchar = FALSE;
2794
2795 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2796 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2797 newlinecheck = TRUE;
2798
2799 if (firstline)
2800 {
2801 /* Search for the end of the first line. */
2802 SLJIT_ASSERT(common->first_line_end != 0);
2803 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2804
2805 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2806 {
2807 mainloop = LABEL();
2808 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2809 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2810 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2811 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2812 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2813 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2814 JUMPHERE(end);
2815 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2816 }
2817 else
2818 {
2819 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2820 mainloop = LABEL();
2821 /* Continual stores does not cause data dependency. */
2822 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2823 read_char(common);
2824 check_newlinechar(common, common->nltype, &newline, TRUE);
2825 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2826 JUMPHERE(end);
2827 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2828 set_jumps(newline, LABEL());
2829 }
2830
2831 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2832 }
2833
2834 start = JUMP(SLJIT_JUMP);
2835
2836 if (newlinecheck)
2837 {
2838 newlinelabel = LABEL();
2839 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2840 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2841 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2842 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2843 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2844 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2845 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2846 #endif
2847 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2848 nl = JUMP(SLJIT_JUMP);
2849 }
2850
2851 mainloop = LABEL();
2852
2853 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2854 #ifdef SUPPORT_UTF
2855 if (common->utf) readuchar = TRUE;
2856 #endif
2857 if (newlinecheck) readuchar = TRUE;
2858
2859 if (readuchar)
2860 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2861
2862 if (newlinecheck)
2863 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2864
2865 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2866 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2867 #if defined COMPILE_PCRE8
2868 if (common->utf)
2869 {
2870 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2871 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2872 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2873 JUMPHERE(singlechar);
2874 }
2875 #elif defined COMPILE_PCRE16
2876 if (common->utf)
2877 {
2878 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2879 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2880 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2881 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2882 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2883 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2884 JUMPHERE(singlechar);
2885 }
2886 #endif /* COMPILE_PCRE[8|16] */
2887 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2888 JUMPHERE(start);
2889
2890 if (newlinecheck)
2891 {
2892 JUMPHERE(end);
2893 JUMPHERE(nl);
2894 }
2895
2896 return mainloop;
2897 }
2898
2899 #define MAX_N_CHARS 3
2900
2901 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2902 {
2903 DEFINE_COMPILER;
2904 struct sljit_label *start;
2905 struct sljit_jump *quit;
2906 pcre_uint32 chars[MAX_N_CHARS * 2];
2907 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2908 int location = 0;
2909 pcre_int32 len, c, bit, caseless;
2910 int must_stop;
2911
2912 /* We do not support alternatives now. */
2913 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2914 return FALSE;
2915
2916 while (TRUE)
2917 {
2918 caseless = 0;
2919 must_stop = 1;
2920 switch(*cc)
2921 {
2922 case OP_CHAR:
2923 must_stop = 0;
2924 cc++;
2925 break;
2926
2927 case OP_CHARI:
2928 caseless = 1;
2929 must_stop = 0;
2930 cc++;
2931 break;
2932
2933 case OP_SOD:
2934 case OP_SOM:
2935 case OP_SET_SOM:
2936 case OP_NOT_WORD_BOUNDARY:
2937 case OP_WORD_BOUNDARY:
2938 case OP_EODN:
2939 case OP_EOD:
2940 case OP_CIRC:
2941 case OP_CIRCM:
2942 case OP_DOLL:
2943 case OP_DOLLM:
2944 /* Zero width assertions. */
2945 cc++;
2946 continue;
2947
2948 case OP_PLUS:
2949 case OP_MINPLUS:
2950 case OP_POSPLUS:
2951 cc++;
2952 break;
2953
2954 case OP_EXACT:
2955 cc += 1 + IMM2_SIZE;
2956 break;
2957
2958 case OP_PLUSI:
2959 case OP_MINPLUSI:
2960 case OP_POSPLUSI:
2961 caseless = 1;
2962 cc++;
2963 break;
2964
2965 case OP_EXACTI:
2966 caseless = 1;
2967 cc += 1 + IMM2_SIZE;
2968 break;
2969
2970 default:
2971 must_stop = 2;
2972 break;
2973 }
2974
2975 if (must_stop == 2)
2976 break;
2977
2978 len = 1;
2979 #ifdef SUPPORT_UTF
2980 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2981 #endif
2982
2983 if (caseless && char_has_othercase(common, cc))
2984 {
2985 caseless = char_get_othercase_bit(common, cc);
2986 if (caseless == 0)
2987 return FALSE;
2988 #ifdef COMPILE_PCRE8
2989 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2990 #else
2991 if ((caseless & 0x100) != 0)
2992 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2993 else
2994 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2995 #endif
2996 }
2997 else
2998 caseless = 0;
2999
3000 while (len > 0 && location < MAX_N_CHARS * 2)
3001 {
3002 c = *cc;
3003 bit = 0;
3004 if (len == (caseless & 0xff))
3005 {
3006 bit = caseless >> 8;
3007 c |= bit;
3008 }
3009
3010 chars[location] = c;
3011 chars[location + 1] = bit;
3012
3013 len--;
3014 location += 2;
3015 cc++;
3016 }
3017
3018 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3019 break;
3020 }
3021
3022 /* At least two characters are required. */
3023 if (location < 2 * 2)
3024 return FALSE;
3025
3026 if (firstline)
3027 {
3028 SLJIT_ASSERT(common->first_line_end != 0);
3029 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3030 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3031 }
3032 else
3033 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3034
3035 start = LABEL();
3036 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3037
3038 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3039 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3040 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3041 if (chars[1] != 0)
3042 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3043 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3044 if (location > 2 * 2)
3045 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3046 if (chars[3] != 0)
3047 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3048 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3049 if (location > 2 * 2)
3050 {
3051 if (chars[5] != 0)
3052 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3053 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3054 }
3055 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3056
3057 JUMPHERE(quit);
3058
3059 if (firstline)
3060 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3061 else
3062 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3063 return TRUE;
3064 }
3065
3066 #undef MAX_N_CHARS
3067
3068 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3069 {
3070 DEFINE_COMPILER;
3071 struct sljit_label *start;
3072 struct sljit_jump *quit;
3073 struct sljit_jump *found;
3074 pcre_uchar oc, bit;
3075
3076 if (firstline)
3077 {
3078 SLJIT_ASSERT(common->first_line_end != 0);
3079 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3080 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3081 }
3082
3083 start = LABEL();
3084 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3085 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3086
3087 oc = first_char;
3088 if (caseless)
3089 {
3090 oc = TABLE_GET(first_char, common->fcc, first_char);
3091 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3092 if (first_char > 127 && common->utf)
3093 oc = UCD_OTHERCASE(first_char);
3094 #endif
3095 }
3096 if (first_char == oc)
3097 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3098 else
3099 {
3100 bit = first_char ^ oc;
3101 if (is_powerof2(bit))
3102 {
3103 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3104 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3105 }
3106 else
3107 {
3108 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3109 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3110 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3111 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3112 found = JUMP(SLJIT_C_NOT_ZERO);
3113 }
3114 }
3115
3116 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3117 JUMPTO(SLJIT_JUMP, start);
3118 JUMPHERE(found);
3119 JUMPHERE(quit);
3120
3121 if (firstline)
3122 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3123 }
3124
3125 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3126 {
3127 DEFINE_COMPILER;
3128 struct sljit_label *loop;
3129 struct sljit_jump *lastchar;
3130 struct sljit_jump *firstchar;
3131 struct sljit_jump *quit;
3132 struct sljit_jump *foundcr = NULL;
3133 struct sljit_jump *notfoundnl;
3134 jump_list *newline = NULL;
3135
3136 if (firstline)
3137 {
3138 SLJIT_ASSERT(common->first_line_end != 0);
3139 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3140 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3141 }
3142
3143 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3144 {
3145 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3146 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3147 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3148 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3149 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3150
3151 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3152 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3153 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3154 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3155 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3156 #endif
3157 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3158
3159 loop = LABEL();
3160 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3161 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3162 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3163 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3164 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3165 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3166
3167 JUMPHERE(quit);
3168 JUMPHERE(firstchar);
3169 JUMPHERE(lastchar);
3170
3171 if (firstline)
3172 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3173 return;
3174 }
3175
3176 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3177 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3178 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3179 skip_char_back(common);
3180
3181 loop = LABEL();
3182 read_char(common);
3183 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3184 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3185 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3186 check_newlinechar(common, common->nltype, &newline, FALSE);
3187 set_jumps(newline, loop);
3188
3189 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3190 {
3191 quit = JUMP(SLJIT_JUMP);
3192 JUMPHERE(foundcr);
3193 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3194 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3195 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3196 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3197 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3198 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3199 #endif
3200 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3201 JUMPHERE(notfoundnl);
3202 JUMPHERE(quit);
3203 }
3204 JUMPHERE(lastchar);
3205 JUMPHERE(firstchar);
3206
3207 if (firstline)
3208 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3209 }
3210
3211 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3212
3213 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3214 {
3215 DEFINE_COMPILER;
3216 struct sljit_label *start;
3217 struct sljit_jump *quit;
3218 struct sljit_jump *found = NULL;
3219 jump_list *matches = NULL;
3220 pcre_uint8 inverted_start_bits[32];
3221 int i;
3222 #ifndef COMPILE_PCRE8
3223 struct sljit_jump *jump;
3224 #endif
3225
3226 for (i = 0; i < 32; ++i)
3227 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3228
3229 if (firstline)
3230 {
3231 SLJIT_ASSERT(common->first_line_end != 0);
3232 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3233 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3234 }
3235
3236 start = LABEL();
3237 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3238 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3239 #ifdef SUPPORT_UTF
3240 if (common->utf)
3241 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3242 #endif
3243
3244 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3245 {
3246 #ifndef COMPILE_PCRE8
3247 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3248 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3249 JUMPHERE(jump);
3250 #endif
3251 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3252 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3253 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3254 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3255 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3256 found = JUMP(SLJIT_C_NOT_ZERO);
3257 }
3258
3259 #ifdef SUPPORT_UTF
3260 if (common->utf)
3261 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3262 #endif
3263 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3264 #ifdef SUPPORT_UTF
3265 #if defined COMPILE_PCRE8
3266 if (common->utf)
3267 {
3268 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3269 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3270 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3271 }
3272 #elif defined COMPILE_PCRE16
3273 if (common->utf)
3274 {
3275 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3276 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3277 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3278 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3279 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3280 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3281 }
3282 #endif /* COMPILE_PCRE[8|16] */
3283 #endif /* SUPPORT_UTF */
3284 JUMPTO(SLJIT_JUMP, start);
3285 if (found != NULL)
3286 JUMPHERE(found);
3287 if (matches != NULL)
3288 set_jumps(matches, LABEL());
3289 JUMPHERE(quit);
3290
3291 if (firstline)
3292 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3293 }
3294
3295 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3296 {
3297 DEFINE_COMPILER;
3298 struct sljit_label *loop;
3299 struct sljit_jump *toolong;
3300 struct sljit_jump *alreadyfound;
3301 struct sljit_jump *found;
3302 struct sljit_jump *foundoc = NULL;
3303 struct sljit_jump *notfound;
3304 pcre_uint32 oc, bit;
3305
3306 SLJIT_ASSERT(common->req_char_ptr != 0);
3307 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3308 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3309 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3310 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3311
3312 if (has_firstchar)
3313 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3314 else
3315 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3316
3317 loop = LABEL();
3318 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3319
3320 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3321 oc = req_char;
3322 if (caseless)
3323 {
3324 oc = TABLE_GET(req_char, common->fcc, req_char);
3325 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3326 if (req_char > 127 && common->utf)
3327 oc = UCD_OTHERCASE(req_char);
3328 #endif
3329 }
3330 if (req_char == oc)
3331 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3332 else
3333 {
3334 bit = req_char ^ oc;
3335 if (is_powerof2(bit))
3336 {
3337 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3338 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3339 }
3340 else
3341 {
3342 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3343 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3344 }
3345 }
3346 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3347 JUMPTO(SLJIT_JUMP, loop);
3348
3349 JUMPHERE(found);
3350 if (foundoc)
3351 JUMPHERE(foundoc);
3352 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3353 JUMPHERE(alreadyfound);
3354 JUMPHERE(toolong);
3355 return notfound;
3356 }
3357
3358 static void do_revertframes(compiler_common *common)
3359 {
3360 DEFINE_COMPILER;
3361 struct sljit_jump *jump;
3362 struct sljit_label *mainloop;
3363
3364 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3365 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3366 GET_LOCAL_BASE(TMP3, 0, 0);
3367
3368 /* Drop frames until we reach STACK_TOP. */
3369 mainloop = LABEL();
3370 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3371 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3372 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3373
3374 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3375 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3376 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3377 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3378 JUMPTO(SLJIT_JUMP, mainloop);
3379
3380 JUMPHERE(jump);
3381 jump = JUMP(SLJIT_C_SIG_LESS);
3382 /* End of dropping frames. */
3383 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3384
3385 JUMPHERE(jump);
3386 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3387 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3388 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3389 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3390 JUMPTO(SLJIT_JUMP, mainloop);
3391 }
3392
3393 static void check_wordboundary(compiler_common *common)
3394 {
3395 DEFINE_COMPILER;
3396 struct sljit_jump *skipread;
3397 jump_list *skipread_list = NULL;
3398 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3399 struct sljit_jump *jump;
3400 #endif
3401
3402 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3403
3404 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3405 /* Get type of the previous char, and put it to LOCALS1. */
3406 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3407 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3409 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3410 skip_char_back(common);
3411 check_start_used_ptr(common);
3412 read_char(common);
3413
3414 /* Testing char type. */
3415 #ifdef SUPPORT_UCP
3416 if (common->use_ucp)
3417 {
3418 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3419 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3420 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3421 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3422 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3423 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3424 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3425 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3426 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3427 JUMPHERE(jump);
3428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3429 }
3430 else
3431 #endif
3432 {
3433 #ifndef COMPILE_PCRE8
3434 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3435 #elif defined SUPPORT_UTF
3436 /* Here LOCALS1 has already been zeroed. */
3437 jump = NULL;
3438 if (common->utf)
3439 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3440 #endif /* COMPILE_PCRE8 */
3441 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3442 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3443 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3445 #ifndef COMPILE_PCRE8
3446 JUMPHERE(jump);
3447 #elif defined SUPPORT_UTF
3448 if (jump != NULL)
3449 JUMPHERE(jump);
3450 #endif /* COMPILE_PCRE8 */
3451 }
3452 JUMPHERE(skipread);
3453
3454 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3455 check_str_end(common, &skipread_list);
3456 peek_char(common);
3457
3458 /* Testing char type. This is a code duplication. */
3459 #ifdef SUPPORT_UCP
3460 if (common->use_ucp)
3461 {
3462 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3463 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3464 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3465 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3466 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3467 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3468 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3469 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3470 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3471 JUMPHERE(jump);
3472 }
3473 else
3474 #endif
3475 {
3476 #ifndef COMPILE_PCRE8
3477 /* TMP2 may be destroyed by peek_char. */
3478 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3479 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3480 #elif defined SUPPORT_UTF
3481 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3482 jump = NULL;
3483 if (common->utf)
3484 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3485 #endif
3486 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3487 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3488 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3489 #ifndef COMPILE_PCRE8
3490 JUMPHERE(jump);
3491 #elif defined SUPPORT_UTF
3492 if (jump != NULL)
3493 JUMPHERE(jump);
3494 #endif /* COMPILE_PCRE8 */
3495 }
3496 set_jumps(skipread_list, LABEL());
3497
3498 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3499 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3500 }
3501
3502 /*
3503 range format:
3504
3505 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3506 ranges[1] = first bit (0 or 1)
3507 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3508 */
3509
3510 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3511 {
3512 DEFINE_COMPILER;
3513 struct sljit_jump *jump;
3514
3515 if (ranges[0] < 0)
3516 return FALSE;
3517
3518 switch(ranges[0])
3519 {
3520 case 1:
3521 if (readch)
3522 read_char(common);
3523 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3524 return TRUE;
3525
3526 case 2:
3527 if (readch)
3528 read_char(common);
3529 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3530 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3531 return TRUE;
3532
3533 case 4:
3534 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3535 {
3536 if (readch)
3537 read_char(common);
3538 if (ranges[1] != 0)
3539 {
3540 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3541 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3542 }
3543 else
3544 {
3545 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3546 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3547 JUMPHERE(jump);
3548 }
3549 return TRUE;
3550 }
3551 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3552 {
3553 if (readch)
3554 read_char(common);
3555 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3556 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3557 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3558 return TRUE;
3559 }
3560 return FALSE;
3561
3562 default:
3563 return FALSE;
3564 }
3565 }
3566
3567 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3568 {
3569 int i, bit, length;
3570 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3571
3572 bit = ctypes[0] & flag;
3573 ranges[0] = -1;
3574 ranges[1] = bit != 0 ? 1 : 0;
3575 length = 0;
3576
3577 for (i = 1; i < 256; i++)
3578 if ((ctypes[i] & flag) != bit)
3579 {
3580 if (length >= MAX_RANGE_SIZE)
3581 return;
3582 ranges[2 + length] = i;
3583 length++;
3584 bit ^= flag;
3585 }
3586
3587 if (bit != 0)
3588 {
3589 if (length >= MAX_RANGE_SIZE)
3590 return;
3591 ranges[2 + length] = 256;
3592 length++;
3593 }
3594 ranges[0] = length;
3595 }
3596
3597 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3598 {
3599 int ranges[2 + MAX_RANGE_SIZE];
3600 pcre_uint8 bit, cbit, all;
3601 int i, byte, length = 0;
3602
3603 bit = bits[0] & 0x1;
3604 ranges[1] = bit;
3605 /* Can be 0 or 255. */
3606 all = -bit;
3607
3608 for (i = 0; i < 256; )
3609 {
3610 byte = i >> 3;
3611 if ((i & 0x7) == 0 && bits[byte] == all)
3612 i += 8;
3613 else
3614 {
3615 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3616 if (cbit != bit)
3617 {
3618 if (length >= MAX_RANGE_SIZE)
3619 return FALSE;
3620 ranges[2 + length] = i;
3621 length++;
3622 bit = cbit;
3623 all = -cbit;
3624 }
3625 i++;
3626 }
3627 }
3628
3629 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3630 {
3631 if (length >= MAX_RANGE_SIZE)
3632 return FALSE;
3633 ranges[2 + length] = 256;
3634 length++;
3635 }
3636 ranges[0] = length;
3637
3638 return check_ranges(common, ranges, backtracks, FALSE);
3639 }
3640
3641 static void check_anynewline(compiler_common *common)
3642 {
3643 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3644 DEFINE_COMPILER;
3645
3646 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3647
3648 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3649 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3650 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3651 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3652 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3653 #ifdef COMPILE_PCRE8
3654 if (common->utf)
3655 {
3656 #endif
3657 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3658 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3659 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3660 #ifdef COMPILE_PCRE8
3661 }
3662 #endif
3663 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3664 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3665 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3666 }
3667
3668 static void check_hspace(compiler_common *common)
3669 {
3670 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3671 DEFINE_COMPILER;
3672
3673 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3674
3675 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3676 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3677 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3678 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3679 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3680 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3681 #ifdef COMPILE_PCRE8
3682 if (common->utf)
3683 {
3684 #endif
3685 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3686 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3687 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3688 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3689 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3690 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3691 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3692 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3693 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3694 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3695 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3696 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3697 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3698 #ifdef COMPILE_PCRE8
3699 }
3700 #endif
3701 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3702 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3703
3704 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3705 }
3706
3707 static void check_vspace(compiler_common *common)
3708 {
3709 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3710 DEFINE_COMPILER;
3711
3712 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3713
3714 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3715 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3716 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3717 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3718 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3719 #ifdef COMPILE_PCRE8
3720 if (common->utf)
3721 {
3722 #endif
3723 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3724 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3725 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3726 #ifdef COMPILE_PCRE8
3727 }
3728 #endif
3729 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3730 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3731
3732 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3733 }
3734
3735 #define CHAR1 STR_END
3736 #define CHAR2 STACK_TOP
3737
3738 static void do_casefulcmp(compiler_common *common)
3739 {
3740 DEFINE_COMPILER;
3741 struct sljit_jump *jump;
3742 struct sljit_label *label;
3743
3744 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3745 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3746 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3747 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3748 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3749 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3750
3751 label = LABEL();
3752 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3753 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3754 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3755 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3756 JUMPTO(SLJIT_C_NOT_ZERO, label);
3757
3758 JUMPHERE(jump);
3759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3760 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3761 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3762 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3763 }
3764
3765 #define LCC_TABLE STACK_LIMIT
3766
3767 static void do_caselesscmp(compiler_common *common)
3768 {
3769 DEFINE_COMPILER;
3770 struct sljit_jump *jump;
3771 struct sljit_label *label;
3772
3773 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3774 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3775
3776 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3778 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3779 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3780 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3781 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3782
3783 label = LABEL();
3784 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3785 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3786 #ifndef COMPILE_PCRE8
3787 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3788 #endif
3789 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3790 #ifndef COMPILE_PCRE8
3791 JUMPHERE(jump);
3792 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3793 #endif
3794 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3795 #ifndef COMPILE_PCRE8
3796 JUMPHERE(jump);
3797 #endif
3798 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3799 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3800 JUMPTO(SLJIT_C_NOT_ZERO, label);
3801
3802 JUMPHERE(jump);
3803 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3804 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3805 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3806 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3807 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3808 }
3809
3810 #undef LCC_TABLE
3811 #undef CHAR1
3812 #undef CHAR2
3813
3814 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3815
3816 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3817 {
3818 /* This function would be ineffective to do in JIT level. */
3819 pcre_uint32 c1, c2;
3820 const pcre_uchar *src2 = args->uchar_ptr;
3821 const pcre_uchar *end2 = args->end;
3822 const ucd_record *ur;
3823 const pcre_uint32 *pp;
3824
3825 while (src1 < end1)
3826 {
3827 if (src2 >= end2)
3828 return (pcre_uchar*)1;
3829 GETCHARINC(c1, src1);
3830 GETCHARINC(c2, src2);
3831 ur = GET_UCD(c2);
3832 if (c1 != c2 && c1 != c2 + ur->other_case)
3833 {
3834 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3835 for (;;)
3836 {
3837 if (c1 < *pp) return NULL;
3838 if (c1 == *pp++) break;
3839 }
3840 }
3841 }
3842 return src2;
3843 }
3844
3845 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3846
3847 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3848 compare_context* context, jump_list **backtracks)
3849 {
3850 DEFINE_COMPILER;
3851 unsigned int othercasebit = 0;
3852 pcre_uchar *othercasechar = NULL;
3853 #ifdef SUPPORT_UTF
3854 int utflength;
3855 #endif
3856
3857 if (caseless && char_has_othercase(common, cc))
3858 {
3859 othercasebit = char_get_othercase_bit(common, cc);
3860 SLJIT_ASSERT(othercasebit);
3861 /* Extracting bit difference info. */
3862 #if defined COMPILE_PCRE8
3863 othercasechar = cc + (othercasebit >> 8);
3864 othercasebit &= 0xff;
3865 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3866 /* Note that this code only handles characters in the BMP. If there
3867 ever are characters outside the BMP whose othercase differs in only one
3868 bit from itself (there currently are none), this code will need to be
3869 revised for COMPILE_PCRE32. */
3870 othercasechar = cc + (othercasebit >> 9);
3871 if ((othercasebit & 0x100) != 0)
3872 othercasebit = (othercasebit & 0xff) << 8;
3873 else
3874 othercasebit &= 0xff;
3875 #endif /* COMPILE_PCRE[8|16|32] */
3876 }
3877
3878 if (context->sourcereg == -1)
3879 {
3880 #if defined COMPILE_PCRE8
3881 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3882 if (context->length >= 4)
3883 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3884 else if (context->length >= 2)
3885 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3886 else
3887 #endif
3888 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3889 #elif defined COMPILE_PCRE16
3890 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3891 if (context->length >= 4)
3892 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3893 else
3894 #endif
3895 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3896 #elif defined COMPILE_PCRE32
3897 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3898 #endif /* COMPILE_PCRE[8|16|32] */
3899 context->sourcereg = TMP2;
3900 }
3901
3902 #ifdef SUPPORT_UTF
3903 utflength = 1;
3904 if (common->utf && HAS_EXTRALEN(*cc))
3905 utflength += GET_EXTRALEN(*cc);
3906
3907 do
3908 {
3909 #endif
3910
3911 context->length -= IN_UCHARS(1);
3912 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3913
3914 /* Unaligned read is supported. */
3915 if (othercasebit != 0 && othercasechar == cc)
3916 {
3917 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3918 context->oc.asuchars[context->ucharptr] = othercasebit;
3919 }
3920 else
3921 {
3922 context->c.asuchars[context->ucharptr] = *cc;
3923 context->oc.asuchars[context->ucharptr] = 0;
3924 }
3925 context->ucharptr++;
3926
3927 #if defined COMPILE_PCRE8
3928 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3929 #else
3930 if (context->ucharptr >= 2 || context->length == 0)
3931 #endif
3932 {
3933 if (context->length >= 4)
3934 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3935 else if (context->length >= 2)
3936 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3937 #if defined COMPILE_PCRE8
3938 else if (context->length >= 1)
3939 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3940 #endif /* COMPILE_PCRE8 */
3941 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3942
3943 switch(context->ucharptr)
3944 {
3945 case 4 / sizeof(pcre_uchar):
3946 if (context->oc.asint != 0)
3947 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3948 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3949 break;
3950
3951 case 2 / sizeof(pcre_uchar):
3952 if (context->oc.asushort != 0)
3953 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3954 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3955 break;
3956
3957 #ifdef COMPILE_PCRE8
3958 case 1:
3959 if (context->oc.asbyte != 0)
3960 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3961 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3962 break;
3963 #endif
3964
3965 default:
3966 SLJIT_ASSERT_STOP();
3967 break;
3968 }
3969 context->ucharptr = 0;
3970 }
3971
3972 #else
3973
3974 /* Unaligned read is unsupported or in 32 bit mode. */
3975 if (context->length >= 1)
3976 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3977
3978 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3979
3980 if (othercasebit != 0 && othercasechar == cc)
3981 {
3982 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3983 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3984 }
3985 else
3986 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3987
3988 #endif
3989
3990 cc++;
3991 #ifdef SUPPORT_UTF
3992 utflength--;
3993 }
3994 while (utflength > 0);
3995 #endif
3996
3997 return cc;
3998 }
3999
4000 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4001
4002 #define SET_TYPE_OFFSET(value) \
4003 if ((value) != typeoffset) \
4004 { \
4005 if ((value) > typeoffset) \
4006 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4007 else \
4008 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4009 } \
4010 typeoffset = (value);
4011
4012 #define SET_CHAR_OFFSET(value) \
4013 if ((value) != charoffset) \
4014 { \
4015 if ((value) > charoffset) \
4016 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4017 else \
4018 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4019 } \
4020 charoffset = (value);
4021
4022 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4023 {
4024 DEFINE_COMPILER;
4025 jump_list *found = NULL;
4026 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4027 pcre_int32 c, charoffset;
4028 const pcre_uint32 *other_cases;
4029 struct sljit_jump *jump = NULL;
4030 pcre_uchar *ccbegin;
4031 int compares, invertcmp, numberofcmps;
4032 #ifdef SUPPORT_UCP
4033 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4034 BOOL charsaved = FALSE;
4035 int typereg = TMP1, scriptreg = TMP1;
4036 pcre_int32 typeoffset;
4037 #endif
4038
4039 /* Although SUPPORT_UTF must be defined, we are
4040 not necessary in utf mode even in 8 bit mode. */
4041 detect_partial_match(common, backtracks);
4042 read_char(common);
4043
4044 if ((*cc++ & XCL_MAP) != 0)
4045 {
4046 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4047 #ifndef COMPILE_PCRE8
4048 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4049 #elif defined SUPPORT_UTF
4050 if (common->utf)
4051 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4052 #endif
4053
4054 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4055 {
4056 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4057 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4058 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4059 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4060 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4061 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4062 }
4063
4064 #ifndef COMPILE_PCRE8
4065 JUMPHERE(jump);
4066 #elif defined SUPPORT_UTF
4067 if (common->utf)
4068 JUMPHERE(jump);
4069 #endif
4070 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4071 #ifdef SUPPORT_UCP
4072 charsaved = TRUE;
4073 #endif
4074 cc += 32 / sizeof(pcre_uchar);
4075 }
4076
4077 /* Scanning the necessary info. */
4078 ccbegin = cc;
4079 compares = 0;
4080 while (*cc != XCL_END)
4081 {
4082 compares++;
4083 if (*cc == XCL_SINGLE)
4084 {
4085 cc += 2;
4086 #ifdef SUPPORT_UTF
4087 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4088 #endif
4089 #ifdef SUPPORT_UCP
4090 needschar = TRUE;
4091 #endif
4092 }
4093 else if (*cc == XCL_RANGE)
4094 {
4095 cc += 2;
4096 #ifdef SUPPORT_UTF
4097 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4098 #endif
4099 cc++;
4100 #ifdef SUPPORT_UTF
4101 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4102 #endif
4103 #ifdef SUPPORT_UCP
4104 needschar = TRUE;
4105 #endif
4106 }
4107 #ifdef SUPPORT_UCP
4108 else
4109 {
4110 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4111 cc++;
4112 switch(*cc)
4113 {
4114 case PT_ANY:
4115 break;
4116
4117 case PT_LAMP:
4118 case PT_GC:
4119 case PT_PC:
4120 case PT_ALNUM:
4121 needstype = TRUE;
4122 break;
4123
4124 case PT_SC:
4125 needsscript = TRUE;
4126 break;
4127
4128 case PT_SPACE:
4129 case PT_PXSPACE:
4130 case PT_WORD:
4131 needstype = TRUE;
4132 needschar = TRUE;
4133 break;
4134
4135 case PT_CLIST:
4136 case PT_UCNC:
4137 needschar = TRUE;
4138 break;
4139
4140 default:
4141 SLJIT_ASSERT_STOP();
4142 break;
4143 }
4144 cc += 2;
4145 }
4146 #endif
4147 }
4148
4149 #ifdef SUPPORT_UCP
4150 /* Simple register allocation. TMP1 is preferred if possible. */
4151 if (needstype || needsscript)
4152 {
4153 if (needschar && !charsaved)
4154 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4155 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4156 if (needschar)
4157 {
4158 if (needstype)
4159 {
4160 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4161 typereg = RETURN_ADDR;
4162 }
4163
4164 if (needsscript)
4165 scriptreg = TMP3;
4166 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4167 }
4168 else if (needstype && needsscript)
4169 scriptreg = TMP3;
4170 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4171
4172 if (needsscript)
4173 {
4174 if (scriptreg == TMP1)
4175 {
4176 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4177 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4178 }
4179 else
4180 {
4181 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4182 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4183 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4184 }
4185 }
4186 }
4187 #endif
4188
4189 /* Generating code. */
4190 cc = ccbegin;
4191 charoffset = 0;
4192 numberofcmps = 0;
4193 #ifdef SUPPORT_UCP
4194 typeoffset = 0;
4195 #endif
4196
4197 while (*cc != XCL_END)
4198 {
4199 compares--;
4200 invertcmp = (compares == 0 && list != backtracks);
4201 jump = NULL;
4202
4203 if (*cc == XCL_SINGLE)
4204 {
4205 cc ++;
4206 #ifdef SUPPORT_UTF
4207 if (common->utf)
4208 {
4209 GETCHARINC(c, cc);
4210 }
4211 else
4212 #endif
4213 c = *cc++;
4214
4215 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4216 {
4217 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4218 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4219 numberofcmps++;
4220 }
4221 else if (numberofcmps > 0)
4222 {
4223 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4224 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4225 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4226 numberofcmps = 0;
4227 }
4228 else
4229 {
4230 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4231 numberofcmps = 0;
4232 }
4233 }
4234 else if (*cc == XCL_RANGE)
4235 {
4236 cc ++;
4237 #ifdef SUPPORT_UTF
4238 if (common->utf)
4239 {
4240 GETCHARINC(c, cc);
4241 }
4242 else
4243 #endif
4244 c = *cc++;
4245 SET_CHAR_OFFSET(c);
4246 #ifdef SUPPORT_UTF
4247 if (common->utf)
4248 {
4249 GETCHARINC(c, cc);
4250 }
4251 else
4252 #endif
4253 c = *cc++;
4254 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4255 {
4256 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4257 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4258 numberofcmps++;
4259 }
4260 else if (numberofcmps > 0)
4261 {
4262 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4263 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4264 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4265 numberofcmps = 0;
4266 }
4267 else
4268 {
4269 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4270 numberofcmps = 0;
4271 }
4272 }
4273 #ifdef SUPPORT_UCP
4274 else
4275 {
4276 if (*cc == XCL_NOTPROP)
4277 invertcmp ^= 0x1;
4278 cc++;
4279 switch(*cc)
4280 {
4281 case PT_ANY:
4282 if (list != backtracks)
4283 {
4284 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4285 continue;
4286 }
4287 else if (cc[-1] == XCL_NOTPROP)
4288 continue;
4289 jump = JUMP(SLJIT_JUMP);
4290 break;
4291
4292 case PT_LAMP:
4293 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4294 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4295 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4296 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4297 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4298 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4299 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4300 break;
4301
4302 case PT_GC:
4303 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4304 SET_TYPE_OFFSET(c);
4305 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4306 break;
4307
4308 case PT_PC:
4309 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4310 break;
4311
4312 case PT_SC:
4313 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4314 break;
4315
4316 case PT_SPACE:
4317 case PT_PXSPACE:
4318 if (*cc == PT_SPACE)
4319 {
4320 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4321 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4322 }
4323 SET_CHAR_OFFSET(9);
4324 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4325 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4326 if (*cc == PT_SPACE)
4327 JUMPHERE(jump);
4328
4329 SET_TYPE_OFFSET(ucp_Zl);
4330 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4331 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4332 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4333 break;
4334
4335 case PT_WORD:
4336 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4337 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4338 /* Fall through. */
4339
4340 case PT_ALNUM:
4341 SET_TYPE_OFFSET(ucp_Ll);
4342 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4343 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4344 SET_TYPE_OFFSET(ucp_Nd);
4345 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4346 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4347 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4348 break;
4349
4350 case PT_CLIST:
4351 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4352
4353 /* At least three characters are required.
4354 Otherwise this case would be handled by the normal code path. */
4355 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4356 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4357
4358 /* Optimizing character pairs, if their difference is power of 2. */
4359 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4360 {
4361 if (charoffset == 0)
4362 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4363 else
4364 {
4365 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4366 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4367 }
4368 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4369 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4370 other_cases += 2;
4371 }
4372 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4373 {
4374 if (charoffset == 0)
4375 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4376 else
4377 {
4378 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4379 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4380 }
4381 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4382 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4383
4384 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4385 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4386
4387 other_cases += 3;
4388 }
4389 else
4390 {
4391 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4392 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4393 }
4394
4395 while (*other_cases != NOTACHAR)
4396 {
4397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4398 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4399 }
4400 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4401 break;
4402
4403 case PT_UCNC:
4404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4405 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4406 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4407 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4408 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4409 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4410
4411 SET_CHAR_OFFSET(0xa0);
4412 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4413 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4414 SET_CHAR_OFFSET(0);
4415 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4416 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4417 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4418 break;
4419 }
4420 cc += 2;
4421 }
4422 #endif
4423
4424 if (jump != NULL)
4425 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4426 }
4427
4428 if (found != NULL)
4429 set_jumps(found, LABEL());
4430 }
4431
4432 #undef SET_TYPE_OFFSET
4433 #undef SET_CHAR_OFFSET
4434
4435 #endif
4436
4437 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4438 {
4439 DEFINE_COMPILER;
4440 int length;
4441 unsigned int c, oc, bit;
4442 compare_context context;
4443 struct sljit_jump *jump[4];
4444 jump_list *end_list;
4445 #ifdef SUPPORT_UTF
4446 struct sljit_label *label;
4447 #ifdef SUPPORT_UCP
4448 pcre_uchar propdata[5];
4449 #endif
4450 #endif
4451
4452 switch(type)
4453 {
4454 case OP_SOD:
4455 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4456 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4457 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4458 return cc;
4459
4460 case OP_SOM:
4461 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4462 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4463 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4464 return cc;
4465
4466 case OP_NOT_WORD_BOUNDARY:
4467 case OP_WORD_BOUNDARY:
4468 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4469 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4470 return cc;
4471
4472 case OP_NOT_DIGIT:
4473 case OP_DIGIT:
4474 /* Digits are usually 0-9, so it is worth to optimize them. */
4475 if (common->digits[0] == -2)
4476 get_ctype_ranges(common, ctype_digit, common->digits);
4477 detect_partial_match(common, backtracks);
4478 /* Flip the starting bit in the negative case. */
4479 if (type == OP_NOT_DIGIT)
4480 common->digits[1] ^= 1;
4481 if (!check_ranges(common, common->digits, backtracks, TRUE))
4482 {
4483 read_char8_type(common);
4484 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4485 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4486 }
4487 if (type == OP_NOT_DIGIT)
4488 common->digits[1] ^= 1;
4489 return cc;
4490
4491 case OP_NOT_WHITESPACE:
4492 case OP_WHITESPACE:
4493 detect_partial_match(common, backtracks);
4494 read_char8_type(common);
4495 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4496 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4497 return cc;
4498
4499 case OP_NOT_WORDCHAR:
4500 case OP_WORDCHAR:
4501 detect_partial_match(common, backtracks);
4502 read_char8_type(common);
4503 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4504 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4505 return cc;
4506
4507 case OP_ANY:
4508 detect_partial_match(common, backtracks);
4509 read_char(common);
4510 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4511 {
4512 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4513 end_list = NULL;
4514 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4515 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4516 else
4517 check_str_end(common, &end_list);
4518
4519 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4520 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4521 set_jumps(end_list, LABEL());
4522 JUMPHERE(jump[0]);
4523 }
4524 else
4525 check_newlinechar(common, common->nltype, backtracks, TRUE);
4526 return cc;
4527
4528 case OP_ALLANY:
4529 detect_partial_match(common, backtracks);
4530 #ifdef SUPPORT_UTF
4531 if (common->utf)
4532 {
4533 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4534 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4535 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4536 #if defined COMPILE_PCRE8
4537 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4538 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4539 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4540 #elif defined COMPILE_PCRE16
4541 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4542 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4543 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4544 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4545 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4546 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4547 #endif
4548 JUMPHERE(jump[0]);
4549 #endif /* COMPILE_PCRE[8|16] */
4550 return cc;
4551 }
4552 #endif
4553 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4554 return cc;
4555
4556 case OP_ANYBYTE:
4557 detect_partial_match(common, backtracks);
4558 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4559 return cc;
4560
4561 #ifdef SUPPORT_UTF
4562 #ifdef SUPPORT_UCP
4563 case OP_NOTPROP:
4564 case OP_PROP:
4565 propdata[0] = 0;
4566 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4567 propdata[2] = cc[0];
4568 propdata[3] = cc[1];
4569 propdata[4] = XCL_END;
4570 compile_xclass_matchingpath(common, propdata, backtracks);
4571 return cc + 2;
4572 #endif
4573 #endif
4574
4575 case OP_ANYNL:
4576 detect_partial_match(common, backtracks);
4577 read_char(common);
4578 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4579 /* We don't need to handle soft partial matching case. */
4580 end_list = NULL;
4581 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4582 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4583 else
4584 check_str_end(common, &end_list);
4585 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4586 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4587 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4588 jump[2] = JUMP(SLJIT_JUMP);
4589 JUMPHERE(jump[0]);
4590 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4591 set_jumps(end_list, LABEL());
4592 JUMPHERE(jump[1]);
4593 JUMPHERE(jump[2]);
4594 return cc;
4595
4596 case OP_NOT_HSPACE:
4597 case OP_HSPACE:
4598 detect_partial_match(common, backtracks);
4599 read_char(common);
4600 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4601 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4602 return cc;
4603
4604 case OP_NOT_VSPACE:
4605 case OP_VSPACE:
4606 detect_partial_match(common, backtracks);
4607 read_char(common);
4608 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4609 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4610 return cc;
4611
4612 #ifdef SUPPORT_UCP
4613 case OP_EXTUNI:
4614 detect_partial_match(common, backtracks);
4615 read_char(common);
4616 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4618 /* Optimize register allocation: use a real register. */
4619 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4620 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4621
4622 label = LABEL();
4623 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4624 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4625 read_char(common);
4626 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4627 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4628 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4629
4630 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4631 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4632 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4633 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4634 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4635 JUMPTO(SLJIT_C_NOT_ZERO, label);
4636
4637 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4638 JUMPHERE(jump[0]);
4639 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4640
4641 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4642 {
4643 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4644 /* Since we successfully read a char above, partial matching must occure. */
4645 check_partial(common, TRUE);
4646 JUMPHERE(jump[0]);
4647 }
4648 return cc;
4649 #endif
4650
4651 case OP_EODN:
4652 /* Requires rather complex checks. */
4653 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4654 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4655 {
4656 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4657 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4658 if (common->mode == JIT_COMPILE)
4659 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4660 else
4661 {
4662 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4663 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4664 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4665 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4666 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4667 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4668 check_partial(common, TRUE);
4669 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4670 JUMPHERE(jump[1]);
4671 }
4672 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4673 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4674 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4675 }
4676 else if (common->nltype == NLTYPE_FIXED)
4677 {
4678 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4679 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4680 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4681 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4682 }
4683 else
4684 {
4685 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4686 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4687 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4688 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4689 jump[2] = JUMP(SLJIT_C_GREATER);
4690 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4691 /* Equal. */
4692 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4693 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4694 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4695
4696 JUMPHERE(jump[1]);
4697 if (common->nltype == NLTYPE_ANYCRLF)
4698 {
4699 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4700 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4701 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4702 }
4703 else
4704 {
4705 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4706 read_char(common);
4707 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4708 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4709 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4710 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4711 }
4712 JUMPHERE(jump[2]);
4713 JUMPHERE(jump[3]);
4714 }
4715 JUMPHERE(jump[0]);
4716 check_partial(common, FALSE);
4717 return cc;
4718
4719 case OP_EOD:
4720 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4721 check_partial(common, FALSE);
4722 return cc;
4723
4724 case OP_CIRC:
4725 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4726 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4727 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4728 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4729 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4730 return cc;
4731
4732 case OP_CIRCM:
4733 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4734 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4735 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4736 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4737 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4738 jump[0] = JUMP(SLJIT_JUMP);
4739 JUMPHERE(jump[1]);
4740
4741 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4742 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4743 {
4744 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4745 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4746 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4747 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4748 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4749 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4750 }
4751 else
4752 {
4753 skip_char_back(common);
4754 read_char(common);
4755 check_newlinechar(common, common->nltype, backtracks, FALSE);
4756 }
4757 JUMPHERE(jump[0]);
4758 return cc;
4759
4760 case OP_DOLL:
4761 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4762 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4763 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4764
4765 if (!common->endonly)
4766 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4767 else
4768 {
4769 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4770 check_partial(common, FALSE);
4771 }
4772 return cc;
4773
4774 case OP_DOLLM:
4775 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4776 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4777 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4778 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4779 check_partial(common, FALSE);
4780 jump[0] = JUMP(SLJIT_JUMP);
4781 JUMPHERE(jump[1]);
4782
4783 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4784 {
4785 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4786 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4787 if (common->mode == JIT_COMPILE)
4788 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4789 else
4790 {
4791 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4792 /* STR_PTR = STR_END - IN_UCHARS(1) */
4793 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4794 check_partial(common, TRUE);
4795 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4796 JUMPHERE(jump[1]);
4797 }
4798
4799 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4800 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4801 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4802 }
4803 else
4804 {
4805 peek_char(common);
4806 check_newlinechar(common, common->nltype, backtracks, FALSE);
4807 }
4808 JUMPHERE(jump[0]);
4809 return cc;
4810
4811 case OP_CHAR:
4812 case OP_CHARI:
4813 length = 1;
4814 #ifdef SUPPORT_UTF
4815 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4816 #endif
4817 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4818 {
4819 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4820 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4821
4822 context.length = IN_UCHARS(length);
4823 context.sourcereg = -1;
4824 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4825 context.ucharptr = 0;
4826 #endif
4827 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4828 }
4829 detect_partial_match(common, backtracks);
4830 read_char(common);
4831 #ifdef SUPPORT_UTF
4832 if (common->utf)
4833 {
4834 GETCHAR(c, cc);
4835 }
4836 else
4837 #endif
4838 c = *cc;
4839 if (type == OP_CHAR || !char_has_othercase(common, cc))
4840 {
4841 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4842 return cc + length;
4843 }
4844 oc = char_othercase(common, c);
4845 bit = c ^ oc;
4846 if (is_powerof2(bit))
4847 {
4848 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4849 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4850 return cc + length;
4851 }
4852 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4853 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4854 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4855 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4856 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4857 return cc + length;
4858
4859 case OP_NOT:
4860 case OP_NOTI:
4861 detect_partial_match(common, backtracks);
4862 length = 1;
4863 #ifdef SUPPORT_UTF
4864 if (common->utf)
4865 {
4866 #ifdef COMPILE_PCRE8
4867 c = *cc;
4868 if (c < 128)
4869 {
4870 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4871 if (type == OP_NOT || !char_has_othercase(common, cc))
4872 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4873 else
4874 {
4875 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4876 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4877 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4878 }
4879 /* Skip the variable-length character. */
4880 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4881 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4882 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4883 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4884 JUMPHERE(jump[0]);
4885 return cc + 1;
4886 }
4887 else
4888 #endif /* COMPILE_PCRE8 */
4889 {
4890 GETCHARLEN(c, cc, length);
4891 read_char(common);
4892 }
4893 }
4894 else
4895 #endif /* SUPPORT_UTF */
4896 {
4897 read_char(common);
4898 c = *cc;
4899 }
4900
4901 if (type == OP_NOT || !char_has_othercase(common, cc))
4902 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4903 else
4904 {
4905 oc = char_othercase(common, c);
4906 bit = c ^ oc;
4907 if (is_powerof2(bit))
4908 {
4909 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4910 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4911 }
4912 else
4913 {
4914 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4915 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4916 }
4917 }
4918 return cc + length;
4919
4920 case OP_CLASS:
4921 case OP_NCLASS:
4922 detect_partial_match(common, backtracks);
4923 read_char(common);
4924 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4925 return cc + 32 / sizeof(pcre_uchar);
4926
4927 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4928 jump[0] = NULL;
4929 #ifdef COMPILE_PCRE8
4930 /* This check only affects 8 bit mode. In other modes, we
4931 always need to compare the value with 255. */
4932 if (common->utf)
4933 #endif /* COMPILE_PCRE8 */
4934 {
4935 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4936 if (type == OP_CLASS)
4937 {
4938 add_jump(compiler, backtracks, jump[0]);
4939 jump[0] = NULL;
4940 }
4941 }
4942 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4943 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4944 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4945 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4946 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4947 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4948 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4949 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4950 if (jump[0] != NULL)
4951 JUMPHERE(jump[0]);
4952 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4953 return cc + 32 / sizeof(pcre_uchar);
4954
4955 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4956 case OP_XCLASS:
4957 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4958 return cc + GET(cc, 0) - 1;
4959 #endif
4960
4961 case OP_REVERSE:
4962 length = GET(cc, 0);
4963 if (length == 0)
4964 return cc + LINK_SIZE;
4965 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4966 #ifdef SUPPORT_UTF
4967 if (common->utf)
4968 {
4969 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4970 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4971 label = LABEL();
4972 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4973 skip_char_back(common);
4974 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4975 JUMPTO(SLJIT_C_NOT_ZERO, label);
4976 }
4977 else
4978 #endif
4979 {
4980 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4981 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4982 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4983 }
4984 check_start_used_ptr(common);
4985 return cc + LINK_SIZE;
4986 }
4987 SLJIT_ASSERT_STOP();
4988 return cc;
4989 }
4990
4991 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4992 {
4993 /* This function consumes at least one input character. */
4994 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4995 DEFINE_COMPILER;
4996 pcre_uchar *ccbegin = cc;
4997 compare_context context;
4998 int size;
4999
5000 context.length = 0;
5001 do
5002 {
5003 if (cc >= ccend)
5004 break;
5005
5006 if (*cc == OP_CHAR)
5007 {
5008 size = 1;
5009 #ifdef SUPPORT_UTF
5010 if (common->utf && HAS_EXTRALEN(cc[1]))
5011 size += GET_EXTRALEN(cc[1]);
5012 #endif
5013 }
5014 else if (*cc == OP_CHARI)
5015 {
5016 size = 1;
5017 #ifdef SUPPORT_UTF
5018 if (common->utf)
5019 {
5020 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5021 size = 0;
5022 else if (HAS_EXTRALEN(cc[1]))
5023 size += GET_EXTRALEN(cc[1]);
5024 }
5025 else
5026 #endif
5027 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5028 size = 0;
5029 }
5030 else
5031 size = 0;
5032
5033 cc += 1 + size;
5034 context.length += IN_UCHARS(size);
5035 }
5036 while (size > 0 && context.length <= 128);
5037
5038 cc = ccbegin;
5039 if (context.length > 0)
5040 {
5041 /* We have a fixed-length byte sequence. */
5042 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5043 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5044
5045 context.sourcereg = -1;
5046 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5047 context.ucharptr = 0;
5048 #endif
5049 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5050 return cc;
5051 }
5052
5053 /* A non-fixed length character will be checked if length == 0. */
5054 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5055 }
5056
5057 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5058 {
5059 DEFINE_COMPILER;
5060 int offset = GET2(cc, 1) << 1;
5061
5062 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5063 if (!common->jscript_compat)
5064 {
5065 if (backtracks == NULL)
5066 {
5067 /* OVECTOR(1) contains the "string begin - 1" constant. */
5068 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5069 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5070 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5071 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5072 return JUMP(SLJIT_C_NOT_ZERO);
5073 }
5074 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5075 }
5076 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5077 }
5078
5079 /* Forward definitions. */
5080 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5081 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5082
5083 #define PUSH_BACKTRACK(size, ccstart, error) \
5084 do \
5085 { \
5086 backtrack = sljit_alloc_memory(compiler, (size)); \
5087 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5088 return error; \
5089 memset(backtrack, 0, size); \
5090 backtrack->prev = parent->top; \
5091 backtrack->cc = (ccstart); \
5092 parent->top = backtrack; \
5093 } \
5094 while (0)
5095
5096 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5097 do \
5098 { \
5099 backtrack = sljit_alloc_memory(compiler, (size)); \
5100 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5101 return; \
5102 memset(backtrack, 0, size); \
5103 backtrack->prev = parent->top; \
5104 backtrack->cc = (ccstart); \
5105 parent->top = backtrack; \
5106 } \
5107 while (0)
5108
5109 #define BACKTRACK_AS(type) ((type *)backtrack)
5110
5111 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5112 {
5113 DEFINE_COMPILER;
5114 int offset = GET2(cc, 1) << 1;
5115 struct sljit_jump *jump = NULL;
5116 struct sljit_jump *partial;
5117 struct sljit_jump *nopartial;
5118
5119 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5120 /* OVECTOR(1) contains the "string begin - 1" constant. */
5121 if (withchecks && !common->jscript_compat)
5122 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5123
5124 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5125 if (common->utf && *cc == OP_REFI)
5126 {
5127 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5128 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5129 if (withchecks)
5130 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5131
5132 /* Needed to save important temporary registers. */
5133 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5134 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5136 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5137 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5138 if (common->mode == JIT_COMPILE)
5139 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5140 else
5141 {
5142 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5143 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5144 check_partial(common, FALSE);
5145 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5146 JUMPHERE(nopartial);
5147 }
5148 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5149 }
5150 else
5151 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5152 {
5153 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5154 if (withchecks)
5155 jump = JUMP(SLJIT_C_ZERO);
5156
5157 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5158 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5159 if (common->mode == JIT_COMPILE)
5160 add_jump(compiler, backtracks, partial);
5161
5162 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5163 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5164
5165 if (common->mode != JIT_COMPILE)
5166 {
5167 nopartial = JUMP(SLJIT_JUMP);
5168 JUMPHERE(partial);
5169 /* TMP2 -= STR_END - STR_PTR */
5170 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5171 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5172 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5173 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5174 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5175 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5176 JUMPHERE(partial);
5177 check_partial(common, FALSE);
5178 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5179 JUMPHERE(nopartial);
5180 }
5181 }
5182
5183 if (jump != NULL)
5184 {
5185 if (emptyfail)
5186 add_jump(compiler, backtracks, jump);
5187 else
5188 JUMPHERE(jump);
5189 }
5190 return cc + 1 + IMM2_SIZE;
5191 }
5192
5193 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5194 {
5195 DEFINE_COMPILER;
5196 backtrack_common *backtrack;
5197 pcre_uchar type;
5198 struct sljit_label *label;
5199 struct sljit_jump *zerolength;
5200 struct sljit_jump *jump = NULL;
5201 pcre_uchar *ccbegin = cc;
5202 int min = 0, max = 0;
5203 BOOL minimize;
5204
5205 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5206
5207 type = cc[1 + IMM2_SIZE];
5208 minimize = (type & 0x1) != 0;
5209 switch(type)
5210 {
5211 case OP_CRSTAR:
5212 case OP_CRMINSTAR:
5213 min = 0;
5214 max = 0;
5215 cc += 1 + IMM2_SIZE + 1;
5216 break;
5217 case OP_CRPLUS:
5218 case OP_CRMINPLUS:
5219 min = 1;
5220 max = 0;
5221 cc += 1 + IMM2_SIZE + 1;
5222 break;
5223 case OP_CRQUERY:
5224 case OP_CRMINQUERY:
5225 min = 0;
5226 max = 1;
5227 cc += 1 + IMM2_SIZE + 1;
5228 break;
5229 case OP_CRRANGE:
5230 case OP_CRMINRANGE:
5231 min = GET2(cc, 1 + IMM2_SIZE + 1);
5232 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5233 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5234 break;
5235 default:
5236 SLJIT_ASSERT_STOP();
5237 break;
5238 }
5239
5240 if (!minimize)
5241 {
5242 if (min == 0)
5243 {
5244 allocate_stack(common, 2);
5245 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5247 /* Temporary release of STR_PTR. */
5248 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5249 zerolength = compile_ref_checks(common, ccbegin, NULL);
5250 /* Restore if not zero length. */
5251 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5252 }
5253 else
5254 {
5255 allocate_stack(common, 1);
5256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5257 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5258 }
5259
5260 if (min > 1 || max > 1)
5261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5262
5263 label = LABEL();
5264 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5265
5266 if (min > 1 || max > 1)
5267 {
5268 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5269 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5270 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5271 if (min > 1)
5272 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5273 if (max > 1)
5274 {
5275 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5276 allocate_stack(common, 1);
5277 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5278 JUMPTO(SLJIT_JUMP, label);
5279 JUMPHERE(jump);
5280 }
5281 }
5282
5283 if (max == 0)
5284 {
5285 /* Includes min > 1 case as well. */
5286 allocate_stack(common, 1);
5287 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5288 JUMPTO(SLJIT_JUMP, label);
5289 }
5290
5291 JUMPHERE(zerolength);
5292 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5293
5294 decrease_call_count(common);
5295 return cc;
5296 }
5297
5298 allocate_stack(common, 2);
5299 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5300 if (type != OP_CRMINSTAR)
5301 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5302
5303 if (min == 0)
5304 {
5305 zerolength = compile_ref_checks(common, ccbegin, NULL);
5306 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5307 jump = JUMP(SLJIT_JUMP);
5308 }
5309 else
5310 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5311
5312 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5313 if (max > 0)
5314 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5315
5316 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5318
5319 if (min > 1)
5320 {
5321 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5322 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5323 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5324 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5325 }
5326 else if (max > 0)
5327 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5328
5329 if (jump != NULL)
5330 JUMPHERE(jump);
5331 JUMPHERE(zerolength);
5332
5333 decrease_call_count(common);
5334 return cc;
5335 }
5336
5337 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5338 {
5339 DEFINE_COMPILER;
5340 backtrack_common *backtrack;
5341 recurse_entry *entry = common->entries;
5342 recurse_entry *prev = NULL;
5343 sljit_sw start = GET(cc, 1);
5344 pcre_uchar *start_cc;
5345 BOOL needs_control_head;
5346
5347 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5348
5349 /* Inlining simple patterns. */
5350 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5351 {
5352 start_cc = common->start + start;
5353 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5354 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5355 return cc + 1 + LINK_SIZE;
5356 }
5357
5358 while (entry != NULL)
5359 {
5360 if (entry->start == start)
5361 break;
5362 prev = entry;
5363 entry = entry->next;
5364 }
5365
5366 if (entry == NULL)
5367 {
5368 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5369 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5370 return NULL;
5371 entry->next = NULL;
5372 entry->entry = NULL;
5373 entry->calls = NULL;
5374 entry->start = start;
5375
5376 if (prev != NULL)
5377 prev->next = entry;
5378 else
5379 common->entries = entry;
5380 }
5381
5382 if (common->has_set_som && common->mark_ptr != 0)
5383 {
5384 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5385 allocate_stack(common, 2);
5386 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5389 }
5390 else if (common->has_set_som || common->mark_ptr != 0)
5391 {
5392 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5393 allocate_stack(common, 1);
5394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5395 }
5396
5397 if (entry->entry == NULL)
5398 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5399 else
5400 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5401 /* Leave if the match is failed. */
5402 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5403 return cc + 1 + LINK_SIZE;
5404 }
5405
5406 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5407 {
5408 const pcre_uchar *begin = arguments->begin;
5409 int *offset_vector = arguments->offsets;
5410 int offset_count = arguments->offset_count;
5411 int i;
5412
5413 if (PUBL(callout) == NULL)
5414 return 0;
5415
5416 callout_block->version = 2;
5417 callout_block->callout_data = arguments->callout_data;
5418
5419 /* Offsets in subject. */
5420 callout_block->subject_length = arguments->end - arguments->begin;
5421 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5422 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5423 #if defined COMPILE_PCRE8
5424 callout_block->subject = (PCRE_SPTR)begin;
5425 #elif defined COMPILE_PCRE16
5426 callout_block->subject = (PCRE_SPTR16)begin;
5427 #elif defined COMPILE_PCRE32
5428 callout_block->subject = (PCRE_SPTR32)begin;
5429 #endif
5430
5431 /* Convert and copy the JIT offset vector to the offset_vector array. */
5432 callout_block->capture_top = 0;
5433 callout_block->offset_vector = offset_vector;
5434 for (i = 2; i < offset_count; i += 2)
5435 {
5436 offset_vector[i] = jit_ovector[i] - begin;
5437 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5438 if (jit_ovector[i] >= begin)
5439 callout_block->capture_top = i;
5440 }
5441
5442 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5443 if (offset_count > 0)
5444 offset_vector[0] = -1;
5445 if (offset_count > 1)
5446 offset_vector[1] = -1;
5447 return (*PUBL(callout))(callout_block);
5448 }
5449
5450 /* Aligning to 8 byte. */
5451 #define CALLOUT_ARG_SIZE \
5452 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5453
5454 #define CALLOUT_ARG_OFFSET(arg) \
5455 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5456
5457 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5458 {
5459 DEFINE_COMPILER;
5460 backtrack_common *backtrack;
5461
5462 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5463
5464 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5465
5466 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5467 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5468 SLJIT_ASSERT(common->capture_last_ptr != 0);
5469 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5470 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5471
5472 /* These pointer sized fields temporarly stores internal variables. */
5473 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5476
5477 if (common->mark_ptr != 0)
5478 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5479 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5480 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5482
5483 /* Needed to save important temporary registers. */
5484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5485 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5486 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5487 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5488 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5489 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5490 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5491
5492 /* Check return value. */
5493 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5494 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5495 if (common->forced_quit_label == NULL)
5496 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5497 else
5498 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5499 return cc + 2 + 2 * LINK_SIZE;
5500 }
5501
5502 #undef CALLOUT_ARG_SIZE
5503 #undef CALLOUT_ARG_OFFSET
5504
5505 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5506 {
5507 DEFINE_COMPILER;
5508 int framesize;
5509 int extrasize;
5510 BOOL needs_control_head;
5511 int private_data_ptr;
5512 backtrack_common altbacktrack;
5513 pcre_uchar *ccbegin;
5514 pcre_uchar opcode;
5515 pcre_uchar bra = OP_BRA;
5516 jump_list *tmp = NULL;
5517 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5518 jump_list **found;
5519 /* Saving previous accept variables. */
5520 BOOL save_local_exit = common->local_exit;
5521 BOOL save_positive_assert = common->positive_assert;
5522 then_trap_backtrack *save_then_trap = common->then_trap;
5523 struct sljit_label *save_quit_label = common->quit_label;
5524 struct sljit_label *save_accept_label = common->accept_label;
5525 jump_list *save_quit = common->quit;
5526 jump_list *save_positive_assert_quit = common->positive_assert_quit;
5527 jump_list *save_accept = common->accept;
5528 struct sljit_jump *jump;
5529 struct sljit_jump *brajump = NULL;
5530
5531 /* Assert captures then. */
5532 common->then_trap = NULL;
5533
5534 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5535 {
5536 SLJIT_ASSERT(!conditional);
5537 bra = *cc;
5538 cc++;
5539 }
5540 private_data_ptr = PRIVATE_DATA(cc);
5541 SLJIT_ASSERT(private_data_ptr != 0);
5542 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5543 backtrack->framesize = framesize;
5544 backtrack->private_data_ptr = private_data_ptr;
5545 opcode = *cc;
5546 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5547 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5548 ccbegin = cc;
5549 cc += GET(cc, 1);
5550
5551 if (bra == OP_BRAMINZERO)
5552 {
5553 /* This is a braminzero backtrack path. */
5554 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5555 free_stack(common, 1);
5556 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5557 }
5558
5559 if (framesize < 0)
5560 {
5561 extrasize = needs_control_head ? 2 : 1;
5562 if (framesize == no_frame)
5563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5564 allocate_stack(common, extrasize);
5565 if (needs_control_head)
5566 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5567 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5568 if (needs_control_head)
5569 {
5570 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5571 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5572 }
5573 }
5574 else
5575 {
5576 extrasize = needs_control_head ? 3 : 2;
5577 allocate_stack(common, framesize + extrasize);
5578 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5579 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5580 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5581 if (needs_control_head)
5582 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5584 if (needs_control_head)
5585 {
5586 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5587 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5588 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5589 }
5590 else
5591 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5592 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5593 }
5594
5595 memset(&altbacktrack, 0, sizeof(backtrack_common));
5596 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5597 {
5598 /* Negative assert is stronger than positive assert. */
5599 common->local_exit = TRUE;
5600 common->quit_label = NULL;
5601 common->quit = NULL;
5602 common->positive_assert = FALSE;
5603 }
5604 else
5605 common->positive_assert = TRUE;
5606 common->positive_assert_quit = NULL;
5607
5608 while (1)
5609 {
5610 common->accept_label = NULL;
5611 common->accept = NULL;
5612 altbacktrack.top = NULL;
5613 altbacktrack.topbacktracks = NULL;
5614
5615 if (*ccbegin == OP_ALT)
5616 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5617
5618 altbacktrack.cc = ccbegin;
5619 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5620 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5621 {
5622 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5623 {
5624 common->local_exit = save_local_exit;
5625 common->quit_label = save_quit_label;
5626 common->quit = save_quit;
5627 }
5628 common->positive_assert = save_positive_assert;
5629 common->then_trap = save_then_trap;
5630 common->accept_label = save_accept_label;
5631 common->positive_assert_quit = save_positive_assert_quit;
5632 common->accept = save_accept;
5633 return NULL;
5634 }
5635 common->accept_label = LABEL();
5636 if (common->accept != NULL)
5637 set_jumps(common->accept, common->accept_label);
5638
5639 /* Reset stack. */
5640 if (framesize < 0)
5641 {
5642 if (framesize == no_frame)
5643 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5644 else
5645 free_stack(common, extrasize);
5646 if (needs_control_head)
5647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5648 }
5649 else
5650 {
5651 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5652 {
5653 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5654 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5655 if (needs_control_head)
5656 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5657 }
5658 else
5659 {
5660 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5661 if (needs_control_head)
5662 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5663 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5664 }
5665 }
5666
5667 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5668 {
5669 /* We know that STR_PTR was stored on the top of the stack. */
5670 if (conditional)
5671 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5672 else if (bra == OP_BRAZERO)
5673 {
5674 if (framesize < 0)
5675 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5676 else
5677 {
5678 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5679 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5681 }
5682 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5683 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5684 }
5685 else if (framesize >= 0)
5686 {
5687 /* For OP_BRA and OP_BRAMINZERO. */
5688 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5689 }
5690 }
5691 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5692
5693 compile_backtrackingpath(common, altbacktrack.top);
5694 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5695 {
5696 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5697 {
5698 common->local_exit = save_local_exit;
5699 common->quit_label = save_quit_label;
5700 common->quit = save_quit;
5701 }
5702 common->positive_assert = save_positive_assert;
5703 common->then_trap = save_then_trap;
5704 common->accept_label = save_accept_label;
5705 common->positive_assert_quit = save_positive_assert_quit;
5706 common->accept = save_accept;
5707 return NULL;
5708 }
5709 set_jumps(altbacktrack.topbacktracks, LABEL());
5710
5711 if (*cc != OP_ALT)
5712 break;
5713
5714 ccbegin = cc;
5715 cc += GET(cc, 1);
5716 }
5717
5718 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5719 {
5720 SLJIT_ASSERT(common->positive_assert_quit == NULL);
5721 /* Makes the check less complicated below. */
5722 common->positive_assert_quit = common->quit;
5723 }
5724
5725 /* None of them matched. */
5726 if (common->positive_assert_quit != NULL)
5727 {
5728 jump = JUMP(SLJIT_JUMP);
5729 set_jumps(common->positive_assert_quit, LABEL());
5730 SLJIT_ASSERT(framesize != no_stack);
5731 if (framesize < 0)
5732 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5733 else
5734 {
5735 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5736 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5737 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5738 }
5739 JUMPHERE(jump);
5740 }
5741
5742 if (needs_control_head)
5743 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5744
5745 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5746 {
5747 /* Assert is failed. */
5748 if (conditional || bra == OP_BRAZERO)
5749 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5750
5751 if (framesize < 0)
5752 {
5753 /* The topmost item should be 0. */
5754 if (bra == OP_BRAZERO)
5755 {
5756 if (extrasize == 2)
5757 free_stack(common, 1);
5758 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5759 }
5760 else
5761 free_stack(common, extrasize);
5762 }
5763 else
5764 {
5765 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5766 /* The topmost item should be 0. */
5767 if (bra == OP_BRAZERO)
5768 {
5769 free_stack(common, framesize + extrasize - 1);
5770 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5771 }
5772 else
5773 free_stack(common, framesize + extrasize);
5774 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5775 }
5776 jump = JUMP(SLJIT_JUMP);
5777 if (bra != OP_BRAZERO)
5778 add_jump(compiler, target, jump);
5779
5780 /* Assert is successful. */
5781 set_jumps(tmp, LABEL());
5782 if (framesize < 0)
5783 {
5784 /* We know that STR_PTR was stored on the top of the stack. */
5785 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5786 /* Keep the STR_PTR on the top of the stack. */
5787 if (bra == OP_BRAZERO)
5788 {
5789 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5790 if (extrasize == 2)
5791 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5792 }
5793 else if (bra == OP_BRAMINZERO)
5794 {
5795 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5796 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5797 }
5798 }
5799 else
5800 {
5801 if (bra == OP_BRA)
5802 {
5803 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5804 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5805 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5806 }
5807 else
5808 {
5809 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5810 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5811 if (extrasize == 2)
5812 {
5813 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5814 if (bra == OP_BRAMINZERO)
5815 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5816 }
5817 else
5818 {
5819 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5820 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5821 }
5822 }
5823 }
5824
5825 if (bra == OP_BRAZERO)
5826 {
5827 backtrack->matchingpath = LABEL();
5828 SET_LABEL(jump, backtrack->matchingpath);
5829 }
5830 else if (bra == OP_BRAMINZERO)
5831 {
5832 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5833 JUMPHERE(brajump);
5834 if (framesize >= 0)
5835 {
5836 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5837 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5838 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5839 }
5840 set_jumps(backtrack->common.topbacktracks, LABEL());
5841 }
5842 }
5843 else
5844 {
5845 /* AssertNot is successful. */
5846 if (framesize < 0)
5847 {
5848 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5849 if (bra != OP_BRA)
5850 {
5851 if (extrasize == 2)
5852 free_stack(common, 1);
5853 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5854 }
5855 else
5856 free_stack(common, extrasize);
5857 }
5858 else
5859 {
5860 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5861 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5862 /* The topmost item should be 0. */
5863 if (bra != OP_BRA)
5864 {
5865 free_stack(common, framesize + extrasize - 1);
5866 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5867 }
5868 else
5869 free_stack(common, framesize + extrasize);
5870 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5871 }
5872
5873 if (bra == OP_BRAZERO)
5874 backtrack->matchingpath = LABEL();
5875 else if (bra == OP_BRAMINZERO)
5876 {
5877 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5878 JUMPHERE(brajump);
5879 }
5880
5881 if (bra != OP_BRA)
5882 {
5883 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5884 set_jumps(backtrack->common.topbacktracks, LABEL());
5885 backtrack->common.topbacktracks = NULL;
5886 }
5887 }
5888
5889 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5890 {
5891 common->local_exit = save_local_exit;
5892 common->quit_label = save_quit_label;
5893 common->quit = save_quit;
5894 }
5895 common->positive_assert = save_positive_assert;
5896 common->then_trap = save_then_trap;
5897 common->accept_label = save_accept_label;
5898 common->positive_assert_quit = save_positive_assert_quit;
5899 common->accept = save_accept;
5900 return cc + 1 + LINK_SIZE;
5901 }
5902
5903 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5904 {
5905 int condition = FALSE;
5906 pcre_uchar *slotA = name_table;
5907 pcre_uchar *slotB;
5908 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5909 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5910 sljit_sw no_capture;
5911 int i;
5912
5913 locals += refno & 0xff;
5914 refno >>= 8;
5915 no_capture = locals[1];
5916
5917 for (i = 0; i < name_count; i++)
5918 {
5919 if (GET2(slotA, 0) == refno) break;
5920 slotA += name_entry_size;
5921 }
5922
5923 if (i < name_count)
5924 {
5925 /* Found a name for the number - there can be only one; duplicate names
5926 for different numbers are allowed, but not vice versa. First scan down
5927 for duplicates. */
5928
5929 slotB = slotA;
5930 while (slotB > name_table)
5931 {
5932 slotB -= name_entry_size;
5933 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5934 {
5935 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5936 if (condition) break;
5937 }
5938 else break;
5939 }
5940
5941 /* Scan up for duplicates */
5942 if (!condition)
5943 {
5944 slotB = slotA;
5945 for (i++; i < name_count; i++)
5946 {
5947 slotB += name_entry_size;
5948 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5949 {
5950 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5951 if (condition) break;
5952 }
5953 else break;
5954 }
5955 }
5956 }
5957 return condition;
5958 }
5959
5960 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5961 {
5962 int condition = FALSE;
5963 pcre_uchar *slotA = name_table;
5964 pcre_uchar *slotB;
5965 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5966 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5967 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5968 sljit_uw i;
5969
5970 for (i = 0; i < name_count; i++)
5971 {
5972 if (GET2(slotA, 0) == recno) break;
5973 slotA += name_entry_size;
5974 }
5975
5976 if (i < name_count)
5977 {
5978 /* Found a name for the number - there can be only one; duplicate
5979 names for different numbers are allowed, but not vice versa. First
5980 scan down for duplicates. */
5981
5982 slotB = slotA;
5983 while (slotB > name_table)
5984 {
5985 slotB -= name_entry_size;
5986 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5987 {
5988 condition = GET2(slotB, 0) == group_num;
5989 if (condition) break;
5990 }
5991 else break;
5992 }
5993
5994 /* Scan up for duplicates */
5995 if (!condition)
5996 {
5997 slotB = slotA;
5998 for (i++; i < name_count; i++)
5999 {
6000 slotB += name_entry_size;
6001 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
6002 {
6003 condition = GET2(slotB, 0) == group_num;
6004 if (condition) break;
6005 }
6006 else break;
6007 }
6008 }
6009 }
6010 return condition;
6011 }
6012
6013 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6014 {
6015 DEFINE_COMPILER;
6016 int stacksize;
6017
6018 if (framesize < 0)
6019 {
6020 if (framesize == no_frame)
6021 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6022 else
6023 {
6024 stacksize = needs_control_head ? 1 : 0;
6025 if (ket != OP_KET || has_alternatives)
6026 stacksize++;
6027 free_stack(common, stacksize);
6028 }
6029
6030 if (needs_control_head)
6031 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6032
6033 /* TMP2 which is set here used by OP_KETRMAX below. */
6034 if (ket == OP_KETRMAX)
6035 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6036 else if (ket == OP_KETRMIN)
6037 {
6038 /* Move the STR_PTR to the private_data_ptr. */
6039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6040 }
6041 }
6042 else
6043 {
6044 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6045 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6046 if (needs_control_head)
6047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6048
6049 if (ket == OP_KETRMAX)
6050 {
6051 /* TMP2 which is set here used by OP_KETRMAX below. */
6052 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6053 }
6054 }
6055 if (needs_control_head)
6056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6057 }
6058
6059 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6060 {
6061 DEFINE_COMPILER;
6062
6063 if (common->capture_last_ptr != 0)
6064 {
6065 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6066 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6067 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6068 stacksize++;
6069 }
6070 if (common->optimized_cbracket[offset >> 1] == 0)
6071 {
6072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6073 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6074 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6076 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6077 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6079 stacksize += 2;
6080 }
6081 return stacksize;
6082 }
6083
6084 /*
6085 Handling bracketed expressions is probably the most complex part.
6086
6087 Stack layout naming characters:
6088 S - Push the current STR_PTR
6089 0 - Push a 0 (NULL)
6090 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6091 before the next alternative. Not pushed if there are no alternatives.
6092 M - Any values pushed by the current alternative. Can be empty, or anything.
6093 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6094 L - Push the previous local (pointed by localptr) to the stack
6095 () - opional values stored on the stack
6096 ()* - optonal, can be stored multiple times
6097
6098 The following list shows the regular expression templates, their PCRE byte codes
6099 and stack layout supported by pcre-sljit.
6100
6101 (?:) OP_BRA | OP_KET A M
6102 () OP_CBRA | OP_KET C M
6103 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6104 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6105 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6106 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6107 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6108 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6109 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6110 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6111 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6112 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6113 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6114 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6115 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6116 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6117 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6118 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6119 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6120 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6121 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6122 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6123
6124
6125 Stack layout naming characters:
6126 A - Push the alternative index (starting from 0) on the stack.
6127 Not pushed if there is no alternatives.
6128 M - Any values pushed by the current alternative. Can be empty, or anything.
6129
6130 The next list shows the possible content of a bracket:
6131 (|) OP_*BRA | OP_ALT ... M A
6132 (?()|) OP_*COND | OP_ALT M A
6133 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6134 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6135 Or nothing, if trace is unnecessary
6136 */
6137
6138 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6139 {
6140 DEFINE_COMPILER;
6141 backtrack_common *backtrack;
6142 pcre_uchar opcode;
6143 int private_data_ptr = 0;
6144 int offset = 0;
6145 int stacksize;
6146 int repeat_ptr = 0, repeat_length = 0;
6147 int repeat_type = 0, repeat_count = 0;
6148 pcre_uchar *ccbegin;
6149 pcre_uchar *matchingpath;
6150 pcre_uchar bra = OP_BRA;
6151 pcre_uchar ket;
6152 assert_backtrack *assert;
6153 BOOL has_alternatives;
6154 BOOL needs_control_head = FALSE;
6155 struct sljit_jump *jump;
6156 struct sljit_jump *skip;
6157 struct sljit_label *rmax_label = NULL;
6158 struct sljit_jump *braminzero = NULL;
6159
6160 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6161
6162 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6163 {
6164 bra = *cc;
6165 cc++;
6166 opcode = *cc;
6167 }
6168
6169 opcode = *cc;
6170 ccbegin = cc;
6171 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6172 ket = *matchingpath;
6173 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6174 {
6175 repeat_ptr = PRIVATE_DATA(matchingpath);
6176 repeat_length = PRIVATE_DATA(matchingpath + 1);
6177 repeat_type = PRIVATE_DATA(matchingpath + 2);
6178 repeat_count = PRIVATE_DATA(matchingpath + 3);
6179 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6180 if (repeat_type == OP_UPTO)
6181 ket = OP_KETRMAX;
6182 if (repeat_type == OP_MINUPTO)
6183 ket = OP_KETRMIN;
6184 }
6185
6186 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6187 {
6188 /* Drop this bracket_backtrack. */
6189 parent->top = backtrack->prev;
6190 return matchingpath + 1 + LINK_SIZE + repeat_length;
6191 }
6192
6193 matchingpath = ccbegin + 1 + LINK_SIZE;
6194 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6195 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6196 cc += GET(cc, 1);
6197
6198 has_alternatives = *cc == OP_ALT;
6199 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6200 {
6201 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6202 if (*matchingpath == OP_NRREF)
6203 {
6204 stacksize = GET2(matchingpath, 1);
6205 if (common->currententry == NULL || stacksize == RREF_ANY)
6206 has_alternatives = FALSE;
6207 else if (common->currententry->start == 0)
6208 has_alternatives = stacksize != 0;
6209 else
6210 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6211 }
6212 }
6213
6214 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6215 opcode = OP_SCOND;
6216 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6217 opcode = OP_ONCE;
6218
6219 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6220 {
6221 /* Capturing brackets has a pre-allocated space. */
6222 offset = GET2(ccbegin, 1 + LINK_SIZE);
6223 if (common->optimized_cbracket[offset] == 0)
6224 {
6225 private_data_ptr = OVECTOR_PRIV(offset);
6226 offset <<= 1;
6227 }
6228 else
6229 {
6230 offset <<= 1;
6231 private_data_ptr = OVECTOR(offset);
6232 }
6233 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6234 matchingpath += IMM2_SIZE;
6235 }
6236 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6237 {
6238 /* Other brackets simply allocate the next entry. */
6239 private_data_ptr = PRIVATE_DATA(ccbegin);
6240 SLJIT_ASSERT(private_data_ptr != 0);
6241 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6242 if (opcode == OP_ONCE)
6243 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6244 }
6245
6246 /* Instructions before the first alternative. */
6247 stacksize = 0;
6248 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6249 stacksize++;
6250 if (bra == OP_BRAZERO)
6251 stacksize++;
6252
6253 if (stacksize > 0)
6254 allocate_stack(common, stacksize);
6255
6256 stacksize = 0;
6257 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6258 {
6259 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6260 stacksize++;
6261 }
6262
6263 if (bra == OP_BRAZERO)
6264 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6265
6266 if (bra == OP_BRAMINZERO)
6267 {
6268 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6269 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6270 if (ket != OP_KETRMIN)
6271 {
6272 free_stack(common, 1);
6273 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6274 }
6275 else
6276 {
6277 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6278 {
6279 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6280 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6281 /* Nothing stored during the first run. */
6282 skip = JUMP(SLJIT_JUMP);
6283 JUMPHERE(jump);
6284 /* Checking zero-length iteration. */
6285 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6286 {
6287 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6288 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6289 }
6290 else
6291 {
6292 /* Except when the whole stack frame must be saved. */
6293 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6294 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6295 }
6296 JUMPHERE(skip);
6297 }
6298 else
6299 {
6300 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6301 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6302 JUMPHERE(jump);
6303 }
6304 }
6305 }
6306
6307 if (repeat_type != 0)
6308 {
6309 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6310 if (repeat_type == OP_EXACT)
6311 rmax_label = LABEL();
6312 }
6313
6314 if (ket == OP_KETRMIN)
6315 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6316
6317 if (ket == OP_KETRMAX)
6318 {
6319 rmax_label = LABEL();
6320 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6321 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6322 }
6323
6324 /* Handling capturing brackets and alternatives. */
6325 if (opcode == OP_ONCE)
6326 {
6327 stacksize = 0;
6328 if (needs_control_head)
6329 {
6330 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6331 stacksize++;
6332 }
6333
6334 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6335 {
6336 /* Neither capturing brackets nor recursions are found in the block. */
6337 if (ket == OP_KETRMIN)
6338 {
6339 stacksize += 2;
6340 if (!needs_control_head)
6341 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6342 }
6343 else
6344 {
6345 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6347 if (ket == OP_KETRMAX || has_alternatives)
6348 stacksize++;
6349 }
6350
6351 if (stacksize > 0)
6352 allocate_stack(common, stacksize);
6353
6354 stacksize = 0;
6355 if (needs_control_head)
6356 {
6357 stacksize++;
6358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6359 }
6360
6361 if (ket == OP_KETRMIN)
6362 {
6363 if (needs_control_head)
6364 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6366 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6367 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6368 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6369 }
6370 else if (ket == OP_KETRMAX || has_alternatives)
6371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6372 }
6373 else
6374 {
6375 if (ket != OP_KET || has_alternatives)
6376 stacksize++;
6377
6378 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6379 allocate_stack(common, stacksize);
6380
6381 if (needs_control_head)
6382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6383
6384 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6385 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6386
6387 stacksize = needs_control_head ? 1 : 0;
6388 if (ket != OP_KET || has_alternatives)
6389 {
6390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6392 stacksize++;
6393 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6394 }
6395 else
6396 {
6397 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6398 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6399 }
6400 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6401 }
6402 }
6403 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6404 {
6405 /* Saving the previous values. */
6406 if (common->optimized_cbracket[offset >> 1] != 0)
6407 {
6408 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6409 allocate_stack(common, 2);
6410 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6411 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6412 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6415 }
6416 else
6417 {
6418 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6419 allocate_stack(common, 1);
6420 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6422 }
6423 }
6424 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6425 {
6426 /* Saving the previous value. */
6427 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6428 allocate_stack(common, 1);
6429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6430 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6431 }
6432 else if (has_alternatives)
6433 {
6434 /* Pushing the starting string pointer. */
6435 allocate_stack(common, 1);
6436 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6437 }
6438
6439 /* Generating code for the first alternative. */
6440 if (opcode == OP_COND || opcode == OP_SCOND)
6441 {
6442 if (*matchingpath == OP_CREF)
6443 {
6444 SLJIT_ASSERT(has_alternatives);
6445 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6446 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6447 matchingpath += 1 + IMM2_SIZE;
6448 }
6449 else if (*matchingpath == OP_NCREF)
6450 {
6451 SLJIT_ASSERT(has_alternatives);
6452 stacksize = GET2(matchingpath, 1);
6453 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6454
6455 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6458 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6459 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6460 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6461 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6462 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6463 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6464
6465 JUMPHERE(jump);
6466 matchingpath += 1 + IMM2_SIZE;
6467 }
6468 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6469 {
6470 /* Never has other case. */
6471 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6472
6473 stacksize = GET2(matchingpath, 1);
6474 if (common->currententry == NULL)
6475 stacksize = 0;
6476 else if (stacksize == RREF_ANY)
6477 stacksize = 1;
6478 else if (common->currententry->start == 0)
6479 stacksize = stacksize == 0;
6480 else
6481 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6482
6483 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6484 {
6485 SLJIT_ASSERT(!has_alternatives);
6486 if (stacksize != 0)
6487 matchingpath += 1 + IMM2_SIZE;
6488 else
6489 {
6490 if (*cc == OP_ALT)
6491 {
6492 matchingpath = cc + 1 + LINK_SIZE;
6493 cc += GET(cc, 1);
6494 }
6495 else
6496 matchingpath = cc;
6497 }
6498 }
6499 else
6500 {
6501 SLJIT_ASSERT(has_alternatives);
6502
6503 stacksize = GET2(matchingpath, 1);
6504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6506 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6508 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6509 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6510 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6511 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6512 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6513 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6514 matchingpath += 1 + IMM2_SIZE;
6515 }
6516 }
6517 else
6518 {
6519 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6520 /* Similar code as PUSH_BACKTRACK macro. */
6521 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6522 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6523 return NULL;
6524 memset(assert, 0, sizeof(assert_backtrack));
6525 assert->common.cc = matchingpath;
6526 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6527 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6528 }
6529 }
6530
6531 compile_matchingpath(common, matchingpath, cc, backtrack);
6532 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6533 return NULL;
6534
6535 if (opcode == OP_ONCE)
6536 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6537
6538 stacksize = 0;
6539 if (repeat_type == OP_MINUPTO)
6540 {
6541 /* We need to preserve the counter. TMP2 will be used below. */
6542 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6543 stacksize++;
6544 }
6545 if (ket != OP_KET || bra != OP_BRA)
6546 stacksize++;
6547 if (offset != 0)
6548 {
6549 if (common->capture_last_ptr != 0)
6550 stacksize++;
6551 if (common->optimized_cbracket[offset >> 1] == 0)
6552 stacksize += 2;
6553 }
6554 if (has_alternatives && opcode != OP_ONCE)
6555 stacksize++;
6556
6557 if (stacksize > 0)
6558 allocate_stack(common, stacksize);
6559
6560 stacksize = 0;
6561 if (repeat_type == OP_MINUPTO)
6562 {
6563 /* TMP2 was set above. */
6564 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
6565 stacksize++;
6566 }
6567
6568 if (ket != OP_KET || bra != OP_BRA)
6569 {
6570 if (ket != OP_KET)
6571 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6572 else
6573 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6574 stacksize++;
6575 }
6576
6577 if (offset != 0)
6578 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6579
6580 if (has_alternatives)
6581 {
6582 if (opcode != OP_ONCE)
6583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6584 if (ket != OP_KETRMAX)
6585 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6586 }
6587
6588 /* Must be after the matchingpath label. */
6589 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6590 {
6591 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6592 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6593 }
6594
6595 if (ket == OP_KETRMAX)
6596 {
6597 if (repeat_type != 0)
6598 {
6599 if (has_alternatives)
6600 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6601 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6602 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6603 /* Drop STR_PTR for greedy plus quantifier. */
6604 if (opcode != OP_ONCE)
6605 free_stack(common, 1);
6606 }
6607 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
6608 {
6609 if (has_alternatives)
6610 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6611 /* Checking zero-length iteration. */
6612 if (opcode != OP_ONCE)
6613 {
6614 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
6615 /* Drop STR_PTR for greedy plus quantifier. */
6616 if (bra != OP_BRAZERO)
6617 free_stack(common, 1);
6618 }
6619 else
6620 /* TMP2 must contain the starting STR_PTR. */
6621 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
6622 }
6623 else
6624 JUMPTO(SLJIT_JUMP, rmax_label);
6625 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6626 }
6627
6628 if (repeat_type == OP_EXACT)
6629 {
6630 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6631 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6632 }
6633 else if (repeat_type == OP_UPTO)
6634 {
6635 /* We need to preserve the counter. */
6636 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6637 allocate_stack(common, 1);
6638 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6639 }
6640
6641 if (bra == OP_BRAZERO)
6642 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6643
6644 if (bra == OP_BRAMINZERO)
6645 {
6646 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6647 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6648 if (braminzero != NULL)
6649 {
6650 JUMPHERE(braminzero);
6651 /* We need to release the end pointer to perform the
6652 backtrack for the zero-length iteration. When
6653 framesize is < 0, OP_ONCE will do the release itself. */
6654 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6655 {
6656 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6657 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6658 }
6659 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6660 free_stack(common, 1);
6661 }
6662 /* Continue to the normal backtrack. */
6663 }
6664
6665 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6666 decrease_call_count(common);
6667
6668 /* Skip the other alternatives. */
6669 while (*cc == OP_ALT)
6670 cc += GET(cc, 1);
6671 cc += 1 + LINK_SIZE;
6672
6673 /* Temporarily encoding the needs_control_head in framesize. */
6674 if (opcode == OP_ONCE)
6675 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6676 return cc + repeat_length;
6677 }
6678
6679 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6680 {
6681 DEFINE_COMPILER;
6682 backtrack_common *backtrack;
6683 pcre_uchar opcode;
6684 int private_data_ptr;
6685 int cbraprivptr = 0;
6686 BOOL needs_control_head;
6687 int framesize;
6688 int stacksize;
6689 int offset = 0;
6690 BOOL zero = FALSE;
6691 pcre_uchar *ccbegin = NULL;
6692 int stack; /* Also contains the offset of control head. */
6693 struct sljit_label *loop = NULL;
6694 struct jump_list *emptymatch = NULL;
6695
6696 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6697 if (*cc == OP_BRAPOSZERO)
6698 {
6699 zero = TRUE;
6700 cc++;
6701 }
6702
6703 opcode = *cc;
6704 private_data_ptr = PRIVATE_DATA(cc);
6705 SLJIT_ASSERT(private_data_ptr != 0);
6706 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6707 switch(opcode)
6708 {
6709 case OP_BRAPOS:
6710 case OP_SBRAPOS:
6711 ccbegin = cc + 1 + LINK_SIZE;
6712 break;
6713
6714 case OP_CBRAPOS:
6715 case OP_SCBRAPOS:
6716 offset = GET2(cc, 1 + LINK_SIZE);
6717 /* This case cannot be optimized in the same was as
6718 normal capturing brackets. */
6719 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6720 cbraprivptr = OVECTOR_PRIV(offset);
6721 offset <<= 1;
6722 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6723 break;
6724
6725 default:
6726 SLJIT_ASSERT_STOP();
6727 break;
6728 }
6729
6730 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6731 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6732 if (framesize < 0)
6733 {
6734 if (offset != 0)
6735 {
6736 stacksize = 2;
6737 if (common->capture_last_ptr != 0)
6738 stacksize++;