/[pcre]/code/tags/pcre-8.38/pcre_jit_compile.c
ViewVC logotype

Contents of /code/tags/pcre-8.38/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1306 - (show annotations)
Mon Apr 1 17:04:17 2013 UTC (6 years, 7 months ago) by zherczeg
Original Path: code/trunk/pcre_jit_compile.c
File MIME type: text/plain
File size: 306017 byte(s)
Auto-detect and optimize limited repetitions in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum frame_types {
200 no_frame = -1,
201 no_stack = -2
202 };
203
204 enum control_types {
205 type_mark = 0,
206 type_then_trap = 1
207 };
208
209 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
210
211 /* The following structure is the key data type for the recursive
212 code generator. It is allocated by compile_matchingpath, and contains
213 the aguments for compile_backtrackingpath. Must be the first member
214 of its descendants. */
215 typedef struct backtrack_common {
216 /* Concatenation stack. */
217 struct backtrack_common *prev;
218 jump_list *nextbacktracks;
219 /* Internal stack (for component operators). */
220 struct backtrack_common *top;
221 jump_list *topbacktracks;
222 /* Opcode pointer. */
223 pcre_uchar *cc;
224 } backtrack_common;
225
226 typedef struct assert_backtrack {
227 backtrack_common common;
228 jump_list *condfailed;
229 /* Less than 0 if a frame is not needed. */
230 int framesize;
231 /* Points to our private memory word on the stack. */
232 int private_data_ptr;
233 /* For iterators. */
234 struct sljit_label *matchingpath;
235 } assert_backtrack;
236
237 typedef struct bracket_backtrack {
238 backtrack_common common;
239 /* Where to coninue if an alternative is successfully matched. */
240 struct sljit_label *alternative_matchingpath;
241 /* For rmin and rmax iterators. */
242 struct sljit_label *recursive_matchingpath;
243 /* For greedy ? operator. */
244 struct sljit_label *zero_matchingpath;
245 /* Contains the branches of a failed condition. */
246 union {
247 /* Both for OP_COND, OP_SCOND. */
248 jump_list *condfailed;
249 assert_backtrack *assert;
250 /* For OP_ONCE. Less than 0 if not needed. */
251 int framesize;
252 } u;
253 /* Points to our private memory word on the stack. */
254 int private_data_ptr;
255 } bracket_backtrack;
256
257 typedef struct bracketpos_backtrack {
258 backtrack_common common;
259 /* Points to our private memory word on the stack. */
260 int private_data_ptr;
261 /* Reverting stack is needed. */
262 int framesize;
263 /* Allocated stack size. */
264 int stacksize;
265 } bracketpos_backtrack;
266
267 typedef struct braminzero_backtrack {
268 backtrack_common common;
269 struct sljit_label *matchingpath;
270 } braminzero_backtrack;
271
272 typedef struct iterator_backtrack {
273 backtrack_common common;
274 /* Next iteration. */
275 struct sljit_label *matchingpath;
276 } iterator_backtrack;
277
278 typedef struct recurse_entry {
279 struct recurse_entry *next;
280 /* Contains the function entry. */
281 struct sljit_label *entry;
282 /* Collects the calls until the function is not created. */
283 jump_list *calls;
284 /* Points to the starting opcode. */
285 sljit_sw start;
286 } recurse_entry;
287
288 typedef struct recurse_backtrack {
289 backtrack_common common;
290 BOOL inlined_pattern;
291 } recurse_backtrack;
292
293 #define OP_THEN_TRAP OP_TABLE_LENGTH
294
295 typedef struct then_trap_backtrack {
296 backtrack_common common;
297 /* If then_trap is not NULL, this structure contains the real
298 then_trap for the backtracking path. */
299 struct then_trap_backtrack *then_trap;
300 /* Points to the starting opcode. */
301 sljit_sw start;
302 /* Exit point for the then opcodes of this alternative. */
303 jump_list *quit;
304 /* Frame size of the current alternative. */
305 int framesize;
306 } then_trap_backtrack;
307
308 #define MAX_RANGE_SIZE 6
309
310 typedef struct compiler_common {
311 /* The sljit ceneric compiler. */
312 struct sljit_compiler *compiler;
313 /* First byte code. */
314 pcre_uchar *start;
315 /* Maps private data offset to each opcode. */
316 sljit_si *private_data_ptrs;
317 /* Tells whether the capturing bracket is optimized. */
318 pcre_uint8 *optimized_cbracket;
319 /* Tells whether the starting offset is a target of then. */
320 pcre_uint8 *then_offsets;
321 /* Current position where a THEN must jump. */
322 then_trap_backtrack *then_trap;
323 /* Starting offset of private data for capturing brackets. */
324 int cbra_ptr;
325 /* Output vector starting point. Must be divisible by 2. */
326 int ovector_start;
327 /* Last known position of the requested byte. */
328 int req_char_ptr;
329 /* Head of the last recursion. */
330 int recursive_head_ptr;
331 /* First inspected character for partial matching. */
332 int start_used_ptr;
333 /* Starting pointer for partial soft matches. */
334 int hit_start;
335 /* End pointer of the first line. */
336 int first_line_end;
337 /* Points to the marked string. */
338 int mark_ptr;
339 /* Recursive control verb management chain. */
340 int control_head_ptr;
341 /* Points to the last matched capture block index. */
342 int capture_last_ptr;
343 /* Points to the starting position of the current match. */
344 int start_ptr;
345
346 /* Flipped and lower case tables. */
347 const pcre_uint8 *fcc;
348 sljit_sw lcc;
349 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
350 int mode;
351 /* \K is found in the pattern. */
352 BOOL has_set_som;
353 /* (*SKIP:arg) is found in the pattern. */
354 BOOL has_skip_arg;
355 /* (*THEN) is found in the pattern. */
356 BOOL has_then;
357 /* Needs to know the start position anytime. */
358 BOOL needs_start_ptr;
359 /* Currently in recurse or negative assert. */
360 BOOL local_exit;
361 /* Currently in a positive assert. */
362 BOOL positive_assert;
363 /* Newline control. */
364 int nltype;
365 int newline;
366 int bsr_nltype;
367 /* Dollar endonly. */
368 int endonly;
369 /* Tables. */
370 sljit_sw ctypes;
371 int digits[2 + MAX_RANGE_SIZE];
372 /* Named capturing brackets. */
373 sljit_uw name_table;
374 sljit_sw name_count;
375 sljit_sw name_entry_size;
376
377 /* Labels and jump lists. */
378 struct sljit_label *partialmatchlabel;
379 struct sljit_label *quit_label;
380 struct sljit_label *forced_quit_label;
381 struct sljit_label *accept_label;
382 stub_list *stubs;
383 recurse_entry *entries;
384 recurse_entry *currententry;
385 jump_list *partialmatch;
386 jump_list *quit;
387 jump_list *positive_assert_quit;
388 jump_list *forced_quit;
389 jump_list *accept;
390 jump_list *calllimit;
391 jump_list *stackalloc;
392 jump_list *revertframes;
393 jump_list *wordboundary;
394 jump_list *anynewline;
395 jump_list *hspace;
396 jump_list *vspace;
397 jump_list *casefulcmp;
398 jump_list *caselesscmp;
399 jump_list *reset_match;
400 BOOL jscript_compat;
401 #ifdef SUPPORT_UTF
402 BOOL utf;
403 #ifdef SUPPORT_UCP
404 BOOL use_ucp;
405 #endif
406 #ifndef COMPILE_PCRE32
407 jump_list *utfreadchar;
408 #endif
409 #ifdef COMPILE_PCRE8
410 jump_list *utfreadtype8;
411 #endif
412 #endif /* SUPPORT_UTF */
413 #ifdef SUPPORT_UCP
414 jump_list *getucd;
415 #endif
416 } compiler_common;
417
418 /* For byte_sequence_compare. */
419
420 typedef struct compare_context {
421 int length;
422 int sourcereg;
423 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
424 int ucharptr;
425 union {
426 sljit_si asint;
427 sljit_uh asushort;
428 #if defined COMPILE_PCRE8
429 sljit_ub asbyte;
430 sljit_ub asuchars[4];
431 #elif defined COMPILE_PCRE16
432 sljit_uh asuchars[2];
433 #elif defined COMPILE_PCRE32
434 sljit_ui asuchars[1];
435 #endif
436 } c;
437 union {
438 sljit_si asint;
439 sljit_uh asushort;
440 #if defined COMPILE_PCRE8
441 sljit_ub asbyte;
442 sljit_ub asuchars[4];
443 #elif defined COMPILE_PCRE16
444 sljit_uh asuchars[2];
445 #elif defined COMPILE_PCRE32
446 sljit_ui asuchars[1];
447 #endif
448 } oc;
449 #endif
450 } compare_context;
451
452 /* Undefine sljit macros. */
453 #undef CMP
454
455 /* Used for accessing the elements of the stack. */
456 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
457
458 #define TMP1 SLJIT_SCRATCH_REG1
459 #define TMP2 SLJIT_SCRATCH_REG3
460 #define TMP3 SLJIT_TEMPORARY_EREG2
461 #define STR_PTR SLJIT_SAVED_REG1
462 #define STR_END SLJIT_SAVED_REG2
463 #define STACK_TOP SLJIT_SCRATCH_REG2
464 #define STACK_LIMIT SLJIT_SAVED_REG3
465 #define ARGUMENTS SLJIT_SAVED_EREG1
466 #define CALL_COUNT SLJIT_SAVED_EREG2
467 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
468
469 /* Local space layout. */
470 /* These two locals can be used by the current opcode. */
471 #define LOCALS0 (0 * sizeof(sljit_sw))
472 #define LOCALS1 (1 * sizeof(sljit_sw))
473 /* Two local variables for possessive quantifiers (char1 cannot use them). */
474 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
475 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
476 /* Max limit of recursions. */
477 #define CALL_LIMIT (4 * sizeof(sljit_sw))
478 /* The output vector is stored on the stack, and contains pointers
479 to characters. The vector data is divided into two groups: the first
480 group contains the start / end character pointers, and the second is
481 the start pointers when the end of the capturing group has not yet reached. */
482 #define OVECTOR_START (common->ovector_start)
483 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
484 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
485 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
486
487 #if defined COMPILE_PCRE8
488 #define MOV_UCHAR SLJIT_MOV_UB
489 #define MOVU_UCHAR SLJIT_MOVU_UB
490 #elif defined COMPILE_PCRE16
491 #define MOV_UCHAR SLJIT_MOV_UH
492 #define MOVU_UCHAR SLJIT_MOVU_UH
493 #elif defined COMPILE_PCRE32
494 #define MOV_UCHAR SLJIT_MOV_UI
495 #define MOVU_UCHAR SLJIT_MOVU_UI
496 #else
497 #error Unsupported compiling mode
498 #endif
499
500 /* Shortcuts. */
501 #define DEFINE_COMPILER \
502 struct sljit_compiler *compiler = common->compiler
503 #define OP1(op, dst, dstw, src, srcw) \
504 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
505 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
506 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
507 #define LABEL() \
508 sljit_emit_label(compiler)
509 #define JUMP(type) \
510 sljit_emit_jump(compiler, (type))
511 #define JUMPTO(type, label) \
512 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
513 #define JUMPHERE(jump) \
514 sljit_set_label((jump), sljit_emit_label(compiler))
515 #define SET_LABEL(jump, label) \
516 sljit_set_label((jump), (label))
517 #define CMP(type, src1, src1w, src2, src2w) \
518 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
519 #define CMPTO(type, src1, src1w, src2, src2w, label) \
520 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
521 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
522 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
523 #define GET_LOCAL_BASE(dst, dstw, offset) \
524 sljit_get_local_base(compiler, (dst), (dstw), (offset))
525
526 static pcre_uchar* bracketend(pcre_uchar* cc)
527 {
528 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
529 do cc += GET(cc, 1); while (*cc == OP_ALT);
530 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
531 cc += 1 + LINK_SIZE;
532 return cc;
533 }
534
535 /* Functions whose might need modification for all new supported opcodes:
536 next_opcode
537 check_opcode_types
538 set_private_data_ptrs
539 get_framesize
540 init_frame
541 get_private_data_copy_length
542 copy_private_data
543 compile_matchingpath
544 compile_backtrackingpath
545 */
546
547 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
548 {
549 SLJIT_UNUSED_ARG(common);
550 switch(*cc)
551 {
552 case OP_SOD:
553 case OP_SOM:
554 case OP_SET_SOM:
555 case OP_NOT_WORD_BOUNDARY:
556 case OP_WORD_BOUNDARY:
557 case OP_NOT_DIGIT:
558 case OP_DIGIT:
559 case OP_NOT_WHITESPACE:
560 case OP_WHITESPACE:
561 case OP_NOT_WORDCHAR:
562 case OP_WORDCHAR:
563 case OP_ANY:
564 case OP_ALLANY:
565 case OP_NOTPROP:
566 case OP_PROP:
567 case OP_ANYNL:
568 case OP_NOT_HSPACE:
569 case OP_HSPACE:
570 case OP_NOT_VSPACE:
571 case OP_VSPACE:
572 case OP_EXTUNI:
573 case OP_EODN:
574 case OP_EOD:
575 case OP_CIRC:
576 case OP_CIRCM:
577 case OP_DOLL:
578 case OP_DOLLM:
579 case OP_CRSTAR:
580 case OP_CRMINSTAR:
581 case OP_CRPLUS:
582 case OP_CRMINPLUS:
583 case OP_CRQUERY:
584 case OP_CRMINQUERY:
585 case OP_CRRANGE:
586 case OP_CRMINRANGE:
587 case OP_CLASS:
588 case OP_NCLASS:
589 case OP_REF:
590 case OP_REFI:
591 case OP_RECURSE:
592 case OP_CALLOUT:
593 case OP_ALT:
594 case OP_KET:
595 case OP_KETRMAX:
596 case OP_KETRMIN:
597 case OP_KETRPOS:
598 case OP_REVERSE:
599 case OP_ASSERT:
600 case OP_ASSERT_NOT:
601 case OP_ASSERTBACK:
602 case OP_ASSERTBACK_NOT:
603 case OP_ONCE:
604 case OP_ONCE_NC:
605 case OP_BRA:
606 case OP_BRAPOS:
607 case OP_CBRA:
608 case OP_CBRAPOS:
609 case OP_COND:
610 case OP_SBRA:
611 case OP_SBRAPOS:
612 case OP_SCBRA:
613 case OP_SCBRAPOS:
614 case OP_SCOND:
615 case OP_CREF:
616 case OP_NCREF:
617 case OP_RREF:
618 case OP_NRREF:
619 case OP_DEF:
620 case OP_BRAZERO:
621 case OP_BRAMINZERO:
622 case OP_BRAPOSZERO:
623 case OP_PRUNE:
624 case OP_SKIP:
625 case OP_THEN:
626 case OP_COMMIT:
627 case OP_FAIL:
628 case OP_ACCEPT:
629 case OP_ASSERT_ACCEPT:
630 case OP_CLOSE:
631 case OP_SKIPZERO:
632 return cc + PRIV(OP_lengths)[*cc];
633
634 case OP_CHAR:
635 case OP_CHARI:
636 case OP_NOT:
637 case OP_NOTI:
638 case OP_STAR:
639 case OP_MINSTAR:
640 case OP_PLUS:
641 case OP_MINPLUS:
642 case OP_QUERY:
643 case OP_MINQUERY:
644 case OP_UPTO:
645 case OP_MINUPTO:
646 case OP_EXACT:
647 case OP_POSSTAR:
648 case OP_POSPLUS:
649 case OP_POSQUERY:
650 case OP_POSUPTO:
651 case OP_STARI:
652 case OP_MINSTARI:
653 case OP_PLUSI:
654 case OP_MINPLUSI:
655 case OP_QUERYI:
656 case OP_MINQUERYI:
657 case OP_UPTOI:
658 case OP_MINUPTOI:
659 case OP_EXACTI:
660 case OP_POSSTARI:
661 case OP_POSPLUSI:
662 case OP_POSQUERYI:
663 case OP_POSUPTOI:
664 case OP_NOTSTAR:
665 case OP_NOTMINSTAR:
666 case OP_NOTPLUS:
667 case OP_NOTMINPLUS:
668 case OP_NOTQUERY:
669 case OP_NOTMINQUERY:
670 case OP_NOTUPTO:
671 case OP_NOTMINUPTO:
672 case OP_NOTEXACT:
673 case OP_NOTPOSSTAR:
674 case OP_NOTPOSPLUS:
675 case OP_NOTPOSQUERY:
676 case OP_NOTPOSUPTO:
677 case OP_NOTSTARI:
678 case OP_NOTMINSTARI:
679 case OP_NOTPLUSI:
680 case OP_NOTMINPLUSI:
681 case OP_NOTQUERYI:
682 case OP_NOTMINQUERYI:
683 case OP_NOTUPTOI:
684 case OP_NOTMINUPTOI:
685 case OP_NOTEXACTI:
686 case OP_NOTPOSSTARI:
687 case OP_NOTPOSPLUSI:
688 case OP_NOTPOSQUERYI:
689 case OP_NOTPOSUPTOI:
690 cc += PRIV(OP_lengths)[*cc];
691 #ifdef SUPPORT_UTF
692 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
693 #endif
694 return cc;
695
696 /* Special cases. */
697 case OP_TYPESTAR:
698 case OP_TYPEMINSTAR:
699 case OP_TYPEPLUS:
700 case OP_TYPEMINPLUS:
701 case OP_TYPEQUERY:
702 case OP_TYPEMINQUERY:
703 case OP_TYPEUPTO:
704 case OP_TYPEMINUPTO:
705 case OP_TYPEEXACT:
706 case OP_TYPEPOSSTAR:
707 case OP_TYPEPOSPLUS:
708 case OP_TYPEPOSQUERY:
709 case OP_TYPEPOSUPTO:
710 return cc + PRIV(OP_lengths)[*cc] - 1;
711
712 case OP_ANYBYTE:
713 #ifdef SUPPORT_UTF
714 if (common->utf) return NULL;
715 #endif
716 return cc + 1;
717
718 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
719 case OP_XCLASS:
720 return cc + GET(cc, 1);
721 #endif
722
723 case OP_MARK:
724 case OP_PRUNE_ARG:
725 case OP_SKIP_ARG:
726 case OP_THEN_ARG:
727 return cc + 1 + 2 + cc[1];
728
729 default:
730 /* All opcodes are supported now! */
731 SLJIT_ASSERT_STOP();
732 return NULL;
733 }
734 }
735
736 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
737 {
738 pcre_uchar *name;
739 pcre_uchar *name2;
740 int i, cbra_index;
741
742 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
743 while (cc < ccend)
744 {
745 switch(*cc)
746 {
747 case OP_SET_SOM:
748 common->has_set_som = TRUE;
749 cc += 1;
750 break;
751
752 case OP_REF:
753 case OP_REFI:
754 common->optimized_cbracket[GET2(cc, 1)] = 0;
755 cc += 1 + IMM2_SIZE;
756 break;
757
758 case OP_CBRAPOS:
759 case OP_SCBRAPOS:
760 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
761 cc += 1 + LINK_SIZE + IMM2_SIZE;
762 break;
763
764 case OP_COND:
765 case OP_SCOND:
766 /* Only AUTO_CALLOUT can insert this opcode. We do
767 not intend to support this case. */
768 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
769 return FALSE;
770 cc += 1 + LINK_SIZE;
771 break;
772
773 case OP_CREF:
774 i = GET2(cc, 1);
775 common->optimized_cbracket[i] = 0;
776 cc += 1 + IMM2_SIZE;
777 break;
778
779 case OP_NCREF:
780 cbra_index = GET2(cc, 1);
781 name = (pcre_uchar *)common->name_table;
782 name2 = name;
783 for (i = 0; i < common->name_count; i++)
784 {
785 if (GET2(name, 0) == cbra_index) break;
786 name += common->name_entry_size;
787 }
788 SLJIT_ASSERT(i != common->name_count);
789
790 for (i = 0; i < common->name_count; i++)
791 {
792 if (STRCMP_UC_UC(name2 + IMM2_SIZE, name + IMM2_SIZE) == 0)
793 common->optimized_cbracket[GET2(name2, 0)] = 0;
794 name2 += common->name_entry_size;
795 }
796 cc += 1 + IMM2_SIZE;
797 break;
798
799 case OP_RECURSE:
800 /* Set its value only once. */
801 if (common->recursive_head_ptr == 0)
802 {
803 common->recursive_head_ptr = common->ovector_start;
804 common->ovector_start += sizeof(sljit_sw);
805 }
806 cc += 1 + LINK_SIZE;
807 break;
808
809 case OP_CALLOUT:
810 if (common->capture_last_ptr == 0)
811 {
812 common->capture_last_ptr = common->ovector_start;
813 common->ovector_start += sizeof(sljit_sw);
814 }
815 cc += 2 + 2 * LINK_SIZE;
816 break;
817
818 case OP_THEN_ARG:
819 common->has_then = TRUE;
820 common->control_head_ptr = 1;
821 /* Fall through. */
822
823 case OP_PRUNE_ARG:
824 common->needs_start_ptr = TRUE;
825 /* Fall through. */
826
827 case OP_MARK:
828 if (common->mark_ptr == 0)
829 {
830 common->mark_ptr = common->ovector_start;
831 common->ovector_start += sizeof(sljit_sw);
832 }
833 cc += 1 + 2 + cc[1];
834 break;
835
836 case OP_THEN:
837 common->has_then = TRUE;
838 common->control_head_ptr = 1;
839 /* Fall through. */
840
841 case OP_PRUNE:
842 case OP_SKIP:
843 common->needs_start_ptr = TRUE;
844 cc += 1;
845 break;
846
847 case OP_SKIP_ARG:
848 common->control_head_ptr = 1;
849 common->has_skip_arg = TRUE;
850 cc += 1 + 2 + cc[1];
851 break;
852
853 default:
854 cc = next_opcode(common, cc);
855 if (cc == NULL)
856 return FALSE;
857 break;
858 }
859 }
860 return TRUE;
861 }
862
863 static int get_class_iterator_size(pcre_uchar *cc)
864 {
865 switch(*cc)
866 {
867 case OP_CRSTAR:
868 case OP_CRPLUS:
869 return 2;
870
871 case OP_CRMINSTAR:
872 case OP_CRMINPLUS:
873 case OP_CRQUERY:
874 case OP_CRMINQUERY:
875 return 1;
876
877 case OP_CRRANGE:
878 case OP_CRMINRANGE:
879 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
880 return 0;
881 return 2;
882
883 default:
884 return 0;
885 }
886 }
887
888 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
889 {
890 pcre_uchar *end = bracketend(begin);
891 pcre_uchar *next;
892 pcre_uchar *next_end;
893 pcre_uchar *max_end;
894 pcre_uchar type;
895 sljit_uw length = end - begin;
896 int min, max, i;
897
898 /* Detect fixed iterations first. */
899 if (end[-(1 + LINK_SIZE)] != OP_KET)
900 return FALSE;
901
902 /* Already detected repeat. */
903 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
904 return TRUE;
905
906 next = end;
907 min = 1;
908 while (1)
909 {
910 if (*next != *begin)
911 break;
912 next_end = bracketend(next);
913 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
914 break;
915 next = next_end;
916 min++;
917 }
918
919 if (min == 2)
920 return FALSE;
921
922 max = 0;
923 max_end = next;
924 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
925 {
926 type = *next;
927 while (1)
928 {
929 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
930 break;
931 next_end = bracketend(next + 2 + LINK_SIZE);
932 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
933 break;
934 next = next_end;
935 max++;
936 }
937
938 if (next[0] == type && next[1] == *begin && max >= 1)
939 {
940 next_end = bracketend(next + 1);
941 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
942 {
943 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
944 if (*next_end != OP_KET)
945 break;
946
947 if (i == max)
948 {
949 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
950 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
951 /* +2 the original and the last. */
952 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
953 if (min == 1)
954 return TRUE;
955 min--;
956 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
957 }
958 }
959 }
960 }
961
962 if (min >= 3)
963 {
964 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
965 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
966 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
967 return TRUE;
968 }
969
970 return FALSE;
971 }
972
973 #define CASE_ITERATOR_PRIVATE_DATA_1 \
974 case OP_MINSTAR: \
975 case OP_MINPLUS: \
976 case OP_QUERY: \
977 case OP_MINQUERY: \
978 case OP_MINSTARI: \
979 case OP_MINPLUSI: \
980 case OP_QUERYI: \
981 case OP_MINQUERYI: \
982 case OP_NOTMINSTAR: \
983 case OP_NOTMINPLUS: \
984 case OP_NOTQUERY: \
985 case OP_NOTMINQUERY: \
986 case OP_NOTMINSTARI: \
987 case OP_NOTMINPLUSI: \
988 case OP_NOTQUERYI: \
989 case OP_NOTMINQUERYI:
990
991 #define CASE_ITERATOR_PRIVATE_DATA_2A \
992 case OP_STAR: \
993 case OP_PLUS: \
994 case OP_STARI: \
995 case OP_PLUSI: \
996 case OP_NOTSTAR: \
997 case OP_NOTPLUS: \
998 case OP_NOTSTARI: \
999 case OP_NOTPLUSI:
1000
1001 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1002 case OP_UPTO: \
1003 case OP_MINUPTO: \
1004 case OP_UPTOI: \
1005 case OP_MINUPTOI: \
1006 case OP_NOTUPTO: \
1007 case OP_NOTMINUPTO: \
1008 case OP_NOTUPTOI: \
1009 case OP_NOTMINUPTOI:
1010
1011 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1012 case OP_TYPEMINSTAR: \
1013 case OP_TYPEMINPLUS: \
1014 case OP_TYPEQUERY: \
1015 case OP_TYPEMINQUERY:
1016
1017 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1018 case OP_TYPESTAR: \
1019 case OP_TYPEPLUS:
1020
1021 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1022 case OP_TYPEUPTO: \
1023 case OP_TYPEMINUPTO:
1024
1025 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1026 {
1027 pcre_uchar *cc = common->start;
1028 pcre_uchar *alternative;
1029 pcre_uchar *end = NULL;
1030 int private_data_ptr = *private_data_start;
1031 int space, size, bracketlen;
1032
1033 while (cc < ccend)
1034 {
1035 space = 0;
1036 size = 0;
1037 bracketlen = 0;
1038 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1039 return;
1040
1041 if (*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_ONCE || *cc == OP_ONCE_NC)
1042 if (detect_repeat(common, cc))
1043 {
1044 /* These brackets are converted to repeats, so no global
1045 based single character repeat is allowed. */
1046 if (cc >= end)
1047 end = bracketend(cc);
1048 }
1049
1050 switch(*cc)
1051 {
1052 case OP_KET:
1053 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1054 {
1055 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1056 private_data_ptr += sizeof(sljit_sw);
1057 cc += common->private_data_ptrs[cc + 1 - common->start];
1058 }
1059 cc += 1 + LINK_SIZE;
1060 break;
1061
1062 case OP_ASSERT:
1063 case OP_ASSERT_NOT:
1064 case OP_ASSERTBACK:
1065 case OP_ASSERTBACK_NOT:
1066 case OP_ONCE:
1067 case OP_ONCE_NC:
1068 case OP_BRAPOS:
1069 case OP_SBRA:
1070 case OP_SBRAPOS:
1071 case OP_SCOND:
1072 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1073 private_data_ptr += sizeof(sljit_sw);
1074 bracketlen = 1 + LINK_SIZE;
1075 break;
1076
1077 case OP_CBRAPOS:
1078 case OP_SCBRAPOS:
1079 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1080 private_data_ptr += sizeof(sljit_sw);
1081 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1082 break;
1083
1084 case OP_COND:
1085 /* Might be a hidden SCOND. */
1086 alternative = cc + GET(cc, 1);
1087 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1088 {
1089 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1090 private_data_ptr += sizeof(sljit_sw);
1091 }
1092 bracketlen = 1 + LINK_SIZE;
1093 break;
1094
1095 case OP_BRA:
1096 bracketlen = 1 + LINK_SIZE;
1097 break;
1098
1099 case OP_CBRA:
1100 case OP_SCBRA:
1101 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1102 break;
1103
1104 CASE_ITERATOR_PRIVATE_DATA_1
1105 space = 1;
1106 size = -2;
1107 break;
1108
1109 CASE_ITERATOR_PRIVATE_DATA_2A
1110 space = 2;
1111 size = -2;
1112 break;
1113
1114 CASE_ITERATOR_PRIVATE_DATA_2B
1115 space = 2;
1116 size = -(2 + IMM2_SIZE);
1117 break;
1118
1119 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1120 space = 1;
1121 size = 1;
1122 break;
1123
1124 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1125 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1126 space = 2;
1127 size = 1;
1128 break;
1129
1130 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1131 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1132 space = 2;
1133 size = 1 + IMM2_SIZE;
1134 break;
1135
1136 case OP_CLASS:
1137 case OP_NCLASS:
1138 size += 1 + 32 / sizeof(pcre_uchar);
1139 space = get_class_iterator_size(cc + size);
1140 break;
1141
1142 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1143 case OP_XCLASS:
1144 size = GET(cc, 1);
1145 space = get_class_iterator_size(cc + size);
1146 break;
1147 #endif
1148
1149 default:
1150 cc = next_opcode(common, cc);
1151 SLJIT_ASSERT(cc != NULL);
1152 break;
1153 }
1154
1155 /* Character iterators, which are not inside a repeated bracket,
1156 gets a private slot instead of allocating it on the stack. */
1157 if (space > 0 && cc >= end)
1158 {
1159 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1160 private_data_ptr += sizeof(sljit_sw) * space;
1161 }
1162
1163 if (size != 0)
1164 {
1165 if (size < 0)
1166 {
1167 cc += -size;
1168 #ifdef SUPPORT_UTF
1169 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1170 #endif
1171 }
1172 else
1173 cc += size;
1174 }
1175
1176 if (bracketlen > 0)
1177 {
1178 if (cc >= end)
1179 {
1180 end = bracketend(cc);
1181 if (end[-1 - LINK_SIZE] == OP_KET)
1182 end = NULL;
1183 }
1184 cc += bracketlen;
1185 }
1186 }
1187 *private_data_start = private_data_ptr;
1188 }
1189
1190 /* Returns with a frame_types (always < 0) if no need for frame. */
1191 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1192 {
1193 int length = 0;
1194 int possessive = 0;
1195 BOOL stack_restore = FALSE;
1196 BOOL setsom_found = recursive;
1197 BOOL setmark_found = recursive;
1198 /* The last capture is a local variable even for recursions. */
1199 BOOL capture_last_found = FALSE;
1200
1201 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1202 SLJIT_ASSERT(common->control_head_ptr != 0);
1203 *needs_control_head = TRUE;
1204 #else
1205 *needs_control_head = FALSE;
1206 #endif
1207
1208 if (ccend == NULL)
1209 {
1210 ccend = bracketend(cc) - (1 + LINK_SIZE);
1211 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1212 {
1213 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1214 /* This is correct regardless of common->capture_last_ptr. */
1215 capture_last_found = TRUE;
1216 }
1217 cc = next_opcode(common, cc);
1218 }
1219
1220 SLJIT_ASSERT(cc != NULL);
1221 while (cc < ccend)
1222 switch(*cc)
1223 {
1224 case OP_SET_SOM:
1225 SLJIT_ASSERT(common->has_set_som);
1226 stack_restore = TRUE;
1227 if (!setsom_found)
1228 {
1229 length += 2;
1230 setsom_found = TRUE;
1231 }
1232 cc += 1;
1233 break;
1234
1235 case OP_MARK:
1236 case OP_PRUNE_ARG:
1237 case OP_THEN_ARG:
1238 SLJIT_ASSERT(common->mark_ptr != 0);
1239 stack_restore = TRUE;
1240 if (!setmark_found)
1241 {
1242 length += 2;
1243 setmark_found = TRUE;
1244 }
1245 if (common->control_head_ptr != 0)
1246 *needs_control_head = TRUE;
1247 cc += 1 + 2 + cc[1];
1248 break;
1249
1250 case OP_RECURSE:
1251 stack_restore = TRUE;
1252 if (common->has_set_som && !setsom_found)
1253 {
1254 length += 2;
1255 setsom_found = TRUE;
1256 }
1257 if (common->mark_ptr != 0 && !setmark_found)
1258 {
1259 length += 2;
1260 setmark_found = TRUE;
1261 }
1262 if (common->capture_last_ptr != 0 && !capture_last_found)
1263 {
1264 length += 2;
1265 capture_last_found = TRUE;
1266 }
1267 cc += 1 + LINK_SIZE;
1268 break;
1269
1270 case OP_CBRA:
1271 case OP_CBRAPOS:
1272 case OP_SCBRA:
1273 case OP_SCBRAPOS:
1274 stack_restore = TRUE;
1275 if (common->capture_last_ptr != 0 && !capture_last_found)
1276 {
1277 length += 2;
1278 capture_last_found = TRUE;
1279 }
1280 length += 3;
1281 cc += 1 + LINK_SIZE + IMM2_SIZE;
1282 break;
1283
1284 default:
1285 stack_restore = TRUE;
1286 /* Fall through. */
1287
1288 case OP_NOT_WORD_BOUNDARY:
1289 case OP_WORD_BOUNDARY:
1290 case OP_NOT_DIGIT:
1291 case OP_DIGIT:
1292 case OP_NOT_WHITESPACE:
1293 case OP_WHITESPACE:
1294 case OP_NOT_WORDCHAR:
1295 case OP_WORDCHAR:
1296 case OP_ANY:
1297 case OP_ALLANY:
1298 case OP_ANYBYTE:
1299 case OP_NOTPROP:
1300 case OP_PROP:
1301 case OP_ANYNL:
1302 case OP_NOT_HSPACE:
1303 case OP_HSPACE:
1304 case OP_NOT_VSPACE:
1305 case OP_VSPACE:
1306 case OP_EXTUNI:
1307 case OP_EODN:
1308 case OP_EOD:
1309 case OP_CIRC:
1310 case OP_CIRCM:
1311 case OP_DOLL:
1312 case OP_DOLLM:
1313 case OP_CHAR:
1314 case OP_CHARI:
1315 case OP_NOT:
1316 case OP_NOTI:
1317
1318 case OP_EXACT:
1319 case OP_POSSTAR:
1320 case OP_POSPLUS:
1321 case OP_POSQUERY:
1322 case OP_POSUPTO:
1323
1324 case OP_EXACTI:
1325 case OP_POSSTARI:
1326 case OP_POSPLUSI:
1327 case OP_POSQUERYI:
1328 case OP_POSUPTOI:
1329
1330 case OP_NOTEXACT:
1331 case OP_NOTPOSSTAR:
1332 case OP_NOTPOSPLUS:
1333 case OP_NOTPOSQUERY:
1334 case OP_NOTPOSUPTO:
1335
1336 case OP_NOTEXACTI:
1337 case OP_NOTPOSSTARI:
1338 case OP_NOTPOSPLUSI:
1339 case OP_NOTPOSQUERYI:
1340 case OP_NOTPOSUPTOI:
1341
1342 case OP_TYPEEXACT:
1343 case OP_TYPEPOSSTAR:
1344 case OP_TYPEPOSPLUS:
1345 case OP_TYPEPOSQUERY:
1346 case OP_TYPEPOSUPTO:
1347
1348 case OP_CLASS:
1349 case OP_NCLASS:
1350 case OP_XCLASS:
1351
1352 cc = next_opcode(common, cc);
1353 SLJIT_ASSERT(cc != NULL);
1354 break;
1355 }
1356
1357 /* Possessive quantifiers can use a special case. */
1358 if (SLJIT_UNLIKELY(possessive == length))
1359 return stack_restore ? no_frame : no_stack;
1360
1361 if (length > 0)
1362 return length + 1;
1363 return stack_restore ? no_frame : no_stack;
1364 }
1365
1366 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1367 {
1368 DEFINE_COMPILER;
1369 BOOL setsom_found = recursive;
1370 BOOL setmark_found = recursive;
1371 /* The last capture is a local variable even for recursions. */
1372 BOOL capture_last_found = FALSE;
1373 int offset;
1374
1375 /* >= 1 + shortest item size (2) */
1376 SLJIT_UNUSED_ARG(stacktop);
1377 SLJIT_ASSERT(stackpos >= stacktop + 2);
1378
1379 stackpos = STACK(stackpos);
1380 if (ccend == NULL)
1381 {
1382 ccend = bracketend(cc) - (1 + LINK_SIZE);
1383 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1384 cc = next_opcode(common, cc);
1385 }
1386
1387 SLJIT_ASSERT(cc != NULL);
1388 while (cc < ccend)
1389 switch(*cc)
1390 {
1391 case OP_SET_SOM:
1392 SLJIT_ASSERT(common->has_set_som);
1393 if (!setsom_found)
1394 {
1395 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1396 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1397 stackpos += (int)sizeof(sljit_sw);
1398 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1399 stackpos += (int)sizeof(sljit_sw);
1400 setsom_found = TRUE;
1401 }
1402 cc += 1;
1403 break;
1404
1405 case OP_MARK:
1406 case OP_PRUNE_ARG:
1407 case OP_THEN_ARG:
1408 SLJIT_ASSERT(common->mark_ptr != 0);
1409 if (!setmark_found)
1410 {
1411 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1413 stackpos += (int)sizeof(sljit_sw);
1414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1415 stackpos += (int)sizeof(sljit_sw);
1416 setmark_found = TRUE;
1417 }
1418 cc += 1 + 2 + cc[1];
1419 break;
1420
1421 case OP_RECURSE:
1422 if (common->has_set_som && !setsom_found)
1423 {
1424 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1425 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1426 stackpos += (int)sizeof(sljit_sw);
1427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1428 stackpos += (int)sizeof(sljit_sw);
1429 setsom_found = TRUE;
1430 }
1431 if (common->mark_ptr != 0 && !setmark_found)
1432 {
1433 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1435 stackpos += (int)sizeof(sljit_sw);
1436 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1437 stackpos += (int)sizeof(sljit_sw);
1438 setmark_found = TRUE;
1439 }
1440 if (common->capture_last_ptr != 0 && !capture_last_found)
1441 {
1442 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1444 stackpos += (int)sizeof(sljit_sw);
1445 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1446 stackpos += (int)sizeof(sljit_sw);
1447 capture_last_found = TRUE;
1448 }
1449 cc += 1 + LINK_SIZE;
1450 break;
1451
1452 case OP_CBRA:
1453 case OP_CBRAPOS:
1454 case OP_SCBRA:
1455 case OP_SCBRAPOS:
1456 if (common->capture_last_ptr != 0 && !capture_last_found)
1457 {
1458 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1460 stackpos += (int)sizeof(sljit_sw);
1461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1462 stackpos += (int)sizeof(sljit_sw);
1463 capture_last_found = TRUE;
1464 }
1465 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1467 stackpos += (int)sizeof(sljit_sw);
1468 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1469 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1470 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1471 stackpos += (int)sizeof(sljit_sw);
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1473 stackpos += (int)sizeof(sljit_sw);
1474
1475 cc += 1 + LINK_SIZE + IMM2_SIZE;
1476 break;
1477
1478 default:
1479 cc = next_opcode(common, cc);
1480 SLJIT_ASSERT(cc != NULL);
1481 break;
1482 }
1483
1484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1485 SLJIT_ASSERT(stackpos == STACK(stacktop));
1486 }
1487
1488 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1489 {
1490 int private_data_length = needs_control_head ? 3 : 2;
1491 int size;
1492 pcre_uchar *alternative;
1493 /* Calculate the sum of the private machine words. */
1494 while (cc < ccend)
1495 {
1496 size = 0;
1497 switch(*cc)
1498 {
1499 case OP_ASSERT:
1500 case OP_ASSERT_NOT:
1501 case OP_ASSERTBACK:
1502 case OP_ASSERTBACK_NOT:
1503 case OP_ONCE:
1504 case OP_ONCE_NC:
1505 case OP_BRAPOS:
1506 case OP_SBRA:
1507 case OP_SBRAPOS:
1508 case OP_SCOND:
1509 private_data_length++;
1510 cc += 1 + LINK_SIZE;
1511 break;
1512
1513 case OP_CBRA:
1514 case OP_SCBRA:
1515 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1516 private_data_length++;
1517 cc += 1 + LINK_SIZE + IMM2_SIZE;
1518 break;
1519
1520 case OP_CBRAPOS:
1521 case OP_SCBRAPOS:
1522 private_data_length += 2;
1523 cc += 1 + LINK_SIZE + IMM2_SIZE;
1524 break;
1525
1526 case OP_COND:
1527 /* Might be a hidden SCOND. */
1528 alternative = cc + GET(cc, 1);
1529 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1530 private_data_length++;
1531 cc += 1 + LINK_SIZE;
1532 break;
1533
1534 CASE_ITERATOR_PRIVATE_DATA_1
1535 if (PRIVATE_DATA(cc))
1536 private_data_length++;
1537 cc += 2;
1538 #ifdef SUPPORT_UTF
1539 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1540 #endif
1541 break;
1542
1543 CASE_ITERATOR_PRIVATE_DATA_2A
1544 if (PRIVATE_DATA(cc))
1545 private_data_length += 2;
1546 cc += 2;
1547 #ifdef SUPPORT_UTF
1548 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1549 #endif
1550 break;
1551
1552 CASE_ITERATOR_PRIVATE_DATA_2B
1553 if (PRIVATE_DATA(cc))
1554 private_data_length += 2;
1555 cc += 2 + IMM2_SIZE;
1556 #ifdef SUPPORT_UTF
1557 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1558 #endif
1559 break;
1560
1561 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1562 if (PRIVATE_DATA(cc))
1563 private_data_length++;
1564 cc += 1;
1565 break;
1566
1567 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1568 if (PRIVATE_DATA(cc))
1569 private_data_length += 2;
1570 cc += 1;
1571 break;
1572
1573 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1574 if (PRIVATE_DATA(cc))
1575 private_data_length += 2;
1576 cc += 1 + IMM2_SIZE;
1577 break;
1578
1579 case OP_CLASS:
1580 case OP_NCLASS:
1581 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1582 case OP_XCLASS:
1583 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1584 #else
1585 size = 1 + 32 / (int)sizeof(pcre_uchar);
1586 #endif
1587 if (PRIVATE_DATA(cc))
1588 private_data_length += get_class_iterator_size(cc + size);
1589 cc += size;
1590 break;
1591
1592 default:
1593 cc = next_opcode(common, cc);
1594 SLJIT_ASSERT(cc != NULL);
1595 break;
1596 }
1597 }
1598 SLJIT_ASSERT(cc == ccend);
1599 return private_data_length;
1600 }
1601
1602 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1603 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1604 {
1605 DEFINE_COMPILER;
1606 int srcw[2];
1607 int count, size;
1608 BOOL tmp1next = TRUE;
1609 BOOL tmp1empty = TRUE;
1610 BOOL tmp2empty = TRUE;
1611 pcre_uchar *alternative;
1612 enum {
1613 start,
1614 loop,
1615 end
1616 } status;
1617
1618 status = save ? start : loop;
1619 stackptr = STACK(stackptr - 2);
1620 stacktop = STACK(stacktop - 1);
1621
1622 if (!save)
1623 {
1624 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1625 if (stackptr < stacktop)
1626 {
1627 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1628 stackptr += sizeof(sljit_sw);
1629 tmp1empty = FALSE;
1630 }
1631 if (stackptr < stacktop)
1632 {
1633 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1634 stackptr += sizeof(sljit_sw);
1635 tmp2empty = FALSE;
1636 }
1637 /* The tmp1next must be TRUE in either way. */
1638 }
1639
1640 do
1641 {
1642 count = 0;
1643 switch(status)
1644 {
1645 case start:
1646 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1647 count = 1;
1648 srcw[0] = common->recursive_head_ptr;
1649 if (needs_control_head)
1650 {
1651 SLJIT_ASSERT(common->control_head_ptr != 0);
1652 count = 2;
1653 srcw[1] = common->control_head_ptr;
1654 }
1655 status = loop;
1656 break;
1657
1658 case loop:
1659 if (cc >= ccend)
1660 {
1661 status = end;
1662 break;
1663 }
1664
1665 switch(*cc)
1666 {
1667 case OP_ASSERT:
1668 case OP_ASSERT_NOT:
1669 case OP_ASSERTBACK:
1670 case OP_ASSERTBACK_NOT:
1671 case OP_ONCE:
1672 case OP_ONCE_NC:
1673 case OP_BRAPOS:
1674 case OP_SBRA:
1675 case OP_SBRAPOS:
1676 case OP_SCOND:
1677 count = 1;
1678 srcw[0] = PRIVATE_DATA(cc);
1679 SLJIT_ASSERT(srcw[0] != 0);
1680 cc += 1 + LINK_SIZE;
1681 break;
1682
1683 case OP_CBRA:
1684 case OP_SCBRA:
1685 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1686 {
1687 count = 1;
1688 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1689 }
1690 cc += 1 + LINK_SIZE + IMM2_SIZE;
1691 break;
1692
1693 case OP_CBRAPOS:
1694 case OP_SCBRAPOS:
1695 count = 2;
1696 srcw[0] = PRIVATE_DATA(cc);
1697 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1698 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1699 cc += 1 + LINK_SIZE + IMM2_SIZE;
1700 break;
1701
1702 case OP_COND:
1703 /* Might be a hidden SCOND. */
1704 alternative = cc + GET(cc, 1);
1705 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1706 {
1707 count = 1;
1708 srcw[0] = PRIVATE_DATA(cc);
1709 SLJIT_ASSERT(srcw[0] != 0);
1710 }
1711 cc += 1 + LINK_SIZE;
1712 break;
1713
1714 CASE_ITERATOR_PRIVATE_DATA_1
1715 if (PRIVATE_DATA(cc))
1716 {
1717 count = 1;
1718 srcw[0] = PRIVATE_DATA(cc);
1719 }
1720 cc += 2;
1721 #ifdef SUPPORT_UTF
1722 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1723 #endif
1724 break;
1725
1726 CASE_ITERATOR_PRIVATE_DATA_2A
1727 if (PRIVATE_DATA(cc))
1728 {
1729 count = 2;
1730 srcw[0] = PRIVATE_DATA(cc);
1731 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1732 }
1733 cc += 2;
1734 #ifdef SUPPORT_UTF
1735 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1736 #endif
1737 break;
1738
1739 CASE_ITERATOR_PRIVATE_DATA_2B
1740 if (PRIVATE_DATA(cc))
1741 {
1742 count = 2;
1743 srcw[0] = PRIVATE_DATA(cc);
1744 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1745 }
1746 cc += 2 + IMM2_SIZE;
1747 #ifdef SUPPORT_UTF
1748 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1749 #endif
1750 break;
1751
1752 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1753 if (PRIVATE_DATA(cc))
1754 {
1755 count = 1;
1756 srcw[0] = PRIVATE_DATA(cc);
1757 }
1758 cc += 1;
1759 break;
1760
1761 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1762 if (PRIVATE_DATA(cc))
1763 {
1764 count = 2;
1765 srcw[0] = PRIVATE_DATA(cc);
1766 srcw[1] = srcw[0] + sizeof(sljit_sw);
1767 }
1768 cc += 1;
1769 break;
1770
1771 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1772 if (PRIVATE_DATA(cc))
1773 {
1774 count = 2;
1775 srcw[0] = PRIVATE_DATA(cc);
1776 srcw[1] = srcw[0] + sizeof(sljit_sw);
1777 }
1778 cc += 1 + IMM2_SIZE;
1779 break;
1780
1781 case OP_CLASS:
1782 case OP_NCLASS:
1783 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1784 case OP_XCLASS:
1785 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1786 #else
1787 size = 1 + 32 / (int)sizeof(pcre_uchar);
1788 #endif
1789 if (PRIVATE_DATA(cc))
1790 switch(get_class_iterator_size(cc + size))
1791 {
1792 case 1:
1793 count = 1;
1794 srcw[0] = PRIVATE_DATA(cc);
1795 break;
1796
1797 case 2:
1798 count = 2;
1799 srcw[0] = PRIVATE_DATA(cc);
1800 srcw[1] = srcw[0] + sizeof(sljit_sw);
1801 break;
1802
1803 default:
1804 SLJIT_ASSERT_STOP();
1805 break;
1806 }
1807 cc += size;
1808 break;
1809
1810 default:
1811 cc = next_opcode(common, cc);
1812 SLJIT_ASSERT(cc != NULL);
1813 break;
1814 }
1815 break;
1816
1817 case end:
1818 SLJIT_ASSERT_STOP();
1819 break;
1820 }
1821
1822 while (count > 0)
1823 {
1824 count--;
1825 if (save)
1826 {
1827 if (tmp1next)
1828 {
1829 if (!tmp1empty)
1830 {
1831 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1832 stackptr += sizeof(sljit_sw);
1833 }
1834 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1835 tmp1empty = FALSE;
1836 tmp1next = FALSE;
1837 }
1838 else
1839 {
1840 if (!tmp2empty)
1841 {
1842 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1843 stackptr += sizeof(sljit_sw);
1844 }
1845 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1846 tmp2empty = FALSE;
1847 tmp1next = TRUE;
1848 }
1849 }
1850 else
1851 {
1852 if (tmp1next)
1853 {
1854 SLJIT_ASSERT(!tmp1empty);
1855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1856 tmp1empty = stackptr >= stacktop;
1857 if (!tmp1empty)
1858 {
1859 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1860 stackptr += sizeof(sljit_sw);
1861 }
1862 tmp1next = FALSE;
1863 }
1864 else
1865 {
1866 SLJIT_ASSERT(!tmp2empty);
1867 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1868 tmp2empty = stackptr >= stacktop;
1869 if (!tmp2empty)
1870 {
1871 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1872 stackptr += sizeof(sljit_sw);
1873 }
1874 tmp1next = TRUE;
1875 }
1876 }
1877 }
1878 }
1879 while (status != end);
1880
1881 if (save)
1882 {
1883 if (tmp1next)
1884 {
1885 if (!tmp1empty)
1886 {
1887 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1888 stackptr += sizeof(sljit_sw);
1889 }
1890 if (!tmp2empty)
1891 {
1892 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1893 stackptr += sizeof(sljit_sw);
1894 }
1895 }
1896 else
1897 {
1898 if (!tmp2empty)
1899 {
1900 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1901 stackptr += sizeof(sljit_sw);
1902 }
1903 if (!tmp1empty)
1904 {
1905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1906 stackptr += sizeof(sljit_sw);
1907 }
1908 }
1909 }
1910 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1911 }
1912
1913 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1914 {
1915 pcre_uchar *end = bracketend(cc);
1916 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1917
1918 /* Assert captures then. */
1919 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1920 current_offset = NULL;
1921 /* Conditional block does not. */
1922 if (*cc == OP_COND || *cc == OP_SCOND)
1923 has_alternatives = FALSE;
1924
1925 cc = next_opcode(common, cc);
1926 if (has_alternatives)
1927 current_offset = common->then_offsets + (cc - common->start);
1928
1929 while (cc < end)
1930 {
1931 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1932 cc = set_then_offsets(common, cc, current_offset);
1933 else
1934 {
1935 if (*cc == OP_ALT && has_alternatives)
1936 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1937 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1938 *current_offset = 1;
1939 cc = next_opcode(common, cc);
1940 }
1941 }
1942
1943 return end;
1944 }
1945
1946 #undef CASE_ITERATOR_PRIVATE_DATA_1
1947 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1948 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1949 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1950 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1951 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1952
1953 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1954 {
1955 return (value & (value - 1)) == 0;
1956 }
1957
1958 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1959 {
1960 while (list)
1961 {
1962 /* sljit_set_label is clever enough to do nothing
1963 if either the jump or the label is NULL. */
1964 SET_LABEL(list->jump, label);
1965 list = list->next;
1966 }
1967 }
1968
1969 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1970 {
1971 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1972 if (list_item)
1973 {
1974 list_item->next = *list;
1975 list_item->jump = jump;
1976 *list = list_item;
1977 }
1978 }
1979
1980 static void add_stub(compiler_common *common, struct sljit_jump *start)
1981 {
1982 DEFINE_COMPILER;
1983 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1984
1985 if (list_item)
1986 {
1987 list_item->start = start;
1988 list_item->quit = LABEL();
1989 list_item->next = common->stubs;
1990 common->stubs = list_item;
1991 }
1992 }
1993
1994 static void flush_stubs(compiler_common *common)
1995 {
1996 DEFINE_COMPILER;
1997 stub_list* list_item = common->stubs;
1998
1999 while (list_item)
2000 {
2001 JUMPHERE(list_item->start);
2002 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2003 JUMPTO(SLJIT_JUMP, list_item->quit);
2004 list_item = list_item->next;
2005 }
2006 common->stubs = NULL;
2007 }
2008
2009 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
2010 {
2011 DEFINE_COMPILER;
2012
2013 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
2014 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2015 }
2016
2017 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2018 {
2019 /* May destroy all locals and registers except TMP2. */
2020 DEFINE_COMPILER;
2021
2022 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2023 #ifdef DESTROY_REGISTERS
2024 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2025 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2026 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2028 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2029 #endif
2030 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2031 }
2032
2033 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2034 {
2035 DEFINE_COMPILER;
2036 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2037 }
2038
2039 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2040 {
2041 DEFINE_COMPILER;
2042 struct sljit_label *loop;
2043 int i;
2044
2045 /* At this point we can freely use all temporary registers. */
2046 SLJIT_ASSERT(length > 1);
2047 /* TMP1 returns with begin - 1. */
2048 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2049 if (length < 8)
2050 {
2051 for (i = 1; i < length; i++)
2052 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2053 }
2054 else
2055 {
2056 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2057 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2058 loop = LABEL();
2059 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2060 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2061 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2062 }
2063 }
2064
2065 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2066 {
2067 DEFINE_COMPILER;
2068 struct sljit_label *loop;
2069 int i;
2070
2071 SLJIT_ASSERT(length > 1);
2072 /* OVECTOR(1) contains the "string begin - 1" constant. */
2073 if (length > 2)
2074 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2075 if (length < 8)
2076 {
2077 for (i = 2; i < length; i++)
2078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2079 }
2080 else
2081 {
2082 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2083 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2084 loop = LABEL();
2085 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2086 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2087 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2088 }
2089
2090 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2091 if (common->mark_ptr != 0)
2092 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2093 if (common->control_head_ptr != 0)
2094 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2095 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2096 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2097 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2098 }
2099
2100 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2101 {
2102 while (current != NULL)
2103 {
2104 switch (current[-2])
2105 {
2106 case type_then_trap:
2107 break;
2108
2109 case type_mark:
2110 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2111 return current[-4];
2112 break;
2113
2114 default:
2115 SLJIT_ASSERT_STOP();
2116 break;
2117 }
2118 current = (sljit_sw*)current[-1];
2119 }
2120 return -1;
2121 }
2122
2123 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2124 {
2125 DEFINE_COMPILER;
2126 struct sljit_label *loop;
2127 struct sljit_jump *early_quit;
2128
2129 /* At this point we can freely use all registers. */
2130 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2132
2133 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2134 if (common->mark_ptr != 0)
2135 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2136 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2137 if (common->mark_ptr != 0)
2138 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2139 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2140 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2141 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2142 /* Unlikely, but possible */
2143 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2144 loop = LABEL();
2145 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2146 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2147 /* Copy the integer value to the output buffer */
2148 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2149 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2150 #endif
2151 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2152 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2153 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2154 JUMPHERE(early_quit);
2155
2156 /* Calculate the return value, which is the maximum ovector value. */
2157 if (topbracket > 1)
2158 {
2159 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2160 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2161
2162 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2163 loop = LABEL();
2164 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2165 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2166 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2167 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2168 }
2169 else
2170 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2171 }
2172
2173 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2174 {
2175 DEFINE_COMPILER;
2176 struct sljit_jump *jump;
2177
2178 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2179 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2180 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2181
2182 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2183 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2184 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2185 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2186
2187 /* Store match begin and end. */
2188 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2189 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2190
2191 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2192 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2193 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2194 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2195 #endif
2196 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2197 JUMPHERE(jump);
2198
2199 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2200 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2201 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2202 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2203 #endif
2204 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2205
2206 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2207 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2208 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2209 #endif
2210 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2211
2212 JUMPTO(SLJIT_JUMP, quit);
2213 }
2214
2215 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2216 {
2217 /* May destroy TMP1. */
2218 DEFINE_COMPILER;
2219 struct sljit_jump *jump;
2220
2221 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2222 {
2223 /* The value of -1 must be kept for start_used_ptr! */
2224 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2225 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2226 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2227 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2228 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2229 JUMPHERE(jump);
2230 }
2231 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2232 {
2233 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2234 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2235 JUMPHERE(jump);
2236 }
2237 }
2238
2239 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2240 {
2241 /* Detects if the character has an othercase. */
2242 unsigned int c;
2243
2244 #ifdef SUPPORT_UTF
2245 if (common->utf)
2246 {
2247 GETCHAR(c, cc);
2248 if (c > 127)
2249 {
2250 #ifdef SUPPORT_UCP
2251 return c != UCD_OTHERCASE(c);
2252 #else
2253 return FALSE;
2254 #endif
2255 }
2256 #ifndef COMPILE_PCRE8
2257 return common->fcc[c] != c;
2258 #endif
2259 }
2260 else
2261 #endif
2262 c = *cc;
2263 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2264 }
2265
2266 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2267 {
2268 /* Returns with the othercase. */
2269 #ifdef SUPPORT_UTF
2270 if (common->utf && c > 127)
2271 {
2272 #ifdef SUPPORT_UCP
2273 return UCD_OTHERCASE(c);
2274 #else
2275 return c;
2276 #endif
2277 }
2278 #endif
2279 return TABLE_GET(c, common->fcc, c);
2280 }
2281
2282 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2283 {
2284 /* Detects if the character and its othercase has only 1 bit difference. */
2285 unsigned int c, oc, bit;
2286 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2287 int n;
2288 #endif
2289
2290 #ifdef SUPPORT_UTF
2291 if (common->utf)
2292 {
2293 GETCHAR(c, cc);
2294 if (c <= 127)
2295 oc = common->fcc[c];
2296 else
2297 {
2298 #ifdef SUPPORT_UCP
2299 oc = UCD_OTHERCASE(c);
2300 #else
2301 oc = c;
2302 #endif
2303 }
2304 }
2305 else
2306 {
2307 c = *cc;
2308 oc = TABLE_GET(c, common->fcc, c);
2309 }
2310 #else
2311 c = *cc;
2312 oc = TABLE_GET(c, common->fcc, c);
2313 #endif
2314
2315 SLJIT_ASSERT(c != oc);
2316
2317 bit = c ^ oc;
2318 /* Optimized for English alphabet. */
2319 if (c <= 127 && bit == 0x20)
2320 return (0 << 8) | 0x20;
2321
2322 /* Since c != oc, they must have at least 1 bit difference. */
2323 if (!is_powerof2(bit))
2324 return 0;
2325
2326 #if defined COMPILE_PCRE8
2327
2328 #ifdef SUPPORT_UTF
2329 if (common->utf && c > 127)
2330 {
2331 n = GET_EXTRALEN(*cc);
2332 while ((bit & 0x3f) == 0)
2333 {
2334 n--;
2335 bit >>= 6;
2336 }
2337 return (n << 8) | bit;
2338 }
2339 #endif /* SUPPORT_UTF */
2340 return (0 << 8) | bit;
2341
2342 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2343
2344 #ifdef SUPPORT_UTF
2345 if (common->utf && c > 65535)
2346 {
2347 if (bit >= (1 << 10))
2348 bit >>= 10;
2349 else
2350 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2351 }
2352 #endif /* SUPPORT_UTF */
2353 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2354
2355 #endif /* COMPILE_PCRE[8|16|32] */
2356 }
2357
2358 static void check_partial(compiler_common *common, BOOL force)
2359 {
2360 /* Checks whether a partial matching is occured. Does not modify registers. */
2361 DEFINE_COMPILER;
2362 struct sljit_jump *jump = NULL;
2363
2364 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2365
2366 if (common->mode == JIT_COMPILE)
2367 return;
2368
2369 if (!force)
2370 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2371 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2372 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2373
2374 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2375 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2376 else
2377 {
2378 if (common->partialmatchlabel != NULL)
2379 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2380 else
2381 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2382 }
2383
2384 if (jump != NULL)
2385 JUMPHERE(jump);
2386 }
2387
2388 static void check_str_end(compiler_common *common, jump_list **end_reached)
2389 {
2390 /* Does not affect registers. Usually used in a tight spot. */
2391 DEFINE_COMPILER;
2392 struct sljit_jump *jump;
2393
2394 if (common->mode == JIT_COMPILE)
2395 {
2396 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2397 return;
2398 }
2399
2400 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2401 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2402 {
2403 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2405 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2406 }
2407 else
2408 {
2409 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2410 if (common->partialmatchlabel != NULL)
2411 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2412 else
2413 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2414 }
2415 JUMPHERE(jump);
2416 }
2417
2418 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2419 {
2420 DEFINE_COMPILER;
2421 struct sljit_jump *jump;
2422
2423 if (common->mode == JIT_COMPILE)
2424 {
2425 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2426 return;
2427 }
2428
2429 /* Partial matching mode. */
2430 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2431 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2432 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2433 {
2434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2435 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2436 }
2437 else
2438 {
2439 if (common->partialmatchlabel != NULL)
2440 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2441 else
2442 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2443 }
2444 JUMPHERE(jump);
2445 }
2446
2447 static void read_char(compiler_common *common)
2448 {
2449 /* Reads the character into TMP1, updates STR_PTR.
2450 Does not check STR_END. TMP2 Destroyed. */
2451 DEFINE_COMPILER;
2452 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2453 struct sljit_jump *jump;
2454 #endif
2455
2456 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2457 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2458 if (common->utf)
2459 {
2460 #if defined COMPILE_PCRE8
2461 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2462 #elif defined COMPILE_PCRE16
2463 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2464 #endif /* COMPILE_PCRE[8|16] */
2465 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2466 JUMPHERE(jump);
2467 }
2468 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2470 }
2471
2472 static void peek_char(compiler_common *common)
2473 {
2474 /* Reads the character into TMP1, keeps STR_PTR.
2475 Does not check STR_END. TMP2 Destroyed. */
2476 DEFINE_COMPILER;
2477 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2478 struct sljit_jump *jump;
2479 #endif
2480
2481 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2482 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2483 if (common->utf)
2484 {
2485 #if defined COMPILE_PCRE8
2486 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2487 #elif defined COMPILE_PCRE16
2488 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2489 #endif /* COMPILE_PCRE[8|16] */
2490 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2491 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2492 JUMPHERE(jump);
2493 }
2494 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2495 }
2496
2497 static void read_char8_type(compiler_common *common)
2498 {
2499 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2500 DEFINE_COMPILER;
2501 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2502 struct sljit_jump *jump;
2503 #endif
2504
2505 #ifdef SUPPORT_UTF
2506 if (common->utf)
2507 {
2508 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2509 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2510 #if defined COMPILE_PCRE8
2511 /* This can be an extra read in some situations, but hopefully
2512 it is needed in most cases. */
2513 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2514 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2515 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2516 JUMPHERE(jump);
2517 #elif defined COMPILE_PCRE16
2518 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2519 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2520 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2521 JUMPHERE(jump);
2522 /* Skip low surrogate if necessary. */
2523 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2524 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2525 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2526 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2527 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2528 #elif defined COMPILE_PCRE32
2529 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2530 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2531 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2532 JUMPHERE(jump);
2533 #endif /* COMPILE_PCRE[8|16|32] */
2534 return;
2535 }
2536 #endif /* SUPPORT_UTF */
2537 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2538 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2539 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2540 /* The ctypes array contains only 256 values. */
2541 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2542 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2543 #endif
2544 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2545 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2546 JUMPHERE(jump);
2547 #endif
2548 }
2549
2550 static void skip_char_back(compiler_common *common)
2551 {
2552 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2553 DEFINE_COMPILER;
2554 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2555 #if defined COMPILE_PCRE8
2556 struct sljit_label *label;
2557
2558 if (common->utf)
2559 {
2560 label = LABEL();
2561 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2562 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2563 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2564 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2565 return;
2566 }
2567 #elif defined COMPILE_PCRE16
2568 if (common->utf)
2569 {
2570 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2571 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2572 /* Skip low surrogate if necessary. */
2573 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2574 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2575 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2576 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2577 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2578 return;
2579 }
2580 #endif /* COMPILE_PCRE[8|16] */
2581 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2582 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2583 }
2584
2585 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2586 {
2587 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2588 DEFINE_COMPILER;
2589
2590 if (nltype == NLTYPE_ANY)
2591 {
2592 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2593 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2594 }
2595 else if (nltype == NLTYPE_ANYCRLF)
2596 {
2597 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2598 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2599 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2600 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2601 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2602 }
2603 else
2604 {
2605 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2606 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2607 }
2608 }
2609
2610 #ifdef SUPPORT_UTF
2611
2612 #if defined COMPILE_PCRE8
2613 static void do_utfreadchar(compiler_common *common)
2614 {
2615 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2616 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2617 DEFINE_COMPILER;
2618 struct sljit_jump *jump;
2619
2620 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2621 /* Searching for the first zero. */
2622 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2623 jump = JUMP(SLJIT_C_NOT_ZERO);
2624 /* Two byte sequence. */
2625 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2626 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2627 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2628 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2629 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2630 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2631 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2632 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2633 JUMPHERE(jump);
2634
2635 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2636 jump = JUMP(SLJIT_C_NOT_ZERO);
2637 /* Three byte sequence. */
2638 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2639 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2640 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2641 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2642 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2643 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2644 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2645 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2646 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2647 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2648 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2649 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2650 JUMPHERE(jump);
2651
2652 /* Four byte sequence. */
2653 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2654 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2655 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2656 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2657 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2658 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2659 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2660 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2661 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2662 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2663 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2664 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2665 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2666 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2667 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2668 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2669 }
2670
2671 static void do_utfreadtype8(compiler_common *common)
2672 {
2673 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2674 of the character (>= 0xc0). Return value in TMP1. */
2675 DEFINE_COMPILER;
2676 struct sljit_jump *jump;
2677 struct sljit_jump *compare;
2678
2679 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2680
2681 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2682 jump = JUMP(SLJIT_C_NOT_ZERO);
2683 /* Two byte sequence. */
2684 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2685 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2686 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2687 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2688 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2689 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2690 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2691 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2692 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2693
2694 JUMPHERE(compare);
2695 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2696 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2697 JUMPHERE(jump);
2698
2699 /* We only have types for characters less than 256. */
2700 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2701 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2702 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2703 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2704 }
2705
2706 #elif defined COMPILE_PCRE16
2707
2708 static void do_utfreadchar(compiler_common *common)
2709 {
2710 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2711 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2712 DEFINE_COMPILER;
2713 struct sljit_jump *jump;
2714
2715 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2716 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2717 /* Do nothing, only return. */
2718 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2719
2720 JUMPHERE(jump);
2721 /* Combine two 16 bit characters. */
2722 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2723 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2724 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2725 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2726 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2727 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2728 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2729 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2730 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2731 }
2732
2733 #endif /* COMPILE_PCRE[8|16] */
2734
2735 #endif /* SUPPORT_UTF */
2736
2737 #ifdef SUPPORT_UCP
2738
2739 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2740 #define UCD_BLOCK_MASK 127
2741 #define UCD_BLOCK_SHIFT 7
2742
2743 static void do_getucd(compiler_common *common)
2744 {
2745 /* Search the UCD record for the character comes in TMP1.
2746 Returns chartype in TMP1 and UCD offset in TMP2. */
2747 DEFINE_COMPILER;
2748
2749 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2750
2751 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2752 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2753 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2754 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2755 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2756 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2757 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2758 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2759 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2760 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2761 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2762 }
2763 #endif
2764
2765 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2766 {
2767 DEFINE_COMPILER;
2768 struct sljit_label *mainloop;
2769 struct sljit_label *newlinelabel = NULL;
2770 struct sljit_jump *start;
2771 struct sljit_jump *end = NULL;
2772 struct sljit_jump *nl = NULL;
2773 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2774 struct sljit_jump *singlechar;
2775 #endif
2776 jump_list *newline = NULL;
2777 BOOL newlinecheck = FALSE;
2778 BOOL readuchar = FALSE;
2779
2780 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2781 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2782 newlinecheck = TRUE;
2783
2784 if (firstline)
2785 {
2786 /* Search for the end of the first line. */
2787 SLJIT_ASSERT(common->first_line_end != 0);
2788 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2789
2790 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2791 {
2792 mainloop = LABEL();
2793 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2794 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2795 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2796 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2797 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2798 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2799 JUMPHERE(end);
2800 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2801 }
2802 else
2803 {
2804 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2805 mainloop = LABEL();
2806 /* Continual stores does not cause data dependency. */
2807 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2808 read_char(common);
2809 check_newlinechar(common, common->nltype, &newline, TRUE);
2810 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2811 JUMPHERE(end);
2812 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2813 set_jumps(newline, LABEL());
2814 }
2815
2816 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2817 }
2818
2819 start = JUMP(SLJIT_JUMP);
2820
2821 if (newlinecheck)
2822 {
2823 newlinelabel = LABEL();
2824 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2825 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2826 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2827 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2828 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2829 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2830 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2831 #endif
2832 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2833 nl = JUMP(SLJIT_JUMP);
2834 }
2835
2836 mainloop = LABEL();
2837
2838 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2839 #ifdef SUPPORT_UTF
2840 if (common->utf) readuchar = TRUE;
2841 #endif
2842 if (newlinecheck) readuchar = TRUE;
2843
2844 if (readuchar)
2845 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2846
2847 if (newlinecheck)
2848 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2849
2850 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2851 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2852 #if defined COMPILE_PCRE8
2853 if (common->utf)
2854 {
2855 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2856 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2857 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2858 JUMPHERE(singlechar);
2859 }
2860 #elif defined COMPILE_PCRE16
2861 if (common->utf)
2862 {
2863 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2864 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2865 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2866 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2867 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2869 JUMPHERE(singlechar);
2870 }
2871 #endif /* COMPILE_PCRE[8|16] */
2872 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2873 JUMPHERE(start);
2874
2875 if (newlinecheck)
2876 {
2877 JUMPHERE(end);
2878 JUMPHERE(nl);
2879 }
2880
2881 return mainloop;
2882 }
2883
2884 #define MAX_N_CHARS 3
2885
2886 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2887 {
2888 DEFINE_COMPILER;
2889 struct sljit_label *start;
2890 struct sljit_jump *quit;
2891 pcre_uint32 chars[MAX_N_CHARS * 2];
2892 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2893 int location = 0;
2894 pcre_int32 len, c, bit, caseless;
2895 int must_stop;
2896
2897 /* We do not support alternatives now. */
2898 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2899 return FALSE;
2900
2901 while (TRUE)
2902 {
2903 caseless = 0;
2904 must_stop = 1;
2905 switch(*cc)
2906 {
2907 case OP_CHAR:
2908 must_stop = 0;
2909 cc++;
2910 break;
2911
2912 case OP_CHARI:
2913 caseless = 1;
2914 must_stop = 0;
2915 cc++;
2916 break;
2917
2918 case OP_SOD:
2919 case OP_SOM:
2920 case OP_SET_SOM:
2921 case OP_NOT_WORD_BOUNDARY:
2922 case OP_WORD_BOUNDARY:
2923 case OP_EODN:
2924 case OP_EOD:
2925 case OP_CIRC:
2926 case OP_CIRCM:
2927 case OP_DOLL:
2928 case OP_DOLLM:
2929 /* Zero width assertions. */
2930 cc++;
2931 continue;
2932
2933 case OP_PLUS:
2934 case OP_MINPLUS:
2935 case OP_POSPLUS:
2936 cc++;
2937 break;
2938
2939 case OP_EXACT:
2940 cc += 1 + IMM2_SIZE;
2941 break;
2942
2943 case OP_PLUSI:
2944 case OP_MINPLUSI:
2945 case OP_POSPLUSI:
2946 caseless = 1;
2947 cc++;
2948 break;
2949
2950 case OP_EXACTI:
2951 caseless = 1;
2952 cc += 1 + IMM2_SIZE;
2953 break;
2954
2955 default:
2956 must_stop = 2;
2957 break;
2958 }
2959
2960 if (must_stop == 2)
2961 break;
2962
2963 len = 1;
2964 #ifdef SUPPORT_UTF
2965 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2966 #endif
2967
2968 if (caseless && char_has_othercase(common, cc))
2969 {
2970 caseless = char_get_othercase_bit(common, cc);
2971 if (caseless == 0)
2972 return FALSE;
2973 #ifdef COMPILE_PCRE8
2974 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2975 #else
2976 if ((caseless & 0x100) != 0)
2977 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2978 else
2979 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2980 #endif
2981 }
2982 else
2983 caseless = 0;
2984
2985 while (len > 0 && location < MAX_N_CHARS * 2)
2986 {
2987 c = *cc;
2988 bit = 0;
2989 if (len == (caseless & 0xff))
2990 {
2991 bit = caseless >> 8;
2992 c |= bit;
2993 }
2994
2995 chars[location] = c;
2996 chars[location + 1] = bit;
2997
2998 len--;
2999 location += 2;
3000 cc++;
3001 }
3002
3003 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3004 break;
3005 }
3006
3007 /* At least two characters are required. */
3008 if (location < 2 * 2)
3009 return FALSE;
3010
3011 if (firstline)
3012 {
3013 SLJIT_ASSERT(common->first_line_end != 0);
3014 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3015 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3016 }
3017 else
3018 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3019
3020 start = LABEL();
3021 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3022
3023 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3024 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3025 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3026 if (chars[1] != 0)
3027 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3028 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3029 if (location > 2 * 2)
3030 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3031 if (chars[3] != 0)
3032 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3033 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3034 if (location > 2 * 2)
3035 {
3036 if (chars[5] != 0)
3037 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3038 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3039 }
3040 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3041
3042 JUMPHERE(quit);
3043
3044 if (firstline)
3045 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3046 else
3047 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3048 return TRUE;
3049 }
3050
3051 #undef MAX_N_CHARS
3052
3053 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3054 {
3055 DEFINE_COMPILER;
3056 struct sljit_label *start;
3057 struct sljit_jump *quit;
3058 struct sljit_jump *found;
3059 pcre_uchar oc, bit;
3060
3061 if (firstline)
3062 {
3063 SLJIT_ASSERT(common->first_line_end != 0);
3064 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3065 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3066 }
3067
3068 start = LABEL();
3069 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3070 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3071
3072 oc = first_char;
3073 if (caseless)
3074 {
3075 oc = TABLE_GET(first_char, common->fcc, first_char);
3076 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3077 if (first_char > 127 && common->utf)
3078 oc = UCD_OTHERCASE(first_char);
3079 #endif
3080 }
3081 if (first_char == oc)
3082 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3083 else
3084 {
3085 bit = first_char ^ oc;
3086 if (is_powerof2(bit))
3087 {
3088 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3089 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3090 }
3091 else
3092 {
3093 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3094 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3095 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3096 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3097 found = JUMP(SLJIT_C_NOT_ZERO);
3098 }
3099 }
3100
3101 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3102 JUMPTO(SLJIT_JUMP, start);
3103 JUMPHERE(found);
3104 JUMPHERE(quit);
3105
3106 if (firstline)
3107 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3108 }
3109
3110 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3111 {
3112 DEFINE_COMPILER;
3113 struct sljit_label *loop;
3114 struct sljit_jump *lastchar;
3115 struct sljit_jump *firstchar;
3116 struct sljit_jump *quit;
3117 struct sljit_jump *foundcr = NULL;
3118 struct sljit_jump *notfoundnl;
3119 jump_list *newline = NULL;
3120
3121 if (firstline)
3122 {
3123 SLJIT_ASSERT(common->first_line_end != 0);
3124 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3125 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3126 }
3127
3128 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3129 {
3130 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3131 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3132 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3133 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3134 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3135
3136 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3137 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3138 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3139 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3140 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3141 #endif
3142 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3143
3144 loop = LABEL();
3145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3146 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3147 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3148 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3149 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3150 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3151
3152 JUMPHERE(quit);
3153 JUMPHERE(firstchar);
3154 JUMPHERE(lastchar);
3155
3156 if (firstline)
3157 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3158 return;
3159 }
3160
3161 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3162 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3163 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3164 skip_char_back(common);
3165
3166 loop = LABEL();
3167 read_char(common);
3168 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3169 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3170 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3171 check_newlinechar(common, common->nltype, &newline, FALSE);
3172 set_jumps(newline, loop);
3173
3174 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3175 {
3176 quit = JUMP(SLJIT_JUMP);
3177 JUMPHERE(foundcr);
3178 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3179 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3180 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3181 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3182 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3183 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3184 #endif
3185 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3186 JUMPHERE(notfoundnl);
3187 JUMPHERE(quit);
3188 }
3189 JUMPHERE(lastchar);
3190 JUMPHERE(firstchar);
3191
3192 if (firstline)
3193 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3194 }
3195
3196 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3197
3198 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3199 {
3200 DEFINE_COMPILER;
3201 struct sljit_label *start;
3202 struct sljit_jump *quit;
3203 struct sljit_jump *found = NULL;
3204 jump_list *matches = NULL;
3205 pcre_uint8 inverted_start_bits[32];
3206 int i;
3207 #ifndef COMPILE_PCRE8
3208 struct sljit_jump *jump;
3209 #endif
3210
3211 for (i = 0; i < 32; ++i)
3212 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3213
3214 if (firstline)
3215 {
3216 SLJIT_ASSERT(common->first_line_end != 0);
3217 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3218 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3219 }
3220
3221 start = LABEL();
3222 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3223 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3224 #ifdef SUPPORT_UTF
3225 if (common->utf)
3226 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3227 #endif
3228
3229 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3230 {
3231 #ifndef COMPILE_PCRE8
3232 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3233 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3234 JUMPHERE(jump);
3235 #endif
3236 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3237 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3238 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3239 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3240 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3241 found = JUMP(SLJIT_C_NOT_ZERO);
3242 }
3243
3244 #ifdef SUPPORT_UTF
3245 if (common->utf)
3246 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3247 #endif
3248 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3249 #ifdef SUPPORT_UTF
3250 #if defined COMPILE_PCRE8
3251 if (common->utf)
3252 {
3253 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3254 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3255 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3256 }
3257 #elif defined COMPILE_PCRE16
3258 if (common->utf)
3259 {
3260 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3261 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3262 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3263 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3264 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3265 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3266 }
3267 #endif /* COMPILE_PCRE[8|16] */
3268 #endif /* SUPPORT_UTF */
3269 JUMPTO(SLJIT_JUMP, start);
3270 if (found != NULL)
3271 JUMPHERE(found);
3272 if (matches != NULL)
3273 set_jumps(matches, LABEL());
3274 JUMPHERE(quit);
3275
3276 if (firstline)
3277 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3278 }
3279
3280 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3281 {
3282 DEFINE_COMPILER;
3283 struct sljit_label *loop;
3284 struct sljit_jump *toolong;
3285 struct sljit_jump *alreadyfound;
3286 struct sljit_jump *found;
3287 struct sljit_jump *foundoc = NULL;
3288 struct sljit_jump *notfound;
3289 pcre_uint32 oc, bit;
3290
3291 SLJIT_ASSERT(common->req_char_ptr != 0);
3292 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3293 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3294 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3295 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3296
3297 if (has_firstchar)
3298 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3299 else
3300 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3301
3302 loop = LABEL();
3303 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3304
3305 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3306 oc = req_char;
3307 if (caseless)
3308 {
3309 oc = TABLE_GET(req_char, common->fcc, req_char);
3310 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3311 if (req_char > 127 && common->utf)
3312 oc = UCD_OTHERCASE(req_char);
3313 #endif
3314 }
3315 if (req_char == oc)
3316 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3317 else
3318 {
3319 bit = req_char ^ oc;
3320 if (is_powerof2(bit))
3321 {
3322 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3323 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3324 }
3325 else
3326 {
3327 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3328 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3329 }
3330 }
3331 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3332 JUMPTO(SLJIT_JUMP, loop);
3333
3334 JUMPHERE(found);
3335 if (foundoc)
3336 JUMPHERE(foundoc);
3337 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3338 JUMPHERE(alreadyfound);
3339 JUMPHERE(toolong);
3340 return notfound;
3341 }
3342
3343 static void do_revertframes(compiler_common *common)
3344 {
3345 DEFINE_COMPILER;
3346 struct sljit_jump *jump;
3347 struct sljit_label *mainloop;
3348
3349 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3350 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3351 GET_LOCAL_BASE(TMP3, 0, 0);
3352
3353 /* Drop frames until we reach STACK_TOP. */
3354 mainloop = LABEL();
3355 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3356 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3357 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3358
3359 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3360 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3361 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3362 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3363 JUMPTO(SLJIT_JUMP, mainloop);
3364
3365 JUMPHERE(jump);
3366 jump = JUMP(SLJIT_C_SIG_LESS);
3367 /* End of dropping frames. */
3368 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3369
3370 JUMPHERE(jump);
3371 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3372 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3373 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3374 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3375 JUMPTO(SLJIT_JUMP, mainloop);
3376 }
3377
3378 static void check_wordboundary(compiler_common *common)
3379 {
3380 DEFINE_COMPILER;
3381 struct sljit_jump *skipread;
3382 jump_list *skipread_list = NULL;
3383 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3384 struct sljit_jump *jump;
3385 #endif
3386
3387 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3388
3389 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3390 /* Get type of the previous char, and put it to LOCALS1. */
3391 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3392 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3394 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3395 skip_char_back(common);
3396 check_start_used_ptr(common);
3397 read_char(common);
3398
3399 /* Testing char type. */
3400 #ifdef SUPPORT_UCP
3401 if (common->use_ucp)
3402 {
3403 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3404 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3405 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3406 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3407 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3408 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3409 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3410 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3411 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3412 JUMPHERE(jump);
3413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3414 }
3415 else
3416 #endif
3417 {
3418 #ifndef COMPILE_PCRE8
3419 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3420 #elif defined SUPPORT_UTF
3421 /* Here LOCALS1 has already been zeroed. */
3422 jump = NULL;
3423 if (common->utf)
3424 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3425 #endif /* COMPILE_PCRE8 */
3426 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3427 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3428 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3430 #ifndef COMPILE_PCRE8
3431 JUMPHERE(jump);
3432 #elif defined SUPPORT_UTF
3433 if (jump != NULL)
3434 JUMPHERE(jump);
3435 #endif /* COMPILE_PCRE8 */
3436 }
3437 JUMPHERE(skipread);
3438
3439 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3440 check_str_end(common, &skipread_list);
3441 peek_char(common);
3442
3443 /* Testing char type. This is a code duplication. */
3444 #ifdef SUPPORT_UCP
3445 if (common->use_ucp)
3446 {
3447 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3448 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3449 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3450 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3451 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3452 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3453 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3454 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3455 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3456 JUMPHERE(jump);
3457 }
3458 else
3459 #endif
3460 {
3461 #ifndef COMPILE_PCRE8
3462 /* TMP2 may be destroyed by peek_char. */
3463 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3464 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3465 #elif defined SUPPORT_UTF
3466 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3467 jump = NULL;
3468 if (common->utf)
3469 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3470 #endif
3471 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3472 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3473 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3474 #ifndef COMPILE_PCRE8
3475 JUMPHERE(jump);
3476 #elif defined SUPPORT_UTF
3477 if (jump != NULL)
3478 JUMPHERE(jump);
3479 #endif /* COMPILE_PCRE8 */
3480 }
3481 set_jumps(skipread_list, LABEL());
3482
3483 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3484 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3485 }
3486
3487 /*
3488 range format:
3489
3490 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3491 ranges[1] = first bit (0 or 1)
3492 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3493 */
3494
3495 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3496 {
3497 DEFINE_COMPILER;
3498 struct sljit_jump *jump;
3499
3500 if (ranges[0] < 0)
3501 return FALSE;
3502
3503 switch(ranges[0])
3504 {
3505 case 1:
3506 if (readch)
3507 read_char(common);
3508 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3509 return TRUE;
3510
3511 case 2:
3512 if (readch)
3513 read_char(common);
3514 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3515 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3516 return TRUE;
3517
3518 case 4:
3519 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3520 {
3521 if (readch)
3522 read_char(common);
3523 if (ranges[1] != 0)
3524 {
3525 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3526 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3527 }
3528 else
3529 {
3530 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3531 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3532 JUMPHERE(jump);
3533 }
3534 return TRUE;
3535 }
3536 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3537 {
3538 if (readch)
3539 read_char(common);
3540 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3541 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3542 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3543 return TRUE;
3544 }
3545 return FALSE;
3546
3547 default:
3548 return FALSE;
3549 }
3550 }
3551
3552 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3553 {
3554 int i, bit, length;
3555 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3556
3557 bit = ctypes[0] & flag;
3558 ranges[0] = -1;
3559 ranges[1] = bit != 0 ? 1 : 0;
3560 length = 0;
3561
3562 for (i = 1; i < 256; i++)
3563 if ((ctypes[i] & flag) != bit)
3564 {
3565 if (length >= MAX_RANGE_SIZE)
3566 return;
3567 ranges[2 + length] = i;
3568 length++;
3569 bit ^= flag;
3570 }
3571
3572 if (bit != 0)
3573 {
3574 if (length >= MAX_RANGE_SIZE)
3575 return;
3576 ranges[2 + length] = 256;
3577 length++;
3578 }
3579 ranges[0] = length;
3580 }
3581
3582 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3583 {
3584 int ranges[2 + MAX_RANGE_SIZE];
3585 pcre_uint8 bit, cbit, all;
3586 int i, byte, length = 0;
3587
3588 bit = bits[0] & 0x1;
3589 ranges[1] = bit;
3590 /* Can be 0 or 255. */
3591 all = -bit;
3592
3593 for (i = 0; i < 256; )
3594 {
3595 byte = i >> 3;
3596 if ((i & 0x7) == 0 && bits[byte] == all)
3597 i += 8;
3598 else
3599 {
3600 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3601 if (cbit != bit)
3602 {
3603 if (length >= MAX_RANGE_SIZE)
3604 return FALSE;
3605 ranges[2 + length] = i;
3606 length++;
3607 bit = cbit;
3608 all = -cbit;
3609 }
3610 i++;
3611 }
3612 }
3613
3614 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3615 {
3616 if (length >= MAX_RANGE_SIZE)
3617 return FALSE;
3618 ranges[2 + length] = 256;
3619 length++;
3620 }
3621 ranges[0] = length;
3622
3623 return check_ranges(common, ranges, backtracks, FALSE);
3624 }
3625
3626 static void check_anynewline(compiler_common *common)
3627 {
3628 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3629 DEFINE_COMPILER;
3630
3631 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3632
3633 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3634 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3635 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3636 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3637 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3638 #ifdef COMPILE_PCRE8
3639 if (common->utf)
3640 {
3641 #endif
3642 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3643 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3644 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3645 #ifdef COMPILE_PCRE8
3646 }
3647 #endif
3648 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3649 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3650 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3651 }
3652
3653 static void check_hspace(compiler_common *common)
3654 {
3655 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3656 DEFINE_COMPILER;
3657
3658 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3659
3660 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3661 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3662 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3663 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3665 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3666 #ifdef COMPILE_PCRE8
3667 if (common->utf)
3668 {
3669 #endif
3670 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3671 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3672 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3673 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3674 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3675 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3676 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3677 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3678 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3679 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3680 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3681 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3682 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3683 #ifdef COMPILE_PCRE8
3684 }
3685 #endif
3686 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3687 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3688
3689 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3690 }
3691
3692 static void check_vspace(compiler_common *common)
3693 {
3694 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3695 DEFINE_COMPILER;
3696
3697 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3698
3699 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3700 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3701 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3702 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3703 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3704 #ifdef COMPILE_PCRE8
3705 if (common->utf)
3706 {
3707 #endif
3708 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3709 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3710 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3711 #ifdef COMPILE_PCRE8
3712 }
3713 #endif
3714 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3715 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3716
3717 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3718 }
3719
3720 #define CHAR1 STR_END
3721 #define CHAR2 STACK_TOP
3722
3723 static void do_casefulcmp(compiler_common *common)
3724 {
3725 DEFINE_COMPILER;
3726 struct sljit_jump *jump;
3727 struct sljit_label *label;
3728
3729 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3730 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3731 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3732 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3733 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3734 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3735
3736 label = LABEL();
3737 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3738 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3739 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3740 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3741 JUMPTO(SLJIT_C_NOT_ZERO, label);
3742
3743 JUMPHERE(jump);
3744 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3745 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3746 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3747 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3748 }
3749
3750 #define LCC_TABLE STACK_LIMIT
3751
3752 static void do_caselesscmp(compiler_common *common)
3753 {
3754 DEFINE_COMPILER;
3755 struct sljit_jump *jump;
3756 struct sljit_label *label;
3757
3758 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3759 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3760
3761 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3762 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3763 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3764 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3765 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3766 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3767
3768 label = LABEL();
3769 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3770 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3771 #ifndef COMPILE_PCRE8
3772 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3773 #endif
3774 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3775 #ifndef COMPILE_PCRE8
3776 JUMPHERE(jump);
3777 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3778 #endif
3779 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3780 #ifndef COMPILE_PCRE8
3781 JUMPHERE(jump);
3782 #endif
3783 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3784 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3785 JUMPTO(SLJIT_C_NOT_ZERO, label);
3786
3787 JUMPHERE(jump);
3788 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3789 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3790 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3791 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3792 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3793 }
3794
3795 #undef LCC_TABLE
3796 #undef CHAR1
3797 #undef CHAR2
3798
3799 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3800
3801 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3802 {
3803 /* This function would be ineffective to do in JIT level. */
3804 pcre_uint32 c1, c2;
3805 const pcre_uchar *src2 = args->uchar_ptr;
3806 const pcre_uchar *end2 = args->end;
3807 const ucd_record *ur;
3808 const pcre_uint32 *pp;
3809
3810 while (src1 < end1)
3811 {
3812 if (src2 >= end2)
3813 return (pcre_uchar*)1;
3814 GETCHARINC(c1, src1);
3815 GETCHARINC(c2, src2);
3816 ur = GET_UCD(c2);
3817 if (c1 != c2 && c1 != c2 + ur->other_case)
3818 {
3819 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3820 for (;;)
3821 {
3822 if (c1 < *pp) return NULL;
3823 if (c1 == *pp++) break;
3824 }
3825 }
3826 }
3827 return src2;
3828 }
3829
3830 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3831
3832 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3833 compare_context* context, jump_list **backtracks)
3834 {
3835 DEFINE_COMPILER;
3836 unsigned int othercasebit = 0;
3837 pcre_uchar *othercasechar = NULL;
3838 #ifdef SUPPORT_UTF
3839 int utflength;
3840 #endif
3841
3842 if (caseless && char_has_othercase(common, cc))
3843 {
3844 othercasebit = char_get_othercase_bit(common, cc);
3845 SLJIT_ASSERT(othercasebit);
3846 /* Extracting bit difference info. */
3847 #if defined COMPILE_PCRE8
3848 othercasechar = cc + (othercasebit >> 8);
3849 othercasebit &= 0xff;
3850 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3851 /* Note that this code only handles characters in the BMP. If there
3852 ever are characters outside the BMP whose othercase differs in only one
3853 bit from itself (there currently are none), this code will need to be
3854 revised for COMPILE_PCRE32. */
3855 othercasechar = cc + (othercasebit >> 9);
3856 if ((othercasebit & 0x100) != 0)
3857 othercasebit = (othercasebit & 0xff) << 8;
3858 else
3859 othercasebit &= 0xff;
3860 #endif /* COMPILE_PCRE[8|16|32] */
3861 }
3862
3863 if (context->sourcereg == -1)
3864 {
3865 #if defined COMPILE_PCRE8
3866 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3867 if (context->length >= 4)
3868 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3869 else if (context->length >= 2)
3870 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3871 else
3872 #endif
3873 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3874 #elif defined COMPILE_PCRE16
3875 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3876 if (context->length >= 4)
3877 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3878 else
3879 #endif
3880 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3881 #elif defined COMPILE_PCRE32
3882 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3883 #endif /* COMPILE_PCRE[8|16|32] */
3884 context->sourcereg = TMP2;
3885 }
3886
3887 #ifdef SUPPORT_UTF
3888 utflength = 1;
3889 if (common->utf && HAS_EXTRALEN(*cc))
3890 utflength += GET_EXTRALEN(*cc);
3891
3892 do
3893 {
3894 #endif
3895
3896 context->length -= IN_UCHARS(1);
3897 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3898
3899 /* Unaligned read is supported. */
3900 if (othercasebit != 0 && othercasechar == cc)
3901 {
3902 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3903 context->oc.asuchars[context->ucharptr] = othercasebit;
3904 }
3905 else
3906 {
3907 context->c.asuchars[context->ucharptr] = *cc;
3908 context->oc.asuchars[context->ucharptr] = 0;
3909 }
3910 context->ucharptr++;
3911
3912 #if defined COMPILE_PCRE8
3913 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3914 #else
3915 if (context->ucharptr >= 2 || context->length == 0)
3916 #endif
3917 {
3918 if (context->length >= 4)
3919 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3920 else if (context->length >= 2)
3921 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3922 #if defined COMPILE_PCRE8
3923 else if (context->length >= 1)
3924 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3925 #endif /* COMPILE_PCRE8 */
3926 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3927
3928 switch(context->ucharptr)
3929 {
3930 case 4 / sizeof(pcre_uchar):
3931 if (context->oc.asint != 0)
3932 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3933 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3934 break;
3935
3936 case 2 / sizeof(pcre_uchar):
3937 if (context->oc.asushort != 0)
3938 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3939 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3940 break;
3941
3942 #ifdef COMPILE_PCRE8
3943 case 1:
3944 if (context->oc.asbyte != 0)
3945 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3946 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3947 break;
3948 #endif
3949
3950 default:
3951 SLJIT_ASSERT_STOP();
3952 break;
3953 }
3954 context->ucharptr = 0;
3955 }
3956
3957 #else
3958
3959 /* Unaligned read is unsupported or in 32 bit mode. */
3960 if (context->length >= 1)
3961 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3962
3963 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3964
3965 if (othercasebit != 0 && othercasechar == cc)
3966 {
3967 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3968 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3969 }
3970 else
3971 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3972
3973 #endif
3974
3975 cc++;
3976 #ifdef SUPPORT_UTF
3977 utflength--;
3978 }
3979 while (utflength > 0);
3980 #endif
3981
3982 return cc;
3983 }
3984
3985 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3986
3987 #define SET_TYPE_OFFSET(value) \
3988 if ((value) != typeoffset) \
3989 { \
3990 if ((value) > typeoffset) \
3991 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3992 else \
3993 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3994 } \
3995 typeoffset = (value);
3996
3997 #define SET_CHAR_OFFSET(value) \
3998 if ((value) != charoffset) \
3999 { \
4000 if ((value) > charoffset) \
4001 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4002 else \
4003 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4004 } \
4005 charoffset = (value);
4006
4007 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4008 {
4009 DEFINE_COMPILER;
4010 jump_list *found = NULL;
4011 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4012 pcre_int32 c, charoffset;
4013 const pcre_uint32 *other_cases;
4014 struct sljit_jump *jump = NULL;
4015 pcre_uchar *ccbegin;
4016 int compares, invertcmp, numberofcmps;
4017 #ifdef SUPPORT_UCP
4018 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4019 BOOL charsaved = FALSE;
4020 int typereg = TMP1, scriptreg = TMP1;
4021 pcre_int32 typeoffset;
4022 #endif
4023
4024 /* Although SUPPORT_UTF must be defined, we are
4025 not necessary in utf mode even in 8 bit mode. */
4026 detect_partial_match(common, backtracks);
4027 read_char(common);
4028
4029 if ((*cc++ & XCL_MAP) != 0)
4030 {
4031 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4032 #ifndef COMPILE_PCRE8
4033 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4034 #elif defined SUPPORT_UTF
4035 if (common->utf)
4036 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4037 #endif
4038
4039 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4040 {
4041 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4042 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4043 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4044 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4045 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4046 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4047 }
4048
4049 #ifndef COMPILE_PCRE8
4050 JUMPHERE(jump);
4051 #elif defined SUPPORT_UTF
4052 if (common->utf)
4053 JUMPHERE(jump);
4054 #endif
4055 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4056 #ifdef SUPPORT_UCP
4057 charsaved = TRUE;
4058 #endif
4059 cc += 32 / sizeof(pcre_uchar);
4060 }
4061
4062 /* Scanning the necessary info. */
4063 ccbegin = cc;
4064 compares = 0;
4065 while (*cc != XCL_END)
4066 {
4067 compares++;
4068 if (*cc == XCL_SINGLE)
4069 {
4070 cc += 2;
4071 #ifdef SUPPORT_UTF
4072 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4073 #endif
4074 #ifdef SUPPORT_UCP
4075 needschar = TRUE;
4076 #endif
4077 }
4078 else if (*cc == XCL_RANGE)
4079 {
4080 cc += 2;
4081 #ifdef SUPPORT_UTF
4082 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4083 #endif
4084 cc++;
4085 #ifdef SUPPORT_UTF
4086 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4087 #endif
4088 #ifdef SUPPORT_UCP
4089 needschar = TRUE;
4090 #endif
4091 }
4092 #ifdef SUPPORT_UCP
4093 else
4094 {
4095 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4096 cc++;
4097 switch(*cc)
4098 {
4099 case PT_ANY:
4100 break;
4101
4102 case PT_LAMP:
4103 case PT_GC:
4104 case PT_PC:
4105 case PT_ALNUM:
4106 needstype = TRUE;
4107 break;
4108
4109 case PT_SC:
4110 needsscript = TRUE;
4111 break;
4112
4113 case PT_SPACE:
4114 case PT_PXSPACE:
4115 case PT_WORD:
4116 needstype = TRUE;
4117 needschar = TRUE;
4118 break;
4119
4120 case PT_CLIST:
4121 case PT_UCNC:
4122 needschar = TRUE;
4123 break;
4124
4125 default:
4126 SLJIT_ASSERT_STOP();
4127 break;
4128 }
4129 cc += 2;
4130 }
4131 #endif
4132 }
4133
4134 #ifdef SUPPORT_UCP
4135 /* Simple register allocation. TMP1 is preferred if possible. */
4136 if (needstype || needsscript)
4137 {
4138 if (needschar && !charsaved)
4139 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4140 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4141 if (needschar)
4142 {
4143 if (needstype)
4144 {
4145 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4146 typereg = RETURN_ADDR;
4147 }
4148
4149 if (needsscript)
4150 scriptreg = TMP3;
4151 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4152 }
4153 else if (needstype && needsscript)
4154 scriptreg = TMP3;
4155 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4156
4157 if (needsscript)
4158 {
4159 if (scriptreg == TMP1)
4160 {
4161 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4162 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4163 }
4164 else
4165 {
4166 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4167 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4168 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4169 }
4170 }
4171 }
4172 #endif
4173
4174 /* Generating code. */
4175 cc = ccbegin;
4176 charoffset = 0;
4177 numberofcmps = 0;
4178 #ifdef SUPPORT_UCP
4179 typeoffset = 0;
4180 #endif
4181
4182 while (*cc != XCL_END)
4183 {
4184 compares--;
4185 invertcmp = (compares == 0 && list != backtracks);
4186 jump = NULL;
4187
4188 if (*cc == XCL_SINGLE)
4189 {
4190 cc ++;
4191 #ifdef SUPPORT_UTF
4192 if (common->utf)
4193 {
4194 GETCHARINC(c, cc);
4195 }
4196 else
4197 #endif
4198 c = *cc++;
4199
4200 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4201 {
4202 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4203 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4204 numberofcmps++;
4205 }
4206 else if (numberofcmps > 0)
4207 {
4208 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4209 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4210 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4211 numberofcmps = 0;
4212 }
4213 else
4214 {
4215 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4216 numberofcmps = 0;
4217 }
4218 }
4219 else if (*cc == XCL_RANGE)
4220 {
4221 cc ++;
4222 #ifdef SUPPORT_UTF
4223 if (common->utf)
4224 {
4225 GETCHARINC(c, cc);
4226 }
4227 else
4228 #endif
4229 c = *cc++;
4230 SET_CHAR_OFFSET(c);
4231 #ifdef SUPPORT_UTF
4232 if (common->utf)
4233 {
4234 GETCHARINC(c, cc);
4235 }
4236 else
4237 #endif
4238 c = *cc++;
4239 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4240 {
4241 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4242 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4243 numberofcmps++;
4244 }
4245 else if (numberofcmps > 0)
4246 {
4247 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4248 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4249 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4250 numberofcmps = 0;
4251 }
4252 else
4253 {
4254 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4255 numberofcmps = 0;
4256 }
4257 }
4258 #ifdef SUPPORT_UCP
4259 else
4260 {
4261 if (*cc == XCL_NOTPROP)
4262 invertcmp ^= 0x1;
4263 cc++;
4264 switch(*cc)
4265 {
4266 case PT_ANY:
4267 if (list != backtracks)
4268 {
4269 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4270 continue;
4271 }
4272 else if (cc[-1] == XCL_NOTPROP)
4273 continue;
4274 jump = JUMP(SLJIT_JUMP);
4275 break;
4276
4277 case PT_LAMP:
4278 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4279 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4280 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4281 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4282 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4283 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4284 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4285 break;
4286
4287 case PT_GC:
4288 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4289 SET_TYPE_OFFSET(c);
4290 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4291 break;
4292
4293 case PT_PC:
4294 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4295 break;
4296
4297 case PT_SC:
4298 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4299 break;
4300
4301 case PT_SPACE:
4302 case PT_PXSPACE:
4303 if (*cc == PT_SPACE)
4304 {
4305 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4306 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4307 }
4308 SET_CHAR_OFFSET(9);
4309 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4310 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4311 if (*cc == PT_SPACE)
4312 JUMPHERE(jump);
4313
4314 SET_TYPE_OFFSET(ucp_Zl);
4315 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4316 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4317 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4318 break;
4319
4320 case PT_WORD:
4321 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4322 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4323 /* Fall through. */
4324
4325 case PT_ALNUM:
4326 SET_TYPE_OFFSET(ucp_Ll);
4327 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4328 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4329 SET_TYPE_OFFSET(ucp_Nd);
4330 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4331 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4332 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4333 break;
4334
4335 case PT_CLIST:
4336 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4337
4338 /* At least three characters are required.
4339 Otherwise this case would be handled by the normal code path. */
4340 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4341 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4342
4343 /* Optimizing character pairs, if their difference is power of 2. */
4344 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4345 {
4346 if (charoffset == 0)
4347 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4348 else
4349 {
4350 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4351 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4352 }
4353 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4354 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4355 other_cases += 2;
4356 }
4357 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4358 {
4359 if (charoffset == 0)
4360 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4361 else
4362 {
4363 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4364 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4365 }
4366 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4367 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4368
4369 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4370 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4371
4372 other_cases += 3;
4373 }
4374 else
4375 {
4376 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4377 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4378 }
4379
4380 while (*other_cases != NOTACHAR)
4381 {
4382 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4383 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4384 }
4385 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4386 break;
4387
4388 case PT_UCNC:
4389 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4390 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4391 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4392 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4393 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4394 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4395
4396 SET_CHAR_OFFSET(0xa0);
4397 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4398 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4399 SET_CHAR_OFFSET(0);
4400 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4401 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4402 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4403 break;
4404 }
4405 cc += 2;
4406 }
4407 #endif
4408
4409 if (jump != NULL)
4410 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4411 }
4412
4413 if (found != NULL)
4414 set_jumps(found, LABEL());
4415 }
4416
4417 #undef SET_TYPE_OFFSET
4418 #undef SET_CHAR_OFFSET
4419
4420 #endif
4421
4422 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4423 {
4424 DEFINE_COMPILER;
4425 int length;
4426 unsigned int c, oc, bit;
4427 compare_context context;
4428 struct sljit_jump *jump[4];
4429 jump_list *end_list;
4430 #ifdef SUPPORT_UTF
4431 struct sljit_label *label;
4432 #ifdef SUPPORT_UCP
4433 pcre_uchar propdata[5];
4434 #endif
4435 #endif
4436
4437 switch(type)
4438 {
4439 case OP_SOD:
4440 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4441 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4442 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4443 return cc;
4444
4445 case OP_SOM:
4446 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4448 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4449 return cc;
4450
4451 case OP_NOT_WORD_BOUNDARY:
4452 case OP_WORD_BOUNDARY:
4453 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4454 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4455 return cc;
4456
4457 case OP_NOT_DIGIT:
4458 case OP_DIGIT:
4459 /* Digits are usually 0-9, so it is worth to optimize them. */
4460 if (common->digits[0] == -2)
4461 get_ctype_ranges(common, ctype_digit, common->digits);
4462 detect_partial_match(common, backtracks);
4463 /* Flip the starting bit in the negative case. */
4464 if (type == OP_NOT_DIGIT)
4465 common->digits[1] ^= 1;
4466 if (!check_ranges(common, common->digits, backtracks, TRUE))
4467 {
4468 read_char8_type(common);
4469 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4470 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4471 }
4472 if (type == OP_NOT_DIGIT)
4473 common->digits[1] ^= 1;
4474 return cc;
4475
4476 case OP_NOT_WHITESPACE:
4477 case OP_WHITESPACE:
4478 detect_partial_match(common, backtracks);
4479 read_char8_type(common);
4480 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4481 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4482 return cc;
4483
4484 case OP_NOT_WORDCHAR:
4485 case OP_WORDCHAR:
4486 detect_partial_match(common, backtracks);
4487 read_char8_type(common);
4488 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4489 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4490 return cc;
4491
4492 case OP_ANY:
4493 detect_partial_match(common, backtracks);
4494 read_char(common);
4495 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4496 {
4497 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4498 end_list = NULL;
4499 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4500 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4501 else
4502 check_str_end(common, &end_list);
4503
4504 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4505 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4506 set_jumps(end_list, LABEL());
4507 JUMPHERE(jump[0]);
4508 }
4509 else
4510 check_newlinechar(common, common->nltype, backtracks, TRUE);
4511 return cc;
4512
4513 case OP_ALLANY:
4514 detect_partial_match(common, backtracks);
4515 #ifdef SUPPORT_UTF
4516 if (common->utf)
4517 {
4518 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4519 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4520 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4521 #if defined COMPILE_PCRE8
4522 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4523 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4524 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4525 #elif defined COMPILE_PCRE16
4526 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4527 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4528 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4529 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4530 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4531 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4532 #endif
4533 JUMPHERE(jump[0]);
4534 #endif /* COMPILE_PCRE[8|16] */
4535 return cc;
4536 }
4537 #endif
4538 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4539 return cc;
4540
4541 case OP_ANYBYTE:
4542 detect_partial_match(common, backtracks);
4543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4544 return cc;
4545
4546 #ifdef SUPPORT_UTF
4547 #ifdef SUPPORT_UCP
4548 case OP_NOTPROP:
4549 case OP_PROP:
4550 propdata[0] = 0;
4551 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4552 propdata[2] = cc[0];
4553 propdata[3] = cc[1];
4554 propdata[4] = XCL_END;
4555 compile_xclass_matchingpath(common, propdata, backtracks);
4556 return cc + 2;
4557 #endif
4558 #endif
4559
4560 case OP_ANYNL:
4561 detect_partial_match(common, backtracks);
4562 read_char(common);
4563 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4564 /* We don't need to handle soft partial matching case. */
4565 end_list = NULL;
4566 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4567 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4568 else
4569 check_str_end(common, &end_list);
4570 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4571 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4572 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4573 jump[2] = JUMP(SLJIT_JUMP);
4574 JUMPHERE(jump[0]);
4575 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4576 set_jumps(end_list, LABEL());
4577 JUMPHERE(jump[1]);
4578 JUMPHERE(jump[2]);
4579 return cc;
4580
4581 case OP_NOT_HSPACE:
4582 case OP_HSPACE:
4583 detect_partial_match(common, backtracks);
4584 read_char(common);
4585 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4586 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4587 return cc;
4588
4589 case OP_NOT_VSPACE:
4590 case OP_VSPACE:
4591 detect_partial_match(common, backtracks);
4592 read_char(common);
4593 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4594 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4595 return cc;
4596
4597 #ifdef SUPPORT_UCP
4598 case OP_EXTUNI:
4599 detect_partial_match(common, backtracks);
4600 read_char(common);
4601 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4602 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4603 /* Optimize register allocation: use a real register. */
4604 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4605 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4606
4607 label = LABEL();
4608 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4609 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4610 read_char(common);
4611 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4612 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4613 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4614
4615 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4616 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4617 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4618 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4619 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4620 JUMPTO(SLJIT_C_NOT_ZERO, label);
4621
4622 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4623 JUMPHERE(jump[0]);
4624 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4625
4626 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4627 {
4628 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4629 /* Since we successfully read a char above, partial matching must occure. */
4630 check_partial(common, TRUE);
4631 JUMPHERE(jump[0]);
4632 }
4633 return cc;
4634 #endif
4635
4636 case OP_EODN:
4637 /* Requires rather complex checks. */
4638 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4639 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4640 {
4641 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4642 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4643 if (common->mode == JIT_COMPILE)
4644 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4645 else
4646 {
4647 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4648 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4649 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4650 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4651 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4652 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4653 check_partial(common, TRUE);
4654 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4655 JUMPHERE(jump[1]);
4656 }
4657 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4658 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4659 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4660 }
4661 else if (common->nltype == NLTYPE_FIXED)
4662 {
4663 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4664 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4665 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4666 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4667 }
4668 else
4669 {
4670 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4671 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4672 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4673 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4674 jump[2] = JUMP(SLJIT_C_GREATER);
4675 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4676 /* Equal. */
4677 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4678 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4679 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4680
4681 JUMPHERE(jump[1]);
4682 if (common->nltype == NLTYPE_ANYCRLF)
4683 {
4684 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4685 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4686 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4687 }
4688 else
4689 {
4690 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4691 read_char(common);
4692 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4693 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4694 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4695 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4696 }
4697 JUMPHERE(jump[2]);
4698 JUMPHERE(jump[3]);
4699 }
4700 JUMPHERE(jump[0]);
4701 check_partial(common, FALSE);
4702 return cc;
4703
4704 case OP_EOD:
4705 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4706 check_partial(common, FALSE);
4707 return cc;
4708
4709 case OP_CIRC:
4710 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4711 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4712 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4713 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4714 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4715 return cc;
4716
4717 case OP_CIRCM:
4718 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4719 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4720 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4721 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4722 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4723 jump[0] = JUMP(SLJIT_JUMP);
4724 JUMPHERE(jump[1]);
4725
4726 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4727 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4728 {
4729 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4730 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4731 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4732 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4733 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4734 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4735 }
4736 else
4737 {
4738 skip_char_back(common);
4739 read_char(common);
4740 check_newlinechar(common, common->nltype, backtracks, FALSE);
4741 }
4742 JUMPHERE(jump[0]);
4743 return cc;
4744
4745 case OP_DOLL:
4746 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4747 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4748 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4749
4750 if (!common->endonly)
4751 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4752 else
4753 {
4754 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4755 check_partial(common, FALSE);
4756 }
4757 return cc;
4758
4759 case OP_DOLLM:
4760 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4761 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4762 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4763 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4764 check_partial(common, FALSE);
4765 jump[0] = JUMP(SLJIT_JUMP);
4766 JUMPHERE(jump[1]);
4767
4768 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4769 {
4770 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4771 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4772 if (common->mode == JIT_COMPILE)
4773 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4774 else
4775 {
4776 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4777 /* STR_PTR = STR_END - IN_UCHARS(1) */
4778 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4779 check_partial(common, TRUE);
4780 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4781 JUMPHERE(jump[1]);
4782 }
4783
4784 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4785 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4786 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4787 }
4788 else
4789 {
4790 peek_char(common);
4791 check_newlinechar(common, common->nltype, backtracks, FALSE);
4792 }
4793 JUMPHERE(jump[0]);
4794 return cc;
4795
4796 case OP_CHAR:
4797 case OP_CHARI:
4798 length = 1;
4799 #ifdef SUPPORT_UTF
4800 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4801 #endif
4802 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4803 {
4804 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4805 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4806
4807 context.length = IN_UCHARS(length);
4808 context.sourcereg = -1;
4809 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4810 context.ucharptr = 0;
4811 #endif
4812 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4813 }
4814 detect_partial_match(common, backtracks);
4815 read_char(common);
4816 #ifdef SUPPORT_UTF
4817 if (common->utf)
4818 {
4819 GETCHAR(c, cc);
4820 }
4821 else
4822 #endif
4823 c = *cc;
4824 if (type == OP_CHAR || !char_has_othercase(common, cc))
4825 {
4826 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4827 return cc + length;
4828 }
4829 oc = char_othercase(common, c);
4830 bit = c ^ oc;
4831 if (is_powerof2(bit))
4832 {
4833 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4834 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4835 return cc + length;
4836 }
4837 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4838 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4839 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4840 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4841 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4842 return cc + length;
4843
4844 case OP_NOT:
4845 case OP_NOTI:
4846 detect_partial_match(common, backtracks);
4847 length = 1;
4848 #ifdef SUPPORT_UTF
4849 if (common->utf)
4850 {
4851 #ifdef COMPILE_PCRE8
4852 c = *cc;
4853 if (c < 128)
4854 {
4855 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4856 if (type == OP_NOT || !char_has_othercase(common, cc))
4857 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4858 else
4859 {
4860 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4861 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4862 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4863 }
4864 /* Skip the variable-length character. */
4865 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4866 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4867 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4869 JUMPHERE(jump[0]);
4870 return cc + 1;
4871 }
4872 else
4873 #endif /* COMPILE_PCRE8 */
4874 {
4875 GETCHARLEN(c, cc, length);
4876 read_char(common);
4877 }
4878 }
4879 else
4880 #endif /* SUPPORT_UTF */
4881 {
4882 read_char(common);
4883 c = *cc;
4884 }
4885
4886 if (type == OP_NOT || !char_has_othercase(common, cc))
4887 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4888 else
4889 {
4890 oc = char_othercase(common, c);
4891 bit = c ^ oc;
4892 if (is_powerof2(bit))
4893 {
4894 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4895 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4896 }
4897 else
4898 {
4899 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4900 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4901 }
4902 }
4903 return cc + length;
4904
4905 case OP_CLASS:
4906 case OP_NCLASS:
4907 detect_partial_match(common, backtracks);
4908 read_char(common);
4909 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4910 return cc + 32 / sizeof(pcre_uchar);
4911
4912 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4913 jump[0] = NULL;
4914 #ifdef COMPILE_PCRE8
4915 /* This check only affects 8 bit mode. In other modes, we
4916 always need to compare the value with 255. */
4917 if (common->utf)
4918 #endif /* COMPILE_PCRE8 */
4919 {
4920 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4921 if (type == OP_CLASS)
4922 {
4923 add_jump(compiler, backtracks, jump[0]);
4924 jump[0] = NULL;
4925 }
4926 }
4927 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4928 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4929 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4930 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4931 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4932 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4933 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4934 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4935 if (jump[0] != NULL)
4936 JUMPHERE(jump[0]);
4937 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4938 return cc + 32 / sizeof(pcre_uchar);
4939
4940 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4941 case OP_XCLASS:
4942 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4943 return cc + GET(cc, 0) - 1;
4944 #endif
4945
4946 case OP_REVERSE:
4947 length = GET(cc, 0);
4948 if (length == 0)
4949 return cc + LINK_SIZE;
4950 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4951 #ifdef SUPPORT_UTF
4952 if (common->utf)
4953 {
4954 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4955 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4956 label = LABEL();
4957 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4958 skip_char_back(common);
4959 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4960 JUMPTO(SLJIT_C_NOT_ZERO, label);
4961 }
4962 else
4963 #endif
4964 {
4965 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4966 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4967 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4968 }
4969 check_start_used_ptr(common);
4970 return cc + LINK_SIZE;
4971 }
4972 SLJIT_ASSERT_STOP();
4973 return cc;
4974 }
4975
4976 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4977 {
4978 /* This function consumes at least one input character. */
4979 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4980 DEFINE_COMPILER;
4981 pcre_uchar *ccbegin = cc;
4982 compare_context context;
4983 int size;
4984
4985 context.length = 0;
4986 do
4987 {
4988 if (cc >= ccend)
4989 break;
4990
4991 if (*cc == OP_CHAR)
4992 {
4993 size = 1;
4994 #ifdef SUPPORT_UTF
4995 if (common->utf && HAS_EXTRALEN(cc[1]))
4996 size += GET_EXTRALEN(cc[1]);
4997 #endif
4998 }
4999 else if (*cc == OP_CHARI)
5000 {
5001 size = 1;
5002 #ifdef SUPPORT_UTF
5003 if (common->utf)
5004 {
5005 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5006 size = 0;
5007 else if (HAS_EXTRALEN(cc[1]))
5008 size += GET_EXTRALEN(cc[1]);
5009 }
5010 else
5011 #endif
5012 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5013 size = 0;
5014 }
5015 else
5016 size = 0;
5017
5018 cc += 1 + size;
5019 context.length += IN_UCHARS(size);
5020 }
5021 while (size > 0 && context.length <= 128);
5022
5023 cc = ccbegin;
5024 if (context.length > 0)
5025 {
5026 /* We have a fixed-length byte sequence. */
5027 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5028 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5029
5030 context.sourcereg = -1;
5031 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5032 context.ucharptr = 0;
5033 #endif
5034 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5035 return cc;
5036 }
5037
5038 /* A non-fixed length character will be checked if length == 0. */
5039 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5040 }
5041
5042 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5043 {
5044 DEFINE_COMPILER;
5045 int offset = GET2(cc, 1) << 1;
5046
5047 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5048 if (!common->jscript_compat)
5049 {
5050 if (backtracks == NULL)
5051 {
5052 /* OVECTOR(1) contains the "string begin - 1" constant. */
5053 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5054 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5055 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5056 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5057 return JUMP(SLJIT_C_NOT_ZERO);
5058 }
5059 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5060 }
5061 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5062 }
5063
5064 /* Forward definitions. */
5065 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5066 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5067
5068 #define PUSH_BACKTRACK(size, ccstart, error) \
5069 do \
5070 { \
5071 backtrack = sljit_alloc_memory(compiler, (size)); \
5072 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5073 return error; \
5074 memset(backtrack, 0, size); \
5075 backtrack->prev = parent->top; \
5076 backtrack->cc = (ccstart); \
5077 parent->top = backtrack; \
5078 } \
5079 while (0)
5080
5081 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5082 do \
5083 { \
5084 backtrack = sljit_alloc_memory(compiler, (size)); \
5085 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5086 return; \
5087 memset(backtrack, 0, size); \
5088 backtrack->prev = parent->top; \
5089 backtrack->cc = (ccstart); \
5090 parent->top = backtrack; \
5091 } \
5092 while (0)
5093
5094 #define BACKTRACK_AS(type) ((type *)backtrack)
5095
5096 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5097 {
5098 DEFINE_COMPILER;
5099 int offset = GET2(cc, 1) << 1;
5100 struct sljit_jump *jump = NULL;
5101 struct sljit_jump *partial;
5102 struct sljit_jump *nopartial;
5103
5104 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5105 /* OVECTOR(1) contains the "string begin - 1" constant. */
5106 if (withchecks && !common->jscript_compat)
5107 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5108
5109 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5110 if (common->utf && *cc == OP_REFI)
5111 {
5112 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5113 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5114 if (withchecks)
5115 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5116
5117 /* Needed to save important temporary registers. */
5118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5119 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5121 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5122 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5123 if (common->mode == JIT_COMPILE)
5124 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5125 else
5126 {
5127 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5128 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5129 check_partial(common, FALSE);
5130 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5131 JUMPHERE(nopartial);
5132 }
5133 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5134 }
5135 else
5136 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5137 {
5138 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5139 if (withchecks)
5140 jump = JUMP(SLJIT_C_ZERO);
5141
5142 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5143 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5144 if (common->mode == JIT_COMPILE)
5145 add_jump(compiler, backtracks, partial);
5146
5147 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5148 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5149
5150 if (common->mode != JIT_COMPILE)
5151 {
5152 nopartial = JUMP(SLJIT_JUMP);
5153 JUMPHERE(partial);
5154 /* TMP2 -= STR_END - STR_PTR */
5155 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5156 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5157 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5158 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5159 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5160 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5161 JUMPHERE(partial);
5162 check_partial(common, FALSE);
5163 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5164 JUMPHERE(nopartial);
5165 }
5166 }
5167
5168 if (jump != NULL)
5169 {
5170 if (emptyfail)
5171 add_jump(compiler, backtracks, jump);
5172 else
5173 JUMPHERE(jump);
5174 }
5175 return cc + 1 + IMM2_SIZE;
5176 }
5177
5178 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5179 {
5180 DEFINE_COMPILER;
5181 backtrack_common *backtrack;
5182 pcre_uchar type;
5183 struct sljit_label *label;
5184 struct sljit_jump *zerolength;
5185 struct sljit_jump *jump = NULL;
5186 pcre_uchar *ccbegin = cc;
5187 int min = 0, max = 0;
5188 BOOL minimize;
5189
5190 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5191
5192 type = cc[1 + IMM2_SIZE];
5193 minimize = (type & 0x1) != 0;
5194 switch(type)
5195 {
5196 case OP_CRSTAR:
5197 case OP_CRMINSTAR:
5198 min = 0;
5199 max = 0;
5200 cc += 1 + IMM2_SIZE + 1;
5201 break;
5202 case OP_CRPLUS:
5203 case OP_CRMINPLUS:
5204 min = 1;
5205 max = 0;
5206 cc += 1 + IMM2_SIZE + 1;
5207 break;
5208 case OP_CRQUERY:
5209 case OP_CRMINQUERY:
5210 min = 0;
5211 max = 1;
5212 cc += 1 + IMM2_SIZE + 1;
5213 break;
5214 case OP_CRRANGE:
5215 case OP_CRMINRANGE:
5216 min = GET2(cc, 1 + IMM2_SIZE + 1);
5217 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5218 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5219 break;
5220 default:
5221 SLJIT_ASSERT_STOP();
5222 break;
5223 }
5224
5225 if (!minimize)
5226 {
5227 if (min == 0)
5228 {
5229 allocate_stack(common, 2);
5230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5232 /* Temporary release of STR_PTR. */
5233 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5234 zerolength = compile_ref_checks(common, ccbegin, NULL);
5235 /* Restore if not zero length. */
5236 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5237 }
5238 else
5239 {
5240 allocate_stack(common, 1);
5241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5242 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5243 }
5244
5245 if (min > 1 || max > 1)
5246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5247
5248 label = LABEL();
5249 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5250
5251 if (min > 1 || max > 1)
5252 {
5253 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5254 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5255 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5256 if (min > 1)
5257 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5258 if (max > 1)
5259 {
5260 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5261 allocate_stack(common, 1);
5262 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5263 JUMPTO(SLJIT_JUMP, label);
5264 JUMPHERE(jump);
5265 }
5266 }
5267
5268 if (max == 0)
5269 {
5270 /* Includes min > 1 case as well. */
5271 allocate_stack(common, 1);
5272 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5273 JUMPTO(SLJIT_JUMP, label);
5274 }
5275
5276 JUMPHERE(zerolength);
5277 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5278
5279 decrease_call_count(common);
5280 return cc;
5281 }
5282
5283 allocate_stack(common, 2);
5284 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5285 if (type != OP_CRMINSTAR)
5286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5287
5288 if (min == 0)
5289 {
5290 zerolength = compile_ref_checks(common, ccbegin, NULL);
5291 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5292 jump = JUMP(SLJIT_JUMP);
5293 }
5294 else
5295 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5296
5297 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5298 if (max > 0)
5299 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5300
5301 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5302 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5303
5304 if (min > 1)
5305 {
5306 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5307 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5309 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5310 }
5311 else if (max > 0)
5312 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5313
5314 if (jump != NULL)
5315 JUMPHERE(jump);
5316 JUMPHERE(zerolength);
5317
5318 decrease_call_count(common);
5319 return cc;
5320 }
5321
5322 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5323 {
5324 DEFINE_COMPILER;
5325 backtrack_common *backtrack;
5326 recurse_entry *entry = common->entries;
5327 recurse_entry *prev = NULL;
5328 sljit_sw start = GET(cc, 1);
5329 pcre_uchar *start_cc;
5330 BOOL needs_control_head;
5331
5332 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5333
5334 /* Inlining simple patterns. */
5335 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5336 {
5337 start_cc = common->start + start;
5338 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5339 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5340 return cc + 1 + LINK_SIZE;
5341 }
5342
5343 while (entry != NULL)
5344 {
5345 if (entry->start == start)
5346 break;
5347 prev = entry;
5348 entry = entry->next;
5349 }
5350
5351 if (entry == NULL)
5352 {
5353 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5354 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5355 return NULL;
5356 entry->next = NULL;
5357 entry->entry = NULL;
5358 entry->calls = NULL;
5359 entry->start = start;
5360
5361 if (prev != NULL)
5362 prev->next = entry;
5363 else
5364 common->entries = entry;
5365 }
5366
5367 if (common->has_set_som && common->mark_ptr != 0)
5368 {
5369 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5370 allocate_stack(common, 2);
5371 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5372 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5373 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5374 }
5375 else if (common->has_set_som || common->mark_ptr != 0)
5376 {
5377 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5378 allocate_stack(common, 1);
5379 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5380 }
5381
5382 if (entry->entry == NULL)
5383 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5384 else
5385 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5386 /* Leave if the match is failed. */
5387 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5388 return cc + 1 + LINK_SIZE;
5389 }
5390
5391 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5392 {
5393 const pcre_uchar *begin = arguments->begin;
5394 int *offset_vector = arguments->offsets;
5395 int offset_count = arguments->offset_count;
5396 int i;
5397
5398 if (PUBL(callout) == NULL)
5399 return 0;
5400
5401 callout_block->version = 2;
5402 callout_block->callout_data = arguments->callout_data;
5403
5404 /* Offsets in subject. */
5405 callout_block->subject_length = arguments->end - arguments->begin;
5406 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5407 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5408 #if defined COMPILE_PCRE8
5409 callout_block->subject = (PCRE_SPTR)begin;
5410 #elif defined COMPILE_PCRE16
5411 callout_block->subject = (PCRE_SPTR16)begin;
5412 #elif defined COMPILE_PCRE32
5413 callout_block->subject = (PCRE_SPTR32)begin;
5414 #endif
5415
5416 /* Convert and copy the JIT offset vector to the offset_vector array. */
5417 callout_block->capture_top = 0;
5418 callout_block->offset_vector = offset_vector;
5419 for (i = 2; i < offset_count; i += 2)
5420 {
5421 offset_vector[i] = jit_ovector[i] - begin;
5422 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5423 if (jit_ovector[i] >= begin)
5424 callout_block->capture_top = i;
5425 }
5426
5427 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5428 if (offset_count > 0)
5429 offset_vector[0] = -1;
5430 if (offset_count > 1)
5431 offset_vector[1] = -1;
5432 return (*PUBL(callout))(callout_block);
5433 }
5434
5435 /* Aligning to 8 byte. */
5436 #define CALLOUT_ARG_SIZE \
5437 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5438
5439 #define CALLOUT_ARG_OFFSET(arg) \
5440 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5441
5442 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5443 {
5444 DEFINE_COMPILER;
5445 backtrack_common *backtrack;
5446
5447 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5448
5449 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5450
5451 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5452 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5453 SLJIT_ASSERT(common->capture_last_ptr != 0);
5454 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5455 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5456
5457 /* These pointer sized fields temporarly stores internal variables. */
5458 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5459 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5461
5462 if (common->mark_ptr != 0)
5463 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5464 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5465 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5467
5468 /* Needed to save important temporary registers. */
5469 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5470 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5471 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5472 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5473 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5474 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5475 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5476
5477 /* Check return value. */
5478 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5479 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5480 if (common->forced_quit_label == NULL)
5481 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5482 else
5483 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5484 return cc + 2 + 2 * LINK_SIZE;
5485 }
5486
5487 #undef CALLOUT_ARG_SIZE
5488 #undef CALLOUT_ARG_OFFSET
5489
5490 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5491 {
5492 DEFINE_COMPILER;
5493 int framesize;
5494 int extrasize;
5495 BOOL needs_control_head;
5496 int private_data_ptr;
5497 backtrack_common altbacktrack;
5498 pcre_uchar *ccbegin;
5499 pcre_uchar opcode;
5500 pcre_uchar bra = OP_BRA;
5501 jump_list *tmp = NULL;
5502 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5503 jump_list **found;
5504 /* Saving previous accept variables. */
5505 BOOL save_local_exit = common->local_exit;
5506 BOOL save_positive_assert = common->positive_assert;
5507 then_trap_backtrack *save_then_trap = common->then_trap;
5508 struct sljit_label *save_quit_label = common->quit_label;
5509 struct sljit_label *save_accept_label = common->accept_label;
5510 jump_list *save_quit = common->quit;
5511 jump_list *save_positive_assert_quit = common->positive_assert_quit;
5512 jump_list *save_accept = common->accept;
5513 struct sljit_jump *jump;
5514 struct sljit_jump *brajump = NULL;
5515
5516 /* Assert captures then. */
5517 common->then_trap = NULL;
5518
5519 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5520 {
5521 SLJIT_ASSERT(!conditional);
5522 bra = *cc;
5523 cc++;
5524 }
5525 private_data_ptr = PRIVATE_DATA(cc);
5526 SLJIT_ASSERT(private_data_ptr != 0);
5527 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5528 backtrack->framesize = framesize;
5529 backtrack->private_data_ptr = private_data_ptr;
5530 opcode = *cc;
5531 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5532 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5533 ccbegin = cc;
5534 cc += GET(cc, 1);
5535
5536 if (bra == OP_BRAMINZERO)
5537 {
5538 /* This is a braminzero backtrack path. */
5539 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5540 free_stack(common, 1);
5541 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5542 }
5543
5544 if (framesize < 0)
5545 {
5546 extrasize = needs_control_head ? 2 : 1;
5547 if (framesize == no_frame)
5548 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5549 allocate_stack(common, extrasize);
5550 if (needs_control_head)
5551 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5552 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5553 if (needs_control_head)
5554 {
5555 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5556 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5557 }
5558 }
5559 else
5560 {
5561 extrasize = needs_control_head ? 3 : 2;
5562 allocate_stack(common, framesize + extrasize);
5563 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5564 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5565 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5566 if (needs_control_head)
5567 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5568 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5569 if (needs_control_head)
5570 {
5571 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5573 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5574 }
5575 else
5576 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5577 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5578 }
5579
5580 memset(&altbacktrack, 0, sizeof(backtrack_common));
5581 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5582 {
5583 /* Negative assert is stronger than positive assert. */
5584 common->local_exit = TRUE;
5585 common->quit_label = NULL;
5586 common->quit = NULL;
5587 common->positive_assert = FALSE;
5588 }
5589 else
5590 common->positive_assert = TRUE;
5591 common->positive_assert_quit = NULL;
5592
5593 while (1)
5594 {
5595 common->accept_label = NULL;
5596 common->accept = NULL;
5597 altbacktrack.top = NULL;
5598 altbacktrack.topbacktracks = NULL;
5599
5600 if (*ccbegin == OP_ALT)
5601 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5602
5603 altbacktrack.cc = ccbegin;
5604 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5605 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5606 {
5607 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5608 {
5609 common->local_exit = save_local_exit;
5610 common->quit_label = save_quit_label;
5611 common->quit = save_quit;
5612 }
5613 common->positive_assert = save_positive_assert;
5614 common->then_trap = save_then_trap;
5615 common->accept_label = save_accept_label;
5616 common->positive_assert_quit = save_positive_assert_quit;
5617 common->accept = save_accept;
5618 return NULL;
5619 }
5620 common->accept_label = LABEL();
5621 if (common->accept != NULL)
5622 set_jumps(common->accept, common->accept_label);
5623
5624 /* Reset stack. */
5625 if (framesize < 0)
5626 {
5627 if (framesize == no_frame)
5628 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5629 else
5630 free_stack(common, extrasize);
5631 if (needs_control_head)
5632 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5633 }
5634 else
5635 {
5636 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5637 {
5638 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5639 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5640 if (needs_control_head)
5641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5642 }
5643 else
5644 {
5645 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5646 if (needs_control_head)
5647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5648 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5649 }
5650 }
5651
5652 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5653 {
5654 /* We know that STR_PTR was stored on the top of the stack. */
5655 if (conditional)
5656 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5657 else if (bra == OP_BRAZERO)
5658 {
5659 if (framesize < 0)
5660 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5661 else
5662 {
5663 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5664 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5665 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5666 }
5667 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5668 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5669 }
5670 else if (framesize >= 0)
5671 {
5672 /* For OP_BRA and OP_BRAMINZERO. */
5673 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5674 }
5675 }
5676 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5677
5678 compile_backtrackingpath(common, altbacktrack.top);
5679 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5680 {
5681 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5682 {
5683 common->local_exit = save_local_exit;
5684 common->quit_label = save_quit_label;
5685 common->quit = save_quit;
5686 }
5687 common->positive_assert = save_positive_assert;
5688 common->then_trap = save_then_trap;
5689 common->accept_label = save_accept_label;
5690 common->positive_assert_quit = save_positive_assert_quit;
5691 common->accept = save_accept;
5692 return NULL;
5693 }
5694 set_jumps(altbacktrack.topbacktracks, LABEL());
5695
5696 if (*cc != OP_ALT)
5697 break;
5698
5699 ccbegin = cc;
5700 cc += GET(cc, 1);
5701 }
5702
5703 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5704 {
5705 SLJIT_ASSERT(common->positive_assert_quit == NULL);
5706 /* Makes the check less complicated below. */
5707 common->positive_assert_quit = common->quit;
5708 }
5709
5710 /* None of them matched. */
5711 if (common->positive_assert_quit != NULL)
5712 {
5713 jump = JUMP(SLJIT_JUMP);
5714 set_jumps(common->positive_assert_quit, LABEL());
5715 SLJIT_ASSERT(framesize != no_stack);
5716 if (framesize < 0)
5717 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5718 else
5719 {
5720 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5721 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5722 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5723 }
5724 JUMPHERE(jump);
5725 }
5726
5727 if (needs_control_head)
5728 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5729
5730 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5731 {
5732 /* Assert is failed. */
5733 if (conditional || bra == OP_BRAZERO)
5734 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5735
5736 if (framesize < 0)
5737 {
5738 /* The topmost item should be 0. */
5739 if (bra == OP_BRAZERO)
5740 {
5741 if (extrasize == 2)
5742 free_stack(common, 1);
5743 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5744 }
5745 else
5746 free_stack(common, extrasize);
5747 }
5748 else
5749 {
5750 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5751 /* The topmost item should be 0. */
5752 if (bra == OP_BRAZERO)
5753 {
5754 free_stack(common, framesize + extrasize - 1);
5755 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5756 }
5757 else
5758 free_stack(common, framesize + extrasize);
5759 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5760 }
5761 jump = JUMP(SLJIT_JUMP);
5762 if (bra != OP_BRAZERO)
5763 add_jump(compiler, target, jump);
5764
5765 /* Assert is successful. */
5766 set_jumps(tmp, LABEL());
5767 if (framesize < 0)
5768 {
5769 /* We know that STR_PTR was stored on the top of the stack. */
5770 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5771 /* Keep the STR_PTR on the top of the stack. */
5772 if (bra == OP_BRAZERO)
5773 {
5774 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5775 if (extrasize == 2)
5776 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5777 }
5778 else if (bra == OP_BRAMINZERO)
5779 {
5780 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5781 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5782 }
5783 }
5784 else
5785 {
5786 if (bra == OP_BRA)
5787 {
5788 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5789 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5790 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5791 }
5792 else
5793 {
5794 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5795 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5796 if (extrasize == 2)
5797 {
5798 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5799 if (bra == OP_BRAMINZERO)
5800 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5801 }
5802 else
5803 {
5804 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5805 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5806 }
5807 }
5808 }
5809
5810 if (bra == OP_BRAZERO)
5811 {
5812 backtrack->matchingpath = LABEL();
5813 SET_LABEL(jump, backtrack->matchingpath);
5814 }
5815 else if (bra == OP_BRAMINZERO)
5816 {
5817 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5818 JUMPHERE(brajump);
5819 if (framesize >= 0)
5820 {
5821 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5822 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5823 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5824 }
5825 set_jumps(backtrack->common.topbacktracks, LABEL());
5826 }
5827 }
5828 else
5829 {
5830 /* AssertNot is successful. */
5831 if (framesize < 0)
5832 {
5833 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5834 if (bra != OP_BRA)
5835 {
5836 if (extrasize == 2)
5837 free_stack(common, 1);
5838 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5839 }
5840 else
5841 free_stack(common, extrasize);
5842 }
5843 else
5844 {
5845 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5846 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5847 /* The topmost item should be 0. */
5848 if (bra != OP_BRA)
5849 {
5850 free_stack(common, framesize + extrasize - 1);
5851 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5852 }
5853 else
5854 free_stack(common, framesize + extrasize);
5855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5856 }
5857
5858 if (bra == OP_BRAZERO)
5859 backtrack->matchingpath = LABEL();
5860 else if (bra == OP_BRAMINZERO)
5861 {
5862 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5863 JUMPHERE(brajump);
5864 }
5865
5866 if (bra != OP_BRA)
5867 {
5868 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5869 set_jumps(backtrack->common.topbacktracks, LABEL());
5870 backtrack->common.topbacktracks = NULL;
5871 }
5872 }
5873
5874 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5875 {
5876 common->local_exit = save_local_exit;
5877 common->quit_label = save_quit_label;
5878 common->quit = save_quit;
5879 }
5880 common->positive_assert = save_positive_assert;
5881 common->then_trap = save_then_trap;
5882 common->accept_label = save_accept_label;
5883 common->positive_assert_quit = save_positive_assert_quit;
5884 common->accept = save_accept;
5885 return cc + 1 + LINK_SIZE;
5886 }
5887
5888 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5889 {
5890 int condition = FALSE;
5891 pcre_uchar *slotA = name_table;
5892 pcre_uchar *slotB;
5893 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5894 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5895 sljit_sw no_capture;
5896 int i;
5897
5898 locals += refno & 0xff;
5899 refno >>= 8;
5900 no_capture = locals[1];
5901
5902 for (i = 0; i < name_count; i++)
5903 {
5904 if (GET2(slotA, 0) == refno) break;
5905 slotA += name_entry_size;
5906 }
5907
5908 if (i < name_count)
5909 {
5910 /* Found a name for the number - there can be only one; duplicate names
5911 for different numbers are allowed, but not vice versa. First scan down
5912 for duplicates. */
5913
5914 slotB = slotA;
5915 while (slotB > name_table)
5916 {
5917 slotB -= name_entry_size;
5918 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5919 {
5920 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5921 if (condition) break;
5922 }
5923 else break;
5924 }
5925
5926 /* Scan up for duplicates */
5927 if (!condition)
5928 {
5929 slotB = slotA;
5930 for (i++; i < name_count; i++)
5931 {
5932 slotB += name_entry_size;
5933 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5934 {
5935 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5936 if (condition) break;
5937 }
5938 else break;
5939 }
5940 }
5941 }
5942 return condition;
5943 }
5944
5945 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5946 {
5947 int condition = FALSE;
5948 pcre_uchar *slotA = name_table;
5949 pcre_uchar *slotB;
5950 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5951 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5952 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5953 sljit_uw i;
5954
5955 for (i = 0; i < name_count; i++)
5956 {
5957 if (GET2(slotA, 0) == recno) break;
5958 slotA += name_entry_size;
5959 }
5960
5961 if (i < name_count)
5962 {
5963 /* Found a name for the number - there can be only one; duplicate
5964 names for different numbers are allowed, but not vice versa. First
5965 scan down for duplicates. */
5966
5967 slotB = slotA;
5968 while (slotB > name_table)
5969 {
5970 slotB -= name_entry_size;
5971 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5972 {
5973 condition = GET2(slotB, 0) == group_num;
5974 if (condition) break;
5975 }
5976 else break;
5977 }
5978
5979 /* Scan up for duplicates */
5980 if (!condition)
5981 {
5982 slotB = slotA;
5983 for (i++; i < name_count; i++)
5984 {
5985 slotB += name_entry_size;
5986 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5987 {
5988 condition = GET2(slotB, 0) == group_num;
5989 if (condition) break;
5990 }
5991 else break;
5992 }
5993 }
5994 }
5995 return condition;
5996 }
5997
5998 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
5999 {
6000 DEFINE_COMPILER;
6001 int stacksize;
6002
6003 if (framesize < 0)
6004 {
6005 if (framesize == no_frame)
6006 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6007 else
6008 {
6009 stacksize = needs_control_head ? 1 : 0;
6010 if (ket != OP_KET || has_alternatives)
6011 stacksize++;
6012 free_stack(common, stacksize);
6013 }
6014
6015 if (needs_control_head)
6016 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6017
6018 /* TMP2 which is set here used by OP_KETRMAX below. */
6019 if (ket == OP_KETRMAX)
6020 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6021 else if (ket == OP_KETRMIN)
6022 {
6023 /* Move the STR_PTR to the private_data_ptr. */
6024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6025 }
6026 }
6027 else
6028 {
6029 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6030 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6031 if (needs_control_head)
6032 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6033
6034 if (ket == OP_KETRMAX)
6035 {
6036 /* TMP2 which is set here used by OP_KETRMAX below. */
6037 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6038 }
6039 }
6040 if (needs_control_head)
6041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6042 }
6043
6044 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6045 {
6046 DEFINE_COMPILER;
6047
6048 if (common->capture_last_ptr != 0)
6049 {
6050 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6052 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6053 stacksize++;
6054 }
6055 if (common->optimized_cbracket[offset >> 1] == 0)
6056 {
6057 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6058 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6059 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6060 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6061 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6062 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6063 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6064 stacksize += 2;
6065 }
6066 return stacksize;
6067 }
6068
6069 /*
6070 Handling bracketed expressions is probably the most complex part.
6071
6072 Stack layout naming characters:
6073 S - Push the current STR_PTR
6074 0 - Push a 0 (NULL)
6075 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6076 before the next alternative. Not pushed if there are no alternatives.
6077 M - Any values pushed by the current alternative. Can be empty, or anything.
6078 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6079 L - Push the previous local (pointed by localptr) to the stack
6080 () - opional values stored on the stack
6081 ()* - optonal, can be stored multiple times
6082
6083 The following list shows the regular expression templates, their PCRE byte codes
6084 and stack layout supported by pcre-sljit.
6085
6086 (?:) OP_BRA | OP_KET A M
6087 () OP_CBRA | OP_KET C M
6088 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6089 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6090 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6091 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6092 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6093 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6094 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6095 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6096 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6097 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6098 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6099 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6100 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6101 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6102 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6103 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6104 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6105 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6106 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6107 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6108
6109
6110 Stack layout naming characters:
6111 A - Push the alternative index (starting from 0) on the stack.
6112 Not pushed if there is no alternatives.
6113 M - Any values pushed by the current alternative. Can be empty, or anything.
6114
6115 The next list shows the possible content of a bracket:
6116 (|) OP_*BRA | OP_ALT ... M A
6117 (?()|) OP_*COND | OP_ALT M A
6118 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6119 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6120 Or nothing, if trace is unnecessary
6121 */
6122
6123 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6124 {
6125 DEFINE_COMPILER;
6126 backtrack_common *backtrack;
6127 pcre_uchar opcode;
6128 int private_data_ptr = 0;
6129 int offset = 0;
6130 int stacksize;
6131 int repeat_ptr = 0, repeat_length = 0;
6132 int repeat_type = 0, repeat_count = 0;
6133 pcre_uchar *ccbegin;
6134 pcre_uchar *matchingpath;
6135 pcre_uchar bra = OP_BRA;
6136 pcre_uchar ket;
6137 assert_backtrack *assert;
6138 BOOL has_alternatives;
6139 BOOL needs_control_head = FALSE;
6140 struct sljit_jump *jump;
6141 struct sljit_jump *skip;
6142 struct sljit_label *rmaxlabel = NULL;
6143 struct sljit_jump *braminzerojump = NULL;
6144
6145 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6146
6147 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6148 {
6149 bra = *cc;
6150 cc++;
6151 opcode = *cc;
6152 }
6153
6154 opcode = *cc;
6155 ccbegin = cc;
6156 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6157 ket = *matchingpath;
6158 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6159 {
6160 repeat_ptr = PRIVATE_DATA(matchingpath);
6161 repeat_length = PRIVATE_DATA(matchingpath + 1);
6162 repeat_type = PRIVATE_DATA(matchingpath + 2);
6163 repeat_count = PRIVATE_DATA(matchingpath + 3);
6164 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6165 if (repeat_type == OP_UPTO)
6166 ket = OP_KETRMAX;
6167 if (repeat_type == OP_MINUPTO)
6168 ket = OP_KETRMIN;
6169 }
6170
6171 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6172 {
6173 /* Drop this bracket_backtrack. */
6174 parent->top = backtrack->prev;
6175 return matchingpath + 1 + LINK_SIZE + repeat_length;
6176 }
6177
6178 matchingpath = ccbegin + 1 + LINK_SIZE;
6179 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6180 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6181 cc += GET(cc, 1);
6182
6183 has_alternatives = *cc == OP_ALT;
6184 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6185 {
6186 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6187 if (*matchingpath == OP_NRREF)
6188 {
6189 stacksize = GET2(matchingpath, 1);
6190 if (common->currententry == NULL || stacksize == RREF_ANY)
6191 has_alternatives = FALSE;
6192 else if (common->currententry->start == 0)
6193 has_alternatives = stacksize != 0;
6194 else
6195 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6196 }
6197 }
6198
6199 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6200 opcode = OP_SCOND;
6201 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6202 opcode = OP_ONCE;
6203
6204 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6205 {
6206 /* Capturing brackets has a pre-allocated space. */
6207 offset = GET2(ccbegin, 1 + LINK_SIZE);
6208 if (common->optimized_cbracket[offset] == 0)
6209 {
6210 private_data_ptr = OVECTOR_PRIV(offset);
6211 offset <<= 1;
6212 }
6213 else
6214 {
6215 offset <<= 1;
6216 private_data_ptr = OVECTOR(offset);
6217 }
6218 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6219 matchingpath += IMM2_SIZE;
6220 }
6221 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6222 {
6223 /* Other brackets simply allocate the next entry. */
6224 private_data_ptr = PRIVATE_DATA(ccbegin);
6225 SLJIT_ASSERT(private_data_ptr != 0);
6226 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6227 if (opcode == OP_ONCE)
6228 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6229 }
6230
6231 /* Instructions before the first alternative. */
6232 stacksize = 0;
6233 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6234 stacksize++;
6235 if (bra == OP_BRAZERO)
6236 stacksize++;
6237
6238 if (stacksize > 0)
6239 allocate_stack(common, stacksize);
6240
6241 stacksize = 0;
6242 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6243 {
6244 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6245 stacksize++;
6246 }
6247
6248 if (bra == OP_BRAZERO)
6249 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6250
6251 if (bra == OP_BRAMINZERO)
6252 {
6253 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6254 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6255 if (ket != OP_KETRMIN)
6256 {
6257 free_stack(common, 1);
6258 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6259 }
6260 else
6261 {
6262 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6263 {
6264 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6265 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6266 /* Nothing stored during the first run. */
6267 skip = JUMP(SLJIT_JUMP);
6268 JUMPHERE(jump);
6269 /* Checking zero-length iteration. */
6270 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6271 {
6272 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6273 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6274 }
6275 else
6276 {
6277 /* Except when the whole stack frame must be saved. */
6278 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6279 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6280 }
6281 JUMPHERE(skip);
6282 }
6283 else
6284 {
6285 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6286 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6287 JUMPHERE(jump);
6288 }
6289 }
6290 }
6291
6292 if (repeat_type != 0)
6293 {
6294 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6295 if (repeat_type == OP_EXACT)
6296 rmaxlabel = LABEL();
6297 }
6298
6299 if (ket == OP_KETRMIN)
6300 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6301
6302 if (ket == OP_KETRMAX)
6303 {
6304 rmaxlabel = LABEL();
6305 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6306 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
6307 }
6308
6309 /* Handling capturing brackets and alternatives. */
6310 if (opcode == OP_ONCE)
6311 {
6312 stacksize = 0;
6313 if (needs_control_head)
6314 {
6315 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6316 stacksize++;
6317 }
6318
6319 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6320 {
6321 /* Neither capturing brackets nor recursions are found in the block. */
6322 if (ket == OP_KETRMIN)
6323 {
6324 stacksize += 2;
6325 if (!needs_control_head)
6326 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6327 }
6328 else
6329 {
6330 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6331 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6332 if (ket == OP_KETRMAX || has_alternatives)
6333 stacksize++;
6334 }
6335
6336 if (stacksize > 0)
6337 allocate_stack(common, stacksize);
6338
6339 stacksize = 0;
6340 if (needs_control_head)
6341 {
6342 stacksize++;
6343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6344 }
6345
6346 if (ket == OP_KETRMIN)
6347 {
6348 if (needs_control_head)
6349 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6351 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6352 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6353 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6354 }
6355 else if (ket == OP_KETRMAX || has_alternatives)
6356 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6357 }
6358 else
6359 {
6360 if (ket != OP_KET || has_alternatives)
6361 stacksize++;
6362
6363 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6364 allocate_stack(common, stacksize);
6365
6366 if (needs_control_head)
6367 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6368
6369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6370 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6371
6372 stacksize = needs_control_head ? 1 : 0;
6373 if (ket != OP_KET || has_alternatives)
6374 {
6375 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6377 stacksize++;
6378 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6379 }
6380 else
6381 {
6382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6384 }
6385 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6386 }
6387 }
6388 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6389 {
6390 /* Saving the previous values. */
6391 if (common->optimized_cbracket[offset >> 1] != 0)
6392 {
6393 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6394 allocate_stack(common, 2);
6395 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6396 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6397 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6398 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6400 }
6401 else
6402 {
6403 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6404 allocate_stack(common, 1);
6405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6406 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6407 }
6408 }
6409 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6410 {
6411 /* Saving the previous value. */
6412 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6413 allocate_stack(common, 1);
6414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6416 }
6417 else if (has_alternatives)
6418 {
6419 /* Pushing the starting string pointer. */
6420 allocate_stack(common, 1);
6421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6422 }
6423
6424 /* Generating code for the first alternative. */
6425 if (opcode == OP_COND || opcode == OP_SCOND)
6426 {
6427 if (*matchingpath == OP_CREF)
6428 {
6429 SLJIT_ASSERT(has_alternatives);
6430 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6431 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6432 matchingpath += 1 + IMM2_SIZE;
6433 }
6434 else if (*matchingpath == OP_NCREF)
6435 {
6436 SLJIT_ASSERT(has_alternatives);
6437 stacksize = GET2(matchingpath, 1);
6438 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6439
6440 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6441 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6442 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6443 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6444 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6445 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6446 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6447 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6448 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6449
6450 JUMPHERE(jump);
6451 matchingpath += 1 + IMM2_SIZE;
6452 }
6453 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6454 {
6455 /* Never has other case. */
6456 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6457
6458 stacksize = GET2(matchingpath, 1);
6459 if (common->currententry == NULL)
6460 stacksize = 0;
6461 else if (stacksize == RREF_ANY)
6462 stacksize = 1;
6463 else if (common->currententry->start == 0)
6464 stacksize = stacksize == 0;
6465 else
6466 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6467
6468 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6469 {
6470 SLJIT_ASSERT(!has_alternatives);
6471 if (stacksize != 0)
6472 matchingpath += 1 + IMM2_SIZE;
6473 else
6474 {
6475 if (*cc == OP_ALT)
6476 {
6477 matchingpath = cc + 1 + LINK_SIZE;
6478 cc += GET(cc, 1);
6479 }
6480 else
6481 matchingpath = cc;
6482 }
6483 }
6484 else
6485 {
6486 SLJIT_ASSERT(has_alternatives);
6487
6488 stacksize = GET2(matchingpath, 1);
6489 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6492 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6493 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6494 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6495 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6496 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6497 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6498 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6499 matchingpath += 1 + IMM2_SIZE;
6500 }
6501 }
6502 else
6503 {
6504 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6505 /* Similar code as PUSH_BACKTRACK macro. */
6506 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6507 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6508 return NULL;
6509 memset(assert, 0, sizeof(assert_backtrack));
6510 assert->common.cc = matchingpath;
6511 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6512 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6513 }
6514 }
6515
6516 compile_matchingpath(common, matchingpath, cc, backtrack);
6517 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6518 return NULL;
6519
6520 if (opcode == OP_ONCE)
6521 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6522
6523 stacksize = 0;
6524 if (repeat_type == OP_MINUPTO)
6525 {
6526 /* We need to preserve the counter. TMP2 will be used below. */
6527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6528 stacksize++;
6529 }
6530 if (ket != OP_KET || bra != OP_BRA)
6531 stacksize++;
6532 if (offset != 0)
6533 {
6534 if (common->capture_last_ptr != 0)
6535 stacksize++;
6536 if (common->optimized_cbracket[offset >> 1] == 0)
6537 stacksize += 2;
6538 }
6539 if (has_alternatives && opcode != OP_ONCE)
6540 stacksize++;
6541
6542 if (stacksize > 0)
6543 allocate_stack(common, stacksize);
6544
6545 stacksize = 0;
6546 if (repeat_type == OP_MINUPTO)
6547 {
6548 /* TMP2 was set above. */
6549 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
6550 stacksize++;
6551 }
6552
6553 if (ket != OP_KET || bra != OP_BRA)
6554 {
6555 if (ket != OP_KET)
6556 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6557 else
6558 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6559 stacksize++;
6560 }
6561
6562 if (offset != 0)
6563 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6564
6565 if (has_alternatives)
6566 {
6567 if (opcode != OP_ONCE)
6568 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6569 if (ket != OP_KETRMAX)
6570 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6571 }
6572
6573 /* Must be after the matchingpath label. */
6574 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6575 {
6576 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6577 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6578 }
6579
6580 if (ket == OP_KETRMAX)
6581 {
6582 if (repeat_type != 0)
6583 {
6584 if (has_alternatives)
6585 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6586 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6587 JUMPTO(SLJIT_C_NOT_ZERO, rmaxlabel);
6588 /* Drop STR_PTR for greedy plus quantifier. */
6589 if (opcode != OP_ONCE)
6590 free_stack(common, 1);
6591 }
6592 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
6593 {
6594 if (has_alternatives)
6595 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6596 /* Checking zero-length iteration. */
6597 if (opcode != OP_ONCE)
6598 {
6599 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6600 /* Drop STR_PTR for greedy plus quantifier. */
6601 if (bra != OP_BRAZERO)
6602 free_stack(common, 1);
6603 }
6604 else
6605 /* TMP2 must contain the starting STR_PTR. */
6606 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6607 }
6608 else
6609 JUMPTO(SLJIT_JUMP, rmaxlabel);
6610 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6611 }
6612
6613 if (repeat_type == OP_EXACT)
6614 {
6615 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6616 JUMPTO(SLJIT_C_NOT_ZERO, rmaxlabel);
6617 }
6618 else if (repeat_type == OP_UPTO)
6619 {
6620 /* We need to preserve the counter. */
6621 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6622 allocate_stack(common, 1);
6623 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6624 }
6625
6626 if (bra == OP_BRAZERO)
6627 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6628
6629 if (bra == OP_BRAMINZERO)
6630 {
6631 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6632 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6633 if (braminzerojump != NULL)
6634 {
6635 JUMPHERE(braminzerojump);
6636 /* We need to release the end pointer to perform the
6637 backtrack for the zero-length iteration. When
6638 framesize is < 0, OP_ONCE will do the release itself. */
6639 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6640 {
6641 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6642 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6643 }
6644 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6645 free_stack(common, 1);
6646 }
6647 /* Continue to the normal backtrack. */
6648 }
6649
6650 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6651 decrease_call_count(common);
6652
6653 /* Skip the other alternatives. */
6654 while (*cc == OP_ALT)
6655 cc += GET(cc, 1);
6656 cc += 1 + LINK_SIZE;
6657
6658 /* Temporarily encoding the needs_control_head in framesize. */
6659 if (opcode == OP_ONCE)
6660 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6661 return cc + repeat_length;
6662 }
6663
6664 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6665 {
6666 DEFINE_COMPILER;
6667 backtrack_common *backtrack;
6668 pcre_uchar opcode;
6669 int private_data_ptr;
6670 int cbraprivptr = 0;
6671 BOOL needs_control_head;
6672 int framesize;
6673 int stacksize;
6674 int offset = 0;
6675 BOOL zero = FALSE;
6676 pcre_uchar *ccbegin = NULL;
6677 int stack; /* Also contains the offset of control head. */
6678 struct sljit_label *loop = NULL;
6679 struct jump_list *emptymatch = NULL;
6680
6681 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6682 if (*cc == OP_BRAPOSZERO)
6683 {
6684 zero = TRUE;
6685 cc++;
6686 }
6687
6688 opcode = *cc;
6689 private_data_ptr = PRIVATE_DATA(cc);
6690 SLJIT_ASSERT(private_data_ptr != 0);
6691 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6692 switch(opcode)
6693 {
6694 case OP_BRAPOS:
6695 case OP_SBRAPOS:
6696 ccbegin = cc + 1 + LINK_SIZE;
6697 break;
6698
6699 case OP_CBRAPOS:
6700 case OP_SCBRAPOS:
6701 offset = GET2(cc, 1 + LINK_SIZE);
6702 /* This case cannot be optimized in the same was as
6703 normal capturing brackets. */
6704 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6705 cbraprivptr = OVECTOR_PRIV(offset);
6706 offset <<= 1;
6707 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6708 break;
6709
6710 default:
6711 SLJIT_ASSERT_STOP();
6712 break;
6713 }
6714
6715 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6716 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6717 if (framesize < 0)
6718 {
6719 if (offset != 0)
6720 {
6721 stacksize = 2;
6722 if (common->capture_last_ptr != 0)
6723 stacksize++;
6724 }
6725 else
6726 stacksize = 1;
6727
6728 if (needs_control_head)
6729 stacksize++;
6730 if (!zero)
6731 stacksize++;
6732
6733 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6734 allocate_stack(common, stacksize);
6735 if (framesize == no_frame)
6736