/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1325 - (show annotations)
Fri May 10 14:03:18 2013 UTC (6 years, 6 months ago) by ph10
File MIME type: text/plain
File size: 306686 byte(s)
Error occurred while calculating annotation data.
Fix spelling mistakes in comments.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 6
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 int newline;
367 int bsr_nltype;
368 /* Dollar endonly. */
369 int endonly;
370 /* Tables. */
371 sljit_sw ctypes;
372 int digits[2 + MAX_RANGE_SIZE];
373 /* Named capturing brackets. */
374 sljit_uw name_table;
375 sljit_sw name_count;
376 sljit_sw name_entry_size;
377
378 /* Labels and jump lists. */
379 struct sljit_label *partialmatchlabel;
380 struct sljit_label *quit_label;
381 struct sljit_label *forced_quit_label;
382 struct sljit_label *accept_label;
383 stub_list *stubs;
384 recurse_entry *entries;
385 recurse_entry *currententry;
386 jump_list *partialmatch;
387 jump_list *quit;
388 jump_list *positive_assert_quit;
389 jump_list *forced_quit;
390 jump_list *accept;
391 jump_list *calllimit;
392 jump_list *stackalloc;
393 jump_list *revertframes;
394 jump_list *wordboundary;
395 jump_list *anynewline;
396 jump_list *hspace;
397 jump_list *vspace;
398 jump_list *casefulcmp;
399 jump_list *caselesscmp;
400 jump_list *reset_match;
401 BOOL jscript_compat;
402 #ifdef SUPPORT_UTF
403 BOOL utf;
404 #ifdef SUPPORT_UCP
405 BOOL use_ucp;
406 #endif
407 #ifndef COMPILE_PCRE32
408 jump_list *utfreadchar;
409 #endif
410 #ifdef COMPILE_PCRE8
411 jump_list *utfreadtype8;
412 #endif
413 #endif /* SUPPORT_UTF */
414 #ifdef SUPPORT_UCP
415 jump_list *getucd;
416 #endif
417 } compiler_common;
418
419 /* For byte_sequence_compare. */
420
421 typedef struct compare_context {
422 int length;
423 int sourcereg;
424 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
425 int ucharptr;
426 union {
427 sljit_si asint;
428 sljit_uh asushort;
429 #if defined COMPILE_PCRE8
430 sljit_ub asbyte;
431 sljit_ub asuchars[4];
432 #elif defined COMPILE_PCRE16
433 sljit_uh asuchars[2];
434 #elif defined COMPILE_PCRE32
435 sljit_ui asuchars[1];
436 #endif
437 } c;
438 union {
439 sljit_si asint;
440 sljit_uh asushort;
441 #if defined COMPILE_PCRE8
442 sljit_ub asbyte;
443 sljit_ub asuchars[4];
444 #elif defined COMPILE_PCRE16
445 sljit_uh asuchars[2];
446 #elif defined COMPILE_PCRE32
447 sljit_ui asuchars[1];
448 #endif
449 } oc;
450 #endif
451 } compare_context;
452
453 /* Undefine sljit macros. */
454 #undef CMP
455
456 /* Used for accessing the elements of the stack. */
457 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
458
459 #define TMP1 SLJIT_SCRATCH_REG1
460 #define TMP2 SLJIT_SCRATCH_REG3
461 #define TMP3 SLJIT_TEMPORARY_EREG2
462 #define STR_PTR SLJIT_SAVED_REG1
463 #define STR_END SLJIT_SAVED_REG2
464 #define STACK_TOP SLJIT_SCRATCH_REG2
465 #define STACK_LIMIT SLJIT_SAVED_REG3
466 #define ARGUMENTS SLJIT_SAVED_EREG1
467 #define COUNT_MATCH SLJIT_SAVED_EREG2
468 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469
470 /* Local space layout. */
471 /* These two locals can be used by the current opcode. */
472 #define LOCALS0 (0 * sizeof(sljit_sw))
473 #define LOCALS1 (1 * sizeof(sljit_sw))
474 /* Two local variables for possessive quantifiers (char1 cannot use them). */
475 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
477 /* Max limit of recursions. */
478 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
479 /* The output vector is stored on the stack, and contains pointers
480 to characters. The vector data is divided into two groups: the first
481 group contains the start / end character pointers, and the second is
482 the start pointers when the end of the capturing group has not yet reached. */
483 #define OVECTOR_START (common->ovector_start)
484 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
485 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
486 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
487
488 #if defined COMPILE_PCRE8
489 #define MOV_UCHAR SLJIT_MOV_UB
490 #define MOVU_UCHAR SLJIT_MOVU_UB
491 #elif defined COMPILE_PCRE16
492 #define MOV_UCHAR SLJIT_MOV_UH
493 #define MOVU_UCHAR SLJIT_MOVU_UH
494 #elif defined COMPILE_PCRE32
495 #define MOV_UCHAR SLJIT_MOV_UI
496 #define MOVU_UCHAR SLJIT_MOVU_UI
497 #else
498 #error Unsupported compiling mode
499 #endif
500
501 /* Shortcuts. */
502 #define DEFINE_COMPILER \
503 struct sljit_compiler *compiler = common->compiler
504 #define OP1(op, dst, dstw, src, srcw) \
505 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508 #define LABEL() \
509 sljit_emit_label(compiler)
510 #define JUMP(type) \
511 sljit_emit_jump(compiler, (type))
512 #define JUMPTO(type, label) \
513 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514 #define JUMPHERE(jump) \
515 sljit_set_label((jump), sljit_emit_label(compiler))
516 #define SET_LABEL(jump, label) \
517 sljit_set_label((jump), (label))
518 #define CMP(type, src1, src1w, src2, src2w) \
519 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520 #define CMPTO(type, src1, src1w, src2, src2w, label) \
521 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
522 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
524 #define GET_LOCAL_BASE(dst, dstw, offset) \
525 sljit_get_local_base(compiler, (dst), (dstw), (offset))
526
527 static pcre_uchar* bracketend(pcre_uchar* cc)
528 {
529 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
530 do cc += GET(cc, 1); while (*cc == OP_ALT);
531 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
532 cc += 1 + LINK_SIZE;
533 return cc;
534 }
535
536 /* Functions whose might need modification for all new supported opcodes:
537 next_opcode
538 check_opcode_types
539 set_private_data_ptrs
540 get_framesize
541 init_frame
542 get_private_data_copy_length
543 copy_private_data
544 compile_matchingpath
545 compile_backtrackingpath
546 */
547
548 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
549 {
550 SLJIT_UNUSED_ARG(common);
551 switch(*cc)
552 {
553 case OP_SOD:
554 case OP_SOM:
555 case OP_SET_SOM:
556 case OP_NOT_WORD_BOUNDARY:
557 case OP_WORD_BOUNDARY:
558 case OP_NOT_DIGIT:
559 case OP_DIGIT:
560 case OP_NOT_WHITESPACE:
561 case OP_WHITESPACE:
562 case OP_NOT_WORDCHAR:
563 case OP_WORDCHAR:
564 case OP_ANY:
565 case OP_ALLANY:
566 case OP_NOTPROP:
567 case OP_PROP:
568 case OP_ANYNL:
569 case OP_NOT_HSPACE:
570 case OP_HSPACE:
571 case OP_NOT_VSPACE:
572 case OP_VSPACE:
573 case OP_EXTUNI:
574 case OP_EODN:
575 case OP_EOD:
576 case OP_CIRC:
577 case OP_CIRCM:
578 case OP_DOLL:
579 case OP_DOLLM:
580 case OP_CRSTAR:
581 case OP_CRMINSTAR:
582 case OP_CRPLUS:
583 case OP_CRMINPLUS:
584 case OP_CRQUERY:
585 case OP_CRMINQUERY:
586 case OP_CRRANGE:
587 case OP_CRMINRANGE:
588 case OP_CLASS:
589 case OP_NCLASS:
590 case OP_REF:
591 case OP_REFI:
592 case OP_RECURSE:
593 case OP_CALLOUT:
594 case OP_ALT:
595 case OP_KET:
596 case OP_KETRMAX:
597 case OP_KETRMIN:
598 case OP_KETRPOS:
599 case OP_REVERSE:
600 case OP_ASSERT:
601 case OP_ASSERT_NOT:
602 case OP_ASSERTBACK:
603 case OP_ASSERTBACK_NOT:
604 case OP_ONCE:
605 case OP_ONCE_NC:
606 case OP_BRA:
607 case OP_BRAPOS:
608 case OP_CBRA:
609 case OP_CBRAPOS:
610 case OP_COND:
611 case OP_SBRA:
612 case OP_SBRAPOS:
613 case OP_SCBRA:
614 case OP_SCBRAPOS:
615 case OP_SCOND:
616 case OP_CREF:
617 case OP_NCREF:
618 case OP_RREF:
619 case OP_NRREF:
620 case OP_DEF:
621 case OP_BRAZERO:
622 case OP_BRAMINZERO:
623 case OP_BRAPOSZERO:
624 case OP_PRUNE:
625 case OP_SKIP:
626 case OP_THEN:
627 case OP_COMMIT:
628 case OP_FAIL:
629 case OP_ACCEPT:
630 case OP_ASSERT_ACCEPT:
631 case OP_CLOSE:
632 case OP_SKIPZERO:
633 return cc + PRIV(OP_lengths)[*cc];
634
635 case OP_CHAR:
636 case OP_CHARI:
637 case OP_NOT:
638 case OP_NOTI:
639 case OP_STAR:
640 case OP_MINSTAR:
641 case OP_PLUS:
642 case OP_MINPLUS:
643 case OP_QUERY:
644 case OP_MINQUERY:
645 case OP_UPTO:
646 case OP_MINUPTO:
647 case OP_EXACT:
648 case OP_POSSTAR:
649 case OP_POSPLUS:
650 case OP_POSQUERY:
651 case OP_POSUPTO:
652 case OP_STARI:
653 case OP_MINSTARI:
654 case OP_PLUSI:
655 case OP_MINPLUSI:
656 case OP_QUERYI:
657 case OP_MINQUERYI:
658 case OP_UPTOI:
659 case OP_MINUPTOI:
660 case OP_EXACTI:
661 case OP_POSSTARI:
662 case OP_POSPLUSI:
663 case OP_POSQUERYI:
664 case OP_POSUPTOI:
665 case OP_NOTSTAR:
666 case OP_NOTMINSTAR:
667 case OP_NOTPLUS:
668 case OP_NOTMINPLUS:
669 case OP_NOTQUERY:
670 case OP_NOTMINQUERY:
671 case OP_NOTUPTO:
672 case OP_NOTMINUPTO:
673 case OP_NOTEXACT:
674 case OP_NOTPOSSTAR:
675 case OP_NOTPOSPLUS:
676 case OP_NOTPOSQUERY:
677 case OP_NOTPOSUPTO:
678 case OP_NOTSTARI:
679 case OP_NOTMINSTARI:
680 case OP_NOTPLUSI:
681 case OP_NOTMINPLUSI:
682 case OP_NOTQUERYI:
683 case OP_NOTMINQUERYI:
684 case OP_NOTUPTOI:
685 case OP_NOTMINUPTOI:
686 case OP_NOTEXACTI:
687 case OP_NOTPOSSTARI:
688 case OP_NOTPOSPLUSI:
689 case OP_NOTPOSQUERYI:
690 case OP_NOTPOSUPTOI:
691 cc += PRIV(OP_lengths)[*cc];
692 #ifdef SUPPORT_UTF
693 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
694 #endif
695 return cc;
696
697 /* Special cases. */
698 case OP_TYPESTAR:
699 case OP_TYPEMINSTAR:
700 case OP_TYPEPLUS:
701 case OP_TYPEMINPLUS:
702 case OP_TYPEQUERY:
703 case OP_TYPEMINQUERY:
704 case OP_TYPEUPTO:
705 case OP_TYPEMINUPTO:
706 case OP_TYPEEXACT:
707 case OP_TYPEPOSSTAR:
708 case OP_TYPEPOSPLUS:
709 case OP_TYPEPOSQUERY:
710 case OP_TYPEPOSUPTO:
711 return cc + PRIV(OP_lengths)[*cc] - 1;
712
713 case OP_ANYBYTE:
714 #ifdef SUPPORT_UTF
715 if (common->utf) return NULL;
716 #endif
717 return cc + 1;
718
719 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
720 case OP_XCLASS:
721 return cc + GET(cc, 1);
722 #endif
723
724 case OP_MARK:
725 case OP_PRUNE_ARG:
726 case OP_SKIP_ARG:
727 case OP_THEN_ARG:
728 return cc + 1 + 2 + cc[1];
729
730 default:
731 /* All opcodes are supported now! */
732 SLJIT_ASSERT_STOP();
733 return NULL;
734 }
735 }
736
737 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
738 {
739 pcre_uchar *name;
740 pcre_uchar *name2;
741 unsigned int cbra_index;
742 int i;
743
744 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
745 while (cc < ccend)
746 {
747 switch(*cc)
748 {
749 case OP_SET_SOM:
750 common->has_set_som = TRUE;
751 cc += 1;
752 break;
753
754 case OP_REF:
755 case OP_REFI:
756 common->optimized_cbracket[GET2(cc, 1)] = 0;
757 cc += 1 + IMM2_SIZE;
758 break;
759
760 case OP_CBRAPOS:
761 case OP_SCBRAPOS:
762 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
763 cc += 1 + LINK_SIZE + IMM2_SIZE;
764 break;
765
766 case OP_COND:
767 case OP_SCOND:
768 /* Only AUTO_CALLOUT can insert this opcode. We do
769 not intend to support this case. */
770 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
771 return FALSE;
772 cc += 1 + LINK_SIZE;
773 break;
774
775 case OP_CREF:
776 i = GET2(cc, 1);
777 common->optimized_cbracket[i] = 0;
778 cc += 1 + IMM2_SIZE;
779 break;
780
781 case OP_NCREF:
782 cbra_index = GET2(cc, 1);
783 name = (pcre_uchar *)common->name_table;
784 name2 = name;
785 for (i = 0; i < common->name_count; i++)
786 {
787 if (GET2(name, 0) == cbra_index) break;
788 name += common->name_entry_size;
789 }
790 SLJIT_ASSERT(i != common->name_count);
791
792 for (i = 0; i < common->name_count; i++)
793 {
794 if (STRCMP_UC_UC(name2 + IMM2_SIZE, name + IMM2_SIZE) == 0)
795 common->optimized_cbracket[GET2(name2, 0)] = 0;
796 name2 += common->name_entry_size;
797 }
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_RECURSE:
802 /* Set its value only once. */
803 if (common->recursive_head_ptr == 0)
804 {
805 common->recursive_head_ptr = common->ovector_start;
806 common->ovector_start += sizeof(sljit_sw);
807 }
808 cc += 1 + LINK_SIZE;
809 break;
810
811 case OP_CALLOUT:
812 if (common->capture_last_ptr == 0)
813 {
814 common->capture_last_ptr = common->ovector_start;
815 common->ovector_start += sizeof(sljit_sw);
816 }
817 cc += 2 + 2 * LINK_SIZE;
818 break;
819
820 case OP_THEN_ARG:
821 common->has_then = TRUE;
822 common->control_head_ptr = 1;
823 /* Fall through. */
824
825 case OP_PRUNE_ARG:
826 common->needs_start_ptr = TRUE;
827 /* Fall through. */
828
829 case OP_MARK:
830 if (common->mark_ptr == 0)
831 {
832 common->mark_ptr = common->ovector_start;
833 common->ovector_start += sizeof(sljit_sw);
834 }
835 cc += 1 + 2 + cc[1];
836 break;
837
838 case OP_THEN:
839 common->has_then = TRUE;
840 common->control_head_ptr = 1;
841 /* Fall through. */
842
843 case OP_PRUNE:
844 case OP_SKIP:
845 common->needs_start_ptr = TRUE;
846 cc += 1;
847 break;
848
849 case OP_SKIP_ARG:
850 common->control_head_ptr = 1;
851 common->has_skip_arg = TRUE;
852 cc += 1 + 2 + cc[1];
853 break;
854
855 default:
856 cc = next_opcode(common, cc);
857 if (cc == NULL)
858 return FALSE;
859 break;
860 }
861 }
862 return TRUE;
863 }
864
865 static int get_class_iterator_size(pcre_uchar *cc)
866 {
867 switch(*cc)
868 {
869 case OP_CRSTAR:
870 case OP_CRPLUS:
871 return 2;
872
873 case OP_CRMINSTAR:
874 case OP_CRMINPLUS:
875 case OP_CRQUERY:
876 case OP_CRMINQUERY:
877 return 1;
878
879 case OP_CRRANGE:
880 case OP_CRMINRANGE:
881 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
882 return 0;
883 return 2;
884
885 default:
886 return 0;
887 }
888 }
889
890 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
891 {
892 pcre_uchar *end = bracketend(begin);
893 pcre_uchar *next;
894 pcre_uchar *next_end;
895 pcre_uchar *max_end;
896 pcre_uchar type;
897 sljit_sw length = end - begin;
898 int min, max, i;
899
900 /* Detect fixed iterations first. */
901 if (end[-(1 + LINK_SIZE)] != OP_KET)
902 return FALSE;
903
904 /* Already detected repeat. */
905 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
906 return TRUE;
907
908 next = end;
909 min = 1;
910 while (1)
911 {
912 if (*next != *begin)
913 break;
914 next_end = bracketend(next);
915 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
916 break;
917 next = next_end;
918 min++;
919 }
920
921 if (min == 2)
922 return FALSE;
923
924 max = 0;
925 max_end = next;
926 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
927 {
928 type = *next;
929 while (1)
930 {
931 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
932 break;
933 next_end = bracketend(next + 2 + LINK_SIZE);
934 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
935 break;
936 next = next_end;
937 max++;
938 }
939
940 if (next[0] == type && next[1] == *begin && max >= 1)
941 {
942 next_end = bracketend(next + 1);
943 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
944 {
945 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
946 if (*next_end != OP_KET)
947 break;
948
949 if (i == max)
950 {
951 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
952 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
953 /* +2 the original and the last. */
954 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
955 if (min == 1)
956 return TRUE;
957 min--;
958 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
959 }
960 }
961 }
962 }
963
964 if (min >= 3)
965 {
966 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
967 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
968 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
969 return TRUE;
970 }
971
972 return FALSE;
973 }
974
975 #define CASE_ITERATOR_PRIVATE_DATA_1 \
976 case OP_MINSTAR: \
977 case OP_MINPLUS: \
978 case OP_QUERY: \
979 case OP_MINQUERY: \
980 case OP_MINSTARI: \
981 case OP_MINPLUSI: \
982 case OP_QUERYI: \
983 case OP_MINQUERYI: \
984 case OP_NOTMINSTAR: \
985 case OP_NOTMINPLUS: \
986 case OP_NOTQUERY: \
987 case OP_NOTMINQUERY: \
988 case OP_NOTMINSTARI: \
989 case OP_NOTMINPLUSI: \
990 case OP_NOTQUERYI: \
991 case OP_NOTMINQUERYI:
992
993 #define CASE_ITERATOR_PRIVATE_DATA_2A \
994 case OP_STAR: \
995 case OP_PLUS: \
996 case OP_STARI: \
997 case OP_PLUSI: \
998 case OP_NOTSTAR: \
999 case OP_NOTPLUS: \
1000 case OP_NOTSTARI: \
1001 case OP_NOTPLUSI:
1002
1003 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1004 case OP_UPTO: \
1005 case OP_MINUPTO: \
1006 case OP_UPTOI: \
1007 case OP_MINUPTOI: \
1008 case OP_NOTUPTO: \
1009 case OP_NOTMINUPTO: \
1010 case OP_NOTUPTOI: \
1011 case OP_NOTMINUPTOI:
1012
1013 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1014 case OP_TYPEMINSTAR: \
1015 case OP_TYPEMINPLUS: \
1016 case OP_TYPEQUERY: \
1017 case OP_TYPEMINQUERY:
1018
1019 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1020 case OP_TYPESTAR: \
1021 case OP_TYPEPLUS:
1022
1023 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1024 case OP_TYPEUPTO: \
1025 case OP_TYPEMINUPTO:
1026
1027 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1028 {
1029 pcre_uchar *cc = common->start;
1030 pcre_uchar *alternative;
1031 pcre_uchar *end = NULL;
1032 int private_data_ptr = *private_data_start;
1033 int space, size, bracketlen;
1034
1035 while (cc < ccend)
1036 {
1037 space = 0;
1038 size = 0;
1039 bracketlen = 0;
1040 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1041 return;
1042
1043 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1044 if (detect_repeat(common, cc))
1045 {
1046 /* These brackets are converted to repeats, so no global
1047 based single character repeat is allowed. */
1048 if (cc >= end)
1049 end = bracketend(cc);
1050 }
1051
1052 switch(*cc)
1053 {
1054 case OP_KET:
1055 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1056 {
1057 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1058 private_data_ptr += sizeof(sljit_sw);
1059 cc += common->private_data_ptrs[cc + 1 - common->start];
1060 }
1061 cc += 1 + LINK_SIZE;
1062 break;
1063
1064 case OP_ASSERT:
1065 case OP_ASSERT_NOT:
1066 case OP_ASSERTBACK:
1067 case OP_ASSERTBACK_NOT:
1068 case OP_ONCE:
1069 case OP_ONCE_NC:
1070 case OP_BRAPOS:
1071 case OP_SBRA:
1072 case OP_SBRAPOS:
1073 case OP_SCOND:
1074 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1075 private_data_ptr += sizeof(sljit_sw);
1076 bracketlen = 1 + LINK_SIZE;
1077 break;
1078
1079 case OP_CBRAPOS:
1080 case OP_SCBRAPOS:
1081 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1082 private_data_ptr += sizeof(sljit_sw);
1083 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1084 break;
1085
1086 case OP_COND:
1087 /* Might be a hidden SCOND. */
1088 alternative = cc + GET(cc, 1);
1089 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1090 {
1091 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1092 private_data_ptr += sizeof(sljit_sw);
1093 }
1094 bracketlen = 1 + LINK_SIZE;
1095 break;
1096
1097 case OP_BRA:
1098 bracketlen = 1 + LINK_SIZE;
1099 break;
1100
1101 case OP_CBRA:
1102 case OP_SCBRA:
1103 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1104 break;
1105
1106 CASE_ITERATOR_PRIVATE_DATA_1
1107 space = 1;
1108 size = -2;
1109 break;
1110
1111 CASE_ITERATOR_PRIVATE_DATA_2A
1112 space = 2;
1113 size = -2;
1114 break;
1115
1116 CASE_ITERATOR_PRIVATE_DATA_2B
1117 space = 2;
1118 size = -(2 + IMM2_SIZE);
1119 break;
1120
1121 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1122 space = 1;
1123 size = 1;
1124 break;
1125
1126 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1127 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1128 space = 2;
1129 size = 1;
1130 break;
1131
1132 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1133 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1134 space = 2;
1135 size = 1 + IMM2_SIZE;
1136 break;
1137
1138 case OP_CLASS:
1139 case OP_NCLASS:
1140 size += 1 + 32 / sizeof(pcre_uchar);
1141 space = get_class_iterator_size(cc + size);
1142 break;
1143
1144 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1145 case OP_XCLASS:
1146 size = GET(cc, 1);
1147 space = get_class_iterator_size(cc + size);
1148 break;
1149 #endif
1150
1151 default:
1152 cc = next_opcode(common, cc);
1153 SLJIT_ASSERT(cc != NULL);
1154 break;
1155 }
1156
1157 /* Character iterators, which are not inside a repeated bracket,
1158 gets a private slot instead of allocating it on the stack. */
1159 if (space > 0 && cc >= end)
1160 {
1161 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1162 private_data_ptr += sizeof(sljit_sw) * space;
1163 }
1164
1165 if (size != 0)
1166 {
1167 if (size < 0)
1168 {
1169 cc += -size;
1170 #ifdef SUPPORT_UTF
1171 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1172 #endif
1173 }
1174 else
1175 cc += size;
1176 }
1177
1178 if (bracketlen > 0)
1179 {
1180 if (cc >= end)
1181 {
1182 end = bracketend(cc);
1183 if (end[-1 - LINK_SIZE] == OP_KET)
1184 end = NULL;
1185 }
1186 cc += bracketlen;
1187 }
1188 }
1189 *private_data_start = private_data_ptr;
1190 }
1191
1192 /* Returns with a frame_types (always < 0) if no need for frame. */
1193 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1194 {
1195 int length = 0;
1196 int possessive = 0;
1197 BOOL stack_restore = FALSE;
1198 BOOL setsom_found = recursive;
1199 BOOL setmark_found = recursive;
1200 /* The last capture is a local variable even for recursions. */
1201 BOOL capture_last_found = FALSE;
1202
1203 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1204 SLJIT_ASSERT(common->control_head_ptr != 0);
1205 *needs_control_head = TRUE;
1206 #else
1207 *needs_control_head = FALSE;
1208 #endif
1209
1210 if (ccend == NULL)
1211 {
1212 ccend = bracketend(cc) - (1 + LINK_SIZE);
1213 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1214 {
1215 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1216 /* This is correct regardless of common->capture_last_ptr. */
1217 capture_last_found = TRUE;
1218 }
1219 cc = next_opcode(common, cc);
1220 }
1221
1222 SLJIT_ASSERT(cc != NULL);
1223 while (cc < ccend)
1224 switch(*cc)
1225 {
1226 case OP_SET_SOM:
1227 SLJIT_ASSERT(common->has_set_som);
1228 stack_restore = TRUE;
1229 if (!setsom_found)
1230 {
1231 length += 2;
1232 setsom_found = TRUE;
1233 }
1234 cc += 1;
1235 break;
1236
1237 case OP_MARK:
1238 case OP_PRUNE_ARG:
1239 case OP_THEN_ARG:
1240 SLJIT_ASSERT(common->mark_ptr != 0);
1241 stack_restore = TRUE;
1242 if (!setmark_found)
1243 {
1244 length += 2;
1245 setmark_found = TRUE;
1246 }
1247 if (common->control_head_ptr != 0)
1248 *needs_control_head = TRUE;
1249 cc += 1 + 2 + cc[1];
1250 break;
1251
1252 case OP_RECURSE:
1253 stack_restore = TRUE;
1254 if (common->has_set_som && !setsom_found)
1255 {
1256 length += 2;
1257 setsom_found = TRUE;
1258 }
1259 if (common->mark_ptr != 0 && !setmark_found)
1260 {
1261 length += 2;
1262 setmark_found = TRUE;
1263 }
1264 if (common->capture_last_ptr != 0 && !capture_last_found)
1265 {
1266 length += 2;
1267 capture_last_found = TRUE;
1268 }
1269 cc += 1 + LINK_SIZE;
1270 break;
1271
1272 case OP_CBRA:
1273 case OP_CBRAPOS:
1274 case OP_SCBRA:
1275 case OP_SCBRAPOS:
1276 stack_restore = TRUE;
1277 if (common->capture_last_ptr != 0 && !capture_last_found)
1278 {
1279 length += 2;
1280 capture_last_found = TRUE;
1281 }
1282 length += 3;
1283 cc += 1 + LINK_SIZE + IMM2_SIZE;
1284 break;
1285
1286 default:
1287 stack_restore = TRUE;
1288 /* Fall through. */
1289
1290 case OP_NOT_WORD_BOUNDARY:
1291 case OP_WORD_BOUNDARY:
1292 case OP_NOT_DIGIT:
1293 case OP_DIGIT:
1294 case OP_NOT_WHITESPACE:
1295 case OP_WHITESPACE:
1296 case OP_NOT_WORDCHAR:
1297 case OP_WORDCHAR:
1298 case OP_ANY:
1299 case OP_ALLANY:
1300 case OP_ANYBYTE:
1301 case OP_NOTPROP:
1302 case OP_PROP:
1303 case OP_ANYNL:
1304 case OP_NOT_HSPACE:
1305 case OP_HSPACE:
1306 case OP_NOT_VSPACE:
1307 case OP_VSPACE:
1308 case OP_EXTUNI:
1309 case OP_EODN:
1310 case OP_EOD:
1311 case OP_CIRC:
1312 case OP_CIRCM:
1313 case OP_DOLL:
1314 case OP_DOLLM:
1315 case OP_CHAR:
1316 case OP_CHARI:
1317 case OP_NOT:
1318 case OP_NOTI:
1319
1320 case OP_EXACT:
1321 case OP_POSSTAR:
1322 case OP_POSPLUS:
1323 case OP_POSQUERY:
1324 case OP_POSUPTO:
1325
1326 case OP_EXACTI:
1327 case OP_POSSTARI:
1328 case OP_POSPLUSI:
1329 case OP_POSQUERYI:
1330 case OP_POSUPTOI:
1331
1332 case OP_NOTEXACT:
1333 case OP_NOTPOSSTAR:
1334 case OP_NOTPOSPLUS:
1335 case OP_NOTPOSQUERY:
1336 case OP_NOTPOSUPTO:
1337
1338 case OP_NOTEXACTI:
1339 case OP_NOTPOSSTARI:
1340 case OP_NOTPOSPLUSI:
1341 case OP_NOTPOSQUERYI:
1342 case OP_NOTPOSUPTOI:
1343
1344 case OP_TYPEEXACT:
1345 case OP_TYPEPOSSTAR:
1346 case OP_TYPEPOSPLUS:
1347 case OP_TYPEPOSQUERY:
1348 case OP_TYPEPOSUPTO:
1349
1350 case OP_CLASS:
1351 case OP_NCLASS:
1352 case OP_XCLASS:
1353
1354 cc = next_opcode(common, cc);
1355 SLJIT_ASSERT(cc != NULL);
1356 break;
1357 }
1358
1359 /* Possessive quantifiers can use a special case. */
1360 if (SLJIT_UNLIKELY(possessive == length))
1361 return stack_restore ? no_frame : no_stack;
1362
1363 if (length > 0)
1364 return length + 1;
1365 return stack_restore ? no_frame : no_stack;
1366 }
1367
1368 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1369 {
1370 DEFINE_COMPILER;
1371 BOOL setsom_found = recursive;
1372 BOOL setmark_found = recursive;
1373 /* The last capture is a local variable even for recursions. */
1374 BOOL capture_last_found = FALSE;
1375 int offset;
1376
1377 /* >= 1 + shortest item size (2) */
1378 SLJIT_UNUSED_ARG(stacktop);
1379 SLJIT_ASSERT(stackpos >= stacktop + 2);
1380
1381 stackpos = STACK(stackpos);
1382 if (ccend == NULL)
1383 {
1384 ccend = bracketend(cc) - (1 + LINK_SIZE);
1385 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1386 cc = next_opcode(common, cc);
1387 }
1388
1389 SLJIT_ASSERT(cc != NULL);
1390 while (cc < ccend)
1391 switch(*cc)
1392 {
1393 case OP_SET_SOM:
1394 SLJIT_ASSERT(common->has_set_som);
1395 if (!setsom_found)
1396 {
1397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1398 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1399 stackpos += (int)sizeof(sljit_sw);
1400 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1401 stackpos += (int)sizeof(sljit_sw);
1402 setsom_found = TRUE;
1403 }
1404 cc += 1;
1405 break;
1406
1407 case OP_MARK:
1408 case OP_PRUNE_ARG:
1409 case OP_THEN_ARG:
1410 SLJIT_ASSERT(common->mark_ptr != 0);
1411 if (!setmark_found)
1412 {
1413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1415 stackpos += (int)sizeof(sljit_sw);
1416 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1417 stackpos += (int)sizeof(sljit_sw);
1418 setmark_found = TRUE;
1419 }
1420 cc += 1 + 2 + cc[1];
1421 break;
1422
1423 case OP_RECURSE:
1424 if (common->has_set_som && !setsom_found)
1425 {
1426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1428 stackpos += (int)sizeof(sljit_sw);
1429 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1430 stackpos += (int)sizeof(sljit_sw);
1431 setsom_found = TRUE;
1432 }
1433 if (common->mark_ptr != 0 && !setmark_found)
1434 {
1435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1436 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1437 stackpos += (int)sizeof(sljit_sw);
1438 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1439 stackpos += (int)sizeof(sljit_sw);
1440 setmark_found = TRUE;
1441 }
1442 if (common->capture_last_ptr != 0 && !capture_last_found)
1443 {
1444 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1445 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1446 stackpos += (int)sizeof(sljit_sw);
1447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1448 stackpos += (int)sizeof(sljit_sw);
1449 capture_last_found = TRUE;
1450 }
1451 cc += 1 + LINK_SIZE;
1452 break;
1453
1454 case OP_CBRA:
1455 case OP_CBRAPOS:
1456 case OP_SCBRA:
1457 case OP_SCBRAPOS:
1458 if (common->capture_last_ptr != 0 && !capture_last_found)
1459 {
1460 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1461 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1462 stackpos += (int)sizeof(sljit_sw);
1463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1464 stackpos += (int)sizeof(sljit_sw);
1465 capture_last_found = TRUE;
1466 }
1467 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1469 stackpos += (int)sizeof(sljit_sw);
1470 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1471 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1472 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1473 stackpos += (int)sizeof(sljit_sw);
1474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1475 stackpos += (int)sizeof(sljit_sw);
1476
1477 cc += 1 + LINK_SIZE + IMM2_SIZE;
1478 break;
1479
1480 default:
1481 cc = next_opcode(common, cc);
1482 SLJIT_ASSERT(cc != NULL);
1483 break;
1484 }
1485
1486 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1487 SLJIT_ASSERT(stackpos == STACK(stacktop));
1488 }
1489
1490 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1491 {
1492 int private_data_length = needs_control_head ? 3 : 2;
1493 int size;
1494 pcre_uchar *alternative;
1495 /* Calculate the sum of the private machine words. */
1496 while (cc < ccend)
1497 {
1498 size = 0;
1499 switch(*cc)
1500 {
1501 case OP_KET:
1502 if (PRIVATE_DATA(cc) != 0)
1503 private_data_length++;
1504 cc += 1 + LINK_SIZE;
1505 break;
1506
1507 case OP_ASSERT:
1508 case OP_ASSERT_NOT:
1509 case OP_ASSERTBACK:
1510 case OP_ASSERTBACK_NOT:
1511 case OP_ONCE:
1512 case OP_ONCE_NC:
1513 case OP_BRAPOS:
1514 case OP_SBRA:
1515 case OP_SBRAPOS:
1516 case OP_SCOND:
1517 private_data_length++;
1518 cc += 1 + LINK_SIZE;
1519 break;
1520
1521 case OP_CBRA:
1522 case OP_SCBRA:
1523 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1524 private_data_length++;
1525 cc += 1 + LINK_SIZE + IMM2_SIZE;
1526 break;
1527
1528 case OP_CBRAPOS:
1529 case OP_SCBRAPOS:
1530 private_data_length += 2;
1531 cc += 1 + LINK_SIZE + IMM2_SIZE;
1532 break;
1533
1534 case OP_COND:
1535 /* Might be a hidden SCOND. */
1536 alternative = cc + GET(cc, 1);
1537 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1538 private_data_length++;
1539 cc += 1 + LINK_SIZE;
1540 break;
1541
1542 CASE_ITERATOR_PRIVATE_DATA_1
1543 if (PRIVATE_DATA(cc))
1544 private_data_length++;
1545 cc += 2;
1546 #ifdef SUPPORT_UTF
1547 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1548 #endif
1549 break;
1550
1551 CASE_ITERATOR_PRIVATE_DATA_2A
1552 if (PRIVATE_DATA(cc))
1553 private_data_length += 2;
1554 cc += 2;
1555 #ifdef SUPPORT_UTF
1556 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1557 #endif
1558 break;
1559
1560 CASE_ITERATOR_PRIVATE_DATA_2B
1561 if (PRIVATE_DATA(cc))
1562 private_data_length += 2;
1563 cc += 2 + IMM2_SIZE;
1564 #ifdef SUPPORT_UTF
1565 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1566 #endif
1567 break;
1568
1569 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1570 if (PRIVATE_DATA(cc))
1571 private_data_length++;
1572 cc += 1;
1573 break;
1574
1575 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1576 if (PRIVATE_DATA(cc))
1577 private_data_length += 2;
1578 cc += 1;
1579 break;
1580
1581 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1582 if (PRIVATE_DATA(cc))
1583 private_data_length += 2;
1584 cc += 1 + IMM2_SIZE;
1585 break;
1586
1587 case OP_CLASS:
1588 case OP_NCLASS:
1589 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1590 case OP_XCLASS:
1591 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1592 #else
1593 size = 1 + 32 / (int)sizeof(pcre_uchar);
1594 #endif
1595 if (PRIVATE_DATA(cc))
1596 private_data_length += get_class_iterator_size(cc + size);
1597 cc += size;
1598 break;
1599
1600 default:
1601 cc = next_opcode(common, cc);
1602 SLJIT_ASSERT(cc != NULL);
1603 break;
1604 }
1605 }
1606 SLJIT_ASSERT(cc == ccend);
1607 return private_data_length;
1608 }
1609
1610 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1611 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1612 {
1613 DEFINE_COMPILER;
1614 int srcw[2];
1615 int count, size;
1616 BOOL tmp1next = TRUE;
1617 BOOL tmp1empty = TRUE;
1618 BOOL tmp2empty = TRUE;
1619 pcre_uchar *alternative;
1620 enum {
1621 start,
1622 loop,
1623 end
1624 } status;
1625
1626 status = save ? start : loop;
1627 stackptr = STACK(stackptr - 2);
1628 stacktop = STACK(stacktop - 1);
1629
1630 if (!save)
1631 {
1632 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1633 if (stackptr < stacktop)
1634 {
1635 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1636 stackptr += sizeof(sljit_sw);
1637 tmp1empty = FALSE;
1638 }
1639 if (stackptr < stacktop)
1640 {
1641 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1642 stackptr += sizeof(sljit_sw);
1643 tmp2empty = FALSE;
1644 }
1645 /* The tmp1next must be TRUE in either way. */
1646 }
1647
1648 do
1649 {
1650 count = 0;
1651 switch(status)
1652 {
1653 case start:
1654 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1655 count = 1;
1656 srcw[0] = common->recursive_head_ptr;
1657 if (needs_control_head)
1658 {
1659 SLJIT_ASSERT(common->control_head_ptr != 0);
1660 count = 2;
1661 srcw[1] = common->control_head_ptr;
1662 }
1663 status = loop;
1664 break;
1665
1666 case loop:
1667 if (cc >= ccend)
1668 {
1669 status = end;
1670 break;
1671 }
1672
1673 switch(*cc)
1674 {
1675 case OP_KET:
1676 if (PRIVATE_DATA(cc) != 0)
1677 {
1678 count = 1;
1679 srcw[0] = PRIVATE_DATA(cc);
1680 }
1681 cc += 1 + LINK_SIZE;
1682 break;
1683
1684 case OP_ASSERT:
1685 case OP_ASSERT_NOT:
1686 case OP_ASSERTBACK:
1687 case OP_ASSERTBACK_NOT:
1688 case OP_ONCE:
1689 case OP_ONCE_NC:
1690 case OP_BRAPOS:
1691 case OP_SBRA:
1692 case OP_SBRAPOS:
1693 case OP_SCOND:
1694 count = 1;
1695 srcw[0] = PRIVATE_DATA(cc);
1696 SLJIT_ASSERT(srcw[0] != 0);
1697 cc += 1 + LINK_SIZE;
1698 break;
1699
1700 case OP_CBRA:
1701 case OP_SCBRA:
1702 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1703 {
1704 count = 1;
1705 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1706 }
1707 cc += 1 + LINK_SIZE + IMM2_SIZE;
1708 break;
1709
1710 case OP_CBRAPOS:
1711 case OP_SCBRAPOS:
1712 count = 2;
1713 srcw[0] = PRIVATE_DATA(cc);
1714 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1715 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1716 cc += 1 + LINK_SIZE + IMM2_SIZE;
1717 break;
1718
1719 case OP_COND:
1720 /* Might be a hidden SCOND. */
1721 alternative = cc + GET(cc, 1);
1722 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1723 {
1724 count = 1;
1725 srcw[0] = PRIVATE_DATA(cc);
1726 SLJIT_ASSERT(srcw[0] != 0);
1727 }
1728 cc += 1 + LINK_SIZE;
1729 break;
1730
1731 CASE_ITERATOR_PRIVATE_DATA_1
1732 if (PRIVATE_DATA(cc))
1733 {
1734 count = 1;
1735 srcw[0] = PRIVATE_DATA(cc);
1736 }
1737 cc += 2;
1738 #ifdef SUPPORT_UTF
1739 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1740 #endif
1741 break;
1742
1743 CASE_ITERATOR_PRIVATE_DATA_2A
1744 if (PRIVATE_DATA(cc))
1745 {
1746 count = 2;
1747 srcw[0] = PRIVATE_DATA(cc);
1748 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1749 }
1750 cc += 2;
1751 #ifdef SUPPORT_UTF
1752 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1753 #endif
1754 break;
1755
1756 CASE_ITERATOR_PRIVATE_DATA_2B
1757 if (PRIVATE_DATA(cc))
1758 {
1759 count = 2;
1760 srcw[0] = PRIVATE_DATA(cc);
1761 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1762 }
1763 cc += 2 + IMM2_SIZE;
1764 #ifdef SUPPORT_UTF
1765 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1766 #endif
1767 break;
1768
1769 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1770 if (PRIVATE_DATA(cc))
1771 {
1772 count = 1;
1773 srcw[0] = PRIVATE_DATA(cc);
1774 }
1775 cc += 1;
1776 break;
1777
1778 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1779 if (PRIVATE_DATA(cc))
1780 {
1781 count = 2;
1782 srcw[0] = PRIVATE_DATA(cc);
1783 srcw[1] = srcw[0] + sizeof(sljit_sw);
1784 }
1785 cc += 1;
1786 break;
1787
1788 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1789 if (PRIVATE_DATA(cc))
1790 {
1791 count = 2;
1792 srcw[0] = PRIVATE_DATA(cc);
1793 srcw[1] = srcw[0] + sizeof(sljit_sw);
1794 }
1795 cc += 1 + IMM2_SIZE;
1796 break;
1797
1798 case OP_CLASS:
1799 case OP_NCLASS:
1800 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1801 case OP_XCLASS:
1802 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1803 #else
1804 size = 1 + 32 / (int)sizeof(pcre_uchar);
1805 #endif
1806 if (PRIVATE_DATA(cc))
1807 switch(get_class_iterator_size(cc + size))
1808 {
1809 case 1:
1810 count = 1;
1811 srcw[0] = PRIVATE_DATA(cc);
1812 break;
1813
1814 case 2:
1815 count = 2;
1816 srcw[0] = PRIVATE_DATA(cc);
1817 srcw[1] = srcw[0] + sizeof(sljit_sw);
1818 break;
1819
1820 default:
1821 SLJIT_ASSERT_STOP();
1822 break;
1823 }
1824 cc += size;
1825 break;
1826
1827 default:
1828 cc = next_opcode(common, cc);
1829 SLJIT_ASSERT(cc != NULL);
1830 break;
1831 }
1832 break;
1833
1834 case end:
1835 SLJIT_ASSERT_STOP();
1836 break;
1837 }
1838
1839 while (count > 0)
1840 {
1841 count--;
1842 if (save)
1843 {
1844 if (tmp1next)
1845 {
1846 if (!tmp1empty)
1847 {
1848 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1849 stackptr += sizeof(sljit_sw);
1850 }
1851 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1852 tmp1empty = FALSE;
1853 tmp1next = FALSE;
1854 }
1855 else
1856 {
1857 if (!tmp2empty)
1858 {
1859 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1860 stackptr += sizeof(sljit_sw);
1861 }
1862 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1863 tmp2empty = FALSE;
1864 tmp1next = TRUE;
1865 }
1866 }
1867 else
1868 {
1869 if (tmp1next)
1870 {
1871 SLJIT_ASSERT(!tmp1empty);
1872 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1873 tmp1empty = stackptr >= stacktop;
1874 if (!tmp1empty)
1875 {
1876 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1877 stackptr += sizeof(sljit_sw);
1878 }
1879 tmp1next = FALSE;
1880 }
1881 else
1882 {
1883 SLJIT_ASSERT(!tmp2empty);
1884 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1885 tmp2empty = stackptr >= stacktop;
1886 if (!tmp2empty)
1887 {
1888 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1889 stackptr += sizeof(sljit_sw);
1890 }
1891 tmp1next = TRUE;
1892 }
1893 }
1894 }
1895 }
1896 while (status != end);
1897
1898 if (save)
1899 {
1900 if (tmp1next)
1901 {
1902 if (!tmp1empty)
1903 {
1904 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1905 stackptr += sizeof(sljit_sw);
1906 }
1907 if (!tmp2empty)
1908 {
1909 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1910 stackptr += sizeof(sljit_sw);
1911 }
1912 }
1913 else
1914 {
1915 if (!tmp2empty)
1916 {
1917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1918 stackptr += sizeof(sljit_sw);
1919 }
1920 if (!tmp1empty)
1921 {
1922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 }
1926 }
1927 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1928 }
1929
1930 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1931 {
1932 pcre_uchar *end = bracketend(cc);
1933 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1934
1935 /* Assert captures then. */
1936 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1937 current_offset = NULL;
1938 /* Conditional block does not. */
1939 if (*cc == OP_COND || *cc == OP_SCOND)
1940 has_alternatives = FALSE;
1941
1942 cc = next_opcode(common, cc);
1943 if (has_alternatives)
1944 current_offset = common->then_offsets + (cc - common->start);
1945
1946 while (cc < end)
1947 {
1948 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1949 cc = set_then_offsets(common, cc, current_offset);
1950 else
1951 {
1952 if (*cc == OP_ALT && has_alternatives)
1953 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1954 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1955 *current_offset = 1;
1956 cc = next_opcode(common, cc);
1957 }
1958 }
1959
1960 return end;
1961 }
1962
1963 #undef CASE_ITERATOR_PRIVATE_DATA_1
1964 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1965 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1966 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1967 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1968 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1969
1970 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1971 {
1972 return (value & (value - 1)) == 0;
1973 }
1974
1975 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1976 {
1977 while (list)
1978 {
1979 /* sljit_set_label is clever enough to do nothing
1980 if either the jump or the label is NULL. */
1981 SET_LABEL(list->jump, label);
1982 list = list->next;
1983 }
1984 }
1985
1986 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1987 {
1988 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1989 if (list_item)
1990 {
1991 list_item->next = *list;
1992 list_item->jump = jump;
1993 *list = list_item;
1994 }
1995 }
1996
1997 static void add_stub(compiler_common *common, struct sljit_jump *start)
1998 {
1999 DEFINE_COMPILER;
2000 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2001
2002 if (list_item)
2003 {
2004 list_item->start = start;
2005 list_item->quit = LABEL();
2006 list_item->next = common->stubs;
2007 common->stubs = list_item;
2008 }
2009 }
2010
2011 static void flush_stubs(compiler_common *common)
2012 {
2013 DEFINE_COMPILER;
2014 stub_list* list_item = common->stubs;
2015
2016 while (list_item)
2017 {
2018 JUMPHERE(list_item->start);
2019 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2020 JUMPTO(SLJIT_JUMP, list_item->quit);
2021 list_item = list_item->next;
2022 }
2023 common->stubs = NULL;
2024 }
2025
2026 static SLJIT_INLINE void count_match(compiler_common *common)
2027 {
2028 DEFINE_COMPILER;
2029
2030 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2031 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2032 }
2033
2034 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2035 {
2036 /* May destroy all locals and registers except TMP2. */
2037 DEFINE_COMPILER;
2038
2039 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2040 #ifdef DESTROY_REGISTERS
2041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2042 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2043 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2046 #endif
2047 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2048 }
2049
2050 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2051 {
2052 DEFINE_COMPILER;
2053 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2054 }
2055
2056 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2057 {
2058 DEFINE_COMPILER;
2059 struct sljit_label *loop;
2060 int i;
2061
2062 /* At this point we can freely use all temporary registers. */
2063 SLJIT_ASSERT(length > 1);
2064 /* TMP1 returns with begin - 1. */
2065 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2066 if (length < 8)
2067 {
2068 for (i = 1; i < length; i++)
2069 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2070 }
2071 else
2072 {
2073 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2074 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2075 loop = LABEL();
2076 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2077 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2078 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2079 }
2080 }
2081
2082 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2083 {
2084 DEFINE_COMPILER;
2085 struct sljit_label *loop;
2086 int i;
2087
2088 SLJIT_ASSERT(length > 1);
2089 /* OVECTOR(1) contains the "string begin - 1" constant. */
2090 if (length > 2)
2091 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2092 if (length < 8)
2093 {
2094 for (i = 2; i < length; i++)
2095 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2096 }
2097 else
2098 {
2099 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2100 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2101 loop = LABEL();
2102 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2103 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2104 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2105 }
2106
2107 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2108 if (common->mark_ptr != 0)
2109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2110 if (common->control_head_ptr != 0)
2111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2112 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2114 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2115 }
2116
2117 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2118 {
2119 while (current != NULL)
2120 {
2121 switch (current[-2])
2122 {
2123 case type_then_trap:
2124 break;
2125
2126 case type_mark:
2127 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2128 return current[-4];
2129 break;
2130
2131 default:
2132 SLJIT_ASSERT_STOP();
2133 break;
2134 }
2135 current = (sljit_sw*)current[-1];
2136 }
2137 return -1;
2138 }
2139
2140 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2141 {
2142 DEFINE_COMPILER;
2143 struct sljit_label *loop;
2144 struct sljit_jump *early_quit;
2145
2146 /* At this point we can freely use all registers. */
2147 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2148 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2149
2150 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2151 if (common->mark_ptr != 0)
2152 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2153 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2154 if (common->mark_ptr != 0)
2155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2156 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2157 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2158 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2159 /* Unlikely, but possible */
2160 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2161 loop = LABEL();
2162 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2163 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2164 /* Copy the integer value to the output buffer */
2165 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2166 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2167 #endif
2168 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2169 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2170 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2171 JUMPHERE(early_quit);
2172
2173 /* Calculate the return value, which is the maximum ovector value. */
2174 if (topbracket > 1)
2175 {
2176 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2177 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2178
2179 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2180 loop = LABEL();
2181 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2182 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2183 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2184 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2185 }
2186 else
2187 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2188 }
2189
2190 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2191 {
2192 DEFINE_COMPILER;
2193 struct sljit_jump *jump;
2194
2195 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2196 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2197 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2198
2199 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2200 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2201 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2202 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2203
2204 /* Store match begin and end. */
2205 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2206 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2207
2208 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2209 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2210 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2211 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2212 #endif
2213 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2214 JUMPHERE(jump);
2215
2216 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2217 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2218 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2219 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2220 #endif
2221 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2222
2223 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2224 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2225 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2226 #endif
2227 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2228
2229 JUMPTO(SLJIT_JUMP, quit);
2230 }
2231
2232 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2233 {
2234 /* May destroy TMP1. */
2235 DEFINE_COMPILER;
2236 struct sljit_jump *jump;
2237
2238 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2239 {
2240 /* The value of -1 must be kept for start_used_ptr! */
2241 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2242 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2243 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2244 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2245 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2246 JUMPHERE(jump);
2247 }
2248 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2249 {
2250 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2251 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2252 JUMPHERE(jump);
2253 }
2254 }
2255
2256 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2257 {
2258 /* Detects if the character has an othercase. */
2259 unsigned int c;
2260
2261 #ifdef SUPPORT_UTF
2262 if (common->utf)
2263 {
2264 GETCHAR(c, cc);
2265 if (c > 127)
2266 {
2267 #ifdef SUPPORT_UCP
2268 return c != UCD_OTHERCASE(c);
2269 #else
2270 return FALSE;
2271 #endif
2272 }
2273 #ifndef COMPILE_PCRE8
2274 return common->fcc[c] != c;
2275 #endif
2276 }
2277 else
2278 #endif
2279 c = *cc;
2280 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2281 }
2282
2283 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2284 {
2285 /* Returns with the othercase. */
2286 #ifdef SUPPORT_UTF
2287 if (common->utf && c > 127)
2288 {
2289 #ifdef SUPPORT_UCP
2290 return UCD_OTHERCASE(c);
2291 #else
2292 return c;
2293 #endif
2294 }
2295 #endif
2296 return TABLE_GET(c, common->fcc, c);
2297 }
2298
2299 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2300 {
2301 /* Detects if the character and its othercase has only 1 bit difference. */
2302 unsigned int c, oc, bit;
2303 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2304 int n;
2305 #endif
2306
2307 #ifdef SUPPORT_UTF
2308 if (common->utf)
2309 {
2310 GETCHAR(c, cc);
2311 if (c <= 127)
2312 oc = common->fcc[c];
2313 else
2314 {
2315 #ifdef SUPPORT_UCP
2316 oc = UCD_OTHERCASE(c);
2317 #else
2318 oc = c;
2319 #endif
2320 }
2321 }
2322 else
2323 {
2324 c = *cc;
2325 oc = TABLE_GET(c, common->fcc, c);
2326 }
2327 #else
2328 c = *cc;
2329 oc = TABLE_GET(c, common->fcc, c);
2330 #endif
2331
2332 SLJIT_ASSERT(c != oc);
2333
2334 bit = c ^ oc;
2335 /* Optimized for English alphabet. */
2336 if (c <= 127 && bit == 0x20)
2337 return (0 << 8) | 0x20;
2338
2339 /* Since c != oc, they must have at least 1 bit difference. */
2340 if (!is_powerof2(bit))
2341 return 0;
2342
2343 #if defined COMPILE_PCRE8
2344
2345 #ifdef SUPPORT_UTF
2346 if (common->utf && c > 127)
2347 {
2348 n = GET_EXTRALEN(*cc);
2349 while ((bit & 0x3f) == 0)
2350 {
2351 n--;
2352 bit >>= 6;
2353 }
2354 return (n << 8) | bit;
2355 }
2356 #endif /* SUPPORT_UTF */
2357 return (0 << 8) | bit;
2358
2359 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2360
2361 #ifdef SUPPORT_UTF
2362 if (common->utf && c > 65535)
2363 {
2364 if (bit >= (1 << 10))
2365 bit >>= 10;
2366 else
2367 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2368 }
2369 #endif /* SUPPORT_UTF */
2370 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2371
2372 #endif /* COMPILE_PCRE[8|16|32] */
2373 }
2374
2375 static void check_partial(compiler_common *common, BOOL force)
2376 {
2377 /* Checks whether a partial matching is occurred. Does not modify registers. */
2378 DEFINE_COMPILER;
2379 struct sljit_jump *jump = NULL;
2380
2381 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2382
2383 if (common->mode == JIT_COMPILE)
2384 return;
2385
2386 if (!force)
2387 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2388 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2389 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2390
2391 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2392 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2393 else
2394 {
2395 if (common->partialmatchlabel != NULL)
2396 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2397 else
2398 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2399 }
2400
2401 if (jump != NULL)
2402 JUMPHERE(jump);
2403 }
2404
2405 static void check_str_end(compiler_common *common, jump_list **end_reached)
2406 {
2407 /* Does not affect registers. Usually used in a tight spot. */
2408 DEFINE_COMPILER;
2409 struct sljit_jump *jump;
2410
2411 if (common->mode == JIT_COMPILE)
2412 {
2413 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2414 return;
2415 }
2416
2417 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2418 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2419 {
2420 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2421 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2422 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2423 }
2424 else
2425 {
2426 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2427 if (common->partialmatchlabel != NULL)
2428 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2429 else
2430 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2431 }
2432 JUMPHERE(jump);
2433 }
2434
2435 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2436 {
2437 DEFINE_COMPILER;
2438 struct sljit_jump *jump;
2439
2440 if (common->mode == JIT_COMPILE)
2441 {
2442 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2443 return;
2444 }
2445
2446 /* Partial matching mode. */
2447 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2448 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2449 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2450 {
2451 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2452 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2453 }
2454 else
2455 {
2456 if (common->partialmatchlabel != NULL)
2457 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2458 else
2459 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2460 }
2461 JUMPHERE(jump);
2462 }
2463
2464 static void read_char(compiler_common *common)
2465 {
2466 /* Reads the character into TMP1, updates STR_PTR.
2467 Does not check STR_END. TMP2 Destroyed. */
2468 DEFINE_COMPILER;
2469 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2470 struct sljit_jump *jump;
2471 #endif
2472
2473 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2474 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2475 if (common->utf)
2476 {
2477 #if defined COMPILE_PCRE8
2478 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2479 #elif defined COMPILE_PCRE16
2480 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2481 #endif /* COMPILE_PCRE[8|16] */
2482 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2483 JUMPHERE(jump);
2484 }
2485 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2486 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2487 }
2488
2489 static void peek_char(compiler_common *common)
2490 {
2491 /* Reads the character into TMP1, keeps STR_PTR.
2492 Does not check STR_END. TMP2 Destroyed. */
2493 DEFINE_COMPILER;
2494 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2495 struct sljit_jump *jump;
2496 #endif
2497
2498 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2499 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2500 if (common->utf)
2501 {
2502 #if defined COMPILE_PCRE8
2503 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2504 #elif defined COMPILE_PCRE16
2505 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2506 #endif /* COMPILE_PCRE[8|16] */
2507 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2508 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2509 JUMPHERE(jump);
2510 }
2511 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2512 }
2513
2514 static void read_char8_type(compiler_common *common)
2515 {
2516 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2517 DEFINE_COMPILER;
2518 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2519 struct sljit_jump *jump;
2520 #endif
2521
2522 #ifdef SUPPORT_UTF
2523 if (common->utf)
2524 {
2525 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2526 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2527 #if defined COMPILE_PCRE8
2528 /* This can be an extra read in some situations, but hopefully
2529 it is needed in most cases. */
2530 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2531 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2532 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2533 JUMPHERE(jump);
2534 #elif defined COMPILE_PCRE16
2535 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2536 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2537 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2538 JUMPHERE(jump);
2539 /* Skip low surrogate if necessary. */
2540 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2541 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2542 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2543 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2544 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2545 #elif defined COMPILE_PCRE32
2546 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2547 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2548 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2549 JUMPHERE(jump);
2550 #endif /* COMPILE_PCRE[8|16|32] */
2551 return;
2552 }
2553 #endif /* SUPPORT_UTF */
2554 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2555 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2556 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2557 /* The ctypes array contains only 256 values. */
2558 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2559 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2560 #endif
2561 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2562 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2563 JUMPHERE(jump);
2564 #endif
2565 }
2566
2567 static void skip_char_back(compiler_common *common)
2568 {
2569 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2570 DEFINE_COMPILER;
2571 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2572 #if defined COMPILE_PCRE8
2573 struct sljit_label *label;
2574
2575 if (common->utf)
2576 {
2577 label = LABEL();
2578 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2579 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2580 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2581 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2582 return;
2583 }
2584 #elif defined COMPILE_PCRE16
2585 if (common->utf)
2586 {
2587 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2588 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2589 /* Skip low surrogate if necessary. */
2590 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2591 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2592 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2593 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2594 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2595 return;
2596 }
2597 #endif /* COMPILE_PCRE[8|16] */
2598 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2599 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2600 }
2601
2602 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2603 {
2604 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2605 DEFINE_COMPILER;
2606
2607 if (nltype == NLTYPE_ANY)
2608 {
2609 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2610 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2611 }
2612 else if (nltype == NLTYPE_ANYCRLF)
2613 {
2614 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2615 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2616 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2617 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2618 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2619 }
2620 else
2621 {
2622 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2623 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2624 }
2625 }
2626
2627 #ifdef SUPPORT_UTF
2628
2629 #if defined COMPILE_PCRE8
2630 static void do_utfreadchar(compiler_common *common)
2631 {
2632 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2633 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2634 DEFINE_COMPILER;
2635 struct sljit_jump *jump;
2636
2637 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2638 /* Searching for the first zero. */
2639 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2640 jump = JUMP(SLJIT_C_NOT_ZERO);
2641 /* Two byte sequence. */
2642 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2643 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2644 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2645 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2646 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2647 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2648 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2649 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2650 JUMPHERE(jump);
2651
2652 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2653 jump = JUMP(SLJIT_C_NOT_ZERO);
2654 /* Three byte sequence. */
2655 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2656 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2657 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2658 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2660 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2661 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2663 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2664 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2665 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2666 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2667 JUMPHERE(jump);
2668
2669 /* Four byte sequence. */
2670 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2671 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2672 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2673 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2674 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2675 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2676 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2677 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2678 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2679 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2680 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2681 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2682 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2683 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2684 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2685 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2686 }
2687
2688 static void do_utfreadtype8(compiler_common *common)
2689 {
2690 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2691 of the character (>= 0xc0). Return value in TMP1. */
2692 DEFINE_COMPILER;
2693 struct sljit_jump *jump;
2694 struct sljit_jump *compare;
2695
2696 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2697
2698 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2699 jump = JUMP(SLJIT_C_NOT_ZERO);
2700 /* Two byte sequence. */
2701 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2702 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2703 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2704 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2705 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2706 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2707 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2708 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2709 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2710
2711 JUMPHERE(compare);
2712 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2713 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2714 JUMPHERE(jump);
2715
2716 /* We only have types for characters less than 256. */
2717 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2718 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2719 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2720 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2721 }
2722
2723 #elif defined COMPILE_PCRE16
2724
2725 static void do_utfreadchar(compiler_common *common)
2726 {
2727 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2728 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2729 DEFINE_COMPILER;
2730 struct sljit_jump *jump;
2731
2732 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2733 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2734 /* Do nothing, only return. */
2735 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2736
2737 JUMPHERE(jump);
2738 /* Combine two 16 bit characters. */
2739 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2741 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2742 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2743 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2744 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2745 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2746 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2747 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2748 }
2749
2750 #endif /* COMPILE_PCRE[8|16] */
2751
2752 #endif /* SUPPORT_UTF */
2753
2754 #ifdef SUPPORT_UCP
2755
2756 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2757 #define UCD_BLOCK_MASK 127
2758 #define UCD_BLOCK_SHIFT 7
2759
2760 static void do_getucd(compiler_common *common)
2761 {
2762 /* Search the UCD record for the character comes in TMP1.
2763 Returns chartype in TMP1 and UCD offset in TMP2. */
2764 DEFINE_COMPILER;
2765
2766 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2767
2768 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2769 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2770 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2771 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2772 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2773 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2774 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2775 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2776 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2777 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2778 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2779 }
2780 #endif
2781
2782 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2783 {
2784 DEFINE_COMPILER;
2785 struct sljit_label *mainloop;
2786 struct sljit_label *newlinelabel = NULL;
2787 struct sljit_jump *start;
2788 struct sljit_jump *end = NULL;
2789 struct sljit_jump *nl = NULL;
2790 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2791 struct sljit_jump *singlechar;
2792 #endif
2793 jump_list *newline = NULL;
2794 BOOL newlinecheck = FALSE;
2795 BOOL readuchar = FALSE;
2796
2797 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2798 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2799 newlinecheck = TRUE;
2800
2801 if (firstline)
2802 {
2803 /* Search for the end of the first line. */
2804 SLJIT_ASSERT(common->first_line_end != 0);
2805 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2806
2807 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2808 {
2809 mainloop = LABEL();
2810 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2811 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2812 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2813 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2814 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2815 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2816 JUMPHERE(end);
2817 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2818 }
2819 else
2820 {
2821 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2822 mainloop = LABEL();
2823 /* Continual stores does not cause data dependency. */
2824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2825 read_char(common);
2826 check_newlinechar(common, common->nltype, &newline, TRUE);
2827 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2828 JUMPHERE(end);
2829 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2830 set_jumps(newline, LABEL());
2831 }
2832
2833 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2834 }
2835
2836 start = JUMP(SLJIT_JUMP);
2837
2838 if (newlinecheck)
2839 {
2840 newlinelabel = LABEL();
2841 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2842 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2843 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2844 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2845 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2846 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2847 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2848 #endif
2849 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2850 nl = JUMP(SLJIT_JUMP);
2851 }
2852
2853 mainloop = LABEL();
2854
2855 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2856 #ifdef SUPPORT_UTF
2857 if (common->utf) readuchar = TRUE;
2858 #endif
2859 if (newlinecheck) readuchar = TRUE;
2860
2861 if (readuchar)
2862 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2863
2864 if (newlinecheck)
2865 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2866
2867 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2868 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2869 #if defined COMPILE_PCRE8
2870 if (common->utf)
2871 {
2872 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2873 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2874 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2875 JUMPHERE(singlechar);
2876 }
2877 #elif defined COMPILE_PCRE16
2878 if (common->utf)
2879 {
2880 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2881 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2882 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2883 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2884 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2885 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2886 JUMPHERE(singlechar);
2887 }
2888 #endif /* COMPILE_PCRE[8|16] */
2889 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2890 JUMPHERE(start);
2891
2892 if (newlinecheck)
2893 {
2894 JUMPHERE(end);
2895 JUMPHERE(nl);
2896 }
2897
2898 return mainloop;
2899 }
2900
2901 #define MAX_N_CHARS 3
2902
2903 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2904 {
2905 DEFINE_COMPILER;
2906 struct sljit_label *start;
2907 struct sljit_jump *quit;
2908 pcre_uint32 chars[MAX_N_CHARS * 2];
2909 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2910 int location = 0;
2911 pcre_int32 len, c, bit, caseless;
2912 int must_stop;
2913
2914 /* We do not support alternatives now. */
2915 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2916 return FALSE;
2917
2918 while (TRUE)
2919 {
2920 caseless = 0;
2921 must_stop = 1;
2922 switch(*cc)
2923 {
2924 case OP_CHAR:
2925 must_stop = 0;
2926 cc++;
2927 break;
2928
2929 case OP_CHARI:
2930 caseless = 1;
2931 must_stop = 0;
2932 cc++;
2933 break;
2934
2935 case OP_SOD:
2936 case OP_SOM:
2937 case OP_SET_SOM:
2938 case OP_NOT_WORD_BOUNDARY:
2939 case OP_WORD_BOUNDARY:
2940 case OP_EODN:
2941 case OP_EOD:
2942 case OP_CIRC:
2943 case OP_CIRCM:
2944 case OP_DOLL:
2945 case OP_DOLLM:
2946 /* Zero width assertions. */
2947 cc++;
2948 continue;
2949
2950 case OP_PLUS:
2951 case OP_MINPLUS:
2952 case OP_POSPLUS:
2953 cc++;
2954 break;
2955
2956 case OP_EXACT:
2957 cc += 1 + IMM2_SIZE;
2958 break;
2959
2960 case OP_PLUSI:
2961 case OP_MINPLUSI:
2962 case OP_POSPLUSI:
2963 caseless = 1;
2964 cc++;
2965 break;
2966
2967 case OP_EXACTI:
2968 caseless = 1;
2969 cc += 1 + IMM2_SIZE;
2970 break;
2971
2972 default:
2973 must_stop = 2;
2974 break;
2975 }
2976
2977 if (must_stop == 2)
2978 break;
2979
2980 len = 1;
2981 #ifdef SUPPORT_UTF
2982 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2983 #endif
2984
2985 if (caseless && char_has_othercase(common, cc))
2986 {
2987 caseless = char_get_othercase_bit(common, cc);
2988 if (caseless == 0)
2989 return FALSE;
2990 #ifdef COMPILE_PCRE8
2991 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2992 #else
2993 if ((caseless & 0x100) != 0)
2994 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2995 else
2996 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2997 #endif
2998 }
2999 else
3000 caseless = 0;
3001
3002 while (len > 0 && location < MAX_N_CHARS * 2)
3003 {
3004 c = *cc;
3005 bit = 0;
3006 if (len == (caseless & 0xff))
3007 {
3008 bit = caseless >> 8;
3009 c |= bit;
3010 }
3011
3012 chars[location] = c;
3013 chars[location + 1] = bit;
3014
3015 len--;
3016 location += 2;
3017 cc++;
3018 }
3019
3020 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3021 break;
3022 }
3023
3024 /* At least two characters are required. */
3025 if (location < 2 * 2)
3026 return FALSE;
3027
3028 if (firstline)
3029 {
3030 SLJIT_ASSERT(common->first_line_end != 0);
3031 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3032 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3033 }
3034 else
3035 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3036
3037 start = LABEL();
3038 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3039
3040 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3041 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3042 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3043 if (chars[1] != 0)
3044 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3045 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3046 if (location > 2 * 2)
3047 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3048 if (chars[3] != 0)
3049 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3050 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3051 if (location > 2 * 2)
3052 {
3053 if (chars[5] != 0)
3054 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3055 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3056 }
3057 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3058
3059 JUMPHERE(quit);
3060
3061 if (firstline)
3062 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3063 else
3064 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3065 return TRUE;
3066 }
3067
3068 #undef MAX_N_CHARS
3069
3070 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3071 {
3072 DEFINE_COMPILER;
3073 struct sljit_label *start;
3074 struct sljit_jump *quit;
3075 struct sljit_jump *found;
3076 pcre_uchar oc, bit;
3077
3078 if (firstline)
3079 {
3080 SLJIT_ASSERT(common->first_line_end != 0);
3081 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3082 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3083 }
3084
3085 start = LABEL();
3086 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3087 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3088
3089 oc = first_char;
3090 if (caseless)
3091 {
3092 oc = TABLE_GET(first_char, common->fcc, first_char);
3093 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3094 if (first_char > 127 && common->utf)
3095 oc = UCD_OTHERCASE(first_char);
3096 #endif
3097 }
3098 if (first_char == oc)
3099 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3100 else
3101 {
3102 bit = first_char ^ oc;
3103 if (is_powerof2(bit))
3104 {
3105 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3106 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3107 }
3108 else
3109 {
3110 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3111 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3112 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3113 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3114 found = JUMP(SLJIT_C_NOT_ZERO);
3115 }
3116 }
3117
3118 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3119 JUMPTO(SLJIT_JUMP, start);
3120 JUMPHERE(found);
3121 JUMPHERE(quit);
3122
3123 if (firstline)
3124 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3125 }
3126
3127 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3128 {
3129 DEFINE_COMPILER;
3130 struct sljit_label *loop;
3131 struct sljit_jump *lastchar;
3132 struct sljit_jump *firstchar;
3133 struct sljit_jump *quit;
3134 struct sljit_jump *foundcr = NULL;
3135 struct sljit_jump *notfoundnl;
3136 jump_list *newline = NULL;
3137
3138 if (firstline)
3139 {
3140 SLJIT_ASSERT(common->first_line_end != 0);
3141 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3142 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3143 }
3144
3145 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3146 {
3147 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3148 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3149 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3150 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3151 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3152
3153 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3154 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3155 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3156 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3157 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3158 #endif
3159 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3160
3161 loop = LABEL();
3162 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3163 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3165 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3166 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3167 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3168
3169 JUMPHERE(quit);
3170 JUMPHERE(firstchar);
3171 JUMPHERE(lastchar);
3172
3173 if (firstline)
3174 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3175 return;
3176 }
3177
3178 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3179 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3180 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3181 skip_char_back(common);
3182
3183 loop = LABEL();
3184 read_char(common);
3185 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3186 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3187 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3188 check_newlinechar(common, common->nltype, &newline, FALSE);
3189 set_jumps(newline, loop);
3190
3191 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3192 {
3193 quit = JUMP(SLJIT_JUMP);
3194 JUMPHERE(foundcr);
3195 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3196 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3197 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3198 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3199 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3200 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3201 #endif
3202 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3203 JUMPHERE(notfoundnl);
3204 JUMPHERE(quit);
3205 }
3206 JUMPHERE(lastchar);
3207 JUMPHERE(firstchar);
3208
3209 if (firstline)
3210 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3211 }
3212
3213 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3214
3215 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3216 {
3217 DEFINE_COMPILER;
3218 struct sljit_label *start;
3219 struct sljit_jump *quit;
3220 struct sljit_jump *found = NULL;
3221 jump_list *matches = NULL;
3222 pcre_uint8 inverted_start_bits[32];
3223 int i;
3224 #ifndef COMPILE_PCRE8
3225 struct sljit_jump *jump;
3226 #endif
3227
3228 for (i = 0; i < 32; ++i)
3229 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3230
3231 if (firstline)
3232 {
3233 SLJIT_ASSERT(common->first_line_end != 0);
3234 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3235 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3236 }
3237
3238 start = LABEL();
3239 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3240 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3241 #ifdef SUPPORT_UTF
3242 if (common->utf)
3243 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3244 #endif
3245
3246 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3247 {
3248 #ifndef COMPILE_PCRE8
3249 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3250 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3251 JUMPHERE(jump);
3252 #endif
3253 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3254 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3255 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3256 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3257 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3258 found = JUMP(SLJIT_C_NOT_ZERO);
3259 }
3260
3261 #ifdef SUPPORT_UTF
3262 if (common->utf)
3263 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3264 #endif
3265 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3266 #ifdef SUPPORT_UTF
3267 #if defined COMPILE_PCRE8
3268 if (common->utf)
3269 {
3270 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3271 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3272 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3273 }
3274 #elif defined COMPILE_PCRE16
3275 if (common->utf)
3276 {
3277 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3278 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3279 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3280 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3281 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3282 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3283 }
3284 #endif /* COMPILE_PCRE[8|16] */
3285 #endif /* SUPPORT_UTF */
3286 JUMPTO(SLJIT_JUMP, start);
3287 if (found != NULL)
3288 JUMPHERE(found);
3289 if (matches != NULL)
3290 set_jumps(matches, LABEL());
3291 JUMPHERE(quit);
3292
3293 if (firstline)
3294 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3295 }
3296
3297 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3298 {
3299 DEFINE_COMPILER;
3300 struct sljit_label *loop;
3301 struct sljit_jump *toolong;
3302 struct sljit_jump *alreadyfound;
3303 struct sljit_jump *found;
3304 struct sljit_jump *foundoc = NULL;
3305 struct sljit_jump *notfound;
3306 pcre_uint32 oc, bit;
3307
3308 SLJIT_ASSERT(common->req_char_ptr != 0);
3309 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3310 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3311 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3312 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3313
3314 if (has_firstchar)
3315 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3316 else
3317 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3318
3319 loop = LABEL();
3320 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3321
3322 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3323 oc = req_char;
3324 if (caseless)
3325 {
3326 oc = TABLE_GET(req_char, common->fcc, req_char);
3327 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3328 if (req_char > 127 && common->utf)
3329 oc = UCD_OTHERCASE(req_char);
3330 #endif
3331 }
3332 if (req_char == oc)
3333 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3334 else
3335 {
3336 bit = req_char ^ oc;
3337 if (is_powerof2(bit))
3338 {
3339 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3340 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3341 }
3342 else
3343 {
3344 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3345 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3346 }
3347 }
3348 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3349 JUMPTO(SLJIT_JUMP, loop);
3350
3351 JUMPHERE(found);
3352 if (foundoc)
3353 JUMPHERE(foundoc);
3354 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3355 JUMPHERE(alreadyfound);
3356 JUMPHERE(toolong);
3357 return notfound;
3358 }
3359
3360 static void do_revertframes(compiler_common *common)
3361 {
3362 DEFINE_COMPILER;
3363 struct sljit_jump *jump;
3364 struct sljit_label *mainloop;
3365
3366 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3367 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3368 GET_LOCAL_BASE(TMP3, 0, 0);
3369
3370 /* Drop frames until we reach STACK_TOP. */
3371 mainloop = LABEL();
3372 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3373 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3374 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3375
3376 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3377 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3378 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3379 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3380 JUMPTO(SLJIT_JUMP, mainloop);
3381
3382 JUMPHERE(jump);
3383 jump = JUMP(SLJIT_C_SIG_LESS);
3384 /* End of dropping frames. */
3385 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3386
3387 JUMPHERE(jump);
3388 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3389 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3390 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3391 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3392 JUMPTO(SLJIT_JUMP, mainloop);
3393 }
3394
3395 static void check_wordboundary(compiler_common *common)
3396 {
3397 DEFINE_COMPILER;
3398 struct sljit_jump *skipread;
3399 jump_list *skipread_list = NULL;
3400 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3401 struct sljit_jump *jump;
3402 #endif
3403
3404 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3405
3406 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3407 /* Get type of the previous char, and put it to LOCALS1. */
3408 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3411 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3412 skip_char_back(common);
3413 check_start_used_ptr(common);
3414 read_char(common);
3415
3416 /* Testing char type. */
3417 #ifdef SUPPORT_UCP
3418 if (common->use_ucp)
3419 {
3420 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3421 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3422 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3423 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3424 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3425 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3426 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3427 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3428 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3429 JUMPHERE(jump);
3430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3431 }
3432 else
3433 #endif
3434 {
3435 #ifndef COMPILE_PCRE8
3436 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3437 #elif defined SUPPORT_UTF
3438 /* Here LOCALS1 has already been zeroed. */
3439 jump = NULL;
3440 if (common->utf)
3441 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3442 #endif /* COMPILE_PCRE8 */
3443 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3444 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3445 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3446 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3447 #ifndef COMPILE_PCRE8
3448 JUMPHERE(jump);
3449 #elif defined SUPPORT_UTF
3450 if (jump != NULL)
3451 JUMPHERE(jump);
3452 #endif /* COMPILE_PCRE8 */
3453 }
3454 JUMPHERE(skipread);
3455
3456 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3457 check_str_end(common, &skipread_list);
3458 peek_char(common);
3459
3460 /* Testing char type. This is a code duplication. */
3461 #ifdef SUPPORT_UCP
3462 if (common->use_ucp)
3463 {
3464 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3465 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3466 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3467 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3468 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3469 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3470 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3471 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3472 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3473 JUMPHERE(jump);
3474 }
3475 else
3476 #endif
3477 {
3478 #ifndef COMPILE_PCRE8
3479 /* TMP2 may be destroyed by peek_char. */
3480 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3481 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3482 #elif defined SUPPORT_UTF
3483 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3484 jump = NULL;
3485 if (common->utf)
3486 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3487 #endif
3488 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3489 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3490 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3491 #ifndef COMPILE_PCRE8
3492 JUMPHERE(jump);
3493 #elif defined SUPPORT_UTF
3494 if (jump != NULL)
3495 JUMPHERE(jump);
3496 #endif /* COMPILE_PCRE8 */
3497 }
3498 set_jumps(skipread_list, LABEL());
3499
3500 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3501 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3502 }
3503
3504 /*
3505 range format:
3506
3507 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3508 ranges[1] = first bit (0 or 1)
3509 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3510 */
3511
3512 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3513 {
3514 DEFINE_COMPILER;
3515 struct sljit_jump *jump;
3516
3517 if (ranges[0] < 0)
3518 return FALSE;
3519
3520 switch(ranges[0])
3521 {
3522 case 1:
3523 if (readch)
3524 read_char(common);
3525 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3526 return TRUE;
3527
3528 case 2:
3529 if (readch)
3530 read_char(common);
3531 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3532 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3533 return TRUE;
3534
3535 case 4:
3536 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3537 {
3538 if (readch)
3539 read_char(common);
3540 if (ranges[1] != 0)
3541 {
3542 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3543 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3544 }
3545 else
3546 {
3547 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3548 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3549 JUMPHERE(jump);
3550 }
3551 return TRUE;
3552 }
3553 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3554 {
3555 if (readch)
3556 read_char(common);
3557 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3558 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3559 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3560 return TRUE;
3561 }
3562 return FALSE;
3563
3564 default:
3565 return FALSE;
3566 }
3567 }
3568
3569 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3570 {
3571 int i, bit, length;
3572 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3573
3574 bit = ctypes[0] & flag;
3575 ranges[0] = -1;
3576 ranges[1] = bit != 0 ? 1 : 0;
3577 length = 0;
3578
3579 for (i = 1; i < 256; i++)
3580 if ((ctypes[i] & flag) != bit)
3581 {
3582 if (length >= MAX_RANGE_SIZE)
3583 return;
3584 ranges[2 + length] = i;
3585 length++;
3586 bit ^= flag;
3587 }
3588
3589 if (bit != 0)
3590 {
3591 if (length >= MAX_RANGE_SIZE)
3592 return;
3593 ranges[2 + length] = 256;
3594 length++;
3595 }
3596 ranges[0] = length;
3597 }
3598
3599 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3600 {
3601 int ranges[2 + MAX_RANGE_SIZE];
3602 pcre_uint8 bit, cbit, all;
3603 int i, byte, length = 0;
3604
3605 bit = bits[0] & 0x1;
3606 ranges[1] = bit;
3607 /* Can be 0 or 255. */
3608 all = -bit;
3609
3610 for (i = 0; i < 256; )
3611 {
3612 byte = i >> 3;
3613 if ((i & 0x7) == 0 && bits[byte] == all)
3614 i += 8;
3615 else
3616 {
3617 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3618 if (cbit != bit)
3619 {
3620 if (length >= MAX_RANGE_SIZE)
3621 return FALSE;
3622 ranges[2 + length] = i;
3623 length++;
3624 bit = cbit;
3625 all = -cbit;
3626 }
3627 i++;
3628 }
3629 }
3630
3631 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3632 {
3633 if (length >= MAX_RANGE_SIZE)
3634 return FALSE;
3635 ranges[2 + length] = 256;
3636 length++;
3637 }
3638 ranges[0] = length;
3639
3640 return check_ranges(common, ranges, backtracks, FALSE);
3641 }
3642
3643 static void check_anynewline(compiler_common *common)
3644 {
3645 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3646 DEFINE_COMPILER;
3647
3648 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3649
3650 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3651 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3652 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3653 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3654 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3655 #ifdef COMPILE_PCRE8
3656 if (common->utf)
3657 {
3658 #endif
3659 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3660 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3661 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3662 #ifdef COMPILE_PCRE8
3663 }
3664 #endif
3665 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3666 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3667 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3668 }
3669
3670 static void check_hspace(compiler_common *common)
3671 {
3672 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3673 DEFINE_COMPILER;
3674
3675 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3676
3677 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3678 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3679 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3680 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3681 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3682 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3683 #ifdef COMPILE_PCRE8
3684 if (common->utf)
3685 {
3686 #endif
3687 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3688 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3689 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3690 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3691 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3692 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3693 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3694 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3695 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3696 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3697 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3698 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3699 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3700 #ifdef COMPILE_PCRE8
3701 }
3702 #endif
3703 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3704 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3705
3706 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3707 }
3708
3709 static void check_vspace(compiler_common *common)
3710 {
3711 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3712 DEFINE_COMPILER;
3713
3714 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3715
3716 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3717 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3718 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3719 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3720 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3721 #ifdef COMPILE_PCRE8
3722 if (common->utf)
3723 {
3724 #endif
3725 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3726 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3727 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3728 #ifdef COMPILE_PCRE8
3729 }
3730 #endif
3731 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3732 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3733
3734 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3735 }
3736
3737 #define CHAR1 STR_END
3738 #define CHAR2 STACK_TOP
3739
3740 static void do_casefulcmp(compiler_common *common)
3741 {
3742 DEFINE_COMPILER;
3743 struct sljit_jump *jump;
3744 struct sljit_label *label;
3745
3746 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3747 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3748 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3749 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3750 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3751 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3752
3753 label = LABEL();
3754 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3755 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3756 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3757 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3758 JUMPTO(SLJIT_C_NOT_ZERO, label);
3759
3760 JUMPHERE(jump);
3761 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3762 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3763 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3764 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3765 }
3766
3767 #define LCC_TABLE STACK_LIMIT
3768
3769 static void do_caselesscmp(compiler_common *common)
3770 {
3771 DEFINE_COMPILER;
3772 struct sljit_jump *jump;
3773 struct sljit_label *label;
3774
3775 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3776 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3777
3778 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3779 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3780 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3781 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3782 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3783 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3784
3785 label = LABEL();
3786 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3787 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3788 #ifndef COMPILE_PCRE8
3789 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3790 #endif
3791 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3792 #ifndef COMPILE_PCRE8
3793 JUMPHERE(jump);
3794 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3795 #endif
3796 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3797 #ifndef COMPILE_PCRE8
3798 JUMPHERE(jump);
3799 #endif
3800 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3801 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3802 JUMPTO(SLJIT_C_NOT_ZERO, label);
3803
3804 JUMPHERE(jump);
3805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3806 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3807 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3808 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3809 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3810 }
3811
3812 #undef LCC_TABLE
3813 #undef CHAR1
3814 #undef CHAR2
3815
3816 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3817
3818 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3819 {
3820 /* This function would be ineffective to do in JIT level. */
3821 pcre_uint32 c1, c2;
3822 const pcre_uchar *src2 = args->uchar_ptr;
3823 const pcre_uchar *end2 = args->end;
3824 const ucd_record *ur;
3825 const pcre_uint32 *pp;
3826
3827 while (src1 < end1)
3828 {
3829 if (src2 >= end2)
3830 return (pcre_uchar*)1;
3831 GETCHARINC(c1, src1);
3832 GETCHARINC(c2, src2);
3833 ur = GET_UCD(c2);
3834 if (c1 != c2 && c1 != c2 + ur->other_case)
3835 {
3836 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3837 for (;;)
3838 {
3839 if (c1 < *pp) return NULL;
3840 if (c1 == *pp++) break;
3841 }
3842 }
3843 }
3844 return src2;
3845 }
3846
3847 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3848
3849 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3850 compare_context* context, jump_list **backtracks)
3851 {
3852 DEFINE_COMPILER;
3853 unsigned int othercasebit = 0;
3854 pcre_uchar *othercasechar = NULL;
3855 #ifdef SUPPORT_UTF
3856 int utflength;
3857 #endif
3858
3859 if (caseless && char_has_othercase(common, cc))
3860 {
3861 othercasebit = char_get_othercase_bit(common, cc);
3862 SLJIT_ASSERT(othercasebit);
3863 /* Extracting bit difference info. */
3864 #if defined COMPILE_PCRE8
3865 othercasechar = cc + (othercasebit >> 8);
3866 othercasebit &= 0xff;
3867 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3868 /* Note that this code only handles characters in the BMP. If there
3869 ever are characters outside the BMP whose othercase differs in only one
3870 bit from itself (there currently are none), this code will need to be
3871 revised for COMPILE_PCRE32. */
3872 othercasechar = cc + (othercasebit >> 9);
3873 if ((othercasebit & 0x100) != 0)
3874 othercasebit = (othercasebit & 0xff) << 8;
3875 else
3876 othercasebit &= 0xff;
3877 #endif /* COMPILE_PCRE[8|16|32] */
3878 }
3879
3880 if (context->sourcereg == -1)
3881 {
3882 #if defined COMPILE_PCRE8
3883 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3884 if (context->length >= 4)
3885 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3886 else if (context->length >= 2)
3887 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3888 else
3889 #endif
3890 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3891 #elif defined COMPILE_PCRE16
3892 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3893 if (context->length >= 4)
3894 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3895 else
3896 #endif
3897 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3898 #elif defined COMPILE_PCRE32
3899 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3900 #endif /* COMPILE_PCRE[8|16|32] */
3901 context->sourcereg = TMP2;
3902 }
3903
3904 #ifdef SUPPORT_UTF
3905 utflength = 1;
3906 if (common->utf && HAS_EXTRALEN(*cc))
3907 utflength += GET_EXTRALEN(*cc);
3908
3909 do
3910 {
3911 #endif
3912
3913 context->length -= IN_UCHARS(1);
3914 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3915
3916 /* Unaligned read is supported. */
3917 if (othercasebit != 0 && othercasechar == cc)
3918 {
3919 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3920 context->oc.asuchars[context->ucharptr] = othercasebit;
3921 }
3922 else
3923 {
3924 context->c.asuchars[context->ucharptr] = *cc;
3925 context->oc.asuchars[context->ucharptr] = 0;
3926 }
3927 context->ucharptr++;
3928
3929 #if defined COMPILE_PCRE8
3930 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3931 #else
3932 if (context->ucharptr >= 2 || context->length == 0)
3933 #endif
3934 {
3935 if (context->length >= 4)
3936 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3937 else if (context->length >= 2)
3938 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3939 #if defined COMPILE_PCRE8
3940 else if (context->length >= 1)
3941 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3942 #endif /* COMPILE_PCRE8 */
3943 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3944
3945 switch(context->ucharptr)
3946 {
3947 case 4 / sizeof(pcre_uchar):
3948 if (context->oc.asint != 0)
3949 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3950 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3951 break;
3952
3953 case 2 / sizeof(pcre_uchar):
3954 if (context->oc.asushort != 0)
3955 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3956 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3957 break;
3958
3959 #ifdef COMPILE_PCRE8
3960 case 1:
3961 if (context->oc.asbyte != 0)
3962 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3963 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3964 break;
3965 #endif
3966
3967 default:
3968 SLJIT_ASSERT_STOP();
3969 break;
3970 }
3971 context->ucharptr = 0;
3972 }
3973
3974 #else
3975
3976 /* Unaligned read is unsupported or in 32 bit mode. */
3977 if (context->length >= 1)
3978 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3979
3980 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3981
3982 if (othercasebit != 0 && othercasechar == cc)
3983 {
3984 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3985 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3986 }
3987 else
3988 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3989
3990 #endif
3991
3992 cc++;
3993 #ifdef SUPPORT_UTF
3994 utflength--;
3995 }
3996 while (utflength > 0);
3997 #endif
3998
3999 return cc;
4000 }
4001
4002 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4003
4004 #define SET_TYPE_OFFSET(value) \
4005 if ((value) != typeoffset) \
4006 { \
4007 if ((value) > typeoffset) \
4008 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4009 else \
4010 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4011 } \
4012 typeoffset = (value);
4013
4014 #define SET_CHAR_OFFSET(value) \
4015 if ((value) != charoffset) \
4016 { \
4017 if ((value) > charoffset) \
4018 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4019 else \
4020 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4021 } \
4022 charoffset = (value);
4023
4024 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4025 {
4026 DEFINE_COMPILER;
4027 jump_list *found = NULL;
4028 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4029 pcre_int32 c, charoffset;
4030 const pcre_uint32 *other_cases;
4031 struct sljit_jump *jump = NULL;
4032 pcre_uchar *ccbegin;
4033 int compares, invertcmp, numberofcmps;
4034 #ifdef SUPPORT_UCP
4035 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4036 BOOL charsaved = FALSE;
4037 int typereg = TMP1, scriptreg = TMP1;
4038 pcre_int32 typeoffset;
4039 #endif
4040
4041 /* Although SUPPORT_UTF must be defined, we are
4042 not necessary in utf mode even in 8 bit mode. */
4043 detect_partial_match(common, backtracks);
4044 read_char(common);
4045
4046 if ((*cc++ & XCL_MAP) != 0)
4047 {
4048 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4049 #ifndef COMPILE_PCRE8
4050 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4051 #elif defined SUPPORT_UTF
4052 if (common->utf)
4053 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4054 #endif
4055
4056 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4057 {
4058 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4059 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4060 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4061 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4062 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4063 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4064 }
4065
4066 #ifndef COMPILE_PCRE8
4067 JUMPHERE(jump);
4068 #elif defined SUPPORT_UTF
4069 if (common->utf)
4070 JUMPHERE(jump);
4071 #endif
4072 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4073 #ifdef SUPPORT_UCP
4074 charsaved = TRUE;
4075 #endif
4076 cc += 32 / sizeof(pcre_uchar);
4077 }
4078
4079 /* Scanning the necessary info. */
4080 ccbegin = cc;
4081 compares = 0;
4082 while (*cc != XCL_END)
4083 {
4084 compares++;
4085 if (*cc == XCL_SINGLE)
4086 {
4087 cc += 2;
4088 #ifdef SUPPORT_UTF
4089 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4090 #endif
4091 #ifdef SUPPORT_UCP
4092 needschar = TRUE;
4093 #endif
4094 }
4095 else if (*cc == XCL_RANGE)
4096 {
4097 cc += 2;
4098 #ifdef SUPPORT_UTF
4099 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4100 #endif
4101 cc++;
4102 #ifdef SUPPORT_UTF
4103 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4104 #endif
4105 #ifdef SUPPORT_UCP
4106 needschar = TRUE;
4107 #endif
4108 }
4109 #ifdef SUPPORT_UCP
4110 else
4111 {
4112 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4113 cc++;
4114 switch(*cc)
4115 {
4116 case PT_ANY:
4117 break;
4118
4119 case PT_LAMP:
4120 case PT_GC:
4121 case PT_PC:
4122 case PT_ALNUM:
4123 needstype = TRUE;
4124 break;
4125
4126 case PT_SC:
4127 needsscript = TRUE;
4128 break;
4129
4130 case PT_SPACE:
4131 case PT_PXSPACE:
4132 case PT_WORD:
4133 needstype = TRUE;
4134 needschar = TRUE;
4135 break;
4136
4137 case PT_CLIST:
4138 case PT_UCNC:
4139 needschar = TRUE;
4140 break;
4141
4142 default:
4143 SLJIT_ASSERT_STOP();
4144 break;
4145 }
4146 cc += 2;
4147 }
4148 #endif
4149 }
4150
4151 #ifdef SUPPORT_UCP
4152 /* Simple register allocation. TMP1 is preferred if possible. */
4153 if (needstype || needsscript)
4154 {
4155 if (needschar && !charsaved)
4156 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4157 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4158 if (needschar)
4159 {
4160 if (needstype)
4161 {
4162 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4163 typereg = RETURN_ADDR;
4164 }
4165
4166 if (needsscript)
4167 scriptreg = TMP3;
4168 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4169 }
4170 else if (needstype && needsscript)
4171 scriptreg = TMP3;
4172 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4173
4174 if (needsscript)
4175 {
4176 if (scriptreg == TMP1)
4177 {
4178 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4179 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4180 }
4181 else
4182 {
4183 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4184 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4185 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4186 }
4187 }
4188 }
4189 #endif
4190
4191 /* Generating code. */
4192 cc = ccbegin;
4193 charoffset = 0;
4194 numberofcmps = 0;
4195 #ifdef SUPPORT_UCP
4196 typeoffset = 0;
4197 #endif
4198
4199 while (*cc != XCL_END)
4200 {
4201 compares--;
4202 invertcmp = (compares == 0 && list != backtracks);
4203 jump = NULL;
4204
4205 if (*cc == XCL_SINGLE)
4206 {
4207 cc ++;
4208 #ifdef SUPPORT_UTF
4209 if (common->utf)
4210 {
4211 GETCHARINC(c, cc);
4212 }
4213 else
4214 #endif
4215 c = *cc++;
4216
4217 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4218 {
4219 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4220 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4221 numberofcmps++;
4222 }
4223 else if (numberofcmps > 0)
4224 {
4225 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4226 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4227 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4228 numberofcmps = 0;
4229 }
4230 else
4231 {
4232 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4233 numberofcmps = 0;
4234 }
4235 }
4236 else if (*cc == XCL_RANGE)
4237 {
4238 cc ++;
4239 #ifdef SUPPORT_UTF
4240 if (common->utf)
4241 {
4242 GETCHARINC(c, cc);
4243 }
4244 else
4245 #endif
4246 c = *cc++;
4247 SET_CHAR_OFFSET(c);
4248 #ifdef SUPPORT_UTF
4249 if (common->utf)
4250 {
4251 GETCHARINC(c, cc);
4252 }
4253 else
4254 #endif
4255 c = *cc++;
4256 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4257 {
4258 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4259 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4260 numberofcmps++;
4261 }
4262 else if (numberofcmps > 0)
4263 {
4264 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4265 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4266 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4267 numberofcmps = 0;
4268 }
4269 else
4270 {
4271 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4272 numberofcmps = 0;
4273 }
4274 }
4275 #ifdef SUPPORT_UCP
4276 else
4277 {
4278 if (*cc == XCL_NOTPROP)
4279 invertcmp ^= 0x1;
4280 cc++;
4281 switch(*cc)
4282 {
4283 case PT_ANY:
4284 if (list != backtracks)
4285 {
4286 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4287 continue;
4288 }
4289 else if (cc[-1] == XCL_NOTPROP)
4290 continue;
4291 jump = JUMP(SLJIT_JUMP);
4292 break;
4293
4294 case PT_LAMP:
4295 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4296 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4297 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4298 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4299 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4300 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4301 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4302 break;
4303
4304 case PT_GC:
4305 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4306 SET_TYPE_OFFSET(c);
4307 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4308 break;
4309
4310 case PT_PC:
4311 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4312 break;
4313
4314 case PT_SC:
4315 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4316 break;
4317
4318 case PT_SPACE:
4319 case PT_PXSPACE:
4320 if (*cc == PT_SPACE)
4321 {
4322 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4323 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4324 }
4325 SET_CHAR_OFFSET(9);
4326 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4327 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4328 if (*cc == PT_SPACE)
4329 JUMPHERE(jump);
4330
4331 SET_TYPE_OFFSET(ucp_Zl);
4332 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4333 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4334 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4335 break;
4336
4337 case PT_WORD:
4338 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4339 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4340 /* Fall through. */
4341
4342 case PT_ALNUM:
4343 SET_TYPE_OFFSET(ucp_Ll);
4344 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4345 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4346 SET_TYPE_OFFSET(ucp_Nd);
4347 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4348 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4349 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4350 break;
4351
4352 case PT_CLIST:
4353 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4354
4355 /* At least three characters are required.
4356 Otherwise this case would be handled by the normal code path. */
4357 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4358 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4359
4360 /* Optimizing character pairs, if their difference is power of 2. */
4361 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4362 {
4363 if (charoffset == 0)
4364 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4365 else
4366 {
4367 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4368 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4369 }
4370 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4371 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4372 other_cases += 2;
4373 }
4374 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4375 {
4376 if (charoffset == 0)
4377 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4378 else
4379 {
4380 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4381 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4382 }
4383 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4384 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4385
4386 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4387 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4388
4389 other_cases += 3;
4390 }
4391 else
4392 {
4393 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4394 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4395 }
4396
4397 while (*other_cases != NOTACHAR)
4398 {
4399 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4400 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4401 }
4402 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4403 break;
4404
4405 case PT_UCNC:
4406 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4407 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4408 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4409 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4410 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4411 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4412
4413 SET_CHAR_OFFSET(0xa0);
4414 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4415 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4416 SET_CHAR_OFFSET(0);
4417 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4418 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4419 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4420 break;
4421 }
4422 cc += 2;
4423 }
4424 #endif
4425
4426 if (jump != NULL)
4427 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4428 }
4429
4430 if (found != NULL)
4431 set_jumps(found, LABEL());
4432 }
4433
4434 #undef SET_TYPE_OFFSET
4435 #undef SET_CHAR_OFFSET
4436
4437 #endif
4438
4439 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4440 {
4441 DEFINE_COMPILER;
4442 int length;
4443 unsigned int c, oc, bit;
4444 compare_context context;
4445 struct sljit_jump *jump[4];
4446 jump_list *end_list;
4447 #ifdef SUPPORT_UTF
4448 struct sljit_label *label;
4449 #ifdef SUPPORT_UCP
4450 pcre_uchar propdata[5];
4451 #endif
4452 #endif
4453
4454 switch(type)
4455 {
4456 case OP_SOD:
4457 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4458 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4459 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4460 return cc;
4461
4462 case OP_SOM:
4463 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4464 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4465 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4466 return cc;
4467
4468 case OP_NOT_WORD_BOUNDARY:
4469 case OP_WORD_BOUNDARY:
4470 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4471 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4472 return cc;
4473
4474 case OP_NOT_DIGIT:
4475 case OP_DIGIT:
4476 /* Digits are usually 0-9, so it is worth to optimize them. */
4477 if (common->digits[0] == -2)
4478 get_ctype_ranges(common, ctype_digit, common->digits);
4479 detect_partial_match(common, backtracks);
4480 /* Flip the starting bit in the negative case. */
4481 if (type == OP_NOT_DIGIT)
4482 common->digits[1] ^= 1;
4483 if (!check_ranges(common, common->digits, backtracks, TRUE))
4484 {
4485 read_char8_type(common);
4486 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4487 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4488 }
4489 if (type == OP_NOT_DIGIT)
4490 common->digits[1] ^= 1;
4491 return cc;
4492
4493 case OP_NOT_WHITESPACE:
4494 case OP_WHITESPACE:
4495 detect_partial_match(common, backtracks);
4496 read_char8_type(common);
4497 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4498 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4499 return cc;
4500
4501 case OP_NOT_WORDCHAR:
4502 case OP_WORDCHAR:
4503 detect_partial_match(common, backtracks);
4504 read_char8_type(common);
4505 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4506 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4507 return cc;
4508
4509 case OP_ANY:
4510 detect_partial_match(common, backtracks);
4511 read_char(common);
4512 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4513 {
4514 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4515 end_list = NULL;
4516 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4517 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4518 else
4519 check_str_end(common, &end_list);
4520
4521 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4522 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4523 set_jumps(end_list, LABEL());
4524 JUMPHERE(jump[0]);
4525 }
4526 else
4527 check_newlinechar(common, common->nltype, backtracks, TRUE);
4528 return cc;
4529
4530 case OP_ALLANY:
4531 detect_partial_match(common, backtracks);
4532 #ifdef SUPPORT_UTF
4533 if (common->utf)
4534 {
4535 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4536 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4537 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4538 #if defined COMPILE_PCRE8
4539 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4540 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4541 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4542 #elif defined COMPILE_PCRE16
4543 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4544 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4545 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4546 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4547 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4548 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4549 #endif
4550 JUMPHERE(jump[0]);
4551 #endif /* COMPILE_PCRE[8|16] */
4552 return cc;
4553 }
4554 #endif
4555 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4556 return cc;
4557
4558 case OP_ANYBYTE:
4559 detect_partial_match(common, backtracks);
4560 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4561 return cc;
4562
4563 #ifdef SUPPORT_UTF
4564 #ifdef SUPPORT_UCP
4565 case OP_NOTPROP:
4566 case OP_PROP:
4567 propdata[0] = 0;
4568 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4569 propdata[2] = cc[0];
4570 propdata[3] = cc[1];
4571 propdata[4] = XCL_END;
4572 compile_xclass_matchingpath(common, propdata, backtracks);
4573 return cc + 2;
4574 #endif
4575 #endif
4576
4577 case OP_ANYNL:
4578 detect_partial_match(common, backtracks);
4579 read_char(common);
4580 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4581 /* We don't need to handle soft partial matching case. */
4582 end_list = NULL;
4583 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4584 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4585 else
4586 check_str_end(common, &end_list);
4587 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4588 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4589 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4590 jump[2] = JUMP(SLJIT_JUMP);
4591 JUMPHERE(jump[0]);
4592 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4593 set_jumps(end_list, LABEL());
4594 JUMPHERE(jump[1]);
4595 JUMPHERE(jump[2]);
4596 return cc;
4597
4598 case OP_NOT_HSPACE:
4599 case OP_HSPACE:
4600 detect_partial_match(common, backtracks);
4601 read_char(common);
4602 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4603 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4604 return cc;
4605
4606 case OP_NOT_VSPACE:
4607 case OP_VSPACE:
4608 detect_partial_match(common, backtracks);
4609 read_char(common);
4610 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4611 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4612 return cc;
4613
4614 #ifdef SUPPORT_UCP
4615 case OP_EXTUNI:
4616 detect_partial_match(common, backtracks);
4617 read_char(common);
4618 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4619 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4620 /* Optimize register allocation: use a real register. */
4621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4622 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4623
4624 label = LABEL();
4625 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4626 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4627 read_char(common);
4628 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4630 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4631
4632 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4633 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4634 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4635 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4636 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4637 JUMPTO(SLJIT_C_NOT_ZERO, label);
4638
4639 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4640 JUMPHERE(jump[0]);
4641 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4642
4643 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4644 {
4645 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4646 /* Since we successfully read a char above, partial matching must occure. */
4647 check_partial(common, TRUE);
4648 JUMPHERE(jump[0]);
4649 }
4650 return cc;
4651 #endif
4652
4653 case OP_EODN:
4654 /* Requires rather complex checks. */
4655 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4656 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4657 {
4658 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4659 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4660 if (common->mode == JIT_COMPILE)
4661 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4662 else
4663 {
4664 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4665 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4666 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4667 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4668 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4669 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4670 check_partial(common, TRUE);
4671 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4672 JUMPHERE(jump[1]);
4673 }
4674 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4675 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4676 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4677 }
4678 else if (common->nltype == NLTYPE_FIXED)
4679 {
4680 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4681 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4682 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4683 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4684 }
4685 else
4686 {
4687 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4688 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4689 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4690 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4691 jump[2] = JUMP(SLJIT_C_GREATER);
4692 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4693 /* Equal. */
4694 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4695 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4696 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4697
4698 JUMPHERE(jump[1]);
4699 if (common->nltype == NLTYPE_ANYCRLF)
4700 {
4701 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4702 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4703 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4704 }
4705 else
4706 {
4707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4708 read_char(common);
4709 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4710 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4711 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4712 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4713 }
4714 JUMPHERE(jump[2]);
4715 JUMPHERE(jump[3]);
4716 }
4717 JUMPHERE(jump[0]);
4718 check_partial(common, FALSE);
4719 return cc;
4720
4721 case OP_EOD:
4722 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4723 check_partial(common, FALSE);
4724 return cc;
4725
4726 case OP_CIRC:
4727 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4729 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4730 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4731 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4732 return cc;
4733
4734 case OP_CIRCM:
4735 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4737 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4738 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4739 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4740 jump[0] = JUMP(SLJIT_JUMP);
4741 JUMPHERE(jump[1]);
4742
4743 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4744 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4745 {
4746 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4747 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4748 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4749 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4750 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4751 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4752 }
4753 else
4754 {
4755 skip_char_back(common);
4756 read_char(common);
4757 check_newlinechar(common, common->nltype, backtracks, FALSE);
4758 }
4759 JUMPHERE(jump[0]);
4760 return cc;
4761
4762 case OP_DOLL:
4763 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4764 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4765 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4766
4767 if (!common->endonly)
4768 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4769 else
4770 {
4771 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4772 check_partial(common, FALSE);
4773 }
4774 return cc;
4775
4776 case OP_DOLLM:
4777 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4778 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4779 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4780 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4781 check_partial(common, FALSE);
4782 jump[0] = JUMP(SLJIT_JUMP);
4783 JUMPHERE(jump[1]);
4784
4785 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4786 {
4787 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4788 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4789 if (common->mode == JIT_COMPILE)
4790 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4791 else
4792 {
4793 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4794 /* STR_PTR = STR_END - IN_UCHARS(1) */
4795 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4796 check_partial(common, TRUE);
4797 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4798 JUMPHERE(jump[1]);
4799 }
4800
4801 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4802 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4803 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4804 }
4805 else
4806 {
4807 peek_char(common);
4808 check_newlinechar(common, common->nltype, backtracks, FALSE);
4809 }
4810 JUMPHERE(jump[0]);
4811 return cc;
4812
4813 case OP_CHAR:
4814 case OP_CHARI:
4815 length = 1;
4816 #ifdef SUPPORT_UTF
4817 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4818 #endif
4819 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4820 {
4821 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4822 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4823
4824 context.length = IN_UCHARS(length);
4825 context.sourcereg = -1;
4826 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4827 context.ucharptr = 0;
4828 #endif
4829 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4830 }
4831 detect_partial_match(common, backtracks);
4832 read_char(common);
4833 #ifdef SUPPORT_UTF
4834 if (common->utf)
4835 {
4836 GETCHAR(c, cc);
4837 }
4838 else
4839 #endif
4840 c = *cc;
4841 if (type == OP_CHAR || !char_has_othercase(common, cc))
4842 {
4843 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4844 return cc + length;
4845 }
4846 oc = char_othercase(common, c);
4847 bit = c ^ oc;
4848 if (is_powerof2(bit))
4849 {
4850 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4851 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4852 return cc + length;
4853 }
4854 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4855 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4856 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4857 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4858 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4859 return cc + length;
4860
4861 case OP_NOT:
4862 case OP_NOTI:
4863 detect_partial_match(common, backtracks);
4864 length = 1;
4865 #ifdef SUPPORT_UTF
4866 if (common->utf)
4867 {
4868 #ifdef COMPILE_PCRE8
4869 c = *cc;
4870 if (c < 128)
4871 {
4872 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4873 if (type == OP_NOT || !char_has_othercase(common, cc))
4874 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4875 else
4876 {
4877 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4878 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4879 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4880 }
4881 /* Skip the variable-length character. */
4882 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4883 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4884 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4885 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4886 JUMPHERE(jump[0]);
4887 return cc + 1;
4888 }
4889 else
4890 #endif /* COMPILE_PCRE8 */
4891 {
4892 GETCHARLEN(c, cc, length);
4893 read_char(common);
4894 }
4895 }
4896 else
4897 #endif /* SUPPORT_UTF */
4898 {
4899 read_char(common);
4900 c = *cc;
4901 }
4902
4903 if (type == OP_NOT || !char_has_othercase(common, cc))
4904 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4905 else
4906 {
4907 oc = char_othercase(common, c);
4908 bit = c ^ oc;
4909 if (is_powerof2(bit))
4910 {
4911 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4912 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4913 }
4914 else
4915 {
4916 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4917 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4918 }
4919 }
4920 return cc + length;
4921
4922 case OP_CLASS:
4923 case OP_NCLASS:
4924 detect_partial_match(common, backtracks);
4925 read_char(common);
4926 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4927 return cc + 32 / sizeof(pcre_uchar);
4928
4929 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4930 jump[0] = NULL;
4931 #ifdef COMPILE_PCRE8
4932 /* This check only affects 8 bit mode. In other modes, we
4933 always need to compare the value with 255. */
4934 if (common->utf)
4935 #endif /* COMPILE_PCRE8 */
4936 {
4937 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4938 if (type == OP_CLASS)
4939 {
4940 add_jump(compiler, backtracks, jump[0]);
4941 jump[0] = NULL;
4942 }
4943 }
4944 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4945 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4946 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4947 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4948 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4949 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4950 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4951 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4952 if (jump[0] != NULL)
4953 JUMPHERE(jump[0]);
4954 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4955 return cc + 32 / sizeof(pcre_uchar);
4956
4957 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4958 case OP_XCLASS:
4959 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4960 return cc + GET(cc, 0) - 1;
4961 #endif
4962
4963 case OP_REVERSE:
4964 length = GET(cc, 0);
4965 if (length == 0)
4966 return cc + LINK_SIZE;
4967 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4968 #ifdef SUPPORT_UTF
4969 if (common->utf)
4970 {
4971 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4972 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4973 label = LABEL();
4974 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4975 skip_char_back(common);
4976 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4977 JUMPTO(SLJIT_C_NOT_ZERO, label);
4978 }
4979 else
4980 #endif
4981 {
4982 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4983 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4984 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4985 }
4986 check_start_used_ptr(common);
4987 return cc + LINK_SIZE;
4988 }
4989 SLJIT_ASSERT_STOP();
4990 return cc;
4991 }
4992
4993 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4994 {
4995 /* This function consumes at least one input character. */
4996 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4997 DEFINE_COMPILER;
4998 pcre_uchar *ccbegin = cc;
4999 compare_context context;
5000 int size;
5001
5002 context.length = 0;
5003 do
5004 {
5005 if (cc >= ccend)
5006 break;
5007
5008 if (*cc == OP_CHAR)
5009 {
5010 size = 1;
5011 #ifdef SUPPORT_UTF
5012 if (common->utf && HAS_EXTRALEN(cc[1]))
5013 size += GET_EXTRALEN(cc[1]);
5014 #endif
5015 }
5016 else if (*cc == OP_CHARI)
5017 {
5018 size = 1;
5019 #ifdef SUPPORT_UTF
5020 if (common->utf)
5021 {
5022 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5023 size = 0;
5024 else if (HAS_EXTRALEN(cc[1]))
5025 size += GET_EXTRALEN(cc[1]);
5026 }
5027 else
5028 #endif
5029 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5030 size = 0;
5031 }
5032 else
5033 size = 0;
5034
5035 cc += 1 + size;
5036 context.length += IN_UCHARS(size);
5037 }
5038 while (size > 0 && context.length <= 128);
5039
5040 cc = ccbegin;
5041 if (context.length > 0)
5042 {
5043 /* We have a fixed-length byte sequence. */
5044 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5045 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5046
5047 context.sourcereg = -1;
5048 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5049 context.ucharptr = 0;
5050 #endif
5051 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5052 return cc;
5053 }
5054
5055 /* A non-fixed length character will be checked if length == 0. */
5056 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5057 }
5058
5059 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5060 {
5061 DEFINE_COMPILER;
5062 int offset = GET2(cc, 1) << 1;
5063
5064 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5065 if (!common->jscript_compat)
5066 {
5067 if (backtracks == NULL)
5068 {
5069 /* OVECTOR(1) contains the "string begin - 1" constant. */
5070 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5071 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5072 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5073 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5074 return JUMP(SLJIT_C_NOT_ZERO);
5075 }
5076 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5077 }
5078 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5079 }
5080
5081 /* Forward definitions. */
5082 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5083 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5084
5085 #define PUSH_BACKTRACK(size, ccstart, error) \
5086 do \
5087 { \
5088 backtrack = sljit_alloc_memory(compiler, (size)); \
5089 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5090 return error; \
5091 memset(backtrack, 0, size); \
5092 backtrack->prev = parent->top; \
5093 backtrack->cc = (ccstart); \
5094 parent->top = backtrack; \
5095 } \
5096 while (0)
5097
5098 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5099 do \
5100 { \
5101 backtrack = sljit_alloc_memory(compiler, (size)); \
5102 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5103 return; \
5104 memset(backtrack, 0, size); \
5105 backtrack->prev = parent->top; \
5106 backtrack->cc = (ccstart); \
5107 parent->top = backtrack; \
5108 } \
5109 while (0)
5110
5111 #define BACKTRACK_AS(type) ((type *)backtrack)
5112
5113 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5114 {
5115 DEFINE_COMPILER;
5116 int offset = GET2(cc, 1) << 1;
5117 struct sljit_jump *jump = NULL;
5118 struct sljit_jump *partial;
5119 struct sljit_jump *nopartial;
5120
5121 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5122 /* OVECTOR(1) contains the "string begin - 1" constant. */
5123 if (withchecks && !common->jscript_compat)
5124 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5125
5126 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5127 if (common->utf && *cc == OP_REFI)
5128 {
5129 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5130 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5131 if (withchecks)
5132 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5133
5134 /* Needed to save important temporary registers. */
5135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5136 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5137 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5138 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5139 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5140 if (common->mode == JIT_COMPILE)
5141 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5142 else
5143 {
5144 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5145 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5146 check_partial(common, FALSE);
5147 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5148 JUMPHERE(nopartial);
5149 }
5150 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5151 }
5152 else
5153 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5154 {
5155 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5156 if (withchecks)
5157 jump = JUMP(SLJIT_C_ZERO);
5158
5159 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5160 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5161 if (common->mode == JIT_COMPILE)
5162 add_jump(compiler, backtracks, partial);
5163
5164 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5165 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5166
5167 if (common->mode != JIT_COMPILE)
5168 {
5169 nopartial = JUMP(SLJIT_JUMP);
5170 JUMPHERE(partial);
5171 /* TMP2 -= STR_END - STR_PTR */
5172 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5173 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5174 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5175 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5176 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5177 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5178 JUMPHERE(partial);
5179 check_partial(common, FALSE);
5180 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5181 JUMPHERE(nopartial);
5182 }
5183 }
5184
5185 if (jump != NULL)
5186 {
5187 if (emptyfail)
5188 add_jump(compiler, backtracks, jump);
5189 else
5190 JUMPHERE(jump);
5191 }
5192 return cc + 1 + IMM2_SIZE;
5193 }
5194
5195 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5196 {
5197 DEFINE_COMPILER;
5198 backtrack_common *backtrack;
5199 pcre_uchar type;
5200 struct sljit_label *label;
5201 struct sljit_jump *zerolength;
5202 struct sljit_jump *jump = NULL;
5203 pcre_uchar *ccbegin = cc;
5204 int min = 0, max = 0;
5205 BOOL minimize;
5206
5207 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5208
5209 type = cc[1 + IMM2_SIZE];
5210 minimize = (type & 0x1) != 0;
5211 switch(type)
5212 {
5213 case OP_CRSTAR:
5214 case OP_CRMINSTAR:
5215 min = 0;
5216 max = 0;
5217 cc += 1 + IMM2_SIZE + 1;
5218 break;
5219 case OP_CRPLUS:
5220 case OP_CRMINPLUS:
5221 min = 1;
5222 max = 0;
5223 cc += 1 + IMM2_SIZE + 1;
5224 break;
5225 case OP_CRQUERY:
5226 case OP_CRMINQUERY:
5227 min = 0;
5228 max = 1;
5229 cc += 1 + IMM2_SIZE + 1;
5230 break;
5231 case OP_CRRANGE:
5232 case OP_CRMINRANGE:
5233 min = GET2(cc, 1 + IMM2_SIZE + 1);
5234 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5235 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5236 break;
5237 default:
5238 SLJIT_ASSERT_STOP();
5239 break;
5240 }
5241
5242 if (!minimize)
5243 {
5244 if (min == 0)
5245 {
5246 allocate_stack(common, 2);
5247 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5248 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5249 /* Temporary release of STR_PTR. */
5250 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5251 zerolength = compile_ref_checks(common, ccbegin, NULL);
5252 /* Restore if not zero length. */
5253 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5254 }
5255 else
5256 {
5257 allocate_stack(common, 1);
5258 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5259 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5260 }
5261
5262 if (min > 1 || max > 1)
5263 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5264
5265 label = LABEL();
5266 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5267
5268 if (min > 1 || max > 1)
5269 {
5270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5271 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5273 if (min > 1)
5274 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5275 if (max > 1)
5276 {
5277 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5278 allocate_stack(common, 1);
5279 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5280 JUMPTO(SLJIT_JUMP, label);
5281 JUMPHERE(jump);
5282 }
5283 }
5284
5285 if (max == 0)
5286 {
5287 /* Includes min > 1 case as well. */
5288 allocate_stack(common, 1);
5289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5290 JUMPTO(SLJIT_JUMP, label);
5291 }
5292
5293 JUMPHERE(zerolength);
5294 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5295
5296 count_match(common);
5297 return cc;
5298 }
5299
5300 allocate_stack(common, 2);
5301 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5302 if (type != OP_CRMINSTAR)
5303 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5304
5305 if (min == 0)
5306 {
5307 zerolength = compile_ref_checks(common, ccbegin, NULL);
5308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5309 jump = JUMP(SLJIT_JUMP);
5310 }
5311 else
5312 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5313
5314 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5315 if (max > 0)
5316 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5317
5318 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5319 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5320
5321 if (min > 1)
5322 {
5323 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5324 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5325 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5326 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5327 }
5328 else if (max > 0)
5329 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5330
5331 if (jump != NULL)
5332 JUMPHERE(jump);
5333 JUMPHERE(zerolength);
5334
5335 count_match(common);
5336 return cc;
5337 }
5338
5339 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5340 {
5341 DEFINE_COMPILER;
5342 backtrack_common *backtrack;
5343 recurse_entry *entry = common->entries;
5344 recurse_entry *prev = NULL;
5345 sljit_sw start = GET(cc, 1);
5346 pcre_uchar *start_cc;
5347 BOOL needs_control_head;
5348
5349 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5350
5351 /* Inlining simple patterns. */
5352 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5353 {
5354 start_cc = common->start + start;
5355 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5356 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5357 return cc + 1 + LINK_SIZE;
5358 }
5359
5360 while (entry != NULL)
5361 {
5362 if (entry->start == start)
5363 break;
5364 prev = entry;
5365 entry = entry->next;
5366 }
5367
5368 if (entry == NULL)
5369 {
5370 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5371 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5372 return NULL;
5373 entry->next = NULL;
5374 entry->entry = NULL;
5375 entry->calls = NULL;
5376 entry->start = start;
5377
5378 if (prev != NULL)
5379 prev->next = entry;
5380 else
5381 common->entries = entry;
5382 }
5383
5384 if (common->has_set_som && common->mark_ptr != 0)
5385 {
5386 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5387 allocate_stack(common, 2);
5388 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5389 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5391 }
5392 else if (common->has_set_som || common->mark_ptr != 0)
5393 {
5394 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5395 allocate_stack(common, 1);
5396 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5397 }
5398
5399 if (entry->entry == NULL)
5400 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5401 else
5402 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5403 /* Leave if the match is failed. */
5404 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5405 return cc + 1 + LINK_SIZE;
5406 }
5407
5408 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5409 {
5410 const pcre_uchar *begin = arguments->begin;
5411 int *offset_vector = arguments->offsets;
5412 int offset_count = arguments->offset_count;
5413 int i;
5414
5415 if (PUBL(callout) == NULL)
5416 return 0;
5417
5418 callout_block->version = 2;
5419 callout_block->callout_data = arguments->callout_data;
5420
5421 /* Offsets in subject. */
5422 callout_block->subject_length = arguments->end - arguments->begin;
5423 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5424 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5425 #if defined COMPILE_PCRE8
5426 callout_block->subject = (PCRE_SPTR)begin;
5427 #elif defined COMPILE_PCRE16
5428 callout_block->subject = (PCRE_SPTR16)begin;
5429 #elif defined COMPILE_PCRE32
5430 callout_block->subject = (PCRE_SPTR32)begin;
5431 #endif
5432
5433 /* Convert and copy the JIT offset vector to the offset_vector array. */
5434 callout_block->capture_top = 0;
5435 callout_block->offset_vector = offset_vector;
5436 for (i = 2; i < offset_count; i += 2)
5437 {
5438 offset_vector[i] = jit_ovector[i] - begin;
5439 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5440 if (jit_ovector[i] >= begin)
5441 callout_block->capture_top = i;
5442 }
5443
5444 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5445 if (offset_count > 0)
5446 offset_vector[0] = -1;
5447 if (offset_count > 1)
5448 offset_vector[1] = -1;
5449 return (*PUBL(callout))(callout_block);
5450 }
5451
5452 /* Aligning to 8 byte. */
5453 #define CALLOUT_ARG_SIZE \
5454 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5455
5456 #define CALLOUT_ARG_OFFSET(arg) \
5457 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5458
5459 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5460 {
5461 DEFINE_COMPILER;
5462 backtrack_common *backtrack;
5463
5464 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5465
5466 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5467
5468 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5469 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5470 SLJIT_ASSERT(common->capture_last_ptr != 0);
5471 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5472 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5473
5474 /* These pointer sized fields temporarly stores internal variables. */
5475 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5478
5479 if (common->mark_ptr != 0)
5480 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5481 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5482 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5484
5485 /* Needed to save important temporary registers. */
5486 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5487 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5488 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5489 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5490 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5491 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5492 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5493
5494 /* Check return value. */
5495 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5496 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5497 if (common->forced_quit_label == NULL)
5498 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5499 else
5500 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5501 return cc + 2 + 2 * LINK_SIZE;
5502 }
5503
5504 #undef CALLOUT_ARG_SIZE
5505 #undef CALLOUT_ARG_OFFSET
5506
5507 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5508 {
5509 DEFINE_COMPILER;
5510 int framesize;
5511 int extrasize;
5512 BOOL needs_control_head;
5513 int private_data_ptr;
5514 backtrack_common altbacktrack;
5515 pcre_uchar *ccbegin;
5516 pcre_uchar opcode;
5517 pcre_uchar bra = OP_BRA;
5518 jump_list *tmp = NULL;
5519 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5520 jump_list **found;
5521 /* Saving previous accept variables. */
5522 BOOL save_local_exit = common->local_exit;
5523 BOOL save_positive_assert = common->positive_assert;
5524 then_trap_backtrack *save_then_trap = common->then_trap;
5525 struct sljit_label *save_quit_label = common->quit_label;
5526 struct sljit_label *save_accept_label = common->accept_label;
5527 jump_list *save_quit = common->quit;
5528 jump_list *save_positive_assert_quit = common->positive_assert_quit;
5529 jump_list *save_accept = common->accept;
5530 struct sljit_jump *jump;
5531 struct sljit_jump *brajump = NULL;
5532
5533 /* Assert captures then. */
5534 common->then_trap = NULL;
5535
5536 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5537 {
5538 SLJIT_ASSERT(!conditional);
5539 bra = *cc;
5540 cc++;
5541 }
5542 private_data_ptr = PRIVATE_DATA(cc);
5543 SLJIT_ASSERT(private_data_ptr != 0);
5544 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5545 backtrack->framesize = framesize;
5546 backtrack->private_data_ptr = private_data_ptr;
5547 opcode = *cc;
5548 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5549 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5550 ccbegin = cc;
5551 cc += GET(cc, 1);
5552
5553 if (bra == OP_BRAMINZERO)
5554 {
5555 /* This is a braminzero backtrack path. */
5556 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5557 free_stack(common, 1);
5558 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5559 }
5560
5561 if (framesize < 0)
5562 {
5563 extrasize = needs_control_head ? 2 : 1;
5564 if (framesize == no_frame)
5565 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5566 allocate_stack(common, extrasize);
5567 if (needs_control_head)
5568 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5569 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5570 if (needs_control_head)
5571 {
5572 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5573 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5574 }
5575 }
5576 else
5577 {
5578 extrasize = needs_control_head ? 3 : 2;
5579 allocate_stack(common, framesize + extrasize);
5580 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5581 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5582 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5583 if (needs_control_head)
5584 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5585 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5586 if (needs_control_head)
5587 {
5588 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5589 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5590 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5591 }
5592 else
5593 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5594 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5595 }
5596
5597 memset(&altbacktrack, 0, sizeof(backtrack_common));
5598 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5599 {
5600 /* Negative assert is stronger than positive assert. */
5601 common->local_exit = TRUE;
5602 common->quit_label = NULL;
5603 common->quit = NULL;
5604 common->positive_assert = FALSE;
5605 }
5606 else
5607 common->positive_assert = TRUE;
5608 common->positive_assert_quit = NULL;
5609
5610 while (1)
5611 {
5612 common->accept_label = NULL;
5613 common->accept = NULL;
5614 altbacktrack.top = NULL;
5615 altbacktrack.topbacktracks = NULL;
5616
5617 if (*ccbegin == OP_ALT)
5618 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5619
5620 altbacktrack.cc = ccbegin;
5621 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5622 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5623 {
5624 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5625 {
5626 common->local_exit = save_local_exit;
5627 common->quit_label = save_quit_label;
5628 common->quit = save_quit;
5629 }
5630 common->positive_assert = save_positive_assert;
5631 common->then_trap = save_then_trap;
5632 common->accept_label = save_accept_label;
5633 common->positive_assert_quit = save_positive_assert_quit;
5634 common->accept = save_accept;
5635 return NULL;
5636 }
5637 common->accept_label = LABEL();
5638 if (common->accept != NULL)
5639 set_jumps(common->accept, common->accept_label);
5640
5641 /* Reset stack. */
5642 if (framesize < 0)
5643 {
5644 if (framesize == no_frame)
5645 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5646 else
5647 free_stack(common, extrasize);
5648 if (needs_control_head)
5649 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5650 }
5651 else
5652 {
5653 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5654 {
5655 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5656 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5657 if (needs_control_head)
5658 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5659 }
5660 else
5661 {
5662 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5663 if (needs_control_head)
5664 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5665 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5666 }
5667 }
5668
5669 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5670 {
5671 /* We know that STR_PTR was stored on the top of the stack. */
5672 if (conditional)
5673 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5674 else if (bra == OP_BRAZERO)
5675 {
5676 if (framesize < 0)
5677 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5678 else
5679 {
5680 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5681 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5683 }
5684 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5685 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5686 }
5687 else if (framesize >= 0)
5688 {
5689 /* For OP_BRA and OP_BRAMINZERO. */
5690 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5691 }
5692 }
5693 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5694
5695 compile_backtrackingpath(common, altbacktrack.top);
5696 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5697 {
5698 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5699 {
5700 common->local_exit = save_local_exit;
5701 common->quit_label = save_quit_label;
5702 common->quit = save_quit;
5703 }
5704 common->positive_assert = save_positive_assert;
5705 common->then_trap = save_then_trap;
5706 common->accept_label = save_accept_label;
5707 common->positive_assert_quit = save_positive_assert_quit;
5708 common->accept = save_accept;
5709 return NULL;
5710 }
5711 set_jumps(altbacktrack.topbacktracks, LABEL());
5712
5713 if (*cc != OP_ALT)
5714 break;
5715
5716 ccbegin = cc;
5717 cc += GET(cc, 1);
5718 }
5719
5720 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5721 {
5722 SLJIT_ASSERT(common->positive_assert_quit == NULL);
5723 /* Makes the check less complicated below. */
5724 common->positive_assert_quit = common->quit;
5725 }
5726
5727 /* None of them matched. */
5728 if (common->positive_assert_quit != NULL)
5729 {
5730 jump = JUMP(SLJIT_JUMP);
5731 set_jumps(common->positive_assert_quit, LABEL());
5732 SLJIT_ASSERT(framesize != no_stack);
5733 if (framesize < 0)
5734 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5735 else
5736 {
5737 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5738 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5739 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5740 }
5741 JUMPHERE(jump);
5742 }
5743
5744 if (needs_control_head)
5745 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5746
5747 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5748 {
5749 /* Assert is failed. */
5750 if (conditional || bra == OP_BRAZERO)
5751 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5752
5753 if (framesize < 0)
5754 {
5755 /* The topmost item should be 0. */
5756 if (bra == OP_BRAZERO)
5757 {
5758 if (extrasize == 2)
5759 free_stack(common, 1);
5760 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5761 }
5762 else
5763 free_stack(common, extrasize);
5764 }
5765 else
5766 {
5767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5768 /* The topmost item should be 0. */
5769 if (bra == OP_BRAZERO)
5770 {
5771 free_stack(common, framesize + extrasize - 1);
5772 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5773 }
5774 else
5775 free_stack(common, framesize + extrasize);
5776 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5777 }
5778 jump = JUMP(SLJIT_JUMP);
5779 if (bra != OP_BRAZERO)
5780 add_jump(compiler, target, jump);
5781
5782 /* Assert is successful. */
5783 set_jumps(tmp, LABEL());
5784 if (framesize < 0)
5785 {
5786 /* We know that STR_PTR was stored on the top of the stack. */
5787 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5788 /* Keep the STR_PTR on the top of the stack. */
5789 if (bra == OP_BRAZERO)
5790 {
5791 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5792 if (extrasize == 2)
5793 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5794 }
5795 else if (bra == OP_BRAMINZERO)
5796 {
5797 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5798 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5799 }
5800 }
5801 else
5802 {
5803 if (bra == OP_BRA)
5804 {
5805 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5806 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5807 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5808 }
5809 else
5810 {
5811 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5812 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5813 if (extrasize == 2)
5814 {
5815 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5816 if (bra == OP_BRAMINZERO)
5817 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5818 }
5819 else
5820 {
5821 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5822 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5823 }
5824 }
5825 }
5826
5827 if (bra == OP_BRAZERO)
5828 {
5829 backtrack->matchingpath = LABEL();
5830 SET_LABEL(jump, backtrack->matchingpath);
5831 }
5832 else if (bra == OP_BRAMINZERO)
5833 {
5834 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5835 JUMPHERE(brajump);
5836 if (framesize >= 0)
5837 {
5838 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5839 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5840 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5841 }
5842 set_jumps(backtrack->common.topbacktracks, LABEL());
5843 }
5844 }
5845 else
5846 {
5847 /* AssertNot is successful. */
5848 if (framesize < 0)
5849 {
5850 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5851 if (bra != OP_BRA)
5852 {
5853 if (extrasize == 2)
5854 free_stack(common, 1);
5855 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5856 }
5857 else
5858 free_stack(common, extrasize);
5859 }
5860 else
5861 {
5862 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5863 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5864 /* The topmost item should be 0. */
5865 if (bra != OP_BRA)
5866 {
5867 free_stack(common, framesize + extrasize - 1);
5868 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5869 }
5870 else
5871 free_stack(common, framesize + extrasize);
5872 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5873 }
5874
5875 if (bra == OP_BRAZERO)
5876 backtrack->matchingpath = LABEL();
5877 else if (bra == OP_BRAMINZERO)
5878 {
5879 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5880 JUMPHERE(brajump);
5881 }
5882
5883 if (bra != OP_BRA)
5884 {
5885 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5886 set_jumps(backtrack->common.topbacktracks, LABEL());
5887 backtrack->common.topbacktracks = NULL;
5888 }
5889 }
5890
5891 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5892 {
5893 common->local_exit = save_local_exit;
5894 common->quit_label = save_quit_label;
5895 common->quit = save_quit;
5896 }
5897 common->positive_assert = save_positive_assert;
5898 common->then_trap = save_then_trap;
5899 common->accept_label = save_accept_label;
5900 common->positive_assert_quit = save_positive_assert_quit;
5901 common->accept = save_accept;
5902 return cc + 1 + LINK_SIZE;
5903 }
5904
5905 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5906 {
5907 int condition = FALSE;
5908 pcre_uchar *slotA = name_table;
5909 pcre_uchar *slotB;
5910 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5911 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5912 sljit_sw no_capture;
5913 int i;
5914
5915 locals += refno & 0xff;
5916 refno >>= 8;
5917 no_capture = locals[1];
5918
5919 for (i = 0; i < name_count; i++)
5920 {
5921 if (GET2(slotA, 0) == refno) break;
5922 slotA += name_entry_size;
5923 }
5924
5925 if (i < name_count)
5926 {
5927 /* Found a name for the number - there can be only one; duplicate names
5928 for different numbers are allowed, but not vice versa. First scan down
5929 for duplicates. */
5930
5931 slotB = slotA;
5932 while (slotB > name_table)
5933 {
5934 slotB -= name_entry_size;
5935 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5936 {
5937 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5938 if (condition) break;
5939 }
5940 else break;
5941 }
5942
5943 /* Scan up for duplicates */
5944 if (!condition)
5945 {
5946 slotB = slotA;
5947 for (i++; i < name_count; i++)
5948 {
5949 slotB += name_entry_size;
5950 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5951 {
5952 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5953 if (condition) break;
5954 }
5955 else break;
5956 }
5957 }
5958 }
5959 return condition;
5960 }
5961
5962 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5963 {
5964 int condition = FALSE;
5965 pcre_uchar *slotA = name_table;
5966 pcre_uchar *slotB;
5967 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5968 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5969 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5970 sljit_uw i;
5971
5972 for (i = 0; i < name_count; i++)
5973 {
5974 if (GET2(slotA, 0) == recno) break;
5975 slotA += name_entry_size;
5976 }
5977
5978 if (i < name_count)
5979 {
5980 /* Found a name for the number - there can be only one; duplicate
5981 names for different numbers are allowed, but not vice versa. First
5982 scan down for duplicates. */
5983
5984 slotB = slotA;
5985 while (slotB > name_table)
5986 {
5987 slotB -= name_entry_size;
5988 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5989 {
5990 condition = GET2(slotB, 0) == group_num;
5991 if (condition) break;
5992 }
5993 else break;
5994 }
5995
5996 /* Scan up for duplicates */
5997 if (!condition)
5998 {
5999 slotB = slotA;
6000 for (i++; i < name_count; i++)
6001 {
6002 slotB += name_entry_size;
6003 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
6004 {
6005 condition = GET2(slotB, 0) == group_num;
6006 if (condition) break;
6007 }
6008 else break;
6009 }
6010 }
6011 }
6012 return condition;
6013 }
6014
6015 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6016 {
6017 DEFINE_COMPILER;
6018 int stacksize;
6019
6020 if (framesize < 0)
6021 {
6022 if (framesize == no_frame)
6023 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6024 else
6025 {
6026 stacksize = needs_control_head ? 1 : 0;
6027 if (ket != OP_KET || has_alternatives)
6028 stacksize++;
6029 free_stack(common, stacksize);
6030 }
6031
6032 if (needs_control_head)
6033 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6034
6035 /* TMP2 which is set here used by OP_KETRMAX below. */
6036 if (ket == OP_KETRMAX)
6037 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6038 else if (ket == OP_KETRMIN)
6039 {
6040 /* Move the STR_PTR to the private_data_ptr. */
6041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6042 }
6043 }
6044 else
6045 {
6046 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6047 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6048 if (needs_control_head)
6049 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6050
6051 if (ket == OP_KETRMAX)
6052 {
6053 /* TMP2 which is set here used by OP_KETRMAX below. */
6054 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6055 }
6056 }
6057 if (needs_control_head)
6058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6059 }
6060
6061 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6062 {
6063 DEFINE_COMPILER;
6064
6065 if (common->capture_last_ptr != 0)
6066 {
6067 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6069 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6070 stacksize++;
6071 }
6072 if (common->optimized_cbracket[offset >> 1] == 0)
6073 {
6074 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6075 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6076 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6077 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6078 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6079 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6081 stacksize += 2;
6082 }
6083 return stacksize;
6084 }
6085
6086 /*
6087 Handling bracketed expressions is probably the most complex part.
6088
6089 Stack layout naming characters:
6090 S - Push the current STR_PTR
6091 0 - Push a 0 (NULL)
6092 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6093 before the next alternative. Not pushed if there are no alternatives.
6094 M - Any values pushed by the current alternative. Can be empty, or anything.
6095 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6096 L - Push the previous local (pointed by localptr) to the stack
6097 () - opional values stored on the stack
6098 ()* - optonal, can be stored multiple times
6099
6100 The following list shows the regular expression templates, their PCRE byte codes
6101 and stack layout supported by pcre-sljit.
6102
6103 (?:) OP_BRA | OP_KET A M
6104 () OP_CBRA | OP_KET C M
6105 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6106 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6107 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6108 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6109 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6110 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6111 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6112 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6113 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6114 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6115 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6116 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6117 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6118 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6119 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6120 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6121 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6122 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6123 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6124 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6125
6126
6127 Stack layout naming characters:
6128 A - Push the alternative index (starting from 0) on the stack.
6129 Not pushed if there is no alternatives.
6130 M - Any values pushed by the current alternative. Can be empty, or anything.
6131
6132 The next list shows the possible content of a bracket:
6133 (|) OP_*BRA | OP_ALT ... M A
6134 (?()|) OP_*COND | OP_ALT M A
6135 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6136 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6137 Or nothing, if trace is unnecessary
6138 */
6139
6140 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6141 {
6142 DEFINE_COMPILER;
6143 backtrack_common *backtrack;
6144 pcre_uchar opcode;
6145 int private_data_ptr = 0;
6146 int offset = 0;
6147 int stacksize;
6148 int repeat_ptr = 0, repeat_length = 0;
6149 int repeat_type = 0, repeat_count = 0;
6150 pcre_uchar *ccbegin;
6151 pcre_uchar *matchingpath;
6152 pcre_uchar bra = OP_BRA;
6153 pcre_uchar ket;
6154 assert_backtrack *assert;
6155 BOOL has_alternatives;
6156 BOOL needs_control_head = FALSE;
6157 struct sljit_jump *jump;
6158 struct sljit_jump *skip;
6159 struct sljit_label *rmax_label = NULL;
6160 struct sljit_jump *braminzero = NULL;
6161
6162 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6163
6164 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6165 {
6166 bra = *cc;
6167 cc++;
6168 opcode = *cc;
6169 }
6170
6171 opcode = *cc;
6172 ccbegin = cc;
6173 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6174 ket = *matchingpath;
6175 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6176 {
6177 repeat_ptr = PRIVATE_DATA(matchingpath);
6178 repeat_length = PRIVATE_DATA(matchingpath + 1);
6179 repeat_type = PRIVATE_DATA(matchingpath + 2);
6180 repeat_count = PRIVATE_DATA(matchingpath + 3);
6181 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6182 if (repeat_type == OP_UPTO)
6183 ket = OP_KETRMAX;
6184 if (repeat_type == OP_MINUPTO)
6185 ket = OP_KETRMIN;
6186 }
6187
6188 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6189 {
6190 /* Drop this bracket_backtrack. */
6191 parent->top = backtrack->prev;
6192 return matchingpath + 1 + LINK_SIZE + repeat_length;
6193 }
6194
6195 matchingpath = ccbegin + 1 + LINK_SIZE;
6196 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6197 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6198 cc += GET(cc, 1);
6199
6200 has_alternatives = *cc == OP_ALT;
6201 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6202 {
6203 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6204 if (*matchingpath == OP_NRREF)
6205 {
6206 stacksize = GET2(matchingpath, 1);
6207 if (common->currententry == NULL || stacksize == RREF_ANY)
6208 has_alternatives = FALSE;
6209 else if (common->currententry->start == 0)
6210 has_alternatives = stacksize != 0;
6211 else
6212 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6213 }
6214 }
6215
6216 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6217 opcode = OP_SCOND;
6218 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6219 opcode = OP_ONCE;
6220
6221 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6222 {
6223 /* Capturing brackets has a pre-allocated space. */
6224 offset = GET2(ccbegin, 1 + LINK_SIZE);
6225 if (common->optimized_cbracket[offset] == 0)
6226 {
6227 private_data_ptr = OVECTOR_PRIV(offset);
6228 offset <<= 1;
6229 }
6230 else
6231 {
6232 offset <<= 1;
6233 private_data_ptr = OVECTOR(offset);
6234 }
6235 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6236 matchingpath += IMM2_SIZE;
6237 }
6238 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6239 {
6240 /* Other brackets simply allocate the next entry. */
6241 private_data_ptr = PRIVATE_DATA(ccbegin);
6242 SLJIT_ASSERT(private_data_ptr != 0);
6243 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6244 if (opcode == OP_ONCE)
6245 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6246 }
6247
6248 /* Instructions before the first alternative. */
6249 stacksize = 0;
6250 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6251 stacksize++;
6252 if (bra == OP_BRAZERO)
6253 stacksize++;
6254
6255 if (stacksize > 0)
6256 allocate_stack(common, stacksize);
6257
6258 stacksize = 0;
6259 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6260 {
6261 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6262 stacksize++;
6263 }
6264
6265 if (bra == OP_BRAZERO)
6266 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6267
6268 if (bra == OP_BRAMINZERO)
6269 {
6270 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6271 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6272 if (ket != OP_KETRMIN)
6273 {
6274 free_stack(common, 1);
6275 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6276 }
6277 else
6278 {
6279 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6280 {
6281 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6282 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6283 /* Nothing stored during the first run. */
6284 skip = JUMP(SLJIT_JUMP);
6285 JUMPHERE(jump);
6286 /* Checking zero-length iteration. */
6287 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6288 {
6289 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6290 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6291 }
6292 else
6293 {
6294 /* Except when the whole stack frame must be saved. */
6295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6296 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6297 }
6298 JUMPHERE(skip);
6299 }
6300 else
6301 {
6302 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6303 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6304 JUMPHERE(jump);
6305 }
6306 }
6307 }
6308
6309 if (repeat_type != 0)
6310 {
6311 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6312 if (repeat_type == OP_EXACT)
6313 rmax_label = LABEL();
6314 }
6315
6316 if (ket == OP_KETRMIN)
6317 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6318
6319 if (ket == OP_KETRMAX)
6320 {
6321 rmax_label = LABEL();
6322 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6323 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6324 }
6325
6326 /* Handling capturing brackets and alternatives. */
6327 if (opcode == OP_ONCE)
6328 {
6329 stacksize = 0;
6330 if (needs_control_head)
6331 {
6332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6333 stacksize++;
6334 }
6335
6336 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6337 {
6338 /* Neither capturing brackets nor recursions are found in the block. */
6339 if (ket == OP_KETRMIN)
6340 {
6341 stacksize += 2;
6342 if (!needs_control_head)
6343 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6344 }
6345 else
6346 {
6347 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6349 if (ket == OP_KETRMAX || has_alternatives)
6350 stacksize++;
6351 }
6352
6353 if (stacksize > 0)
6354 allocate_stack(common, stacksize);
6355
6356 stacksize = 0;
6357 if (needs_control_head)
6358 {
6359 stacksize++;
6360 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6361 }
6362
6363 if (ket == OP_KETRMIN)
6364 {
6365 if (needs_control_head)
6366 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6367 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6368 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6369 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6371 }
6372 else if (ket == OP_KETRMAX || has_alternatives)
6373 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6374 }
6375 else
6376 {
6377 if (ket != OP_KET || has_alternatives)
6378 stacksize++;
6379
6380 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6381 allocate_stack(common, stacksize);
6382
6383 if (needs_control_head)
6384 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6385
6386 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6387 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6388
6389 stacksize = needs_control_head ? 1 : 0;
6390 if (ket != OP_KET || has_alternatives)
6391 {
6392 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6394 stacksize++;
6395 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6396 }
6397 else
6398 {
6399 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6400 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6401 }
6402 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6403 }
6404 }
6405 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6406 {
6407 /* Saving the previous values. */
6408 if (common->optimized_cbracket[offset >> 1] != 0)
6409 {
6410 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6411 allocate_stack(common, 2);
6412 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6413 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6414 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6416 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6417 }
6418 else
6419 {
6420 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6421 allocate_stack(common, 1);
6422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6424 }
6425 }
6426 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6427 {
6428 /* Saving the previous value. */
6429 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6430 allocate_stack(common, 1);
6431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6433 }
6434 else if (has_alternatives)
6435 {
6436 /* Pushing the starting string pointer. */
6437 allocate_stack(common, 1);
6438 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6439 }
6440
6441 /* Generating code for the first alternative. */
6442 if (opcode == OP_COND || opcode == OP_SCOND)
6443 {
6444 if (*matchingpath == OP_CREF)
6445 {
6446 SLJIT_ASSERT(has_alternatives);
6447 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6448 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6449 matchingpath += 1 + IMM2_SIZE;
6450 }
6451 else if (*matchingpath == OP_NCREF)
6452 {
6453 SLJIT_ASSERT(has_alternatives);
6454 stacksize = GET2(matchingpath, 1);
6455 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6456
6457 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6458 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6459 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6460 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6461 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6462 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6463 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6464 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6465 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6466
6467 JUMPHERE(jump);
6468 matchingpath += 1 + IMM2_SIZE;
6469 }
6470 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6471 {
6472 /* Never has other case. */
6473 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6474
6475 stacksize = GET2(matchingpath, 1);
6476 if (common->currententry == NULL)
6477 stacksize = 0;
6478 else if (stacksize == RREF_ANY)
6479 stacksize = 1;
6480 else if (common->currententry->start == 0)
6481 stacksize = stacksize == 0;
6482 else
6483 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6484
6485 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6486 {
6487 SLJIT_ASSERT(!has_alternatives);
6488 if (stacksize != 0)
6489 matchingpath += 1 + IMM2_SIZE;
6490 else
6491 {
6492 if (*cc == OP_ALT)
6493 {
6494 matchingpath = cc + 1 + LINK_SIZE;
6495 cc += GET(cc, 1);
6496 }
6497 else
6498 matchingpath = cc;
6499 }
6500 }
6501 else
6502 {
6503 SLJIT_ASSERT(has_alternatives);
6504
6505 stacksize = GET2(matchingpath, 1);
6506 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6509 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6510 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6511 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6512 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6513 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6514 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6515 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6516 matchingpath += 1 + IMM2_SIZE;
6517 }
6518 }
6519 else
6520 {
6521 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6522 /* Similar code as PUSH_BACKTRACK macro. */
6523 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6524 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6525 return NULL;
6526 memset(assert, 0, sizeof(assert_backtrack));
6527 assert->common.cc = matchingpath;
6528 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6529 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6530 }
6531 }
6532
6533 compile_matchingpath(common, matchingpath, cc, backtrack);
6534 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6535 return NULL;
6536
6537 if (opcode == OP_ONCE)
6538 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6539
6540 stacksize = 0;
6541 if (repeat_type == OP_MINUPTO)
6542 {
6543 /* We need to preserve the counter. TMP2 will be used below. */
6544 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6545 stacksize++;
6546 }
6547 if (ket != OP_KET || bra != OP_BRA)
6548 stacksize++;
6549 if (offset != 0)
6550 {
6551 if (common->capture_last_ptr != 0)
6552 stacksize++;
6553 if (common->optimized_cbracket[offset >> 1] == 0)
6554 stacksize += 2;
6555 }
6556 if (has_alternatives && opcode != OP_ONCE)
6557 stacksize++;
6558
6559 if (stacksize > 0)
6560 allocate_stack(common, stacksize);
6561
6562 stacksize = 0;
6563 if (repeat_type == OP_MINUPTO)
6564 {
6565 /* TMP2 was set above. */
6566 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
6567 stacksize++;
6568 }
6569
6570 if (ket != OP_KET || bra != OP_BRA)
6571 {
6572 if (ket != OP_KET)
6573 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6574 else
6575 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6576 stacksize++;
6577 }
6578
6579 if (offset != 0)
6580 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6581
6582 if (has_alternatives)
6583 {
6584 if (opcode != OP_ONCE)
6585 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6586 if (ket != OP_KETRMAX)
6587 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6588 }
6589
6590 /* Must be after the matchingpath label. */
6591 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6592 {
6593 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6594 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6595 }
6596
6597 if (ket == OP_KETRMAX)
6598 {
6599 if (repeat_type != 0)
6600 {
6601 if (has_alternatives)
6602 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6603 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6604 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6605 /* Drop STR_PTR for greedy plus quantifier. */
6606 if (opcode != OP_ONCE)
6607 free_stack(common, 1);
6608 }
6609 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
6610 {
6611 if (has_alternatives)
6612 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6613 /* Checking zero-length iteration. */
6614 if (opcode != OP_ONCE)
6615 {
6616 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmax_label);
6617 /* Drop STR_PTR for greedy plus quantifier. */
6618 if (bra != OP_BRAZERO)
6619 free_stack(common, 1);
6620 }
6621 else
6622 /* TMP2 must contain the starting STR_PTR. */
6623 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
6624 }
6625 else
6626 JUMPTO(SLJIT_JUMP, rmax_label);
6627 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6628 }
6629
6630 if (repeat_type == OP_EXACT)
6631 {
6632 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, 1);
6633 JUMPTO(SLJIT_C_NOT_ZERO, rmax_label);
6634 }
6635 else if (repeat_type == OP_UPTO)
6636 {
6637 /* We need to preserve the counter. */
6638 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr);
6639 allocate_stack(common, 1);
6640 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6641 }
6642
6643 if (bra == OP_BRAZERO)
6644 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6645
6646 if (bra == OP_BRAMINZERO)
6647 {
6648 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6649 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6650 if (braminzero != NULL)
6651 {
6652 JUMPHERE(braminzero);
6653 /* We need to release the end pointer to perform the
6654 backtrack for the zero-length iteration. When
6655 framesize is < 0, OP_ONCE will do the release itself. */
6656 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6657 {
6658 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6659 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6660 }
6661 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6662 free_stack(common, 1);
6663 }
6664 /* Continue to the normal backtrack. */
6665 }
6666
6667 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6668 count_match(common);
6669
6670 /* Skip the other alternatives. */
6671 while (*cc == OP_ALT)
6672 cc += GET(cc, 1);
6673 cc += 1 + LINK_SIZE;
6674
6675 /* Temporarily encoding the needs_control_head in framesize. */
6676 if (opcode == OP_ONCE)
6677 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6678 return cc + repeat_length;
6679 }
6680
6681 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6682 {
6683 DEFINE_COMPILER;
6684 backtrack_common *backtrack;
6685 pcre_uchar opcode;
6686 int private_data_ptr;
6687 int cbraprivptr = 0;
6688 BOOL needs_control_head;
6689 int framesize;
6690 int stacksize;
6691 int offset = 0;
6692 BOOL zero = FALSE;
6693 pcre_uchar *ccbegin = NULL;
6694 int stack; /* Also contains the offset of control head. */
6695 struct sljit_label *loop = NULL;
6696 struct jump_list *emptymatch = NULL;
6697
6698 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6699 if (*cc == OP_BRAPOSZERO)
6700 {
6701 zero = TRUE;
6702 cc++;
6703 }
6704
6705 opcode = *cc;
6706 private_data_ptr = PRIVATE_DATA(cc);
6707 SLJIT_ASSERT(private_data_ptr != 0);
6708 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6709 switch(opcode)
6710 {
6711 case OP_BRAPOS:
6712 case OP_SBRAPOS:
6713 ccbegin = cc + 1 + LINK_SIZE;
6714 break;
6715
6716 case OP_CBRAPOS:
6717 case OP_SCBRAPOS:
6718 offset = GET2(cc, 1 + LINK_SIZE);
6719 /* This case cannot be optimized in the same was as
6720 normal capturing brackets. */
6721 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6722 cbraprivptr = OVECTOR_PRIV(offset);
6723 offset <<= 1;
6724 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6725 break;
6726
6727 default:
6728 SLJIT_ASSERT_STOP();
6729 break;
6730 }
6731
6732 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6733 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6734 if (framesize < 0)
6735 {
6736 if (offset != 0)
6737 {
6738 stacksize = 2; </