/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1424 - (show annotations)
Tue Dec 31 11:22:31 2013 UTC (5 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 322489 byte(s)
JIT: Optimize xclass character read.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 4
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 pcre_uint32 nlmax;
367 int newline;
368 int bsr_nltype;
369 pcre_uint32 bsr_nlmax;
370 /* Dollar endonly. */
371 int endonly;
372 /* Tables. */
373 sljit_sw ctypes;
374 /* Named capturing brackets. */
375 pcre_uchar *name_table;
376 sljit_sw name_count;
377 sljit_sw name_entry_size;
378
379 /* Labels and jump lists. */
380 struct sljit_label *partialmatchlabel;
381 struct sljit_label *quit_label;
382 struct sljit_label *forced_quit_label;
383 struct sljit_label *accept_label;
384 stub_list *stubs;
385 recurse_entry *entries;
386 recurse_entry *currententry;
387 jump_list *partialmatch;
388 jump_list *quit;
389 jump_list *positive_assert_quit;
390 jump_list *forced_quit;
391 jump_list *accept;
392 jump_list *calllimit;
393 jump_list *stackalloc;
394 jump_list *revertframes;
395 jump_list *wordboundary;
396 jump_list *anynewline;
397 jump_list *hspace;
398 jump_list *vspace;
399 jump_list *casefulcmp;
400 jump_list *caselesscmp;
401 jump_list *reset_match;
402 BOOL jscript_compat;
403 #ifdef SUPPORT_UTF
404 BOOL utf;
405 #ifdef SUPPORT_UCP
406 BOOL use_ucp;
407 #endif
408 #ifdef COMPILE_PCRE8
409 jump_list *utfreadchar;
410 jump_list *utfreadchar16;
411 jump_list *utfreadtype8;
412 #endif
413 #endif /* SUPPORT_UTF */
414 #ifdef SUPPORT_UCP
415 jump_list *getucd;
416 #endif
417 } compiler_common;
418
419 /* For byte_sequence_compare. */
420
421 typedef struct compare_context {
422 int length;
423 int sourcereg;
424 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
425 int ucharptr;
426 union {
427 sljit_si asint;
428 sljit_uh asushort;
429 #if defined COMPILE_PCRE8
430 sljit_ub asbyte;
431 sljit_ub asuchars[4];
432 #elif defined COMPILE_PCRE16
433 sljit_uh asuchars[2];
434 #elif defined COMPILE_PCRE32
435 sljit_ui asuchars[1];
436 #endif
437 } c;
438 union {
439 sljit_si asint;
440 sljit_uh asushort;
441 #if defined COMPILE_PCRE8
442 sljit_ub asbyte;
443 sljit_ub asuchars[4];
444 #elif defined COMPILE_PCRE16
445 sljit_uh asuchars[2];
446 #elif defined COMPILE_PCRE32
447 sljit_ui asuchars[1];
448 #endif
449 } oc;
450 #endif
451 } compare_context;
452
453 /* Undefine sljit macros. */
454 #undef CMP
455
456 /* Used for accessing the elements of the stack. */
457 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
458
459 #define TMP1 SLJIT_SCRATCH_REG1
460 #define TMP2 SLJIT_SCRATCH_REG3
461 #define TMP3 SLJIT_TEMPORARY_EREG2
462 #define STR_PTR SLJIT_SAVED_REG1
463 #define STR_END SLJIT_SAVED_REG2
464 #define STACK_TOP SLJIT_SCRATCH_REG2
465 #define STACK_LIMIT SLJIT_SAVED_REG3
466 #define ARGUMENTS SLJIT_SAVED_EREG1
467 #define COUNT_MATCH SLJIT_SAVED_EREG2
468 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469
470 /* Local space layout. */
471 /* These two locals can be used by the current opcode. */
472 #define LOCALS0 (0 * sizeof(sljit_sw))
473 #define LOCALS1 (1 * sizeof(sljit_sw))
474 /* Two local variables for possessive quantifiers (char1 cannot use them). */
475 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
477 /* Max limit of recursions. */
478 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
479 /* The output vector is stored on the stack, and contains pointers
480 to characters. The vector data is divided into two groups: the first
481 group contains the start / end character pointers, and the second is
482 the start pointers when the end of the capturing group has not yet reached. */
483 #define OVECTOR_START (common->ovector_start)
484 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
485 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
486 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
487
488 #if defined COMPILE_PCRE8
489 #define MOV_UCHAR SLJIT_MOV_UB
490 #define MOVU_UCHAR SLJIT_MOVU_UB
491 #elif defined COMPILE_PCRE16
492 #define MOV_UCHAR SLJIT_MOV_UH
493 #define MOVU_UCHAR SLJIT_MOVU_UH
494 #elif defined COMPILE_PCRE32
495 #define MOV_UCHAR SLJIT_MOV_UI
496 #define MOVU_UCHAR SLJIT_MOVU_UI
497 #else
498 #error Unsupported compiling mode
499 #endif
500
501 /* Shortcuts. */
502 #define DEFINE_COMPILER \
503 struct sljit_compiler *compiler = common->compiler
504 #define OP1(op, dst, dstw, src, srcw) \
505 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508 #define LABEL() \
509 sljit_emit_label(compiler)
510 #define JUMP(type) \
511 sljit_emit_jump(compiler, (type))
512 #define JUMPTO(type, label) \
513 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514 #define JUMPHERE(jump) \
515 sljit_set_label((jump), sljit_emit_label(compiler))
516 #define SET_LABEL(jump, label) \
517 sljit_set_label((jump), (label))
518 #define CMP(type, src1, src1w, src2, src2w) \
519 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520 #define CMPTO(type, src1, src1w, src2, src2w, label) \
521 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
522 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
524 #define GET_LOCAL_BASE(dst, dstw, offset) \
525 sljit_get_local_base(compiler, (dst), (dstw), (offset))
526
527 #define READ_CHAR_ANY 0x7fffffff
528
529 static pcre_uchar* bracketend(pcre_uchar* cc)
530 {
531 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
532 do cc += GET(cc, 1); while (*cc == OP_ALT);
533 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
534 cc += 1 + LINK_SIZE;
535 return cc;
536 }
537
538 static int ones_in_half_byte[16] = {
539 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
540 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
541 };
542
543 /* Functions whose might need modification for all new supported opcodes:
544 next_opcode
545 check_opcode_types
546 set_private_data_ptrs
547 get_framesize
548 init_frame
549 get_private_data_copy_length
550 copy_private_data
551 compile_matchingpath
552 compile_backtrackingpath
553 */
554
555 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
556 {
557 SLJIT_UNUSED_ARG(common);
558 switch(*cc)
559 {
560 case OP_SOD:
561 case OP_SOM:
562 case OP_SET_SOM:
563 case OP_NOT_WORD_BOUNDARY:
564 case OP_WORD_BOUNDARY:
565 case OP_NOT_DIGIT:
566 case OP_DIGIT:
567 case OP_NOT_WHITESPACE:
568 case OP_WHITESPACE:
569 case OP_NOT_WORDCHAR:
570 case OP_WORDCHAR:
571 case OP_ANY:
572 case OP_ALLANY:
573 case OP_NOTPROP:
574 case OP_PROP:
575 case OP_ANYNL:
576 case OP_NOT_HSPACE:
577 case OP_HSPACE:
578 case OP_NOT_VSPACE:
579 case OP_VSPACE:
580 case OP_EXTUNI:
581 case OP_EODN:
582 case OP_EOD:
583 case OP_CIRC:
584 case OP_CIRCM:
585 case OP_DOLL:
586 case OP_DOLLM:
587 case OP_CRSTAR:
588 case OP_CRMINSTAR:
589 case OP_CRPLUS:
590 case OP_CRMINPLUS:
591 case OP_CRQUERY:
592 case OP_CRMINQUERY:
593 case OP_CRRANGE:
594 case OP_CRMINRANGE:
595 case OP_CRPOSSTAR:
596 case OP_CRPOSPLUS:
597 case OP_CRPOSQUERY:
598 case OP_CRPOSRANGE:
599 case OP_CLASS:
600 case OP_NCLASS:
601 case OP_REF:
602 case OP_REFI:
603 case OP_DNREF:
604 case OP_DNREFI:
605 case OP_RECURSE:
606 case OP_CALLOUT:
607 case OP_ALT:
608 case OP_KET:
609 case OP_KETRMAX:
610 case OP_KETRMIN:
611 case OP_KETRPOS:
612 case OP_REVERSE:
613 case OP_ASSERT:
614 case OP_ASSERT_NOT:
615 case OP_ASSERTBACK:
616 case OP_ASSERTBACK_NOT:
617 case OP_ONCE:
618 case OP_ONCE_NC:
619 case OP_BRA:
620 case OP_BRAPOS:
621 case OP_CBRA:
622 case OP_CBRAPOS:
623 case OP_COND:
624 case OP_SBRA:
625 case OP_SBRAPOS:
626 case OP_SCBRA:
627 case OP_SCBRAPOS:
628 case OP_SCOND:
629 case OP_CREF:
630 case OP_DNCREF:
631 case OP_RREF:
632 case OP_DNRREF:
633 case OP_DEF:
634 case OP_BRAZERO:
635 case OP_BRAMINZERO:
636 case OP_BRAPOSZERO:
637 case OP_PRUNE:
638 case OP_SKIP:
639 case OP_THEN:
640 case OP_COMMIT:
641 case OP_FAIL:
642 case OP_ACCEPT:
643 case OP_ASSERT_ACCEPT:
644 case OP_CLOSE:
645 case OP_SKIPZERO:
646 return cc + PRIV(OP_lengths)[*cc];
647
648 case OP_CHAR:
649 case OP_CHARI:
650 case OP_NOT:
651 case OP_NOTI:
652 case OP_STAR:
653 case OP_MINSTAR:
654 case OP_PLUS:
655 case OP_MINPLUS:
656 case OP_QUERY:
657 case OP_MINQUERY:
658 case OP_UPTO:
659 case OP_MINUPTO:
660 case OP_EXACT:
661 case OP_POSSTAR:
662 case OP_POSPLUS:
663 case OP_POSQUERY:
664 case OP_POSUPTO:
665 case OP_STARI:
666 case OP_MINSTARI:
667 case OP_PLUSI:
668 case OP_MINPLUSI:
669 case OP_QUERYI:
670 case OP_MINQUERYI:
671 case OP_UPTOI:
672 case OP_MINUPTOI:
673 case OP_EXACTI:
674 case OP_POSSTARI:
675 case OP_POSPLUSI:
676 case OP_POSQUERYI:
677 case OP_POSUPTOI:
678 case OP_NOTSTAR:
679 case OP_NOTMINSTAR:
680 case OP_NOTPLUS:
681 case OP_NOTMINPLUS:
682 case OP_NOTQUERY:
683 case OP_NOTMINQUERY:
684 case OP_NOTUPTO:
685 case OP_NOTMINUPTO:
686 case OP_NOTEXACT:
687 case OP_NOTPOSSTAR:
688 case OP_NOTPOSPLUS:
689 case OP_NOTPOSQUERY:
690 case OP_NOTPOSUPTO:
691 case OP_NOTSTARI:
692 case OP_NOTMINSTARI:
693 case OP_NOTPLUSI:
694 case OP_NOTMINPLUSI:
695 case OP_NOTQUERYI:
696 case OP_NOTMINQUERYI:
697 case OP_NOTUPTOI:
698 case OP_NOTMINUPTOI:
699 case OP_NOTEXACTI:
700 case OP_NOTPOSSTARI:
701 case OP_NOTPOSPLUSI:
702 case OP_NOTPOSQUERYI:
703 case OP_NOTPOSUPTOI:
704 cc += PRIV(OP_lengths)[*cc];
705 #ifdef SUPPORT_UTF
706 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
707 #endif
708 return cc;
709
710 /* Special cases. */
711 case OP_TYPESTAR:
712 case OP_TYPEMINSTAR:
713 case OP_TYPEPLUS:
714 case OP_TYPEMINPLUS:
715 case OP_TYPEQUERY:
716 case OP_TYPEMINQUERY:
717 case OP_TYPEUPTO:
718 case OP_TYPEMINUPTO:
719 case OP_TYPEEXACT:
720 case OP_TYPEPOSSTAR:
721 case OP_TYPEPOSPLUS:
722 case OP_TYPEPOSQUERY:
723 case OP_TYPEPOSUPTO:
724 return cc + PRIV(OP_lengths)[*cc] - 1;
725
726 case OP_ANYBYTE:
727 #ifdef SUPPORT_UTF
728 if (common->utf) return NULL;
729 #endif
730 return cc + 1;
731
732 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
733 case OP_XCLASS:
734 return cc + GET(cc, 1);
735 #endif
736
737 case OP_MARK:
738 case OP_PRUNE_ARG:
739 case OP_SKIP_ARG:
740 case OP_THEN_ARG:
741 return cc + 1 + 2 + cc[1];
742
743 default:
744 /* All opcodes are supported now! */
745 SLJIT_ASSERT_STOP();
746 return NULL;
747 }
748 }
749
750 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
751 {
752 int count;
753 pcre_uchar *slot;
754
755 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
756 while (cc < ccend)
757 {
758 switch(*cc)
759 {
760 case OP_SET_SOM:
761 common->has_set_som = TRUE;
762 cc += 1;
763 break;
764
765 case OP_REF:
766 case OP_REFI:
767 common->optimized_cbracket[GET2(cc, 1)] = 0;
768 cc += 1 + IMM2_SIZE;
769 break;
770
771 case OP_CBRAPOS:
772 case OP_SCBRAPOS:
773 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
774 cc += 1 + LINK_SIZE + IMM2_SIZE;
775 break;
776
777 case OP_COND:
778 case OP_SCOND:
779 /* Only AUTO_CALLOUT can insert this opcode. We do
780 not intend to support this case. */
781 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
782 return FALSE;
783 cc += 1 + LINK_SIZE;
784 break;
785
786 case OP_CREF:
787 common->optimized_cbracket[GET2(cc, 1)] = 0;
788 cc += 1 + IMM2_SIZE;
789 break;
790
791 case OP_DNREF:
792 case OP_DNREFI:
793 case OP_DNCREF:
794 count = GET2(cc, 1 + IMM2_SIZE);
795 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
796 while (count-- > 0)
797 {
798 common->optimized_cbracket[GET2(slot, 0)] = 0;
799 slot += common->name_entry_size;
800 }
801 cc += 1 + 2 * IMM2_SIZE;
802 break;
803
804 case OP_RECURSE:
805 /* Set its value only once. */
806 if (common->recursive_head_ptr == 0)
807 {
808 common->recursive_head_ptr = common->ovector_start;
809 common->ovector_start += sizeof(sljit_sw);
810 }
811 cc += 1 + LINK_SIZE;
812 break;
813
814 case OP_CALLOUT:
815 if (common->capture_last_ptr == 0)
816 {
817 common->capture_last_ptr = common->ovector_start;
818 common->ovector_start += sizeof(sljit_sw);
819 }
820 cc += 2 + 2 * LINK_SIZE;
821 break;
822
823 case OP_THEN_ARG:
824 common->has_then = TRUE;
825 common->control_head_ptr = 1;
826 /* Fall through. */
827
828 case OP_PRUNE_ARG:
829 common->needs_start_ptr = TRUE;
830 /* Fall through. */
831
832 case OP_MARK:
833 if (common->mark_ptr == 0)
834 {
835 common->mark_ptr = common->ovector_start;
836 common->ovector_start += sizeof(sljit_sw);
837 }
838 cc += 1 + 2 + cc[1];
839 break;
840
841 case OP_THEN:
842 common->has_then = TRUE;
843 common->control_head_ptr = 1;
844 /* Fall through. */
845
846 case OP_PRUNE:
847 case OP_SKIP:
848 common->needs_start_ptr = TRUE;
849 cc += 1;
850 break;
851
852 case OP_SKIP_ARG:
853 common->control_head_ptr = 1;
854 common->has_skip_arg = TRUE;
855 cc += 1 + 2 + cc[1];
856 break;
857
858 default:
859 cc = next_opcode(common, cc);
860 if (cc == NULL)
861 return FALSE;
862 break;
863 }
864 }
865 return TRUE;
866 }
867
868 static int get_class_iterator_size(pcre_uchar *cc)
869 {
870 switch(*cc)
871 {
872 case OP_CRSTAR:
873 case OP_CRPLUS:
874 return 2;
875
876 case OP_CRMINSTAR:
877 case OP_CRMINPLUS:
878 case OP_CRQUERY:
879 case OP_CRMINQUERY:
880 return 1;
881
882 case OP_CRRANGE:
883 case OP_CRMINRANGE:
884 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
885 return 0;
886 return 2;
887
888 default:
889 return 0;
890 }
891 }
892
893 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
894 {
895 pcre_uchar *end = bracketend(begin);
896 pcre_uchar *next;
897 pcre_uchar *next_end;
898 pcre_uchar *max_end;
899 pcre_uchar type;
900 sljit_sw length = end - begin;
901 int min, max, i;
902
903 /* Detect fixed iterations first. */
904 if (end[-(1 + LINK_SIZE)] != OP_KET)
905 return FALSE;
906
907 /* Already detected repeat. */
908 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
909 return TRUE;
910
911 next = end;
912 min = 1;
913 while (1)
914 {
915 if (*next != *begin)
916 break;
917 next_end = bracketend(next);
918 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
919 break;
920 next = next_end;
921 min++;
922 }
923
924 if (min == 2)
925 return FALSE;
926
927 max = 0;
928 max_end = next;
929 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
930 {
931 type = *next;
932 while (1)
933 {
934 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
935 break;
936 next_end = bracketend(next + 2 + LINK_SIZE);
937 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
938 break;
939 next = next_end;
940 max++;
941 }
942
943 if (next[0] == type && next[1] == *begin && max >= 1)
944 {
945 next_end = bracketend(next + 1);
946 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
947 {
948 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
949 if (*next_end != OP_KET)
950 break;
951
952 if (i == max)
953 {
954 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
955 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
956 /* +2 the original and the last. */
957 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
958 if (min == 1)
959 return TRUE;
960 min--;
961 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
962 }
963 }
964 }
965 }
966
967 if (min >= 3)
968 {
969 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
970 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
971 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
972 return TRUE;
973 }
974
975 return FALSE;
976 }
977
978 #define CASE_ITERATOR_PRIVATE_DATA_1 \
979 case OP_MINSTAR: \
980 case OP_MINPLUS: \
981 case OP_QUERY: \
982 case OP_MINQUERY: \
983 case OP_MINSTARI: \
984 case OP_MINPLUSI: \
985 case OP_QUERYI: \
986 case OP_MINQUERYI: \
987 case OP_NOTMINSTAR: \
988 case OP_NOTMINPLUS: \
989 case OP_NOTQUERY: \
990 case OP_NOTMINQUERY: \
991 case OP_NOTMINSTARI: \
992 case OP_NOTMINPLUSI: \
993 case OP_NOTQUERYI: \
994 case OP_NOTMINQUERYI:
995
996 #define CASE_ITERATOR_PRIVATE_DATA_2A \
997 case OP_STAR: \
998 case OP_PLUS: \
999 case OP_STARI: \
1000 case OP_PLUSI: \
1001 case OP_NOTSTAR: \
1002 case OP_NOTPLUS: \
1003 case OP_NOTSTARI: \
1004 case OP_NOTPLUSI:
1005
1006 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1007 case OP_UPTO: \
1008 case OP_MINUPTO: \
1009 case OP_UPTOI: \
1010 case OP_MINUPTOI: \
1011 case OP_NOTUPTO: \
1012 case OP_NOTMINUPTO: \
1013 case OP_NOTUPTOI: \
1014 case OP_NOTMINUPTOI:
1015
1016 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1017 case OP_TYPEMINSTAR: \
1018 case OP_TYPEMINPLUS: \
1019 case OP_TYPEQUERY: \
1020 case OP_TYPEMINQUERY:
1021
1022 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1023 case OP_TYPESTAR: \
1024 case OP_TYPEPLUS:
1025
1026 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1027 case OP_TYPEUPTO: \
1028 case OP_TYPEMINUPTO:
1029
1030 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1031 {
1032 pcre_uchar *cc = common->start;
1033 pcre_uchar *alternative;
1034 pcre_uchar *end = NULL;
1035 int private_data_ptr = *private_data_start;
1036 int space, size, bracketlen;
1037
1038 while (cc < ccend)
1039 {
1040 space = 0;
1041 size = 0;
1042 bracketlen = 0;
1043 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1044 return;
1045
1046 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1047 if (detect_repeat(common, cc))
1048 {
1049 /* These brackets are converted to repeats, so no global
1050 based single character repeat is allowed. */
1051 if (cc >= end)
1052 end = bracketend(cc);
1053 }
1054
1055 switch(*cc)
1056 {
1057 case OP_KET:
1058 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1059 {
1060 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1061 private_data_ptr += sizeof(sljit_sw);
1062 cc += common->private_data_ptrs[cc + 1 - common->start];
1063 }
1064 cc += 1 + LINK_SIZE;
1065 break;
1066
1067 case OP_ASSERT:
1068 case OP_ASSERT_NOT:
1069 case OP_ASSERTBACK:
1070 case OP_ASSERTBACK_NOT:
1071 case OP_ONCE:
1072 case OP_ONCE_NC:
1073 case OP_BRAPOS:
1074 case OP_SBRA:
1075 case OP_SBRAPOS:
1076 case OP_SCOND:
1077 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1078 private_data_ptr += sizeof(sljit_sw);
1079 bracketlen = 1 + LINK_SIZE;
1080 break;
1081
1082 case OP_CBRAPOS:
1083 case OP_SCBRAPOS:
1084 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1085 private_data_ptr += sizeof(sljit_sw);
1086 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1087 break;
1088
1089 case OP_COND:
1090 /* Might be a hidden SCOND. */
1091 alternative = cc + GET(cc, 1);
1092 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1093 {
1094 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1095 private_data_ptr += sizeof(sljit_sw);
1096 }
1097 bracketlen = 1 + LINK_SIZE;
1098 break;
1099
1100 case OP_BRA:
1101 bracketlen = 1 + LINK_SIZE;
1102 break;
1103
1104 case OP_CBRA:
1105 case OP_SCBRA:
1106 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1107 break;
1108
1109 CASE_ITERATOR_PRIVATE_DATA_1
1110 space = 1;
1111 size = -2;
1112 break;
1113
1114 CASE_ITERATOR_PRIVATE_DATA_2A
1115 space = 2;
1116 size = -2;
1117 break;
1118
1119 CASE_ITERATOR_PRIVATE_DATA_2B
1120 space = 2;
1121 size = -(2 + IMM2_SIZE);
1122 break;
1123
1124 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1125 space = 1;
1126 size = 1;
1127 break;
1128
1129 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1130 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1131 space = 2;
1132 size = 1;
1133 break;
1134
1135 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1136 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1137 space = 2;
1138 size = 1 + IMM2_SIZE;
1139 break;
1140
1141 case OP_CLASS:
1142 case OP_NCLASS:
1143 size += 1 + 32 / sizeof(pcre_uchar);
1144 space = get_class_iterator_size(cc + size);
1145 break;
1146
1147 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1148 case OP_XCLASS:
1149 size = GET(cc, 1);
1150 space = get_class_iterator_size(cc + size);
1151 break;
1152 #endif
1153
1154 default:
1155 cc = next_opcode(common, cc);
1156 SLJIT_ASSERT(cc != NULL);
1157 break;
1158 }
1159
1160 /* Character iterators, which are not inside a repeated bracket,
1161 gets a private slot instead of allocating it on the stack. */
1162 if (space > 0 && cc >= end)
1163 {
1164 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1165 private_data_ptr += sizeof(sljit_sw) * space;
1166 }
1167
1168 if (size != 0)
1169 {
1170 if (size < 0)
1171 {
1172 cc += -size;
1173 #ifdef SUPPORT_UTF
1174 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1175 #endif
1176 }
1177 else
1178 cc += size;
1179 }
1180
1181 if (bracketlen > 0)
1182 {
1183 if (cc >= end)
1184 {
1185 end = bracketend(cc);
1186 if (end[-1 - LINK_SIZE] == OP_KET)
1187 end = NULL;
1188 }
1189 cc += bracketlen;
1190 }
1191 }
1192 *private_data_start = private_data_ptr;
1193 }
1194
1195 /* Returns with a frame_types (always < 0) if no need for frame. */
1196 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1197 {
1198 int length = 0;
1199 int possessive = 0;
1200 BOOL stack_restore = FALSE;
1201 BOOL setsom_found = recursive;
1202 BOOL setmark_found = recursive;
1203 /* The last capture is a local variable even for recursions. */
1204 BOOL capture_last_found = FALSE;
1205
1206 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1207 SLJIT_ASSERT(common->control_head_ptr != 0);
1208 *needs_control_head = TRUE;
1209 #else
1210 *needs_control_head = FALSE;
1211 #endif
1212
1213 if (ccend == NULL)
1214 {
1215 ccend = bracketend(cc) - (1 + LINK_SIZE);
1216 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1217 {
1218 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1219 /* This is correct regardless of common->capture_last_ptr. */
1220 capture_last_found = TRUE;
1221 }
1222 cc = next_opcode(common, cc);
1223 }
1224
1225 SLJIT_ASSERT(cc != NULL);
1226 while (cc < ccend)
1227 switch(*cc)
1228 {
1229 case OP_SET_SOM:
1230 SLJIT_ASSERT(common->has_set_som);
1231 stack_restore = TRUE;
1232 if (!setsom_found)
1233 {
1234 length += 2;
1235 setsom_found = TRUE;
1236 }
1237 cc += 1;
1238 break;
1239
1240 case OP_MARK:
1241 case OP_PRUNE_ARG:
1242 case OP_THEN_ARG:
1243 SLJIT_ASSERT(common->mark_ptr != 0);
1244 stack_restore = TRUE;
1245 if (!setmark_found)
1246 {
1247 length += 2;
1248 setmark_found = TRUE;
1249 }
1250 if (common->control_head_ptr != 0)
1251 *needs_control_head = TRUE;
1252 cc += 1 + 2 + cc[1];
1253 break;
1254
1255 case OP_RECURSE:
1256 stack_restore = TRUE;
1257 if (common->has_set_som && !setsom_found)
1258 {
1259 length += 2;
1260 setsom_found = TRUE;
1261 }
1262 if (common->mark_ptr != 0 && !setmark_found)
1263 {
1264 length += 2;
1265 setmark_found = TRUE;
1266 }
1267 if (common->capture_last_ptr != 0 && !capture_last_found)
1268 {
1269 length += 2;
1270 capture_last_found = TRUE;
1271 }
1272 cc += 1 + LINK_SIZE;
1273 break;
1274
1275 case OP_CBRA:
1276 case OP_CBRAPOS:
1277 case OP_SCBRA:
1278 case OP_SCBRAPOS:
1279 stack_restore = TRUE;
1280 if (common->capture_last_ptr != 0 && !capture_last_found)
1281 {
1282 length += 2;
1283 capture_last_found = TRUE;
1284 }
1285 length += 3;
1286 cc += 1 + LINK_SIZE + IMM2_SIZE;
1287 break;
1288
1289 default:
1290 stack_restore = TRUE;
1291 /* Fall through. */
1292
1293 case OP_NOT_WORD_BOUNDARY:
1294 case OP_WORD_BOUNDARY:
1295 case OP_NOT_DIGIT:
1296 case OP_DIGIT:
1297 case OP_NOT_WHITESPACE:
1298 case OP_WHITESPACE:
1299 case OP_NOT_WORDCHAR:
1300 case OP_WORDCHAR:
1301 case OP_ANY:
1302 case OP_ALLANY:
1303 case OP_ANYBYTE:
1304 case OP_NOTPROP:
1305 case OP_PROP:
1306 case OP_ANYNL:
1307 case OP_NOT_HSPACE:
1308 case OP_HSPACE:
1309 case OP_NOT_VSPACE:
1310 case OP_VSPACE:
1311 case OP_EXTUNI:
1312 case OP_EODN:
1313 case OP_EOD:
1314 case OP_CIRC:
1315 case OP_CIRCM:
1316 case OP_DOLL:
1317 case OP_DOLLM:
1318 case OP_CHAR:
1319 case OP_CHARI:
1320 case OP_NOT:
1321 case OP_NOTI:
1322
1323 case OP_EXACT:
1324 case OP_POSSTAR:
1325 case OP_POSPLUS:
1326 case OP_POSQUERY:
1327 case OP_POSUPTO:
1328
1329 case OP_EXACTI:
1330 case OP_POSSTARI:
1331 case OP_POSPLUSI:
1332 case OP_POSQUERYI:
1333 case OP_POSUPTOI:
1334
1335 case OP_NOTEXACT:
1336 case OP_NOTPOSSTAR:
1337 case OP_NOTPOSPLUS:
1338 case OP_NOTPOSQUERY:
1339 case OP_NOTPOSUPTO:
1340
1341 case OP_NOTEXACTI:
1342 case OP_NOTPOSSTARI:
1343 case OP_NOTPOSPLUSI:
1344 case OP_NOTPOSQUERYI:
1345 case OP_NOTPOSUPTOI:
1346
1347 case OP_TYPEEXACT:
1348 case OP_TYPEPOSSTAR:
1349 case OP_TYPEPOSPLUS:
1350 case OP_TYPEPOSQUERY:
1351 case OP_TYPEPOSUPTO:
1352
1353 case OP_CLASS:
1354 case OP_NCLASS:
1355 case OP_XCLASS:
1356
1357 cc = next_opcode(common, cc);
1358 SLJIT_ASSERT(cc != NULL);
1359 break;
1360 }
1361
1362 /* Possessive quantifiers can use a special case. */
1363 if (SLJIT_UNLIKELY(possessive == length))
1364 return stack_restore ? no_frame : no_stack;
1365
1366 if (length > 0)
1367 return length + 1;
1368 return stack_restore ? no_frame : no_stack;
1369 }
1370
1371 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1372 {
1373 DEFINE_COMPILER;
1374 BOOL setsom_found = recursive;
1375 BOOL setmark_found = recursive;
1376 /* The last capture is a local variable even for recursions. */
1377 BOOL capture_last_found = FALSE;
1378 int offset;
1379
1380 /* >= 1 + shortest item size (2) */
1381 SLJIT_UNUSED_ARG(stacktop);
1382 SLJIT_ASSERT(stackpos >= stacktop + 2);
1383
1384 stackpos = STACK(stackpos);
1385 if (ccend == NULL)
1386 {
1387 ccend = bracketend(cc) - (1 + LINK_SIZE);
1388 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1389 cc = next_opcode(common, cc);
1390 }
1391
1392 SLJIT_ASSERT(cc != NULL);
1393 while (cc < ccend)
1394 switch(*cc)
1395 {
1396 case OP_SET_SOM:
1397 SLJIT_ASSERT(common->has_set_som);
1398 if (!setsom_found)
1399 {
1400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1402 stackpos += (int)sizeof(sljit_sw);
1403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1404 stackpos += (int)sizeof(sljit_sw);
1405 setsom_found = TRUE;
1406 }
1407 cc += 1;
1408 break;
1409
1410 case OP_MARK:
1411 case OP_PRUNE_ARG:
1412 case OP_THEN_ARG:
1413 SLJIT_ASSERT(common->mark_ptr != 0);
1414 if (!setmark_found)
1415 {
1416 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1418 stackpos += (int)sizeof(sljit_sw);
1419 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1420 stackpos += (int)sizeof(sljit_sw);
1421 setmark_found = TRUE;
1422 }
1423 cc += 1 + 2 + cc[1];
1424 break;
1425
1426 case OP_RECURSE:
1427 if (common->has_set_som && !setsom_found)
1428 {
1429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1430 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1431 stackpos += (int)sizeof(sljit_sw);
1432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1433 stackpos += (int)sizeof(sljit_sw);
1434 setsom_found = TRUE;
1435 }
1436 if (common->mark_ptr != 0 && !setmark_found)
1437 {
1438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1440 stackpos += (int)sizeof(sljit_sw);
1441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1442 stackpos += (int)sizeof(sljit_sw);
1443 setmark_found = TRUE;
1444 }
1445 if (common->capture_last_ptr != 0 && !capture_last_found)
1446 {
1447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1448 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1449 stackpos += (int)sizeof(sljit_sw);
1450 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1451 stackpos += (int)sizeof(sljit_sw);
1452 capture_last_found = TRUE;
1453 }
1454 cc += 1 + LINK_SIZE;
1455 break;
1456
1457 case OP_CBRA:
1458 case OP_CBRAPOS:
1459 case OP_SCBRA:
1460 case OP_SCBRAPOS:
1461 if (common->capture_last_ptr != 0 && !capture_last_found)
1462 {
1463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1465 stackpos += (int)sizeof(sljit_sw);
1466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1467 stackpos += (int)sizeof(sljit_sw);
1468 capture_last_found = TRUE;
1469 }
1470 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1472 stackpos += (int)sizeof(sljit_sw);
1473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1474 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1476 stackpos += (int)sizeof(sljit_sw);
1477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1478 stackpos += (int)sizeof(sljit_sw);
1479
1480 cc += 1 + LINK_SIZE + IMM2_SIZE;
1481 break;
1482
1483 default:
1484 cc = next_opcode(common, cc);
1485 SLJIT_ASSERT(cc != NULL);
1486 break;
1487 }
1488
1489 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1490 SLJIT_ASSERT(stackpos == STACK(stacktop));
1491 }
1492
1493 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1494 {
1495 int private_data_length = needs_control_head ? 3 : 2;
1496 int size;
1497 pcre_uchar *alternative;
1498 /* Calculate the sum of the private machine words. */
1499 while (cc < ccend)
1500 {
1501 size = 0;
1502 switch(*cc)
1503 {
1504 case OP_KET:
1505 if (PRIVATE_DATA(cc) != 0)
1506 private_data_length++;
1507 cc += 1 + LINK_SIZE;
1508 break;
1509
1510 case OP_ASSERT:
1511 case OP_ASSERT_NOT:
1512 case OP_ASSERTBACK:
1513 case OP_ASSERTBACK_NOT:
1514 case OP_ONCE:
1515 case OP_ONCE_NC:
1516 case OP_BRAPOS:
1517 case OP_SBRA:
1518 case OP_SBRAPOS:
1519 case OP_SCOND:
1520 private_data_length++;
1521 cc += 1 + LINK_SIZE;
1522 break;
1523
1524 case OP_CBRA:
1525 case OP_SCBRA:
1526 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1527 private_data_length++;
1528 cc += 1 + LINK_SIZE + IMM2_SIZE;
1529 break;
1530
1531 case OP_CBRAPOS:
1532 case OP_SCBRAPOS:
1533 private_data_length += 2;
1534 cc += 1 + LINK_SIZE + IMM2_SIZE;
1535 break;
1536
1537 case OP_COND:
1538 /* Might be a hidden SCOND. */
1539 alternative = cc + GET(cc, 1);
1540 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1541 private_data_length++;
1542 cc += 1 + LINK_SIZE;
1543 break;
1544
1545 CASE_ITERATOR_PRIVATE_DATA_1
1546 if (PRIVATE_DATA(cc))
1547 private_data_length++;
1548 cc += 2;
1549 #ifdef SUPPORT_UTF
1550 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1551 #endif
1552 break;
1553
1554 CASE_ITERATOR_PRIVATE_DATA_2A
1555 if (PRIVATE_DATA(cc))
1556 private_data_length += 2;
1557 cc += 2;
1558 #ifdef SUPPORT_UTF
1559 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1560 #endif
1561 break;
1562
1563 CASE_ITERATOR_PRIVATE_DATA_2B
1564 if (PRIVATE_DATA(cc))
1565 private_data_length += 2;
1566 cc += 2 + IMM2_SIZE;
1567 #ifdef SUPPORT_UTF
1568 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1569 #endif
1570 break;
1571
1572 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1573 if (PRIVATE_DATA(cc))
1574 private_data_length++;
1575 cc += 1;
1576 break;
1577
1578 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1579 if (PRIVATE_DATA(cc))
1580 private_data_length += 2;
1581 cc += 1;
1582 break;
1583
1584 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1585 if (PRIVATE_DATA(cc))
1586 private_data_length += 2;
1587 cc += 1 + IMM2_SIZE;
1588 break;
1589
1590 case OP_CLASS:
1591 case OP_NCLASS:
1592 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1593 case OP_XCLASS:
1594 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1595 #else
1596 size = 1 + 32 / (int)sizeof(pcre_uchar);
1597 #endif
1598 if (PRIVATE_DATA(cc))
1599 private_data_length += get_class_iterator_size(cc + size);
1600 cc += size;
1601 break;
1602
1603 default:
1604 cc = next_opcode(common, cc);
1605 SLJIT_ASSERT(cc != NULL);
1606 break;
1607 }
1608 }
1609 SLJIT_ASSERT(cc == ccend);
1610 return private_data_length;
1611 }
1612
1613 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1614 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1615 {
1616 DEFINE_COMPILER;
1617 int srcw[2];
1618 int count, size;
1619 BOOL tmp1next = TRUE;
1620 BOOL tmp1empty = TRUE;
1621 BOOL tmp2empty = TRUE;
1622 pcre_uchar *alternative;
1623 enum {
1624 start,
1625 loop,
1626 end
1627 } status;
1628
1629 status = save ? start : loop;
1630 stackptr = STACK(stackptr - 2);
1631 stacktop = STACK(stacktop - 1);
1632
1633 if (!save)
1634 {
1635 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1636 if (stackptr < stacktop)
1637 {
1638 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1639 stackptr += sizeof(sljit_sw);
1640 tmp1empty = FALSE;
1641 }
1642 if (stackptr < stacktop)
1643 {
1644 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1645 stackptr += sizeof(sljit_sw);
1646 tmp2empty = FALSE;
1647 }
1648 /* The tmp1next must be TRUE in either way. */
1649 }
1650
1651 do
1652 {
1653 count = 0;
1654 switch(status)
1655 {
1656 case start:
1657 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1658 count = 1;
1659 srcw[0] = common->recursive_head_ptr;
1660 if (needs_control_head)
1661 {
1662 SLJIT_ASSERT(common->control_head_ptr != 0);
1663 count = 2;
1664 srcw[1] = common->control_head_ptr;
1665 }
1666 status = loop;
1667 break;
1668
1669 case loop:
1670 if (cc >= ccend)
1671 {
1672 status = end;
1673 break;
1674 }
1675
1676 switch(*cc)
1677 {
1678 case OP_KET:
1679 if (PRIVATE_DATA(cc) != 0)
1680 {
1681 count = 1;
1682 srcw[0] = PRIVATE_DATA(cc);
1683 }
1684 cc += 1 + LINK_SIZE;
1685 break;
1686
1687 case OP_ASSERT:
1688 case OP_ASSERT_NOT:
1689 case OP_ASSERTBACK:
1690 case OP_ASSERTBACK_NOT:
1691 case OP_ONCE:
1692 case OP_ONCE_NC:
1693 case OP_BRAPOS:
1694 case OP_SBRA:
1695 case OP_SBRAPOS:
1696 case OP_SCOND:
1697 count = 1;
1698 srcw[0] = PRIVATE_DATA(cc);
1699 SLJIT_ASSERT(srcw[0] != 0);
1700 cc += 1 + LINK_SIZE;
1701 break;
1702
1703 case OP_CBRA:
1704 case OP_SCBRA:
1705 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1706 {
1707 count = 1;
1708 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1709 }
1710 cc += 1 + LINK_SIZE + IMM2_SIZE;
1711 break;
1712
1713 case OP_CBRAPOS:
1714 case OP_SCBRAPOS:
1715 count = 2;
1716 srcw[0] = PRIVATE_DATA(cc);
1717 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1718 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1719 cc += 1 + LINK_SIZE + IMM2_SIZE;
1720 break;
1721
1722 case OP_COND:
1723 /* Might be a hidden SCOND. */
1724 alternative = cc + GET(cc, 1);
1725 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1726 {
1727 count = 1;
1728 srcw[0] = PRIVATE_DATA(cc);
1729 SLJIT_ASSERT(srcw[0] != 0);
1730 }
1731 cc += 1 + LINK_SIZE;
1732 break;
1733
1734 CASE_ITERATOR_PRIVATE_DATA_1
1735 if (PRIVATE_DATA(cc))
1736 {
1737 count = 1;
1738 srcw[0] = PRIVATE_DATA(cc);
1739 }
1740 cc += 2;
1741 #ifdef SUPPORT_UTF
1742 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1743 #endif
1744 break;
1745
1746 CASE_ITERATOR_PRIVATE_DATA_2A
1747 if (PRIVATE_DATA(cc))
1748 {
1749 count = 2;
1750 srcw[0] = PRIVATE_DATA(cc);
1751 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1752 }
1753 cc += 2;
1754 #ifdef SUPPORT_UTF
1755 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1756 #endif
1757 break;
1758
1759 CASE_ITERATOR_PRIVATE_DATA_2B
1760 if (PRIVATE_DATA(cc))
1761 {
1762 count = 2;
1763 srcw[0] = PRIVATE_DATA(cc);
1764 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1765 }
1766 cc += 2 + IMM2_SIZE;
1767 #ifdef SUPPORT_UTF
1768 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1769 #endif
1770 break;
1771
1772 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1773 if (PRIVATE_DATA(cc))
1774 {
1775 count = 1;
1776 srcw[0] = PRIVATE_DATA(cc);
1777 }
1778 cc += 1;
1779 break;
1780
1781 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1782 if (PRIVATE_DATA(cc))
1783 {
1784 count = 2;
1785 srcw[0] = PRIVATE_DATA(cc);
1786 srcw[1] = srcw[0] + sizeof(sljit_sw);
1787 }
1788 cc += 1;
1789 break;
1790
1791 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1792 if (PRIVATE_DATA(cc))
1793 {
1794 count = 2;
1795 srcw[0] = PRIVATE_DATA(cc);
1796 srcw[1] = srcw[0] + sizeof(sljit_sw);
1797 }
1798 cc += 1 + IMM2_SIZE;
1799 break;
1800
1801 case OP_CLASS:
1802 case OP_NCLASS:
1803 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1804 case OP_XCLASS:
1805 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1806 #else
1807 size = 1 + 32 / (int)sizeof(pcre_uchar);
1808 #endif
1809 if (PRIVATE_DATA(cc))
1810 switch(get_class_iterator_size(cc + size))
1811 {
1812 case 1:
1813 count = 1;
1814 srcw[0] = PRIVATE_DATA(cc);
1815 break;
1816
1817 case 2:
1818 count = 2;
1819 srcw[0] = PRIVATE_DATA(cc);
1820 srcw[1] = srcw[0] + sizeof(sljit_sw);
1821 break;
1822
1823 default:
1824 SLJIT_ASSERT_STOP();
1825 break;
1826 }
1827 cc += size;
1828 break;
1829
1830 default:
1831 cc = next_opcode(common, cc);
1832 SLJIT_ASSERT(cc != NULL);
1833 break;
1834 }
1835 break;
1836
1837 case end:
1838 SLJIT_ASSERT_STOP();
1839 break;
1840 }
1841
1842 while (count > 0)
1843 {
1844 count--;
1845 if (save)
1846 {
1847 if (tmp1next)
1848 {
1849 if (!tmp1empty)
1850 {
1851 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1852 stackptr += sizeof(sljit_sw);
1853 }
1854 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1855 tmp1empty = FALSE;
1856 tmp1next = FALSE;
1857 }
1858 else
1859 {
1860 if (!tmp2empty)
1861 {
1862 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1863 stackptr += sizeof(sljit_sw);
1864 }
1865 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1866 tmp2empty = FALSE;
1867 tmp1next = TRUE;
1868 }
1869 }
1870 else
1871 {
1872 if (tmp1next)
1873 {
1874 SLJIT_ASSERT(!tmp1empty);
1875 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1876 tmp1empty = stackptr >= stacktop;
1877 if (!tmp1empty)
1878 {
1879 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1880 stackptr += sizeof(sljit_sw);
1881 }
1882 tmp1next = FALSE;
1883 }
1884 else
1885 {
1886 SLJIT_ASSERT(!tmp2empty);
1887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1888 tmp2empty = stackptr >= stacktop;
1889 if (!tmp2empty)
1890 {
1891 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1892 stackptr += sizeof(sljit_sw);
1893 }
1894 tmp1next = TRUE;
1895 }
1896 }
1897 }
1898 }
1899 while (status != end);
1900
1901 if (save)
1902 {
1903 if (tmp1next)
1904 {
1905 if (!tmp1empty)
1906 {
1907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1908 stackptr += sizeof(sljit_sw);
1909 }
1910 if (!tmp2empty)
1911 {
1912 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1913 stackptr += sizeof(sljit_sw);
1914 }
1915 }
1916 else
1917 {
1918 if (!tmp2empty)
1919 {
1920 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1921 stackptr += sizeof(sljit_sw);
1922 }
1923 if (!tmp1empty)
1924 {
1925 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1926 stackptr += sizeof(sljit_sw);
1927 }
1928 }
1929 }
1930 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1931 }
1932
1933 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1934 {
1935 pcre_uchar *end = bracketend(cc);
1936 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1937
1938 /* Assert captures then. */
1939 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1940 current_offset = NULL;
1941 /* Conditional block does not. */
1942 if (*cc == OP_COND || *cc == OP_SCOND)
1943 has_alternatives = FALSE;
1944
1945 cc = next_opcode(common, cc);
1946 if (has_alternatives)
1947 current_offset = common->then_offsets + (cc - common->start);
1948
1949 while (cc < end)
1950 {
1951 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1952 cc = set_then_offsets(common, cc, current_offset);
1953 else
1954 {
1955 if (*cc == OP_ALT && has_alternatives)
1956 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1957 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1958 *current_offset = 1;
1959 cc = next_opcode(common, cc);
1960 }
1961 }
1962
1963 return end;
1964 }
1965
1966 #undef CASE_ITERATOR_PRIVATE_DATA_1
1967 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1968 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1969 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1970 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1971 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1972
1973 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1974 {
1975 return (value & (value - 1)) == 0;
1976 }
1977
1978 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1979 {
1980 while (list)
1981 {
1982 /* sljit_set_label is clever enough to do nothing
1983 if either the jump or the label is NULL. */
1984 SET_LABEL(list->jump, label);
1985 list = list->next;
1986 }
1987 }
1988
1989 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1990 {
1991 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1992 if (list_item)
1993 {
1994 list_item->next = *list;
1995 list_item->jump = jump;
1996 *list = list_item;
1997 }
1998 }
1999
2000 static void add_stub(compiler_common *common, struct sljit_jump *start)
2001 {
2002 DEFINE_COMPILER;
2003 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2004
2005 if (list_item)
2006 {
2007 list_item->start = start;
2008 list_item->quit = LABEL();
2009 list_item->next = common->stubs;
2010 common->stubs = list_item;
2011 }
2012 }
2013
2014 static void flush_stubs(compiler_common *common)
2015 {
2016 DEFINE_COMPILER;
2017 stub_list* list_item = common->stubs;
2018
2019 while (list_item)
2020 {
2021 JUMPHERE(list_item->start);
2022 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2023 JUMPTO(SLJIT_JUMP, list_item->quit);
2024 list_item = list_item->next;
2025 }
2026 common->stubs = NULL;
2027 }
2028
2029 static SLJIT_INLINE void count_match(compiler_common *common)
2030 {
2031 DEFINE_COMPILER;
2032
2033 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2034 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2035 }
2036
2037 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2038 {
2039 /* May destroy all locals and registers except TMP2. */
2040 DEFINE_COMPILER;
2041
2042 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2043 #ifdef DESTROY_REGISTERS
2044 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2045 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2046 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2048 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2049 #endif
2050 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2051 }
2052
2053 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2054 {
2055 DEFINE_COMPILER;
2056 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2057 }
2058
2059 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2060 {
2061 DEFINE_COMPILER;
2062 struct sljit_label *loop;
2063 int i;
2064
2065 /* At this point we can freely use all temporary registers. */
2066 SLJIT_ASSERT(length > 1);
2067 /* TMP1 returns with begin - 1. */
2068 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2069 if (length < 8)
2070 {
2071 for (i = 1; i < length; i++)
2072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2073 }
2074 else
2075 {
2076 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2077 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2078 loop = LABEL();
2079 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2080 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2081 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2082 }
2083 }
2084
2085 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2086 {
2087 DEFINE_COMPILER;
2088 struct sljit_label *loop;
2089 int i;
2090
2091 SLJIT_ASSERT(length > 1);
2092 /* OVECTOR(1) contains the "string begin - 1" constant. */
2093 if (length > 2)
2094 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2095 if (length < 8)
2096 {
2097 for (i = 2; i < length; i++)
2098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2099 }
2100 else
2101 {
2102 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2103 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2104 loop = LABEL();
2105 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2106 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2107 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2108 }
2109
2110 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2111 if (common->mark_ptr != 0)
2112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2113 if (common->control_head_ptr != 0)
2114 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2115 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2116 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2117 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2118 }
2119
2120 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2121 {
2122 while (current != NULL)
2123 {
2124 switch (current[-2])
2125 {
2126 case type_then_trap:
2127 break;
2128
2129 case type_mark:
2130 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2131 return current[-4];
2132 break;
2133
2134 default:
2135 SLJIT_ASSERT_STOP();
2136 break;
2137 }
2138 current = (sljit_sw*)current[-1];
2139 }
2140 return -1;
2141 }
2142
2143 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2144 {
2145 DEFINE_COMPILER;
2146 struct sljit_label *loop;
2147 struct sljit_jump *early_quit;
2148
2149 /* At this point we can freely use all registers. */
2150 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2151 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2152
2153 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2154 if (common->mark_ptr != 0)
2155 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2156 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2157 if (common->mark_ptr != 0)
2158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2159 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2160 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2161 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2162 /* Unlikely, but possible */
2163 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2164 loop = LABEL();
2165 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2166 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2167 /* Copy the integer value to the output buffer */
2168 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2169 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2170 #endif
2171 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2172 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2173 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2174 JUMPHERE(early_quit);
2175
2176 /* Calculate the return value, which is the maximum ovector value. */
2177 if (topbracket > 1)
2178 {
2179 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2180 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2181
2182 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2183 loop = LABEL();
2184 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2185 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2186 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2187 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2188 }
2189 else
2190 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2191 }
2192
2193 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2194 {
2195 DEFINE_COMPILER;
2196 struct sljit_jump *jump;
2197
2198 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2199 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2200 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2201
2202 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2203 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2204 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2205 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2206
2207 /* Store match begin and end. */
2208 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2209 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2210
2211 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2212 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2213 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2214 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2215 #endif
2216 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2217 JUMPHERE(jump);
2218
2219 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2220 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2221 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2222 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2223 #endif
2224 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2225
2226 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2227 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2228 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2229 #endif
2230 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2231
2232 JUMPTO(SLJIT_JUMP, quit);
2233 }
2234
2235 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2236 {
2237 /* May destroy TMP1. */
2238 DEFINE_COMPILER;
2239 struct sljit_jump *jump;
2240
2241 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2242 {
2243 /* The value of -1 must be kept for start_used_ptr! */
2244 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2245 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2246 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2247 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2248 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2249 JUMPHERE(jump);
2250 }
2251 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2252 {
2253 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2255 JUMPHERE(jump);
2256 }
2257 }
2258
2259 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2260 {
2261 /* Detects if the character has an othercase. */
2262 unsigned int c;
2263
2264 #ifdef SUPPORT_UTF
2265 if (common->utf)
2266 {
2267 GETCHAR(c, cc);
2268 if (c > 127)
2269 {
2270 #ifdef SUPPORT_UCP
2271 return c != UCD_OTHERCASE(c);
2272 #else
2273 return FALSE;
2274 #endif
2275 }
2276 #ifndef COMPILE_PCRE8
2277 return common->fcc[c] != c;
2278 #endif
2279 }
2280 else
2281 #endif
2282 c = *cc;
2283 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2284 }
2285
2286 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2287 {
2288 /* Returns with the othercase. */
2289 #ifdef SUPPORT_UTF
2290 if (common->utf && c > 127)
2291 {
2292 #ifdef SUPPORT_UCP
2293 return UCD_OTHERCASE(c);
2294 #else
2295 return c;
2296 #endif
2297 }
2298 #endif
2299 return TABLE_GET(c, common->fcc, c);
2300 }
2301
2302 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2303 {
2304 /* Detects if the character and its othercase has only 1 bit difference. */
2305 unsigned int c, oc, bit;
2306 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2307 int n;
2308 #endif
2309
2310 #ifdef SUPPORT_UTF
2311 if (common->utf)
2312 {
2313 GETCHAR(c, cc);
2314 if (c <= 127)
2315 oc = common->fcc[c];
2316 else
2317 {
2318 #ifdef SUPPORT_UCP
2319 oc = UCD_OTHERCASE(c);
2320 #else
2321 oc = c;
2322 #endif
2323 }
2324 }
2325 else
2326 {
2327 c = *cc;
2328 oc = TABLE_GET(c, common->fcc, c);
2329 }
2330 #else
2331 c = *cc;
2332 oc = TABLE_GET(c, common->fcc, c);
2333 #endif
2334
2335 SLJIT_ASSERT(c != oc);
2336
2337 bit = c ^ oc;
2338 /* Optimized for English alphabet. */
2339 if (c <= 127 && bit == 0x20)
2340 return (0 << 8) | 0x20;
2341
2342 /* Since c != oc, they must have at least 1 bit difference. */
2343 if (!is_powerof2(bit))
2344 return 0;
2345
2346 #if defined COMPILE_PCRE8
2347
2348 #ifdef SUPPORT_UTF
2349 if (common->utf && c > 127)
2350 {
2351 n = GET_EXTRALEN(*cc);
2352 while ((bit & 0x3f) == 0)
2353 {
2354 n--;
2355 bit >>= 6;
2356 }
2357 return (n << 8) | bit;
2358 }
2359 #endif /* SUPPORT_UTF */
2360 return (0 << 8) | bit;
2361
2362 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2363
2364 #ifdef SUPPORT_UTF
2365 if (common->utf && c > 65535)
2366 {
2367 if (bit >= (1 << 10))
2368 bit >>= 10;
2369 else
2370 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2371 }
2372 #endif /* SUPPORT_UTF */
2373 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2374
2375 #endif /* COMPILE_PCRE[8|16|32] */
2376 }
2377
2378 static void check_partial(compiler_common *common, BOOL force)
2379 {
2380 /* Checks whether a partial matching is occurred. Does not modify registers. */
2381 DEFINE_COMPILER;
2382 struct sljit_jump *jump = NULL;
2383
2384 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2385
2386 if (common->mode == JIT_COMPILE)
2387 return;
2388
2389 if (!force)
2390 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2391 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2392 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2393
2394 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2395 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2396 else
2397 {
2398 if (common->partialmatchlabel != NULL)
2399 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2400 else
2401 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2402 }
2403
2404 if (jump != NULL)
2405 JUMPHERE(jump);
2406 }
2407
2408 static void check_str_end(compiler_common *common, jump_list **end_reached)
2409 {
2410 /* Does not affect registers. Usually used in a tight spot. */
2411 DEFINE_COMPILER;
2412 struct sljit_jump *jump;
2413
2414 if (common->mode == JIT_COMPILE)
2415 {
2416 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2417 return;
2418 }
2419
2420 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2421 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2422 {
2423 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2424 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2425 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2426 }
2427 else
2428 {
2429 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2430 if (common->partialmatchlabel != NULL)
2431 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2432 else
2433 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2434 }
2435 JUMPHERE(jump);
2436 }
2437
2438 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2439 {
2440 DEFINE_COMPILER;
2441 struct sljit_jump *jump;
2442
2443 if (common->mode == JIT_COMPILE)
2444 {
2445 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2446 return;
2447 }
2448
2449 /* Partial matching mode. */
2450 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2451 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2452 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2453 {
2454 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2455 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2456 }
2457 else
2458 {
2459 if (common->partialmatchlabel != NULL)
2460 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2461 else
2462 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2463 }
2464 JUMPHERE(jump);
2465 }
2466
2467 static void peek_char(compiler_common *common)
2468 {
2469 /* Reads the character into TMP1, keeps STR_PTR.
2470 Does not check STR_END. TMP2 Destroyed. */
2471 DEFINE_COMPILER;
2472 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2473 struct sljit_jump *jump;
2474 #endif
2475
2476 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2477 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2478 if (common->utf)
2479 {
2480 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2481 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2482 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2483 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2484 JUMPHERE(jump);
2485 }
2486 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2487
2488 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2489 if (common->utf)
2490 {
2491 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2492 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2493 /* TMP2 contains the high surrogate. */
2494 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2495 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2496 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2497 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2498 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2499 JUMPHERE(jump);
2500 }
2501 #endif
2502 }
2503
2504 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2505
2506 static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2507 {
2508 /* Tells whether the character codes below 128 are enough
2509 to determine a match. */
2510 const pcre_uint8 value = nclass ? 0xff : 0;
2511 const pcre_uint8* end = bitset + 32;
2512
2513 bitset += 16;
2514 do
2515 {
2516 if (*bitset++ != value)
2517 return FALSE;
2518 }
2519 while (bitset < end);
2520 return TRUE;
2521 }
2522
2523 static void read_char7_type(compiler_common *common, BOOL full_read)
2524 {
2525 /* Reads the precise character type of a character into TMP1, if the character
2526 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2527 full_read argument tells whether characters above max are accepted or not. */
2528 DEFINE_COMPILER;
2529 struct sljit_jump *jump;
2530
2531 SLJIT_ASSERT(common->utf);
2532
2533 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2534 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2535
2536 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2537
2538 if (full_read)
2539 {
2540 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2541 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2542 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2543 JUMPHERE(jump);
2544 }
2545 }
2546
2547 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2548
2549 static void read_char_max(compiler_common *common, pcre_uint32 max, BOOL full_read)
2550 {
2551 /* Reads the precise value of a character into TMP1, if the character is
2552 less than or equal to max. Otherwise it returns with a value greater than max.
2553 Does not check STR_END. The full_read argument tells whether characters above
2554 max are accepted or not. */
2555 DEFINE_COMPILER;
2556 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2557 struct sljit_jump *jump;
2558 #endif
2559
2560 SLJIT_UNUSED_ARG(full_read);
2561 SLJIT_UNUSED_ARG(max);
2562
2563 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2564 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2565
2566 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2567 if (common->utf)
2568 {
2569 if (max < 128 && !full_read)
2570 return;
2571
2572 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2573 if (max >= 0x800)
2574 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2575 else if (max < 128)
2576 {
2577 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2579 }
2580 else
2581 {
2582 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2583 if (!full_read)
2584 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2585 else
2586 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2587 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2588 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2589 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2590 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2591 if (full_read)
2592 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2593 }
2594 JUMPHERE(jump);
2595 }
2596 #endif
2597
2598 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2599 if (common->utf)
2600 {
2601 if (max >= 0x10000)
2602 {
2603 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2604 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2605 /* TMP2 contains the high surrogate. */
2606 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2607 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2608 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2609 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2610 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2611 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2612 JUMPHERE(jump);
2613 return;
2614 }
2615
2616 if (max < 0xd800 && !full_read)
2617 return;
2618
2619 /* Skip low surrogate if necessary. */
2620 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2621 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2622 if (full_read)
2623 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2624 if (max >= 0xd800)
2625 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2626 JUMPHERE(jump);
2627 }
2628 #endif
2629 }
2630
2631 static SLJIT_INLINE void read_char(compiler_common *common)
2632 {
2633 read_char_max(common, READ_CHAR_ANY, TRUE);
2634 }
2635
2636 static void read_char8_type(compiler_common *common, BOOL full_read)
2637 {
2638 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END.
2639 The full_read argument tells whether characters above max are accepted or not. */
2640 DEFINE_COMPILER;
2641 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2642 struct sljit_jump *jump;
2643 #endif
2644 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2645 struct sljit_jump *jump2;
2646 #endif
2647
2648 SLJIT_UNUSED_ARG(full_read);
2649
2650 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2651 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2652
2653 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2654 if (common->utf)
2655 {
2656 /* This can be an extra read in some situations, but hopefully
2657 it is needed in most cases. */
2658 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2659 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2660 if (!full_read)
2661 {
2662 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2663 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2664 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2665 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2666 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2667 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2668 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2669 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2670 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2671 JUMPHERE(jump2);
2672 }
2673 else
2674 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2675 JUMPHERE(jump);
2676 return;
2677 }
2678 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2679
2680 #if !defined COMPILE_PCRE8
2681 /* The ctypes array contains only 256 values. */
2682 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2683 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2684 #endif
2685 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2686 #if !defined COMPILE_PCRE8
2687 JUMPHERE(jump);
2688 #endif
2689
2690 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2691 if (common->utf && full_read)
2692 {
2693 /* Skip low surrogate if necessary. */
2694 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2695 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2696 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2697 JUMPHERE(jump);
2698 }
2699 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2700 }
2701
2702 static void skip_char_back(compiler_common *common)
2703 {
2704 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2705 DEFINE_COMPILER;
2706 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2707 #if defined COMPILE_PCRE8
2708 struct sljit_label *label;
2709
2710 if (common->utf)
2711 {
2712 label = LABEL();
2713 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2714 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2715 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2716 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2717 return;
2718 }
2719 #elif defined COMPILE_PCRE16
2720 if (common->utf)
2721 {
2722 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2723 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2724 /* Skip low surrogate if necessary. */
2725 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2726 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2727 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2728 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2729 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2730 return;
2731 }
2732 #endif /* COMPILE_PCRE[8|16] */
2733 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2734 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2735 }
2736
2737 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2738 {
2739 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2740 DEFINE_COMPILER;
2741 struct sljit_jump *jump;
2742
2743 if (nltype == NLTYPE_ANY)
2744 {
2745 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2746 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2747 }
2748 else if (nltype == NLTYPE_ANYCRLF)
2749 {
2750 if (jumpifmatch)
2751 {
2752 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2753 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2754 }
2755 else
2756 {
2757 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2758 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2759 JUMPHERE(jump);
2760 }
2761 }
2762 else
2763 {
2764 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2765 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2766 }
2767 }
2768
2769 #ifdef SUPPORT_UTF
2770
2771 #if defined COMPILE_PCRE8
2772 static void do_utfreadchar(compiler_common *common)
2773 {
2774 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2775 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2776 DEFINE_COMPILER;
2777 struct sljit_jump *jump;
2778
2779 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2780 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2781 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2782 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2783 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2784 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2785
2786 /* Searching for the first zero. */
2787 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2788 jump = JUMP(SLJIT_C_NOT_ZERO);
2789 /* Two byte sequence. */
2790 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2791 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2792 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2793
2794 JUMPHERE(jump);
2795 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2796 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2797 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2798 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2799 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2800
2801 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2802 jump = JUMP(SLJIT_C_NOT_ZERO);
2803 /* Three byte sequence. */
2804 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2805 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2806 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2807
2808 /* Four byte sequence. */
2809 JUMPHERE(jump);
2810 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2811 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2812 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2813 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2814 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2815 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2816 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2817 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2818 }
2819
2820 static void do_utfreadchar16(compiler_common *common)
2821 {
2822 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2823 of the character (>= 0xc0). Return value in TMP1. */
2824 DEFINE_COMPILER;
2825 struct sljit_jump *jump;
2826
2827 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2828 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2829 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2830 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2831 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2832 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2833
2834 /* Searching for the first zero. */
2835 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2836 jump = JUMP(SLJIT_C_NOT_ZERO);
2837 /* Two byte sequence. */
2838 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2839 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2840
2841 JUMPHERE(jump);
2842 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2843 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2844 /* This code runs only in 8 bit mode. No need to shift the value. */
2845 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2846 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2847 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2848 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2849 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2850 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2851 /* Three byte sequence. */
2852 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2853 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2854 }
2855
2856 static void do_utfreadtype8(compiler_common *common)
2857 {
2858 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2859 of the character (>= 0xc0). Return value in TMP1. */
2860 DEFINE_COMPILER;
2861 struct sljit_jump *jump;
2862 struct sljit_jump *compare;
2863
2864 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2865
2866 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2867 jump = JUMP(SLJIT_C_NOT_ZERO);
2868 /* Two byte sequence. */
2869 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2870 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2871 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2872 /* The upper 5 bits are known at this point. */
2873 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2874 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2875 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2876 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2877 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2878 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2879
2880 JUMPHERE(compare);
2881 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2882 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2883
2884 /* We only have types for characters less than 256. */
2885 JUMPHERE(jump);
2886 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2887 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2888 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2889 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2890 }
2891
2892 #endif /* COMPILE_PCRE8 */
2893
2894 #endif /* SUPPORT_UTF */
2895
2896 #ifdef SUPPORT_UCP
2897
2898 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2899 #define UCD_BLOCK_MASK 127
2900 #define UCD_BLOCK_SHIFT 7
2901
2902 static void do_getucd(compiler_common *common)
2903 {
2904 /* Search the UCD record for the character comes in TMP1.
2905 Returns chartype in TMP1 and UCD offset in TMP2. */
2906 DEFINE_COMPILER;
2907
2908 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2909
2910 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2911 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2912 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2913 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2914 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2915 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2916 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2917 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2918 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2919 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2920 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2921 }
2922 #endif
2923
2924 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2925 {
2926 DEFINE_COMPILER;
2927 struct sljit_label *mainloop;
2928 struct sljit_label *newlinelabel = NULL;
2929 struct sljit_jump *start;
2930 struct sljit_jump *end = NULL;
2931 struct sljit_jump *nl = NULL;
2932 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2933 struct sljit_jump *singlechar;
2934 #endif
2935 jump_list *newline = NULL;
2936 BOOL newlinecheck = FALSE;
2937 BOOL readuchar = FALSE;
2938
2939 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2940 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2941 newlinecheck = TRUE;
2942
2943 if (firstline)
2944 {
2945 /* Search for the end of the first line. */
2946 SLJIT_ASSERT(common->first_line_end != 0);
2947 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2948
2949 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2950 {
2951 mainloop = LABEL();
2952 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2953 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2954 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2955 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2956 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2957 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2958 JUMPHERE(end);
2959 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2960 }
2961 else
2962 {
2963 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2964 mainloop = LABEL();
2965 /* Continual stores does not cause data dependency. */
2966 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2967 read_char_max(common, common->nlmax, TRUE);
2968 check_newlinechar(common, common->nltype, &newline, TRUE);
2969 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2970 JUMPHERE(end);
2971 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2972 set_jumps(newline, LABEL());
2973 }
2974
2975 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2976 }
2977
2978 start = JUMP(SLJIT_JUMP);
2979
2980 if (newlinecheck)
2981 {
2982 newlinelabel = LABEL();
2983 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2984 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2985 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2986 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2987 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2988 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2989 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2990 #endif
2991 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2992 nl = JUMP(SLJIT_JUMP);
2993 }
2994
2995 mainloop = LABEL();
2996
2997 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2998 #ifdef SUPPORT_UTF
2999 if (common->utf) readuchar = TRUE;
3000 #endif
3001 if (newlinecheck) readuchar = TRUE;
3002
3003 if (readuchar)
3004 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3005
3006 if (newlinecheck)
3007 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3008
3009 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3010 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3011 #if defined COMPILE_PCRE8
3012 if (common->utf)
3013 {
3014 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3015 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3016 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3017 JUMPHERE(singlechar);
3018 }
3019 #elif defined COMPILE_PCRE16
3020 if (common->utf)
3021 {
3022 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3023 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3024 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3025 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3026 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3027 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3028 JUMPHERE(singlechar);
3029 }
3030 #endif /* COMPILE_PCRE[8|16] */
3031 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3032 JUMPHERE(start);
3033
3034 if (newlinecheck)
3035 {
3036 JUMPHERE(end);
3037 JUMPHERE(nl);
3038 }
3039
3040 return mainloop;
3041 }
3042
3043 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
3044 {
3045 /* Recursive function, which scans prefix literals. */
3046 int len, repeat, len_save, consumed = 0;
3047 pcre_uint32 caseless, chr, mask;
3048 pcre_uchar *alternative, *cc_save;
3049 BOOL last, any;
3050
3051 repeat = 1;
3052 while (TRUE)
3053 {
3054 last = TRUE;
3055 any = FALSE;
3056 caseless = 0;
3057 switch (*cc)
3058 {
3059 case OP_CHARI:
3060 caseless = 1;
3061 case OP_CHAR:
3062 last = FALSE;
3063 cc++;
3064 break;
3065
3066 case OP_SOD:
3067 case OP_SOM:
3068 case OP_SET_SOM:
3069 case OP_NOT_WORD_BOUNDARY:
3070 case OP_WORD_BOUNDARY:
3071 case OP_EODN:
3072 case OP_EOD:
3073 case OP_CIRC:
3074 case OP_CIRCM:
3075 case OP_DOLL:
3076 case OP_DOLLM:
3077 /* Zero width assertions. */
3078 cc++;
3079 continue;
3080
3081 case OP_PLUS:
3082 case OP_MINPLUS:
3083 case OP_POSPLUS:
3084 cc++;
3085 break;
3086
3087 case OP_EXACTI:
3088 caseless = 1;
3089 case OP_EXACT:
3090 repeat = GET2(cc, 1);
3091 last = FALSE;
3092 cc += 1 + IMM2_SIZE;
3093 break;
3094
3095 case OP_PLUSI:
3096 case OP_MINPLUSI:
3097 case OP_POSPLUSI:
3098 caseless = 1;
3099 cc++;
3100 break;
3101
3102 case OP_KET:
3103 cc += 1 + LINK_SIZE;
3104 continue;
3105
3106 case OP_ALT:
3107 cc += GET(cc, 1);
3108 continue;
3109
3110 case OP_ONCE:
3111 case OP_ONCE_NC:
3112 case OP_BRA:
3113 case OP_BRAPOS:
3114 case OP_CBRA:
3115 case OP_CBRAPOS:
3116 alternative = cc + GET(cc, 1);
3117 while (*alternative == OP_ALT)
3118 {
3119 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3120 if (max_chars == 0)
3121 return consumed;
3122 alternative += GET(alternative, 1);
3123 }
3124
3125 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3126 cc += IMM2_SIZE;
3127 cc += 1 + LINK_SIZE;
3128 continue;
3129
3130 case OP_CLASS:
3131 case OP_NCLASS:
3132 any = TRUE;
3133 cc += 1 + 32 / sizeof(pcre_uchar);
3134 break;
3135
3136 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3137 case OP_XCLASS:
3138 any = TRUE;
3139 cc += GET(cc, 1);
3140 break;
3141 #endif
3142
3143 case OP_NOT_DIGIT:
3144 case OP_DIGIT:
3145 case OP_NOT_WHITESPACE:
3146 case OP_WHITESPACE:
3147 case OP_NOT_WORDCHAR:
3148 case OP_WORDCHAR:
3149 case OP_ANY:
3150 case OP_ALLANY:
3151 any = TRUE;
3152 cc++;
3153 break;
3154
3155 #ifdef SUPPORT_UCP
3156 case OP_NOTPROP:
3157 case OP_PROP:
3158 any = TRUE;
3159 cc += 1 + 2;
3160 break;
3161 #endif
3162
3163 case OP_TYPEEXACT:
3164 repeat = GET2(cc, 1);
3165 cc += 1 + IMM2_SIZE;
3166 continue;
3167
3168 default:
3169 return consumed;
3170 }
3171
3172 if (any)
3173 {
3174 #ifdef SUPPORT_UTF
3175 if (common->utf) return consumed;
3176 #endif
3177 #if defined COMPILE_PCRE8
3178 mask = 0xff;
3179 #elif defined COMPILE_PCRE16
3180 mask = 0xffff;
3181 #elif defined COMPILE_PCRE32
3182 mask = 0xffffffff;
3183 #else
3184 SLJIT_ASSERT_STOP();
3185 #endif
3186
3187 do
3188 {
3189 chars[0] = mask;
3190 chars[1] = mask;
3191
3192 if (--max_chars == 0)
3193 return consumed;
3194 consumed++;
3195 chars += 2;
3196 }
3197 while (--repeat > 0);
3198
3199 repeat = 1;
3200 continue;
3201 }
3202
3203 len = 1;
3204 #ifdef SUPPORT_UTF
3205 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3206 #endif
3207
3208 if (caseless != 0 && char_has_othercase(common, cc))
3209 {
3210 caseless = char_get_othercase_bit(common, cc);
3211 if (caseless == 0)
3212 return consumed;
3213 #ifdef COMPILE_PCRE8
3214 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3215 #else
3216 if ((caseless & 0x100) != 0)
3217 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3218 else
3219 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3220 #endif
3221 }
3222 else
3223 caseless = 0;
3224
3225 len_save = len;
3226 cc_save = cc;
3227 while (TRUE)
3228 {
3229 do
3230 {
3231 chr = *cc;
3232 #ifdef COMPILE_PCRE32
3233 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3234 return consumed;
3235 #endif
3236 mask = 0;
3237 if ((pcre_uint32)len == (caseless & 0xff))
3238 {
3239 mask = caseless >> 8;
3240 chr |= mask;
3241 }
3242
3243 if (chars[0] == NOTACHAR)
3244 {
3245 chars[0] = chr;
3246 chars[1] = mask;
3247 }
3248 else
3249 {
3250 mask |= chars[0] ^ chr;
3251 chr |= mask;
3252 chars[0] = chr;
3253 chars[1] |= mask;
3254 }
3255
3256 len--;
3257 if (--max_chars == 0)
3258 return consumed;
3259 consumed++;
3260 chars += 2;
3261 cc++;
3262 }
3263 while (len > 0);
3264
3265 if (--repeat == 0)
3266 break;
3267
3268 len = len_save;
3269 cc = cc_save;
3270 }
3271
3272 repeat = 1;
3273 if (last)
3274 return consumed;
3275 }
3276 }
3277
3278 #define MAX_N_CHARS 16
3279
3280 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3281 {
3282 DEFINE_COMPILER;
3283 struct sljit_label *start;
3284 struct sljit_jump *quit;
3285 pcre_uint32 chars[MAX_N_CHARS * 2];
3286 pcre_uint8 ones[MAX_N_CHARS];
3287 pcre_uint32 mask;
3288 int i, max;
3289 int offsets[3];
3290
3291 for (i = 0; i < MAX_N_CHARS; i++)
3292 {
3293 chars[i << 1] = NOTACHAR;
3294 chars[(i << 1) + 1] = 0;
3295 }
3296
3297 max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3298
3299 if (max <= 1)
3300 return FALSE;
3301
3302 for (i = 0; i < max; i++)
3303 {
3304 mask = chars[(i << 1) + 1];
3305 ones[i] = ones_in_half_byte[mask & 0xf];
3306 mask >>= 4;
3307 while (mask != 0)
3308 {
3309 ones[i] += ones_in_half_byte[mask & 0xf];
3310 mask >>= 4;
3311 }
3312 }
3313
3314 offsets[0] = -1;
3315 /* Scan forward. */
3316 for (i = 0; i < max; i++)
3317 if (ones[i] <= 2) {
3318 offsets[0] = i;
3319 break;
3320 }
3321
3322 if (offsets[0] == -1)
3323 return FALSE;
3324
3325 /* Scan backward. */
3326 offsets[1] = -1;
3327 for (i = max - 1; i > offsets[0]; i--)
3328 if (ones[i] <= 2) {
3329 offsets[1] = i;
3330 break;
3331 }
3332
3333 offsets[2] = -1;
3334 if (offsets[1] >= 0)
3335 {
3336 /* Scan from middle. */
3337 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3338 if (ones[i] <= 2)
3339 {
3340 offsets[2] = i;
3341 break;
3342 }
3343
3344 if (offsets[2] == -1)
3345 {
3346 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3347 if (ones[i] <= 2)
3348 {
3349 offsets[2] = i;
3350 break;
3351 }
3352 }
3353 }
3354
3355 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3356 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3357
3358 chars[0] = chars[offsets[0] << 1];
3359 chars[1] = chars[(offsets[0] << 1) + 1];
3360 if (offsets[2] >= 0)
3361 {
3362 chars[2] = chars[offsets[2] << 1];
3363 chars[3] = chars[(offsets[2] << 1) + 1];
3364 }
3365 if (offsets[1] >= 0)
3366 {
3367 chars[4] = chars[offsets[1] << 1];
3368 chars[5] = chars[(offsets[1] << 1) + 1];
3369 }
3370
3371 max -= 1;
3372 if (firstline)
3373 {
3374 SLJIT_ASSERT(common->first_line_end != 0);
3375 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3376 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3377 }
3378 else
3379 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3380
3381 start = LABEL();
3382 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3383
3384 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3385 if (offsets[1] >= 0)
3386 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3387 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3388
3389 if (chars[1] != 0)
3390 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3391 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3392 if (offsets[2] >= 0)
3393 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3394
3395 if (offsets[1] >= 0)
3396 {
3397 if (chars[5] != 0)
3398 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3399 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3400 }
3401
3402 if (offsets[2] >= 0)
3403 {
3404 if (chars[3] != 0)
3405 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3406 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3407 }
3408 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3409
3410 JUMPHERE(quit);
3411
3412 if (firstline)
3413 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3414 else
3415 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3416 return TRUE;
3417 }
3418
3419 #undef MAX_N_CHARS
3420
3421 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3422 {
3423 DEFINE_COMPILER;
3424 struct sljit_label *start;
3425 struct sljit_jump *quit;
3426 struct sljit_jump *found;
3427 pcre_uchar oc, bit;
3428
3429 if (firstline)
3430 {
3431 SLJIT_ASSERT(common->first_line_end != 0);
3432 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3433 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3434 }
3435
3436 start = LABEL();
3437 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3438 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3439
3440 oc = first_char;
3441 if (caseless)
3442 {
3443 oc = TABLE_GET(first_char, common->fcc, first_char);
3444 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3445 if (first_char > 127 && common->utf)
3446 oc = UCD_OTHERCASE(first_char);
3447 #endif
3448 }
3449 if (first_char == oc)
3450 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3451 else
3452 {
3453 bit = first_char ^ oc;
3454 if (is_powerof2(bit))
3455 {
3456 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3457 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3458 }
3459 else
3460 {
3461 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3462 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3463 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3464 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3465 found = JUMP(SLJIT_C_NOT_ZERO);
3466 }
3467 }
3468
3469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3470 JUMPTO(SLJIT_JUMP, start);
3471 JUMPHERE(found);
3472 JUMPHERE(quit);
3473
3474 if (firstline)
3475 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3476 }
3477
3478 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3479 {
3480 DEFINE_COMPILER;
3481 struct sljit_label *loop;
3482 struct sljit_jump *lastchar;
3483 struct sljit_jump *firstchar;
3484 struct sljit_jump *quit;
3485 struct sljit_jump *foundcr = NULL;
3486 struct sljit_jump *notfoundnl;
3487 jump_list *newline = NULL;
3488
3489 if (firstline)
3490 {
3491 SLJIT_ASSERT(common->first_line_end != 0);
3492 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3493 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3494 }
3495
3496 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3497 {
3498 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3499 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3500 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3502 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3503
3504 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3505 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3506 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3507 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3508 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3509 #endif
3510 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3511
3512 loop = LABEL();
3513 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3514 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3515 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3516 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3517 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3518 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3519
3520 JUMPHERE(quit);
3521 JUMPHERE(firstchar);
3522 JUMPHERE(lastchar);
3523
3524 if (firstline)
3525 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3526 return;
3527 }
3528
3529 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3530 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3531 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3532 skip_char_back(common);
3533
3534 loop = LABEL();
3535 read_char_max(common, common->nlmax, TRUE);
3536 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3537 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3538 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3539 check_newlinechar(common, common->nltype, &newline, FALSE);
3540 set_jumps(newline, loop);
3541
3542 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3543 {
3544 quit = JUMP(SLJIT_JUMP);
3545 JUMPHERE(foundcr);
3546 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3547 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3548 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3549 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3550 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3551 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3552 #endif
3553 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3554 JUMPHERE(notfoundnl);
3555 JUMPHERE(quit);
3556 }
3557 JUMPHERE(lastchar);
3558 JUMPHERE(firstchar);
3559
3560 if (firstline)
3561 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3562 }
3563
3564 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3565
3566 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3567 {
3568 DEFINE_COMPILER;
3569 struct sljit_label *start;
3570 struct sljit_jump *quit;
3571 struct sljit_jump *found = NULL;
3572 jump_list *matches = NULL;
3573 #ifndef COMPILE_PCRE8
3574 struct sljit_jump *jump;
3575 #endif
3576
3577 if (firstline)
3578 {
3579 SLJIT_ASSERT(common->first_line_end != 0);
3580 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3581 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3582 }
3583
3584 start = LABEL();
3585 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3586 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3587 #ifdef SUPPORT_UTF
3588 if (common->utf)
3589 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3590 #endif
3591
3592 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3593 {
3594 #ifndef COMPILE_PCRE8
3595 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3596 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3597 JUMPHERE(jump);
3598 #endif
3599 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3600 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3601 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3602 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3603 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3604 found = JUMP(SLJIT_C_NOT_ZERO);
3605 }
3606
3607 #ifdef SUPPORT_UTF
3608 if (common->utf)
3609 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3610 #endif
3611 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3612 #ifdef SUPPORT_UTF
3613 #if defined COMPILE_PCRE8
3614 if (common->utf)
3615 {
3616 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3617 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3618 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3619 }
3620 #elif defined COMPILE_PCRE16
3621 if (common->utf)
3622 {
3623 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3624 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3625 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3626 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3627 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3628 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3629 }
3630 #endif /* COMPILE_PCRE[8|16] */
3631 #endif /* SUPPORT_UTF */
3632 JUMPTO(SLJIT_JUMP, start);
3633 if (found != NULL)
3634 JUMPHERE(found);
3635 if (matches != NULL)
3636 set_jumps(matches, LABEL());
3637 JUMPHERE(quit);
3638
3639 if (firstline)
3640 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3641 }
3642
3643 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3644 {
3645 DEFINE_COMPILER;
3646 struct sljit_label *loop;
3647 struct sljit_jump *toolong;
3648 struct sljit_jump *alreadyfound;
3649 struct sljit_jump *found;
3650 struct sljit_jump *foundoc = NULL;
3651 struct sljit_jump *notfound;
3652 pcre_uint32 oc, bit;
3653
3654 SLJIT_ASSERT(common->req_char_ptr != 0);
3655 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3656 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3657 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3658 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3659
3660 if (has_firstchar)
3661 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3662 else
3663 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3664
3665 loop = LABEL();
3666 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3667
3668 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3669 oc = req_char;
3670 if (caseless)
3671 {
3672 oc = TABLE_GET(req_char, common->fcc, req_char);
3673 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3674 if (req_char > 127 && common->utf)
3675 oc = UCD_OTHERCASE(req_char);
3676 #endif
3677 }
3678 if (req_char == oc)
3679 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3680 else
3681 {
3682 bit = req_char ^ oc;
3683 if (is_powerof2(bit))
3684 {
3685 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3686 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3687 }
3688 else
3689 {
3690 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3691 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3692 }
3693 }
3694 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3695 JUMPTO(SLJIT_JUMP, loop);
3696
3697 JUMPHERE(found);
3698 if (foundoc)
3699 JUMPHERE(foundoc);
3700 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3701 JUMPHERE(alreadyfound);
3702 JUMPHERE(toolong);
3703 return notfound;
3704 }
3705
3706 static void do_revertframes(compiler_common *common)
3707 {
3708 DEFINE_COMPILER;
3709 struct sljit_jump *jump;
3710 struct sljit_label *mainloop;
3711
3712 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3713 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3714 GET_LOCAL_BASE(TMP3, 0, 0);
3715
3716 /* Drop frames until we reach STACK_TOP. */
3717 mainloop = LABEL();
3718 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3719 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3720 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3721
3722 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3723 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3724 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3725 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3726 JUMPTO(SLJIT_JUMP, mainloop);
3727
3728 JUMPHERE(jump);
3729 jump = JUMP(SLJIT_C_SIG_LESS);
3730 /* End of dropping frames. */
3731 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3732
3733 JUMPHERE(jump);
3734 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3735 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3736 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3737 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3738 JUMPTO(SLJIT_JUMP, mainloop);
3739 }
3740
3741 static void check_wordboundary(compiler_common *common)
3742 {
3743 DEFINE_COMPILER;
3744 struct sljit_jump *skipread;
3745 jump_list *skipread_list = NULL;
3746 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3747 struct sljit_jump *jump;
3748 #endif
3749
3750 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3751
3752 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3753 /* Get type of the previous char, and put it to LOCALS1. */
3754 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3755 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3756 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3757 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3758 skip_char_back(common);
3759 check_start_used_ptr(common);
3760 read_char(common);
3761
3762 /* Testing char type. */
3763 #ifdef SUPPORT_UCP
3764 if (common->use_ucp)
3765 {
3766 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3767 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3768 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3769 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3770 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3771 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3772 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3773 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3774 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3775 JUMPHERE(jump);
3776 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3777 }
3778 else
3779 #endif
3780 {
3781 #ifndef COMPILE_PCRE8
3782 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3783 #elif defined SUPPORT_UTF
3784 /* Here LOCALS1 has already been zeroed. */
3785 jump = NULL;
3786 if (common->utf)
3787 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3788 #endif /* COMPILE_PCRE8 */
3789 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3790 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3791 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3792 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3793 #ifndef COMPILE_PCRE8
3794 JUMPHERE(jump);
3795 #elif defined SUPPORT_UTF
3796 if (jump != NULL)
3797 JUMPHERE(jump);
3798 #endif /* COMPILE_PCRE8 */
3799 }
3800 JUMPHERE(skipread);
3801
3802 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3803 check_str_end(common, &skipread_list);
3804 peek_char(common);
3805
3806 /* Testing char type. This is a code duplication. */
3807 #ifdef SUPPORT_UCP
3808 if (common->use_ucp)
3809 {
3810 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3811 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3812 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3813 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3814 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3815 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3816 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3817 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3818 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3819 JUMPHERE(jump);
3820 }
3821 else
3822 #endif
3823 {
3824 #ifndef COMPILE_PCRE8
3825 /* TMP2 may be destroyed by peek_char. */
3826 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3827 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3828 #elif defined SUPPORT_UTF
3829 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3830 jump = NULL;
3831 if (common->utf)
3832 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3833 #endif
3834 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3835 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3836 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3837 #ifndef COMPILE_PCRE8
3838 JUMPHERE(jump);
3839 #elif defined SUPPORT_UTF
3840 if (jump != NULL)
3841 JUMPHERE(jump);
3842 #endif /* COMPILE_PCRE8 */
3843 }
3844 set_jumps(skipread_list, LABEL());
3845
3846 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3847 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3848 }
3849
3850 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3851 {
3852 DEFINE_COMPILER;
3853 int ranges[MAX_RANGE_SIZE];
3854 pcre_uint8 bit, cbit, all;
3855 int i, byte, length = 0;
3856
3857 bit = bits[0] & 0x1;
3858 /* All bits will be zero or one (since bit is zero or one). */
3859 all = -bit;
3860
3861 for (i = 0; i < 256; )
3862 {
3863 byte = i >> 3;
3864 if ((i & 0x7) == 0 && bits[byte] == all)
3865 i += 8;
3866 else
3867 {
3868 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3869 if (cbit != bit)
3870 {
3871 if (length >= MAX_RANGE_SIZE)
3872 return FALSE;
3873 ranges[length] = i;
3874 length++;
3875 bit = cbit;
3876 all = -cbit;
3877 }
3878 i++;
3879 }
3880 }
3881
3882 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3883 {
3884 if (length >= MAX_RANGE_SIZE)
3885 return FALSE;
3886 ranges[length] = 256;
3887 length++;
3888 }
3889
3890 if (length < 0 || length > 4)
3891 return FALSE;
3892
3893 bit = bits[0] & 0x1;
3894 if (invert) bit ^= 0x1;
3895
3896 /* No character is accepted. */
3897 if (length == 0 && bit == 0)
3898 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3899
3900 switch(length)
3901 {
3902 case 0:
3903 /* When bit != 0, all characters are accepted. */
3904 return TRUE;
3905
3906 case 1:
3907 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3908 return TRUE;
3909
3910 case 2:
3911 if (ranges[0] + 1 != ranges[1])
3912 {
3913 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3914 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3915 }
3916 else
3917 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3918 return TRUE;
3919
3920 case 3:
3921 if (bit != 0)
3922 {
3923 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3924 if (ranges[0] + 1 != ranges[1])
3925 {
3926 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3927 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3928 }
3929 else
3930 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3931 return TRUE;
3932 }
3933
3934 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
3935 if (ranges[1] + 1 != ranges[2])
3936 {
3937 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
3938 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3939 }
3940 else
3941 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
3942 return TRUE;
3943
3944 case 4:
3945 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
3946 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
3947 && is_powerof2(ranges[2] - ranges[0]))
3948 {
3949 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
3950 if (ranges[2] + 1 != ranges[3])
3951 {
3952 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3953 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3954 }
3955 else
3956 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3957 return TRUE;
3958 }
3959
3960 if (bit != 0)
3961 {
3962 i = 0;
3963 if (ranges[0] + 1 != ranges[1])
3964 {
3965 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3966 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3967 i = ranges[0];
3968 }
3969 else
3970 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3971
3972 if (ranges[2] + 1 != ranges[3])
3973 {
3974 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
3975 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3976 }
3977 else
3978 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
3979 return TRUE;
3980 }
3981
3982 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3983 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
3984 if (ranges[1] + 1 != ranges[2])
3985 {
3986 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
3987 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3988 }
3989 else
3990 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3991 return TRUE;
3992
3993 default:
3994 SLJIT_ASSERT_STOP();
3995 return FALSE;
3996 }
3997 }
3998
3999 static void check_anynewline(compiler_common *common)
4000 {
4001 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4002 DEFINE_COMPILER;
4003
4004 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4005
4006 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4007 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4008 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4009 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4010 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4011 #ifdef COMPILE_PCRE8
4012 if (common->utf)
4013 {
4014 #endif
4015 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4016 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4017 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4018 #ifdef COMPILE_PCRE8
4019 }
4020 #endif
4021 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4022 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4023 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4024 }
4025
4026 static void check_hspace(compiler_common *common)
4027 {
4028 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4029 DEFINE_COMPILER;
4030
4031 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4032
4033 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4034 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4035 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4036 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4037 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4038 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4039 #ifdef COMPILE_PCRE8
4040 if (common->utf)
4041 {
4042 #endif
4043 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4044 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4045 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4046 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4047 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4048 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4049 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4050 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4051 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4052 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4053 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4054 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4055 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4056 #ifdef COMPILE_PCRE8
4057 }
4058 #endif
4059 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4060 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4061
4062 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4063 }
4064
4065 static void check_vspace(compiler_common *common)
4066 {
4067 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4068 DEFINE_COMPILER;
4069
4070 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4071
4072 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4073 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4074 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4075 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4076 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4077 #ifdef COMPILE_PCRE8
4078 if (common->utf)
4079 {
4080 #endif
4081 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4082 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4083 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4084 #ifdef COMPILE_PCRE8
4085 }
4086 #endif
4087 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4088 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4089
4090 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4091 }
4092
4093 #define CHAR1 STR_END
4094 #define CHAR2 STACK_TOP
4095
4096 static void do_casefulcmp(compiler_common *common)
4097 {
4098 DEFINE_COMPILER;
4099 struct sljit_jump *jump;
4100 struct sljit_label *label;
4101
4102 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4103 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4104 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4105 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4106 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4107 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4108
4109 label = LABEL();
4110 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4111 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4112 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4113 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4114 JUMPTO(SLJIT_C_NOT_ZERO, label);
4115
4116 JUMPHERE(jump);
4117 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4118 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4119 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4120 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4121 }
4122
4123 #define LCC_TABLE STACK_LIMIT
4124
4125 static void do_caselesscmp(compiler_common *common)
4126 {
4127 DEFINE_COMPILER;
4128 struct sljit_jump *jump;
4129 struct sljit_label *label;
4130
4131 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4132 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4133
4134 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4136 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4137 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4138 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4139 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4140
4141 label = LABEL();
4142 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4143 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4144 #ifndef COMPILE_PCRE8
4145 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4146 #endif
4147 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4148 #ifndef COMPILE_PCRE8
4149 JUMPHERE(jump);
4150 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4151 #endif
4152 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4153 #ifndef COMPILE_PCRE8
4154 JUMPHERE(jump);
4155 #endif
4156 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4157 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4158 JUMPTO(SLJIT_C_NOT_ZERO, label);
4159
4160 JUMPHERE(jump);
4161 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4162 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4163 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4164 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4165 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4166 }
4167
4168 #undef LCC_TABLE
4169 #undef CHAR1
4170 #undef CHAR2
4171
4172 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4173
4174 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4175 {
4176 /* This function would be ineffective to do in JIT level. */
4177 pcre_uint32 c1, c2;
4178 const pcre_uchar *src2 = args->uchar_ptr;
4179 const pcre_uchar *end2 = args->end;
4180 const ucd_record *ur;
4181 const pcre_uint32 *pp;
4182
4183 while (src1 < end1)
4184 {
4185 if (src2 >= end2)
4186 return (pcre_uchar*)1;
4187 GETCHARINC(c1, src1);
4188 GETCHARINC(c2, src2);
4189 ur = GET_UCD(c2);
4190 if (c1 != c2 && c1 != c2 + ur->other_case)
4191 {
4192 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4193 for (;;)
4194 {
4195 if (c1 < *pp) return NULL;
4196 if (c1 == *pp++) break;
4197 }
4198 }
4199 }
4200 return src2;
4201 }
4202
4203 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4204
4205 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4206 compare_context* context, jump_list **backtracks)
4207 {
4208 DEFINE_COMPILER;
4209 unsigned int othercasebit = 0;
4210 pcre_uchar *othercasechar = NULL;
4211 #ifdef SUPPORT_UTF
4212 int utflength;
4213 #endif
4214
4215 if (caseless && char_has_othercase(common, cc))
4216 {
4217 othercasebit = char_get_othercase_bit(common, cc);
4218 SLJIT_ASSERT(othercasebit);
4219 /* Extracting bit difference info. */
4220 #if defined COMPILE_PCRE8
4221 othercasechar = cc + (othercasebit >> 8);
4222 othercasebit &= 0xff;
4223 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4224 /* Note that this code only handles characters in the BMP. If there
4225 ever are characters outside the BMP whose othercase differs in only one
4226 bit from itself (there currently are none), this code will need to be
4227 revised for COMPILE_PCRE32. */
4228 othercasechar = cc + (othercasebit >> 9);
4229 if ((othercasebit & 0x100) != 0)
4230 othercasebit = (othercasebit & 0xff) << 8;
4231 else
4232 othercasebit &= 0xff;
4233 #endif /* COMPILE_PCRE[8|16|32] */
4234 }
4235
4236 if (context->sourcereg == -1)
4237 {
4238 #if defined COMPILE_PCRE8
4239 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4240 if (context->length >= 4)
4241 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4242 else if (context->length >= 2)
4243 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4244 else
4245 #endif
4246 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4247 #elif defined COMPILE_PCRE16
4248 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4249 if (context->length >= 4)
4250 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4251 else
4252 #endif
4253 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4254 #elif defined COMPILE_PCRE32
4255 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4256 #endif /* COMPILE_PCRE[8|16|32] */
4257 context->sourcereg = TMP2;
4258 }
4259
4260 #ifdef SUPPORT_UTF
4261 utflength = 1;
4262 if (common->utf && HAS_EXTRALEN(*cc))
4263 utflength += GET_EXTRALEN(*cc);
4264
4265 do
4266 {
4267 #endif
4268
4269 context->length -= IN_UCHARS(1);
4270 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4271
4272 /* Unaligned read is supported. */
4273 if (othercasebit != 0 && othercasechar == cc)
4274 {
4275 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4276 context->oc.asuchars[context->ucharptr] = othercasebit;
4277 }
4278 else
4279 {
4280 context->c.asuchars[context->ucharptr] = *cc;
4281 context->oc.asuchars[context->ucharptr] = 0;
4282 }
4283 context->ucharptr++;
4284
4285 #if defined COMPILE_PCRE8
4286 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4287 #else
4288 if (context->ucharptr >= 2 || context->length == 0)
4289 #endif
4290 {
4291 if (context->length >= 4)
4292 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4293 else if (context->length >= 2)
4294 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4295 #if defined COMPILE_PCRE8
4296 else if (context->length >= 1)
4297 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4298 #endif /* COMPILE_PCRE8 */
4299 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4300
4301 switch(context->ucharptr)
4302 {
4303 case 4 / sizeof(pcre_uchar):
4304 if (context->oc.asint != 0)
4305 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4306 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4307 break;
4308
4309 case 2 / sizeof(pcre_uchar):
4310 if (context->oc.asushort != 0)
4311 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4312 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4313 break;
4314
4315 #ifdef COMPILE_PCRE8
4316 case 1:
4317 if (context->oc.asbyte != 0)
4318 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4319 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4320 break;
4321 #endif
4322
4323 default:
4324 SLJIT_ASSERT_STOP();
4325 break;
4326 }
4327 context->ucharptr = 0;
4328 }
4329
4330 #else
4331
4332 /* Unaligned read is unsupported or in 32 bit mode. */
4333 if (context->length >= 1)
4334 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4335
4336 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4337
4338 if (othercasebit != 0 && othercasechar == cc)
4339 {
4340 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4341 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4342 }
4343 else
4344 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4345
4346 #endif
4347
4348 cc++;
4349 #ifdef SUPPORT_UTF
4350 utflength--;
4351 }
4352 while (utflength > 0);
4353 #endif
4354
4355 return cc;
4356 }
4357
4358 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4359
4360 #define SET_TYPE_OFFSET(value) \
4361 if ((value) != typeoffset) \
4362 { \
4363 if ((value) > typeoffset) \
4364 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4365 else \
4366 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4367 } \
4368 typeoffset = (value);
4369
4370 #define SET_CHAR_OFFSET(value) \
4371 if ((value) != charoffset) \
4372 { \
4373 if ((value) > charoffset) \
4374 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4375 else \
4376 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4377 } \
4378 charoffset = (value);
4379
4380 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4381 {
4382 DEFINE_COMPILER;
4383 jump_list *found = NULL;
4384 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4385 sljit_uw c, charoffset, max = 0;
4386 struct sljit_jump *jump = NULL;
4387 pcre_uchar *ccbegin;
4388 int compares, invertcmp, numberofcmps;
4389 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4390 BOOL utf = common->utf;
4391 #endif
4392
4393 #ifdef SUPPORT_UCP
4394 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4395 BOOL charsaved = FALSE;
4396 int typereg = TMP1, scriptreg = TMP1;
4397 const pcre_uint32 *other_cases;
4398 pcre_int32 typeoffset;
4399 #endif
4400
4401 /* Scanning the necessary info. */
4402 cc++;
4403 ccbegin = cc;
4404 compares = 0;
4405 if (cc[-1] & XCL_MAP) cc += 32 / sizeof(pcre_uchar);
4406
4407 while (*cc != XCL_END)
4408 {
4409 compares++;
4410 if (*cc == XCL_SINGLE)
4411 {
4412 cc ++;
4413 GETCHARINCTEST(c, cc);
4414 if (c > max) max = c;
4415 #ifdef SUPPORT_UCP
4416 needschar = TRUE;
4417 #endif
4418 }
4419 else if (*cc == XCL_RANGE)
4420 {
4421 cc += 2;
4422 #ifdef SUPPORT_UTF
4423 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4424 #endif
4425 GETCHARINCTEST(c, cc);
4426 if (c > max) max = c;
4427 #ifdef SUPPORT_UCP
4428 needschar = TRUE;
4429 #endif
4430 }
4431 #ifdef SUPPORT_UCP
4432 else
4433 {
4434 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4435 cc++;
4436 if (*cc == PT_CLIST)
4437 {
4438 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4439 while (*other_cases != NOTACHAR)
4440 {
4441 if (*other_cases > max) max = *other_cases;
4442 other_cases++;
4443 }
4444 }
4445 else
4446 max = READ_CHAR_ANY;
4447
4448 switch(*cc)
4449 {
4450 case PT_ANY:
4451 break;
4452
4453 case PT_LAMP:
4454 case PT_GC:
4455 case PT_PC:
4456 case PT_ALNUM:
4457 needstype = TRUE;
4458 break;
4459
4460 case PT_SC:
4461 needsscript = TRUE;
4462 break;
4463
4464 case PT_SPACE:
4465 case PT_PXSPACE:
4466 case PT_WORD:
4467 case PT_PXGRAPH:
4468 case PT_PXPRINT:
4469 case PT_PXPUNCT:
4470 needstype = TRUE;
4471 needschar = TRUE;
4472 break;
4473
4474 case PT_CLIST:
4475 case PT_UCNC:
4476 needschar = TRUE;
4477 break;
4478
4479 default:
4480 SLJIT_ASSERT_STOP();
4481 break;
4482 }
4483 cc += 2;
4484 }
4485 #endif
4486 }
4487
4488 /* We are not necessary in utf mode even in 8 bit mode. */
4489 cc = ccbegin;
4490 detect_partial_match(common, backtracks);
4491 read_char_max(common, max, (cc[0] & XCL_NOT) != 0);
4492
4493 if ((cc[-1] & XCL_HASPROP) == 0)
4494 {
4495 if ((cc[-1] & XCL_MAP) != 0)
4496 {
4497 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4498 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4499 {
4500 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4501 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4502 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4503 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4504 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4505 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4506 }
4507
4508 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4509 JUMPHERE(jump);
4510
4511 cc += 32 / sizeof(pcre_uchar);
4512 }
4513 else
4514 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4515 }
4516 else if ((cc[-1] & XCL_MAP) != 0)
4517 {
4518 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4519 #ifdef SUPPORT_UCP
4520 charsaved = TRUE;
4521 #endif
4522 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4523 {
4524 #ifdef COMPILE_PCRE8
4525 SLJIT_ASSERT(common->utf);
4526 #endif
4527 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4528
4529 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4530 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4531 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4532 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4533 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4534 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4535
4536 JUMPHERE(jump);
4537 }
4538
4539 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4540 cc += 32 / sizeof(pcre_uchar);
4541 }
4542
4543 #ifdef SUPPORT_UCP
4544 /* Simple register allocation. TMP1 is preferred if possible. */
4545 if (needstype || needsscript)
4546 {
4547 if (needschar && !charsaved)
4548 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4549 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4550 if (needschar)
4551 {
4552 if (needstype)
4553 {
4554 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4555 typereg = RETURN_ADDR;
4556 }
4557
4558 if (needsscript)
4559 scriptreg = TMP3;
4560 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4561 }
4562 else if (needstype && needsscript)
4563 scriptreg = TMP3;
4564 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4565
4566 if (needsscript)
4567 {
4568 if (scriptreg == TMP1)
4569 {
4570 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4571 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4572 }
4573 else
4574 {
4575 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4576 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4577 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4578 }
4579 }
4580 }
4581 #endif
4582
4583 /* Generating code. */
4584 charoffset = 0;
4585 numberofcmps = 0;
4586 #ifdef SUPPORT_UCP
4587 typeoffset = 0;
4588 #endif
4589
4590 while (*cc != XCL_END)
4591 {
4592 compares--;
4593 invertcmp = (compares == 0 && list != backtracks);
4594 jump = NULL;
4595
4596 if (*cc == XCL_SINGLE)
4597 {
4598 cc ++;
4599 GETCHARINCTEST(c, cc);
4600
4601 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4602 {
4603 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4604 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4605 numberofcmps++;
4606 }
4607 else if (numberofcmps > 0)
4608 {
4609 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4610 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4611 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4612 numberofcmps = 0;
4613 }
4614 else
4615 {
4616 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4617 numberofcmps = 0;
4618 }
4619 }
4620 else if (*cc == XCL_RANGE)
4621 {
4622 cc ++;
4623 GETCHARINCTEST(c, cc);
4624 SET_CHAR_OFFSET(c);
4625 GETCHARINCTEST(c, cc);
4626
4627 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4628 {
4629 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4630 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4631 numberofcmps++;
4632 }
4633 else if (numberofcmps > 0)
4634 {
4635 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4636 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4637 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4638 numberofcmps = 0;
4639 }
4640 else
4641 {
4642 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4643 numberofcmps = 0;
4644 }
4645 }
4646 #ifdef SUPPORT_UCP
4647 else
4648 {
4649 if (*cc == XCL_NOTPROP)
4650 invertcmp ^= 0x1;
4651 cc++;
4652 switch(*cc)
4653 {
4654 case PT_ANY:
4655 if (list != backtracks)
4656 {
4657 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4658 continue;
4659 }
4660 else if (cc[-1] == XCL_NOTPROP)
4661 continue;
4662 jump = JUMP(SLJIT_JUMP);
4663 break;
4664
4665 case PT_LAMP:
4666 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4667 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4668 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4669 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4670 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4671 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4672 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4673 break;
4674
4675 case PT_GC:
4676 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4677 SET_TYPE_OFFSET(c);
4678 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4679 break;
4680
4681 case PT_PC:
4682 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4683 break;
4684
4685 case PT_SC:
4686 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4687 break;
4688
4689 case PT_SPACE:
4690 case PT_PXSPACE:
4691 SET_CHAR_OFFSET(9);
4692 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4693 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4694
4695 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4696 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4697
4698 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4699 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4700
4701 SET_TYPE_OFFSET(ucp_Zl);
4702 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4703 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4704 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4705 break;
4706
4707 case PT_WORD:
4708 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
4709 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4710 /* Fall through. */
4711
4712 case PT_ALNUM:
4713 SET_TYPE_OFFSET(ucp_Ll);
4714 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4715 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4716 SET_TYPE_OFFSET(ucp_Nd);
4717 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4718 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4719 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4720 break;
4721
4722 case PT_CLIST:
4723 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4724
4725 /* At least three characters are required.
4726 Otherwise this case would be handled by the normal code path. */
4727 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4728 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4729
4730 /* Optimizing character pairs, if their difference is power of 2. */
4731 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4732 {
4733 if (charoffset == 0)
4734 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4735 else
4736 {
4737 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4738 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4739 }
4740 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4741 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4742 other_cases += 2;
4743 }
4744 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4745 {
4746 if (charoffset == 0)
4747 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4748 else
4749 {
4750 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4751 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4752 }
4753 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4754 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4755
4756 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
4757 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4758
4759 other_cases += 3;
4760 }
4761 else
4762 {
4763 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4764 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4765 }
4766
4767 while (*other_cases != NOTACHAR)
4768 {
4769 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4770 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4771 }
4772 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4773 break;
4774
4775 case PT_UCNC:
4776 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
4777 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4778 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
4779 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4780 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
4781 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4782
4783 SET_CHAR_OFFSET(0xa0);
4784 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
4785 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4786 SET_CHAR_OFFSET(0);
4787 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4788 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4789 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4790 break;
4791
4792 case PT_PXGRAPH:
4793 /* C and Z groups are the farthest two groups. */
4794 SET_TYPE_OFFSET(ucp_Ll);
4795 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4796 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4797
4798 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4799
4800 /* In case of ucp_Cf, we overwrite the result. */
4801 SET_CHAR_OFFSET(0x2066);
4802 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4803 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4804
4805 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4806 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4807
4808 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4809 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4810
4811 JUMPHERE(jump);
4812 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4813 break;
4814
4815 case PT_PXPRINT:
4816 /* C and Z groups are the farthest two groups. */
4817 SET_TYPE_OFFSET(ucp_Ll);
4818 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4819 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4820
4821 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4822 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4823
4824 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4825
4826 /* In case of ucp_Cf, we overwrite the result. */
4827 SET_CHAR_OFFSET(0x2066);
4828 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4829 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4830
4831 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4832 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4833
4834 JUMPHERE(jump);
4835 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4836 break;
4837
4838 case PT_PXPUNCT:
4839 SET_TYPE_OFFSET(ucp_Sc);
4840 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4841 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4842
4843 SET_CHAR_OFFSET(0);
4844 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4845 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4846
4847 SET_TYPE_OFFSET(ucp_Pc);
4848 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4849 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4850 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4851 break;
4852 }
4853 cc += 2;
4854 }
4855 #endif
4856
4857 if (jump != NULL)
4858 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4859 }
4860
4861 if (found != NULL)
4862 set_jumps(found, LABEL());
4863 }
4864
4865 #undef SET_TYPE_OFFSET
4866 #undef SET_CHAR_OFFSET
4867
4868 #endif
4869
4870 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4871 {
4872 DEFINE_COMPILER;
4873 int length;
4874 unsigned int c, oc, bit;
4875 compare_context context;
4876 struct sljit_jump *jump[4];
4877 jump_list *end_list;
4878 #ifdef SUPPORT_UTF
4879 struct sljit_label *label;
4880 #ifdef SUPPORT_UCP
4881 pcre_uchar propdata[5];
4882 #endif
4883 #endif /* SUPPORT_UTF */
4884
4885 switch(type)
4886 {
4887 case OP_SOD:
4888 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4889 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4890 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4891 return cc;
4892
4893 case OP_SOM:
4894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4895 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4896 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4897 return cc;
4898
4899 case OP_NOT_WORD_BOUNDARY:
4900 case OP_WORD_BOUNDARY:
4901 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4902 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4903 return cc;
4904
4905 case OP_NOT_DIGIT:
4906 case OP_DIGIT:
4907 /* Digits are usually 0-9, so it is worth to optimize them. */
4908 detect_partial_match(common, backtracks);
4909 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4910 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
4911 read_char7_type(common, type == OP_NOT_DIGIT);
4912 else
4913 #endif
4914 read_char8_type(common, type == OP_NOT_DIGIT);
4915 /* Flip the starting bit in the negative case. */
4916 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4917 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4918 return cc;
4919
4920 case OP_NOT_WHITESPACE:
4921 case OP_WHITESPACE:
4922 detect_partial_match(common, backtracks);
4923 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4924 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
4925 read_char7_type(common, type == OP_NOT_WHITESPACE);
4926 else
4927 #endif
4928 read_char8_type(common, type == OP_NOT_WHITESPACE);
4929 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4930 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4931 return cc;
4932
4933 case OP_NOT_WORDCHAR:
4934 case OP_WORDCHAR:
4935 detect_partial_match(common, backtracks);
4936 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4937 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
4938 read_char7_type(common, type == OP_NOT_WORDCHAR);
4939 else
4940 #endif
4941 read_char8_type(common, type == OP_NOT_WORDCHAR);
4942 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4943 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4944 return cc;
4945
4946 case OP_ANY:
4947 detect_partial_match(common, backtracks);
4948 read_char_max(common, common->nlmax, TRUE);
4949 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4950 {
4951 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4952 end_list = NULL;
4953 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4954 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4955 else
4956 check_str_end(common, &end_list);
4957
4958 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4959 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4960 set_jumps(end_list, LABEL());
4961 JUMPHERE(jump[0]);
4962 }
4963 else
4964 check_newlinechar(common, common->nltype, backtracks, TRUE);
4965 return cc;
4966
4967 case OP_ALLANY:
4968 detect_partial_match(common, backtracks);
4969 #ifdef SUPPORT_UTF
4970 if (common->utf)
4971 {
4972 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4974 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4975 #if defined COMPILE_PCRE8
4976 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4977 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4978 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4979 #elif defined COMPILE_PCRE16
4980 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4981 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4982 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4983 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4984 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4985 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4986 #endif
4987 JUMPHERE(jump[0]);
4988 #endif /* COMPILE_PCRE[8|16] */
4989 return cc;
4990 }
4991 #endif
4992 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4993 return cc;
4994
4995 case OP_ANYBYTE:
4996 detect_partial_match(common, backtracks);
4997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4998 return cc;
4999
5000 #ifdef SUPPORT_UTF
5001 #ifdef SUPPORT_UCP
5002 case OP_NOTPROP:
5003 case OP_PROP:
5004 propdata[0] = XCL_HASPROP;
5005 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5006 propdata[2] = cc[0];
5007 propdata[3] = cc[1];
5008 propdata[4] = XCL_END;
5009 compile_xclass_matchingpath(common, propdata, backtracks);
5010 return cc + 2;
5011 #endif
5012 #endif
5013
5014 case OP_ANYNL:
5015 detect_partial_match(common, backtracks);
5016 read_char_max(common, common->bsr_nlmax, FALSE);
5017 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5018 /* We don't need to handle soft partial matching case. */
5019 end_list = NULL;
5020 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5021 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5022 else
5023 check_str_end(common, &end_list);
5024 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5025 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5027 jump[2] = JUMP(SLJIT_JUMP);
5028 JUMPHERE(jump[0]);
5029 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5030 set_jumps(end_list, LABEL());
5031 JUMPHERE(jump[1]);
5032 JUMPHERE(jump[2]);
5033 return cc;
5034
5035 case OP_NOT_HSPACE:
5036 case OP_HSPACE:
5037 detect_partial_match(common, backtracks);
5038 read_char_max(common, 0x3000, type == OP_NOT_HSPACE);
5039 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5040 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5041 return cc;
5042
5043 case OP_NOT_VSPACE:
5044 case OP_VSPACE:
5045 detect_partial_match(common, backtracks);
5046 read_char_max(common, 0x2029, type == OP_NOT_VSPACE);
5047 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5048 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5049 return cc;
5050
5051 #ifdef SUPPORT_UCP
5052 case OP_EXTUNI:
5053 detect_partial_match(common, backtracks);
5054 read_char(common);
5055 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5056 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5057 /* Optimize register allocation: use a real register. */
5058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5059 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5060
5061 label = LABEL();
5062 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5063 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5064 read_char(common);
5065 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5067 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5068
5069 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5070 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5071 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5072 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5073 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5074 JUMPTO(SLJIT_C_NOT_ZERO, label);
5075
5076 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5077 JUMPHERE(jump[0]);
5078 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5079
5080 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5081 {
5082 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5083 /* Since we successfully read a char above, partial matching must occure. */
5084 check_partial(common, TRUE);
5085 JUMPHERE(jump[0]);
5086 }
5087 return cc;
5088 #endif
5089
5090 case OP_EODN:
5091 /* Requires rather complex checks. */
5092 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5093 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5094 {
5095 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5096 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5097 if (common->mode == JIT_COMPILE)
5098 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5099 else
5100 {
5101 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5102 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5103 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5104 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5105 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5106 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5107 check_partial(common, TRUE);
5108 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5109 JUMPHERE(jump[1]);
5110 }
5111 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5112 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5113 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5114 }
5115 else if (common->nltype == NLTYPE_FIXED)
5116 {
5117 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5118 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5119 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5120 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5121 }
5122 else
5123 {
5124 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5125 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5126 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5127 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5128 jump[2] = JUMP(SLJIT_C_GREATER);
5129 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5130 /* Equal. */
5131 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5132 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5133 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5134
5135 JUMPHERE(jump[1]);
5136 if (common->nltype == NLTYPE_ANYCRLF)
5137 {
5138 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5139 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5140 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5141 }
5142 else
5143 {
5144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5145 read_char_max(common, common->nlmax, TRUE);
5146 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5147 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5148 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5149 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5150 }
5151 JUMPHERE(jump[2]);
5152 JUMPHERE(jump[3]);
5153 }
5154 JUMPHERE(jump[0]);
5155 check_partial(common, FALSE);
5156 return cc;
5157
5158 case OP_EOD:
5159 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5160 check_partial(common, FALSE);
5161 return cc;
5162
5163 case OP_CIRC:
5164 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5165 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5166 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5167 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5168 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5169 return cc;
5170
5171 case OP_CIRCM:
5172 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5173 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5174 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5175 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5176 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5177 jump[0] = JUMP(SLJIT_JUMP);
5178 JUMPHERE(jump[1]);
5179
5180 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5181 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5182 {
5183 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5184 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5185 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5186 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5187 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5188 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5189 }
5190 else
5191 {
5192 skip_char_back(common);
5193 read_char_max(common, common->nlmax, TRUE);
5194 check_newlinechar(common, common->nltype, backtracks, FALSE);
5195 }
5196 JUMPHERE(jump[0]);
5197 return cc;
5198
5199 case OP_DOLL:
5200 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5201 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5202 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5203
5204 if (!common->endonly)
5205 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5206 else
5207 {
5208 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5209 check_partial(common, FALSE);
5210 }
5211 return cc;
5212
5213 case OP_DOLLM:
5214 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5215 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5216 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5217 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5218 check_partial(common, FALSE);
5219 jump[0] = JUMP(SLJIT_JUMP);
5220 JUMPHERE(jump[1]);
5221
5222 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5223 {
5224 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5225 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5226 if (common->mode == JIT_COMPILE)
5227 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5228 else
5229 {
5230 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5231 /* STR_PTR = STR_END - IN_UCHARS(1) */
5232 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5233 check_partial(common, TRUE);
5234 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5235 JUMPHERE(jump[1]);
5236 }
5237
5238 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5239 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5240 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5241 }
5242 else
5243 {
5244 peek_char(common);
5245 check_newlinechar(common, common->nltype, backtracks, FALSE);
5246 }
5247 JUMPHERE(jump[0]);
5248 return cc;
5249
5250 case OP_CHAR:
5251 case OP_CHARI:
5252 length = 1;
5253 #ifdef SUPPORT_UTF
5254 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5255 #endif
5256 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5257 {
5258 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5259 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5260
5261 context.length = IN_UCHARS(length);
5262 context.sourcereg = -1;
5263 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5264 context.ucharptr = 0;
5265 #endif
5266 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5267 }
5268
5269 detect_partial_match(common, backtracks);
5270 #ifdef SUPPORT_UTF
5271 if (common->utf)
5272 {
5273 GETCHAR(c, cc);
5274 }
5275 else
5276 #endif
5277 c = *cc;
5278
5279 if (type == OP_CHAR || !char_has_othercase(common, cc))
5280 {
5281 read_char_max(common, c, FALSE);
5282 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5283 return cc + length;
5284 }
5285 oc = char_othercase(common, c);
5286 read_char_max(common, c > oc ? c : oc, FALSE);
5287 bit = c ^ oc;
5288 if (is_powerof2(bit))
5289 {
5290 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5291 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5292 return cc + length;
5293 }
5294 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5295 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5296 JUMPHERE(jump[0]);
5297 return cc + length;
5298
5299 case OP_NOT:
5300 case OP_NOTI:
5301 detect_partial_match(common, backtracks);
5302 length = 1;
5303 #ifdef SUPPORT_UTF
5304 if (common->utf)
5305 {
5306 #ifdef COMPILE_PCRE8
5307 c = *cc;
5308 if (c < 128)
5309 {
5310 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5311 if (type == OP_NOT || !char_has_othercase(common, cc))
5312 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5313 else
5314 {
5315 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5316 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5317 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5318 }
5319 /* Skip the variable-length character. */
5320 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5321 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5322 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5323 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5324 JUMPHERE(jump[0]);
5325 return cc + 1;
5326 }
5327 else
5328 #endif /* COMPILE_PCRE8 */
5329 {
5330 GETCHARLEN(c, cc, length);
5331 }
5332 }
5333 else
5334 #endif /* SUPPORT_UTF */
5335 c = *cc;
5336
5337 if (type == OP_NOT || !char_has_othercase(common, cc))
5338 {
5339 read_char_max(common, c, TRUE);
5340 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5341 }
5342 else
5343 {
5344 oc = char_othercase(common, c);
5345 read_char_max(common, c > oc ? c : oc, TRUE);
5346 bit = c ^ oc;
5347 if (is_powerof2(bit))
5348 {
5349 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5350 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5351 }
5352 else
5353 {
5354 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5355 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5356 }
5357 }
5358 return cc + length;
5359
5360 case OP_CLASS:
5361 case OP_NCLASS:
5362 detect_partial_match(common, backtracks);
5363
5364 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5365 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5366 read_char_max(common, bit, type == OP_NCLASS);
5367 #else
5368 read_char_max(common, 255, type == OP_NCLASS);
5369 #endif
5370
5371 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5372 return cc + 32 / sizeof(pcre_uchar);
5373
5374 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5375 jump[0] = NULL;
5376 if (common->utf)
5377 {
5378 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5379 if (type == OP_CLASS)
5380 {
5381 add_jump(compiler, backtracks, jump[0]);
5382 jump[0] = NULL;
5383 }
5384 }
5385 #elif !defined COMPILE_PCRE8
5386 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5387 if (type == OP_CLASS)
5388 {
5389 add_jump(compiler, backtracks, jump[0]);
5390 jump[0] = NULL;
5391 }
5392 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5393
5394 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5395 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5396 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5397 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5398 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5399 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5400
5401 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5402 if (jump[0] != NULL)
5403 JUMPHERE(jump[0]);
5404 #endif
5405
5406 return cc + 32 / sizeof(pcre_uchar);
5407
5408 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5409 case OP_XCLASS:
5410 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5411 return cc + GET(cc, 0) - 1;
5412 #endif
5413
5414 case OP_REVERSE:
5415 length = GET(cc, 0);
5416 if (length == 0)
5417 return cc + LINK_SIZE;
5418 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5419 #ifdef SUPPORT_UTF
5420 if (common->utf)
5421 {
5422 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5423 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5424 label = LABEL();
5425 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5426 skip_char_back(common);
5427 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5428 JUMPTO(SLJIT_C_NOT_ZERO, label);
5429 }
5430 else
5431 #endif
5432 {
5433 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5434 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5435 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5436 }
5437 check_start_used_ptr(common);
5438 return cc + LINK_SIZE;
5439 }
5440 SLJIT_ASSERT_STOP();
5441 return cc;
5442 }
5443
5444 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5445 {
5446 /* This function consumes at least one input character. */
5447 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5448 DEFINE_COMPILER;
5449 pcre_uchar *ccbegin = cc;
5450 compare_context context;
5451 int size;
5452
5453 context.length = 0;
5454 do
5455 {
5456 if (cc >= ccend)
5457 break;
5458
5459 if (*cc == OP_CHAR)
5460 {
5461 size = 1;
5462 #ifdef SUPPORT_UTF
5463 if (common->utf && HAS_EXTRALEN(cc[1]))
5464 size += GET_EXTRALEN(cc[1]);
5465 #endif
5466 }
5467 else if (*cc == OP_CHARI)
5468 {
5469 size = 1;
5470 #ifdef SUPPORT_UTF
5471 if (common->utf)
5472 {
5473 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5474 size = 0;
5475 else if (HAS_EXTRALEN(cc[1]))
5476 size += GET_EXTRALEN(cc[1]);
5477 }
5478 else
5479 #endif
5480 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5481 size = 0;
5482 }
5483 else
5484 size = 0;
5485
5486 cc += 1 + size;
5487 context.length += IN_UCHARS(size);
5488 }
5489 while (size > 0 && context.length <= 128);
5490
5491 cc = ccbegin;
5492 if (context.length > 0)
5493 {
5494 /* We have a fixed-length byte sequence. */
5495 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5496 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5497
5498 context.sourcereg = -1;
5499 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5500 context.ucharptr = 0;
5501 #endif
5502 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5503 return cc;
5504 }
5505
5506 /* A non-fixed length character will be checked if length == 0. */
5507 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5508 }
5509
5510 /* Forward definitions. */
5511 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5512 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5513
5514 #define PUSH_BACKTRACK(size, ccstart, error) \
5515 do \
5516 { \
5517 backtrack = sljit_alloc_memory(compiler, (size)); \
5518 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5519 return error; \
5520 memset(backtrack, 0, size); \
5521 backtrack->prev = parent->top; \
5522 backtrack->cc = (ccstart); \
5523 parent->top = backtrack; \
5524 } \
5525 while (0)
5526
5527 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5528 do \
5529 { \
5530 backtrack = sljit_alloc_memory(compiler, (size)); \
5531 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5532 return; \
5533 memset(backtrack, 0, size); \
5534 backtrack->prev = parent->top; \
5535 backtrack->cc = (ccstart); \
5536 parent->top = backtrack; \
5537 } \
5538 while (0)
5539
5540 #define BACKTRACK_AS(type) ((type *)backtrack)
5541
5542 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5543 {
5544 /* The OVECTOR offset goes to TMP2. */
5545 DEFINE_COMPILER;
5546 int count = GET2(cc, 1 + IMM2_SIZE);
5547 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5548 unsigned int offset;
5549 jump_list *found = NULL;
5550
5551 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5552
5553 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5554
5555 count--;
5556 while (count-- > 0)
5557 {
5558 offset = GET2(slot, 0) << 1;
5559 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5560 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5561 slot += common->name_entry_size;
5562 }
5563
5564 offset = GET2(slot, 0) << 1;
5565 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5566 if (backtracks != NULL && !common->jscript_compat)
5567 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5568
5569 set_jumps(found, LABEL());
5570 }
5571
5572 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5573 {
5574 DEFINE_COMPILER;
5575 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5576 int offset = 0;
5577 struct sljit_jump *jump = NULL;
5578 struct sljit_jump *partial;
5579 struct sljit_jump *nopartial;
5580
5581 if (ref)
5582 {
5583 offset = GET2(cc, 1) << 1;
5584 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5585 /* OVECTOR(1) contains the "string begin - 1" constant. */
5586 if (withchecks && !common->jscript_compat)
5587 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5588 }
5589 else
5590 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5591
5592 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5593 if (common->utf && *cc == OP_REFI)
5594 {
5595 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5596 if (ref)
5597 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5598 else
5599 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5600
5601 if (withchecks)
5602 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5603
5604 /* Needed to save important temporary registers. */
5605 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5606 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5608 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5609 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5610 if (common->mode == JIT_COMPILE)
5611 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5612 else
5613 {
5614 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5615 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5616 check_partial(common, FALSE);
5617 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5618 JUMPHERE(nopartial);
5619 }
5620 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5621 }
5622 else
5623 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5624 {
5625 if (ref)
5626 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5627 else
5628 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5629
5630 if (withchecks)
5631 jump = JUMP(SLJIT_C_ZERO);
5632
5633 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5634 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5635 if (common->mode == JIT_COMPILE)
5636 add_jump(compiler, backtracks, partial);
5637
5638 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5639 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5640
5641 if (common->mode != JIT_COMPILE)
5642 {
5643 nopartial = JUMP(SLJIT_JUMP);
5644 JUMPHERE(partial);
5645 /* TMP2 -= STR_END - STR_PTR */
5646 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5647 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5648 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5649 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5650 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5651 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5652 JUMPHERE(partial);
5653 check_partial(common, FALSE);
5654 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5655 JUMPHERE(nopartial);
5656 }
5657 }
5658
5659 if (jump != NULL)
5660 {
5661 if (emptyfail)
5662 add_jump(compiler, backtracks, jump);
5663 else
5664 JUMPHERE(jump);
5665 }
5666 }
5667
5668 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5669 {
5670 DEFINE_COMPILER;
5671 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5672 backtrack_common *backtrack;
5673 pcre_uchar type;
5674 int offset = 0;
5675 struct sljit_label *label;
5676 struct sljit_jump *zerolength;
5677 struct sljit_jump *jump = NULL;
5678 pcre_uchar *ccbegin = cc;
5679 int min = 0, max = 0;
5680 BOOL minimize;
5681
5682 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5683
5684 if (ref)
5685 offset = GET2(cc, 1) << 1;
5686 else
5687 cc += IMM2_SIZE;
5688 type = cc[1 + IMM2_SIZE];
5689
5690 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5691 minimize = (type & 0x1) != 0;
5692 switch(type)
5693 {
5694 case OP_CRSTAR:
5695 case OP_CRMINSTAR:
5696 min = 0;
5697 max = 0;
5698 cc += 1 + IMM2_SIZE + 1;
5699 break;
5700 case OP_CRPLUS:
5701 case OP_CRMINPLUS:
5702 min = 1;
5703 max = 0;
5704 cc += 1 + IMM2_SIZE + 1;
5705 break;
5706 case OP_CRQUERY:
5707 case OP_CRMINQUERY:
5708 min = 0;
5709 max = 1;
5710 cc += 1 + IMM2_SIZE + 1;
5711 break;
5712 case OP_CRRANGE:
5713 case OP_CRMINRANGE:
5714 min = GET2(cc, 1 + IMM2_SIZE + 1);
5715 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5716 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5717 break;
5718 default:
5719 SLJIT_ASSERT_STOP();
5720 break;
5721 }
5722
5723 if (!minimize)
5724 {
5725 if (min == 0)
5726 {
5727 allocate_stack(common, 2);
5728 if (ref)
5729 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5731 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5732 /* Temporary release of STR_PTR. */
5733 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5734 /* Handles both invalid and empty cases. Since the minimum repeat,
5735 is zero the invalid case is basically the same as an empty case. */
5736 if (ref)
5737 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5738 else
5739 {
5740 compile_dnref_search(common, ccbegin, NULL);
5741 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5742 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5743 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5744 }
5745 /* Restore if not zero length. */
5746 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5747 }
5748 else
5749 {
5750 allocate_stack(common, 1);
5751 if (ref)
5752 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5753 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5754 if (ref)
5755 {
5756 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5757 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5758 }
5759 else
5760 {
5761 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5762 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5763 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5764 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5765 }
5766 }
5767
5768 if (min > 1 || max > 1)
5769 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5770
5771 label = LABEL();
5772 if (!ref)
5773 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5774 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5775
5776 if (min > 1 || max > 1)
5777 {
5778 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5779 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5780 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5781 if (min > 1)
5782 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5783 if (max > 1)
5784 {
5785 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5786 allocate_stack(common, 1);
5787 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5788 JUMPTO(SLJIT_JUMP, label);
5789 JUMPHERE(jump);
5790 }
5791 }
5792
5793 if (max == 0)
5794 {
5795 /* Includes min > 1 case as well. */
5796 allocate_stack(common, 1);
5797 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5798 JUMPTO(SLJIT_JUMP, label);
5799 }
5800
5801 JUMPHERE(zerolength);
5802 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5803
5804 count_match(common);
5805 return cc;
5806 }
5807
5808 allocate_stack(common, ref ? 2 : 3);
5809 if (ref)
5810 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5811 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5812 if (type != OP_CRMINSTAR)
5813 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5814
5815 if (min == 0)
5816 {
5817 /* Handles both invalid and empty cases. Since the minimum repeat,
5818 is zero the invalid case is basically the same as an empty case. */
5819 if (ref)
5820 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5821 else
5822 {
5823 compile_dnref_search(common, ccbegin, NULL);
5824 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5825 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5826 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5827 }
5828 /* Length is non-zero, we can match real repeats. */
5829 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5830 jump = JUMP(SLJIT_JUMP);
5831 }
5832 else
5833 {
5834 if (ref)
5835 {
5836 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5837 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5838 }
5839 else
5840 {
5841 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5842 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5843 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5844 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5845 }
5846 }
5847
5848 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5849 if (max > 0)
5850 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5851
5852 if (!ref)
5853 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5854 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5855 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5856
5857 if (min > 1)
5858 {
5859 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5860 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5861 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5862 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5863 }
5864 else if (max > 0)
5865 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5866
5867 if (jump != NULL)
5868 JUMPHERE(jump);
5869 JUMPHERE(zerolength);
5870
5871 count_match(common);
5872 return cc;
5873 }
5874
5875 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5876 {
5877 DEFINE_COMPILER;
5878 backtrack_common *backtrack;
5879 recurse_entry *entry = common->entries;
5880 recurse_entry *prev = NULL;
5881 sljit_sw start = GET(cc, 1);
5882 pcre_uchar *start_cc;
5883 BOOL needs_control_head;
5884
5885 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5886
5887 /* Inlining simple patterns. */
5888 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5889 {
5890 start_cc = common->start + start;
5891 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5892 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5893 return cc + 1 + LINK_SIZE;
5894 }
5895
5896 while (entry != NULL)
5897 {
5898 if (entry->start == start)
5899 break;
5900 prev = entry;
5901 entry = entry->next;
5902 }
5903
5904 if (entry == NULL)
5905 {
5906 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5907 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5908 return NULL;
5909 entry->next = NULL;
5910 entry->entry = NULL;
5911 entry->calls = NULL;
5912 entry->start = start;
5913
5914 if (prev != NULL)
5915 prev->next = entry;
5916 else
5917 common->entries = entry;
5918 }
5919
5920 if (common->has_set_som && common->mark_ptr != 0)
5921 {
5922 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5923 allocate_stack(common, 2);
5924 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5925 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5926 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5927 }
5928 else if (common->has_set_som || common->mark_ptr != 0)
5929 {
5930 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5931 allocate_stack(common, 1);
5932 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5933 }
5934
5935 if (entry->entry == NULL)
5936 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5937 else
5938 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5939 /* Leave if the match is failed. */
5940 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5941 return cc + 1 + LINK_SIZE;
5942 }
5943
5944 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5945 {
5946 const pcre_uchar *begin = arguments->begin;
5947 int *offset_vector = arguments->offsets;
5948 int offset_count = arguments->offset_count;
5949 int i;
5950
5951 if (PUBL(callout) == NULL)
5952 return 0;
5953
5954 callout_block->version = 2;
5955 callout_block->callout_data = arguments->callout_data;
5956
5957 /* Offsets in subject. */
5958 callout_block->subject_length = arguments->end - arguments->begin;
5959 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5960 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5961 #if defined COMPILE_PCRE8
5962 callout_block->subject = (PCRE_SPTR)begin;
5963 #elif defined COMPILE_PCRE16
5964 callout_block->subject = (PCRE_SPTR16)begin;
5965 #elif defined COMPILE_PCRE32
5966 callout_block->subject = (PCRE_SPTR32)begin;
5967 #endif
5968
5969 /* Convert and copy the JIT offset vector to the offset_vector array. */
5970 callout_block->capture_top = 0;
5971 callout_block->offset_vector = offset_vector;
5972 for (i = 2; i < offset_count; i += 2)
5973 {
5974 offset_vector[i] = jit_ovector[i] - begin;
5975 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5976 if (jit_ovector[i] >= begin)
5977 callout_block->capture_top = i;
5978 }
5979
5980 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5981 if (offset_count > 0)
5982 offset_vector[0] = -1;
5983 if (offset_count > 1)
5984 offset_vector[1] = -1;
5985 return (*PUBL(callout))(callout_block);
5986 }
5987
5988 /* Aligning to 8 byte. */
5989 #define CALLOUT_ARG_SIZE \
5990 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5991
5992 #define CALLOUT_ARG_OFFSET(arg) \
5993 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5994
5995 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5996 {
5997 DEFINE_COMPILER;
5998 backtrack_common *backtrack;
5999
6000 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6001
6002 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6003
6004 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6005 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6006 SLJIT_ASSERT(common->capture_last_ptr != 0);
6007 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6008 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6009
6010 /* These pointer sized fields temporarly stores internal variables. */
6011 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6012 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6013 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6014
6015 if (common->mark_ptr != 0)
6016 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6017 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6018 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6019 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6020
6021 /* Needed to save important temporary registers. */
6022 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6023 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6024 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6025 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6026 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6027 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6028 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6029
6030 /* Check return value. */
6031 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6032 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6033 if (common->forced_quit_label == NULL)
6034 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6035 else
6036 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6037 return cc + 2 + 2 * LINK_SIZE;
6038 }
6039
6040 #undef CALLOUT_ARG_SIZE
6041 #undef CALLOUT_ARG_OFFSET
6042
6043 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6044 {
6045 DEFINE_COMPILER;
6046 int framesize;
6047 int extrasize;
6048 BOOL needs_control_head;
6049 int private_data_ptr;
6050 backtrack_common altbacktrack;
6051 pcre_uchar *ccbegin;
6052 pcre_uchar opcode;
6053 pcre_uchar bra = OP_BRA;
6054 jump_list *tmp = NULL;
6055 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6056 jump_list **found;
6057 /* Saving previous accept variables. */
6058 BOOL save_local_exit = common->local_exit;
6059 BOOL save_positive_assert = common->positive_assert;
6060 then_trap_backtrack *save_then_trap = common->then_trap;
6061 struct sljit_label *save_quit_label = common->quit_label;
6062 struct sljit_label *save_accept_label = common->accept_label;
6063 jump_list *save_quit = common->quit;
6064 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6065 jump_list *save_accept = common->accept;
6066 struct sljit_jump *jump;
6067 struct sljit_jump *brajump = NULL;
6068
6069 /* Assert captures then. */
6070 common->then_trap = NULL;
6071
6072 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6073 {
6074 SLJIT_ASSERT(!conditional);
6075 bra = *cc;
6076 cc++;
6077 }
6078 private_data_ptr = PRIVATE_DATA(cc);
6079 SLJIT_ASSERT(private_data_ptr != 0);
6080 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6081 backtrack->framesize = framesize;
6082 backtrack->private_data_ptr = private_data_ptr;
6083 opcode = *cc;
6084 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6085 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6086 ccbegin = cc;
6087 cc += GET(cc, 1);
6088
6089 if (bra == OP_BRAMINZERO)
6090 {
6091 /* This is a braminzero backtrack path. */
6092 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6093 free_stack(common, 1);
6094 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6095 }
6096
6097 if (framesize < 0)
6098 {
6099 extrasize = needs_control_head ? 2 : 1;
6100 if (framesize == no_frame)
6101 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6102 allocate_stack(common, extrasize);
6103 if (needs_control_head)
6104 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6105 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6106 if (needs_control_head)
6107 {
6108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6110 }
6111 }
6112 else
6113 {
6114 extrasize = needs_control_head ? 3 : 2;
6115 allocate_stack(common, framesize + extrasize);
6116 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6117 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6119 if (needs_control_head)
6120 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6122 if (needs_control_head)
6123 {
6124 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6125 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6127 }
6128 else
6129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6130 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6131 }
6132
6133 memset(&altbacktrack, 0, sizeof(backtrack_common));
6134 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6135 {
6136 /* Negative assert is stronger than positive assert. */
6137 common->local_exit = TRUE;
6138 common->quit_label = NULL;
6139 common->quit = NULL;
6140 common->positive_assert = FALSE;
6141 }
6142 else
6143 common->positive_assert = TRUE;
6144 common->positive_assert_quit = NULL;
6145
6146 while (1)
6147 {
6148 common->accept_label = NULL;
6149 common->accept = NULL;
6150 altbacktrack.top = NULL;
6151 altbacktrack.topbacktracks = NULL;
6152
6153 if (*ccbegin == OP_ALT)
6154 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6155
6156 altbacktrack.cc = ccbegin;
6157 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6158 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6159 {
6160 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6161 {
6162 common->local_exit = save_local_exit;
6163 common->quit_label = save_quit_label;
6164 common->quit = save_quit;
6165 }
6166 common->positive_assert = save_positive_assert;
6167 common->then_trap = save_then_trap;
6168 common->accept_label = save_accept_label;
6169 common->positive_assert_quit = save_positive_assert_quit;
6170 common->accept = save_accept;
6171 return NULL;
6172 }
6173 common->accept_label = LABEL();
6174 if (common->accept != NULL)
6175 set_jumps(common->accept, common->accept_label);
6176
6177 /* Reset stack. */
6178 if (framesize < 0)
6179 {
6180 if (framesize == no_frame)
6181 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6182 else
6183 free_stack(common, extrasize);
6184 if (needs_control_head)
6185 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6186 }
6187 else
6188 {
6189 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6190 {
6191 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6192 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6193 if (needs_control_head)
6194 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6195 }
6196 else
6197 {
6198 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6199 if (needs_control_head)
6200 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6201 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6202 }
6203 }
6204
6205 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6206 {
6207 /* We know that STR_PTR was stored on the top of the stack. */
6208 if (conditional)
6209 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6210 else if (bra == OP_BRAZERO)
6211 {
6212 if (framesize < 0)
6213 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6214 else
6215 {
6216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6217 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6218 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6219 }
6220 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6221 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6222 }
6223 else if (framesize >= 0)
6224 {
6225 /* For OP_BRA and OP_BRAMINZERO. */
6226 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6227 }
6228 }
6229 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6230
6231 compile_backtrackingpath(common, altbacktrack.top);
6232 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6233 {
6234 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6235 {
6236 common->local_exit = save_local_exit;
6237 common->quit_label = save_quit_label;
6238 common->quit = save_quit;
6239 }
6240 common->positive_assert = save_positive_assert;
6241 common->then_trap = save_then_trap;
6242 common->accept_label = save_accept_label;
6243 common->positive_assert_quit = save_positive_assert_quit;
6244 common->accept = save_accept;
6245 return NULL;
6246 }
6247 set_jumps(altbacktrack.topbacktracks, LABEL());
6248
6249 if (*cc != OP_ALT)
6250 break;
6251
6252 ccbegin = cc;
6253 cc += GET(cc, 1);
6254 }
6255
6256 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6257 {
6258 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6259 /* Makes the check less complicated below. */
6260 common->positive_assert_quit = common->quit;
6261 }
6262
6263 /* None of them matched. */
6264 if (common->positive_assert_quit != NULL)
6265 {
6266 jump = JUMP(SLJIT_JUMP);
6267 set_jumps(common->positive_assert_quit, LABEL());
6268 SLJIT_ASSERT(framesize != no_stack);
6269 if (framesize < 0)
6270 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6271 else
6272 {
6273 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6274 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6275 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6276 }
6277 JUMPHERE(jump);
6278 }
6279
6280 if (needs_control_head)
6281 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6282
6283 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6284 {
6285 /* Assert is failed. */
6286 if (conditional || bra == OP_BRAZERO)
6287 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6288
6289 if (framesize < 0)
6290 {
6291 /* The topmost item should be 0. */
6292 if (bra == OP_BRAZERO)
6293 {
6294 if (extrasize == 2)
6295 free_stack(common, 1);
6296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6297 }
6298 else
6299 free_stack(common, extrasize);
6300 }
6301 else
6302 {
6303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6304 /* The topmost item should be 0. */
6305 if (bra == OP_BRAZERO)
6306 {
6307 free_stack(common, framesize + extrasize - 1);
6308 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6309 }
6310 else
6311 free_stack(common, framesize + extrasize);
6312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6313 }
6314 jump = JUMP(SLJIT_JUMP);
6315 if (bra != OP_BRAZERO)
6316 add_jump(compiler, target, jump);
6317
6318 /* Assert is successful. */
6319 set_jumps(tmp, LABEL());
6320 if (framesize < 0)
6321 {
6322 /* We know that STR_PTR was stored on the top of the stack. */
6323 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6324 /* Keep the STR_PTR on the top of the stack. */
6325 if (bra == OP_BRAZERO)
6326 {
6327 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6328 if (extrasize == 2)
6329 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6330 }
6331 else if (bra == OP_BRAMINZERO)
6332 {
6333 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6334 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6335 }
6336 }
6337 else
6338 {
6339 if (bra == OP_BRA)
6340 {
6341 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6342 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6343 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6344 }
6345 else
6346 {
6347 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6348 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6349 if (extrasize == 2)
6350 {
6351 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6352 if (bra == OP_BRAMINZERO)
6353 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6354 }
6355 else
6356 {
6357 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6359 }
6360 }
6361 }
6362
6363 if (bra == OP_BRAZERO)
6364 {
6365 backtrack->matchingpath = LABEL();
6366 SET_LABEL(jump, backtrack->matchingpath);
6367 }
6368 else if (bra == OP_BRAMINZERO)
6369 {
6370 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6371 JUMPHERE(brajump);
6372 if (framesize >= 0)
6373 {
6374 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6375 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6377 }
6378 set_jumps(backtrack->common.topbacktracks, LABEL());
6379 }
6380 }
6381 else
6382 {
6383 /* AssertNot is successful. */
6384 if (framesize < 0)
6385 {
6386 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6387 if (bra != OP_BRA)
6388 {
6389 if (extrasize == 2)
6390 free_stack(common, 1);
6391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6392 }
6393 else
6394 free_stack(common, extrasize);
6395 }
6396 else
6397 {
6398 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6399 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6400 /* The topmost item should be 0. */
6401 if (bra != OP_BRA)
6402 {
6403 free_stack(common, framesize + extrasize - 1);
6404 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6405 }
6406 else
6407 free_stack(common, framesize + extrasize);
6408 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6409 }
6410
6411 if (bra == OP_BRAZERO)
6412 backtrack->matchingpath = LABEL();
6413 else if (bra == OP_BRAMINZERO)
6414 {
6415 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6416 JUMPHERE(brajump);
6417 }
6418
6419 if (bra != OP_BRA)
6420 {
6421 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6422 set_jumps(backtrack->common.topbacktracks, LABEL());
6423 backtrack->common.topbacktracks = NULL;
6424 }
6425 }
6426
6427 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6428 {
6429 common->local_exit = save_local_exit;
6430 common->quit_label = save_quit_label;
6431 common->quit = save_quit;
6432 }
6433 common->positive_assert = save_positive_assert;
6434 common->then_trap = save_then_trap;
6435 common->accept_label = save_accept_label;
6436 common->positive_assert_quit = save_positive_assert_quit;
6437 common->accept = save_accept;
6438 return cc + 1 + LINK_SIZE;
6439 }
6440
6441 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6442 {
6443 DEFINE_COMPILER;
6444 int stacksize;
6445
6446 if (framesize < 0)
6447 {
6448 if (framesize == no_frame)
6449 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6450 else
6451 {
6452 stacksize = needs_control_head ? 1 : 0;
6453 if (ket != OP_KET || has_alternatives)
6454 stacksize++;
6455 free_stack(common, stacksize);
6456 }
6457
6458 if (needs_control_head)
6459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6460
6461 /* TMP2 which is set here used by OP_KETRMAX below. */
6462 if (ket == OP_KETRMAX)
6463 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6464 else if (ket == OP_KETRMIN)
6465 {
6466 /* Move the STR_PTR to the private_data_ptr. */
6467 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6468 }
6469 }
6470 else
6471 {
6472 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6473 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6474 if (needs_control_head)
6475 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6476
6477 if (ket == OP_KETRMAX)
6478 {
6479 /* TMP2 which is set here used by OP_KETRMAX below. */
6480 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6481 }
6482 }
6483 if (needs_control_head)
6484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6485 }
6486
6487 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6488 {
6489 DEFINE_COMPILER;
6490
6491 if (common->capture_last_ptr != 0)
6492 {
6493 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6494 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6495 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6496 stacksize++;
6497 }
6498 if (common->optimized_cbracket[offset >> 1] == 0)
6499 {
6500 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6501 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6502 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6503 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6504 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6506 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6507 stacksize += 2;
6508 }
6509 return stacksize;
6510 }
6511
6512 /*
6513 Handling bracketed expressions is probably the most complex part.
6514
6515 Stack layout naming characters:
6516 S - Push the current STR_PTR
6517 0 - Push a 0 (NULL)
6518 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6519 before the next alternative. Not pushed if there are no alternatives.
6520 M - Any values pushed by the current alternative. Can be empty, or anything.
6521 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6522 L - Push the previous local (pointed by localptr) to the stack
6523 () - opional values stored on the stack
6524 ()* - optonal, can be stored multiple times
6525
6526 The following list shows the regular expression templates, their PCRE byte codes
6527 and stack layout supported by pcre-sljit.
6528
6529 (?:) OP_BRA | OP_KET A M
6530 () OP_CBRA | OP_KET C M
6531 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6532 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6533 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6534 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6535 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6536 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6537 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6538 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6539 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6540 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6541 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6542 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6543 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6544 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6545 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6546 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6547 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6548 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6549 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6550 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6551
6552
6553 Stack layout naming characters:
6554 A - Push the alternative index (starting from 0) on the stack.
6555 Not pushed if there is no alternatives.
6556 M - Any values pushed by the current alternative. Can be empty, or anything.
6557
6558 The next list shows the possible content of a bracket:
6559 (|) OP_*BRA | OP_ALT ... M A
6560 (?()|) OP_*COND | OP_ALT M A
6561 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6562 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6563 Or nothing, if trace is unnecessary
6564 */
6565
6566 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6567 {
6568 DEFINE_COMPILER;
6569 backtrack_common *backtrack;
6570 pcre_uchar opcode;
6571 int private_data_ptr = 0;
6572 int offset = 0;
6573 int i, stacksize;
6574 int repeat_ptr = 0, repeat_length = 0;
6575 int repeat_type = 0, repeat_count = 0;
6576 pcre_uchar *ccbegin;
6577 pcre_uchar *matchingpath;
6578 pcre_uchar *slot;
6579 pcre_uchar bra = OP_BRA;
6580 pcre_uchar ket;
6581 assert_backtrack *assert;
6582 BOOL has_alternatives;
6583 BOOL needs_control_head = FALSE;
6584 struct sljit_jump *jump;
6585 struct sljit_jump *skip;
6586 struct sljit_label *rmax_label = NULL;
6587 struct sljit_jump *braminzero = NULL;
6588
6589 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6590
6591 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6592 {
6593 bra = *cc;
6594 cc++;
6595 opcode = *cc;
6596 }
6597
6598 opcode = *cc;
6599 ccbegin = cc;
6600 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6601 ket = *matchingpath;
6602 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6603 {
6604 repeat_ptr = PRIVATE_DATA(matchingpath);
6605 repeat_length = PRIVATE_DATA(matchingpath + 1);
6606 repeat_type = PRIVATE_DATA(matchingpath + 2);
6607 repeat_count = PRIVATE_DATA(matchingpath + 3);
6608 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6609 if (repeat_type == OP_UPTO)
6610 ket = OP_KETRMAX;
6611 if (repeat_type == OP_MINUPTO)
6612 ket = OP_KETRMIN;
6613 }
6614
6615 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6616 {
6617 /* Drop this bracket_backtrack. */
6618 parent->top = backtrack->prev;
6619 return matchingpath + 1 + LINK_SIZE + repeat_length;
6620 }
6621
6622 matchingpath = ccbegin + 1 + LINK_SIZE;
6623 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6624 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6625 cc += GET(cc, 1);
6626
6627 has_alternatives = *cc == OP_ALT;
6628 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6629 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6630
6631 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6632 opcode = OP_SCOND;
6633 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6634 opcode = OP_ONCE;
6635
6636 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6637 {
6638 /* Capturing brackets has a pre-allocated space. */
6639 offset = GET2(ccbegin, 1 + LINK_SIZE);
6640 if (common->optimized_cbracket[offset] == 0)
6641 {
6642 private_data_ptr = OVECTOR_PRIV(offset);
6643 offset <<= 1;
6644 }
6645 else
6646 {
6647 offset <<= 1;
6648 private_data_ptr = OVECTOR(offset);
6649 }
6650 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6651 matchingpath += IMM2_SIZE;
6652 }
6653 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6654 {
6655 /* Other brackets simply allocate the next entry. */
6656 private_data_ptr = PRIVATE_DATA(ccbegin);
6657 SLJIT_ASSERT(private_data_ptr != 0);
6658 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6659 if (opcode == OP_ONCE)
6660 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6661 }
6662
6663 /* Instructions before the first alternative. */
6664 stacksize = 0;
6665 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6666 stacksize++;
6667 if (bra == OP_BRAZERO)
6668 stacksize++;
6669
6670 if (stacksize > 0)
6671 allocate_stack(common, stacksize);
6672
6673 stacksize = 0;
6674 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6675 {
6676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6677 stacksize++;
6678 }
6679
6680 if (bra == OP_BRAZERO)
6681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6682
6683 if (bra == OP_BRAMINZERO)
6684 {
6685 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6686 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6687 if (ket != OP_KETRMIN)
6688 {
6689 free_stack(common, 1);
6690 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6691 }
6692 else
6693 {
6694 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6695 {
6696 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6697 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6698 /* Nothing stored during the first run. */
6699 skip = JUMP(SLJIT_JUMP);
6700 JUMPHERE(jump);
6701 /* Checking zero-length iteration. */
6702 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6703 {
6704 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6705 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6706 }
6707 else
6708 {
6709 /* Except when the whole stack frame must be saved. */
6710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6711 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6712 }
6713 JUMPHERE(skip);
6714 }
6715 else
6716 {
6717 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6718 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6719 JUMPHERE(jump);
6720 }
6721 }
6722 }
6723
6724 if (repeat_type != 0)
6725 {
6726 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6727 if (repeat_type == OP_EXACT)
6728 rmax_label = LABEL();
6729 }
6730
6731 if (ket == OP_KETRMIN)
6732 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6733
6734 if (ket == OP_KETRMAX)
6735 {
6736 rmax_label = LABEL();
6737 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6738 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6739 }
6740
6741 /* Handling capturing brackets and alternatives. */
6742 if (opcode == OP_ONCE)
6743 {
6744 stacksize = 0;
6745 if (needs_control_head)