/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1646 - (show annotations)
Tue Apr 12 11:17:23 2016 UTC (3 years, 6 months ago) by zherczeg
File MIME type: text/plain
File size: 359262 byte(s)
Style updates in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 sljit_u32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 sljit_u8 notbol;
175 sljit_u8 noteol;
176 sljit_u8 notempty;
177 sljit_u8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 sljit_u32 top_bracket;
187 sljit_u32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct char_iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 union {
285 jump_list *backtracks;
286 struct {
287 unsigned int othercasebit;
288 pcre_uchar chr;
289 BOOL enabled;
290 } charpos;
291 } u;
292 } char_iterator_backtrack;
293
294 typedef struct ref_iterator_backtrack {
295 backtrack_common common;
296 /* Next iteration. */
297 struct sljit_label *matchingpath;
298 } ref_iterator_backtrack;
299
300 typedef struct recurse_entry {
301 struct recurse_entry *next;
302 /* Contains the function entry. */
303 struct sljit_label *entry;
304 /* Collects the calls until the function is not created. */
305 jump_list *calls;
306 /* Points to the starting opcode. */
307 sljit_sw start;
308 } recurse_entry;
309
310 typedef struct recurse_backtrack {
311 backtrack_common common;
312 BOOL inlined_pattern;
313 } recurse_backtrack;
314
315 #define OP_THEN_TRAP OP_TABLE_LENGTH
316
317 typedef struct then_trap_backtrack {
318 backtrack_common common;
319 /* If then_trap is not NULL, this structure contains the real
320 then_trap for the backtracking path. */
321 struct then_trap_backtrack *then_trap;
322 /* Points to the starting opcode. */
323 sljit_sw start;
324 /* Exit point for the then opcodes of this alternative. */
325 jump_list *quit;
326 /* Frame size of the current alternative. */
327 int framesize;
328 } then_trap_backtrack;
329
330 #define MAX_RANGE_SIZE 4
331
332 typedef struct compiler_common {
333 /* The sljit ceneric compiler. */
334 struct sljit_compiler *compiler;
335 /* First byte code. */
336 pcre_uchar *start;
337 /* Maps private data offset to each opcode. */
338 sljit_s32 *private_data_ptrs;
339 /* Chain list of read-only data ptrs. */
340 void *read_only_data_head;
341 /* Tells whether the capturing bracket is optimized. */
342 sljit_u8 *optimized_cbracket;
343 /* Tells whether the starting offset is a target of then. */
344 sljit_u8 *then_offsets;
345 /* Current position where a THEN must jump. */
346 then_trap_backtrack *then_trap;
347 /* Starting offset of private data for capturing brackets. */
348 sljit_s32 cbra_ptr;
349 /* Output vector starting point. Must be divisible by 2. */
350 sljit_s32 ovector_start;
351 /* Points to the starting character of the current match. */
352 sljit_s32 start_ptr;
353 /* Last known position of the requested byte. */
354 sljit_s32 req_char_ptr;
355 /* Head of the last recursion. */
356 sljit_s32 recursive_head_ptr;
357 /* First inspected character for partial matching.
358 (Needed for avoiding zero length partial matches.) */
359 sljit_s32 start_used_ptr;
360 /* Starting pointer for partial soft matches. */
361 sljit_s32 hit_start;
362 /* Pointer of the match end position. */
363 sljit_s32 match_end_ptr;
364 /* Points to the marked string. */
365 sljit_s32 mark_ptr;
366 /* Recursive control verb management chain. */
367 sljit_s32 control_head_ptr;
368 /* Points to the last matched capture block index. */
369 sljit_s32 capture_last_ptr;
370 /* Fast forward skipping byte code pointer. */
371 pcre_uchar *fast_forward_bc_ptr;
372 /* Locals used by fast fail optimization. */
373 sljit_s32 fast_fail_start_ptr;
374 sljit_s32 fast_fail_end_ptr;
375
376 /* Flipped and lower case tables. */
377 const sljit_u8 *fcc;
378 sljit_sw lcc;
379 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
380 int mode;
381 /* TRUE, when minlength is greater than 0. */
382 BOOL might_be_empty;
383 /* \K is found in the pattern. */
384 BOOL has_set_som;
385 /* (*SKIP:arg) is found in the pattern. */
386 BOOL has_skip_arg;
387 /* (*THEN) is found in the pattern. */
388 BOOL has_then;
389 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
390 BOOL has_skip_in_assert_back;
391 /* Currently in recurse or negative assert. */
392 BOOL local_exit;
393 /* Currently in a positive assert. */
394 BOOL positive_assert;
395 /* Newline control. */
396 int nltype;
397 sljit_u32 nlmax;
398 sljit_u32 nlmin;
399 int newline;
400 int bsr_nltype;
401 sljit_u32 bsr_nlmax;
402 sljit_u32 bsr_nlmin;
403 /* Dollar endonly. */
404 int endonly;
405 /* Tables. */
406 sljit_sw ctypes;
407 /* Named capturing brackets. */
408 pcre_uchar *name_table;
409 sljit_sw name_count;
410 sljit_sw name_entry_size;
411
412 /* Labels and jump lists. */
413 struct sljit_label *partialmatchlabel;
414 struct sljit_label *quit_label;
415 struct sljit_label *forced_quit_label;
416 struct sljit_label *accept_label;
417 struct sljit_label *ff_newline_shortcut;
418 stub_list *stubs;
419 label_addr_list *label_addrs;
420 recurse_entry *entries;
421 recurse_entry *currententry;
422 jump_list *partialmatch;
423 jump_list *quit;
424 jump_list *positive_assert_quit;
425 jump_list *forced_quit;
426 jump_list *accept;
427 jump_list *calllimit;
428 jump_list *stackalloc;
429 jump_list *revertframes;
430 jump_list *wordboundary;
431 jump_list *anynewline;
432 jump_list *hspace;
433 jump_list *vspace;
434 jump_list *casefulcmp;
435 jump_list *caselesscmp;
436 jump_list *reset_match;
437 BOOL jscript_compat;
438 #ifdef SUPPORT_UTF
439 BOOL utf;
440 #ifdef SUPPORT_UCP
441 BOOL use_ucp;
442 jump_list *getucd;
443 #endif
444 #ifdef COMPILE_PCRE8
445 jump_list *utfreadchar;
446 jump_list *utfreadchar16;
447 jump_list *utfreadtype8;
448 #endif
449 #endif /* SUPPORT_UTF */
450 } compiler_common;
451
452 /* For byte_sequence_compare. */
453
454 typedef struct compare_context {
455 int length;
456 int sourcereg;
457 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
458 int ucharptr;
459 union {
460 sljit_s32 asint;
461 sljit_u16 asushort;
462 #if defined COMPILE_PCRE8
463 sljit_u8 asbyte;
464 sljit_u8 asuchars[4];
465 #elif defined COMPILE_PCRE16
466 sljit_u16 asuchars[2];
467 #elif defined COMPILE_PCRE32
468 sljit_u32 asuchars[1];
469 #endif
470 } c;
471 union {
472 sljit_s32 asint;
473 sljit_u16 asushort;
474 #if defined COMPILE_PCRE8
475 sljit_u8 asbyte;
476 sljit_u8 asuchars[4];
477 #elif defined COMPILE_PCRE16
478 sljit_u16 asuchars[2];
479 #elif defined COMPILE_PCRE32
480 sljit_u32 asuchars[1];
481 #endif
482 } oc;
483 #endif
484 } compare_context;
485
486 /* Undefine sljit macros. */
487 #undef CMP
488
489 /* Used for accessing the elements of the stack. */
490 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
491
492 #define TMP1 SLJIT_R0
493 #define TMP2 SLJIT_R2
494 #define TMP3 SLJIT_R3
495 #define STR_PTR SLJIT_S0
496 #define STR_END SLJIT_S1
497 #define STACK_TOP SLJIT_R1
498 #define STACK_LIMIT SLJIT_S2
499 #define COUNT_MATCH SLJIT_S3
500 #define ARGUMENTS SLJIT_S4
501 #define RETURN_ADDR SLJIT_R4
502
503 /* Local space layout. */
504 /* These two locals can be used by the current opcode. */
505 #define LOCALS0 (0 * sizeof(sljit_sw))
506 #define LOCALS1 (1 * sizeof(sljit_sw))
507 /* Two local variables for possessive quantifiers (char1 cannot use them). */
508 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
509 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
510 /* Max limit of recursions. */
511 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
512 /* The output vector is stored on the stack, and contains pointers
513 to characters. The vector data is divided into two groups: the first
514 group contains the start / end character pointers, and the second is
515 the start pointers when the end of the capturing group has not yet reached. */
516 #define OVECTOR_START (common->ovector_start)
517 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
518 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
519 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
520
521 #if defined COMPILE_PCRE8
522 #define MOV_UCHAR SLJIT_MOV_U8
523 #define MOVU_UCHAR SLJIT_MOVU_U8
524 #elif defined COMPILE_PCRE16
525 #define MOV_UCHAR SLJIT_MOV_U16
526 #define MOVU_UCHAR SLJIT_MOVU_U16
527 #elif defined COMPILE_PCRE32
528 #define MOV_UCHAR SLJIT_MOV_U32
529 #define MOVU_UCHAR SLJIT_MOVU_U32
530 #else
531 #error Unsupported compiling mode
532 #endif
533
534 /* Shortcuts. */
535 #define DEFINE_COMPILER \
536 struct sljit_compiler *compiler = common->compiler
537 #define OP1(op, dst, dstw, src, srcw) \
538 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
539 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
540 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
541 #define LABEL() \
542 sljit_emit_label(compiler)
543 #define JUMP(type) \
544 sljit_emit_jump(compiler, (type))
545 #define JUMPTO(type, label) \
546 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
547 #define JUMPHERE(jump) \
548 sljit_set_label((jump), sljit_emit_label(compiler))
549 #define SET_LABEL(jump, label) \
550 sljit_set_label((jump), (label))
551 #define CMP(type, src1, src1w, src2, src2w) \
552 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
553 #define CMPTO(type, src1, src1w, src2, src2w, label) \
554 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
555 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
556 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
557 #define GET_LOCAL_BASE(dst, dstw, offset) \
558 sljit_get_local_base(compiler, (dst), (dstw), (offset))
559
560 #define READ_CHAR_MAX 0x7fffffff
561
562 static pcre_uchar *bracketend(pcre_uchar *cc)
563 {
564 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
565 do cc += GET(cc, 1); while (*cc == OP_ALT);
566 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
567 cc += 1 + LINK_SIZE;
568 return cc;
569 }
570
571 static int no_alternatives(pcre_uchar *cc)
572 {
573 int count = 0;
574 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
575 do
576 {
577 cc += GET(cc, 1);
578 count++;
579 }
580 while (*cc == OP_ALT);
581 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
582 return count;
583 }
584
585 /* Functions whose might need modification for all new supported opcodes:
586 next_opcode
587 check_opcode_types
588 set_private_data_ptrs
589 get_framesize
590 init_frame
591 get_private_data_copy_length
592 copy_private_data
593 compile_matchingpath
594 compile_backtrackingpath
595 */
596
597 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
598 {
599 SLJIT_UNUSED_ARG(common);
600 switch(*cc)
601 {
602 case OP_SOD:
603 case OP_SOM:
604 case OP_SET_SOM:
605 case OP_NOT_WORD_BOUNDARY:
606 case OP_WORD_BOUNDARY:
607 case OP_NOT_DIGIT:
608 case OP_DIGIT:
609 case OP_NOT_WHITESPACE:
610 case OP_WHITESPACE:
611 case OP_NOT_WORDCHAR:
612 case OP_WORDCHAR:
613 case OP_ANY:
614 case OP_ALLANY:
615 case OP_NOTPROP:
616 case OP_PROP:
617 case OP_ANYNL:
618 case OP_NOT_HSPACE:
619 case OP_HSPACE:
620 case OP_NOT_VSPACE:
621 case OP_VSPACE:
622 case OP_EXTUNI:
623 case OP_EODN:
624 case OP_EOD:
625 case OP_CIRC:
626 case OP_CIRCM:
627 case OP_DOLL:
628 case OP_DOLLM:
629 case OP_CRSTAR:
630 case OP_CRMINSTAR:
631 case OP_CRPLUS:
632 case OP_CRMINPLUS:
633 case OP_CRQUERY:
634 case OP_CRMINQUERY:
635 case OP_CRRANGE:
636 case OP_CRMINRANGE:
637 case OP_CRPOSSTAR:
638 case OP_CRPOSPLUS:
639 case OP_CRPOSQUERY:
640 case OP_CRPOSRANGE:
641 case OP_CLASS:
642 case OP_NCLASS:
643 case OP_REF:
644 case OP_REFI:
645 case OP_DNREF:
646 case OP_DNREFI:
647 case OP_RECURSE:
648 case OP_CALLOUT:
649 case OP_ALT:
650 case OP_KET:
651 case OP_KETRMAX:
652 case OP_KETRMIN:
653 case OP_KETRPOS:
654 case OP_REVERSE:
655 case OP_ASSERT:
656 case OP_ASSERT_NOT:
657 case OP_ASSERTBACK:
658 case OP_ASSERTBACK_NOT:
659 case OP_ONCE:
660 case OP_ONCE_NC:
661 case OP_BRA:
662 case OP_BRAPOS:
663 case OP_CBRA:
664 case OP_CBRAPOS:
665 case OP_COND:
666 case OP_SBRA:
667 case OP_SBRAPOS:
668 case OP_SCBRA:
669 case OP_SCBRAPOS:
670 case OP_SCOND:
671 case OP_CREF:
672 case OP_DNCREF:
673 case OP_RREF:
674 case OP_DNRREF:
675 case OP_DEF:
676 case OP_BRAZERO:
677 case OP_BRAMINZERO:
678 case OP_BRAPOSZERO:
679 case OP_PRUNE:
680 case OP_SKIP:
681 case OP_THEN:
682 case OP_COMMIT:
683 case OP_FAIL:
684 case OP_ACCEPT:
685 case OP_ASSERT_ACCEPT:
686 case OP_CLOSE:
687 case OP_SKIPZERO:
688 return cc + PRIV(OP_lengths)[*cc];
689
690 case OP_CHAR:
691 case OP_CHARI:
692 case OP_NOT:
693 case OP_NOTI:
694 case OP_STAR:
695 case OP_MINSTAR:
696 case OP_PLUS:
697 case OP_MINPLUS:
698 case OP_QUERY:
699 case OP_MINQUERY:
700 case OP_UPTO:
701 case OP_MINUPTO:
702 case OP_EXACT:
703 case OP_POSSTAR:
704 case OP_POSPLUS:
705 case OP_POSQUERY:
706 case OP_POSUPTO:
707 case OP_STARI:
708 case OP_MINSTARI:
709 case OP_PLUSI:
710 case OP_MINPLUSI:
711 case OP_QUERYI:
712 case OP_MINQUERYI:
713 case OP_UPTOI:
714 case OP_MINUPTOI:
715 case OP_EXACTI:
716 case OP_POSSTARI:
717 case OP_POSPLUSI:
718 case OP_POSQUERYI:
719 case OP_POSUPTOI:
720 case OP_NOTSTAR:
721 case OP_NOTMINSTAR:
722 case OP_NOTPLUS:
723 case OP_NOTMINPLUS:
724 case OP_NOTQUERY:
725 case OP_NOTMINQUERY:
726 case OP_NOTUPTO:
727 case OP_NOTMINUPTO:
728 case OP_NOTEXACT:
729 case OP_NOTPOSSTAR:
730 case OP_NOTPOSPLUS:
731 case OP_NOTPOSQUERY:
732 case OP_NOTPOSUPTO:
733 case OP_NOTSTARI:
734 case OP_NOTMINSTARI:
735 case OP_NOTPLUSI:
736 case OP_NOTMINPLUSI:
737 case OP_NOTQUERYI:
738 case OP_NOTMINQUERYI:
739 case OP_NOTUPTOI:
740 case OP_NOTMINUPTOI:
741 case OP_NOTEXACTI:
742 case OP_NOTPOSSTARI:
743 case OP_NOTPOSPLUSI:
744 case OP_NOTPOSQUERYI:
745 case OP_NOTPOSUPTOI:
746 cc += PRIV(OP_lengths)[*cc];
747 #ifdef SUPPORT_UTF
748 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
749 #endif
750 return cc;
751
752 /* Special cases. */
753 case OP_TYPESTAR:
754 case OP_TYPEMINSTAR:
755 case OP_TYPEPLUS:
756 case OP_TYPEMINPLUS:
757 case OP_TYPEQUERY:
758 case OP_TYPEMINQUERY:
759 case OP_TYPEUPTO:
760 case OP_TYPEMINUPTO:
761 case OP_TYPEEXACT:
762 case OP_TYPEPOSSTAR:
763 case OP_TYPEPOSPLUS:
764 case OP_TYPEPOSQUERY:
765 case OP_TYPEPOSUPTO:
766 return cc + PRIV(OP_lengths)[*cc] - 1;
767
768 case OP_ANYBYTE:
769 #ifdef SUPPORT_UTF
770 if (common->utf) return NULL;
771 #endif
772 return cc + 1;
773
774 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
775 case OP_XCLASS:
776 return cc + GET(cc, 1);
777 #endif
778
779 case OP_MARK:
780 case OP_PRUNE_ARG:
781 case OP_SKIP_ARG:
782 case OP_THEN_ARG:
783 return cc + 1 + 2 + cc[1];
784
785 default:
786 /* All opcodes are supported now! */
787 SLJIT_ASSERT_STOP();
788 return NULL;
789 }
790 }
791
792 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
793 {
794 int count;
795 pcre_uchar *slot;
796 pcre_uchar *assert_back_end = cc - 1;
797
798 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
799 while (cc < ccend)
800 {
801 switch(*cc)
802 {
803 case OP_SET_SOM:
804 common->has_set_som = TRUE;
805 common->might_be_empty = TRUE;
806 cc += 1;
807 break;
808
809 case OP_REF:
810 case OP_REFI:
811 common->optimized_cbracket[GET2(cc, 1)] = 0;
812 cc += 1 + IMM2_SIZE;
813 break;
814
815 case OP_CBRAPOS:
816 case OP_SCBRAPOS:
817 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
818 cc += 1 + LINK_SIZE + IMM2_SIZE;
819 break;
820
821 case OP_COND:
822 case OP_SCOND:
823 /* Only AUTO_CALLOUT can insert this opcode. We do
824 not intend to support this case. */
825 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
826 return FALSE;
827 cc += 1 + LINK_SIZE;
828 break;
829
830 case OP_CREF:
831 common->optimized_cbracket[GET2(cc, 1)] = 0;
832 cc += 1 + IMM2_SIZE;
833 break;
834
835 case OP_DNREF:
836 case OP_DNREFI:
837 case OP_DNCREF:
838 count = GET2(cc, 1 + IMM2_SIZE);
839 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
840 while (count-- > 0)
841 {
842 common->optimized_cbracket[GET2(slot, 0)] = 0;
843 slot += common->name_entry_size;
844 }
845 cc += 1 + 2 * IMM2_SIZE;
846 break;
847
848 case OP_RECURSE:
849 /* Set its value only once. */
850 if (common->recursive_head_ptr == 0)
851 {
852 common->recursive_head_ptr = common->ovector_start;
853 common->ovector_start += sizeof(sljit_sw);
854 }
855 cc += 1 + LINK_SIZE;
856 break;
857
858 case OP_CALLOUT:
859 if (common->capture_last_ptr == 0)
860 {
861 common->capture_last_ptr = common->ovector_start;
862 common->ovector_start += sizeof(sljit_sw);
863 }
864 cc += 2 + 2 * LINK_SIZE;
865 break;
866
867 case OP_ASSERTBACK:
868 slot = bracketend(cc);
869 if (slot > assert_back_end)
870 assert_back_end = slot;
871 cc += 1 + LINK_SIZE;
872 break;
873
874 case OP_THEN_ARG:
875 common->has_then = TRUE;
876 common->control_head_ptr = 1;
877 /* Fall through. */
878
879 case OP_PRUNE_ARG:
880 case OP_MARK:
881 if (common->mark_ptr == 0)
882 {
883 common->mark_ptr = common->ovector_start;
884 common->ovector_start += sizeof(sljit_sw);
885 }
886 cc += 1 + 2 + cc[1];
887 break;
888
889 case OP_THEN:
890 common->has_then = TRUE;
891 common->control_head_ptr = 1;
892 cc += 1;
893 break;
894
895 case OP_SKIP:
896 if (cc < assert_back_end)
897 common->has_skip_in_assert_back = TRUE;
898 cc += 1;
899 break;
900
901 case OP_SKIP_ARG:
902 common->control_head_ptr = 1;
903 common->has_skip_arg = TRUE;
904 if (cc < assert_back_end)
905 common->has_skip_in_assert_back = TRUE;
906 cc += 1 + 2 + cc[1];
907 break;
908
909 default:
910 cc = next_opcode(common, cc);
911 if (cc == NULL)
912 return FALSE;
913 break;
914 }
915 }
916 return TRUE;
917 }
918
919 static BOOL is_accelerated_repeat(pcre_uchar *cc)
920 {
921 switch(*cc)
922 {
923 case OP_TYPESTAR:
924 case OP_TYPEMINSTAR:
925 case OP_TYPEPLUS:
926 case OP_TYPEMINPLUS:
927 case OP_TYPEPOSSTAR:
928 case OP_TYPEPOSPLUS:
929 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
930
931 case OP_STAR:
932 case OP_MINSTAR:
933 case OP_PLUS:
934 case OP_MINPLUS:
935 case OP_POSSTAR:
936 case OP_POSPLUS:
937
938 case OP_STARI:
939 case OP_MINSTARI:
940 case OP_PLUSI:
941 case OP_MINPLUSI:
942 case OP_POSSTARI:
943 case OP_POSPLUSI:
944
945 case OP_NOTSTAR:
946 case OP_NOTMINSTAR:
947 case OP_NOTPLUS:
948 case OP_NOTMINPLUS:
949 case OP_NOTPOSSTAR:
950 case OP_NOTPOSPLUS:
951
952 case OP_NOTSTARI:
953 case OP_NOTMINSTARI:
954 case OP_NOTPLUSI:
955 case OP_NOTMINPLUSI:
956 case OP_NOTPOSSTARI:
957 case OP_NOTPOSPLUSI:
958 return TRUE;
959
960 case OP_CLASS:
961 case OP_NCLASS:
962 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
963 case OP_XCLASS:
964 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
965 #else
966 cc += (1 + (32 / sizeof(pcre_uchar)));
967 #endif
968
969 switch(*cc)
970 {
971 case OP_CRSTAR:
972 case OP_CRMINSTAR:
973 case OP_CRPLUS:
974 case OP_CRMINPLUS:
975 case OP_CRPOSSTAR:
976 case OP_CRPOSPLUS:
977 return TRUE;
978 }
979 break;
980 }
981 return FALSE;
982 }
983
984 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
985 {
986 pcre_uchar *cc = common->start;
987 pcre_uchar *end;
988
989 /* Skip not repeated brackets. */
990 while (TRUE)
991 {
992 switch(*cc)
993 {
994 case OP_SOD:
995 case OP_SOM:
996 case OP_SET_SOM:
997 case OP_NOT_WORD_BOUNDARY:
998 case OP_WORD_BOUNDARY:
999 case OP_EODN:
1000 case OP_EOD:
1001 case OP_CIRC:
1002 case OP_CIRCM:
1003 case OP_DOLL:
1004 case OP_DOLLM:
1005 /* Zero width assertions. */
1006 cc++;
1007 continue;
1008 }
1009
1010 if (*cc != OP_BRA && *cc != OP_CBRA)
1011 break;
1012
1013 end = cc + GET(cc, 1);
1014 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1015 return FALSE;
1016 if (*cc == OP_CBRA)
1017 {
1018 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1019 return FALSE;
1020 cc += IMM2_SIZE;
1021 }
1022 cc += 1 + LINK_SIZE;
1023 }
1024
1025 if (is_accelerated_repeat(cc))
1026 {
1027 common->fast_forward_bc_ptr = cc;
1028 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1029 *private_data_start += sizeof(sljit_sw);
1030 return TRUE;
1031 }
1032 return FALSE;
1033 }
1034
1035 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1036 {
1037 pcre_uchar *next_alt;
1038
1039 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1040
1041 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1042 return;
1043
1044 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1045 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1046 return;
1047
1048 do
1049 {
1050 next_alt = cc + GET(cc, 1);
1051
1052 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1053
1054 while (TRUE)
1055 {
1056 switch(*cc)
1057 {
1058 case OP_SOD:
1059 case OP_SOM:
1060 case OP_SET_SOM:
1061 case OP_NOT_WORD_BOUNDARY:
1062 case OP_WORD_BOUNDARY:
1063 case OP_EODN:
1064 case OP_EOD:
1065 case OP_CIRC:
1066 case OP_CIRCM:
1067 case OP_DOLL:
1068 case OP_DOLLM:
1069 /* Zero width assertions. */
1070 cc++;
1071 continue;
1072 }
1073 break;
1074 }
1075
1076 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1077 detect_fast_fail(common, cc, private_data_start, depth - 1);
1078
1079 if (is_accelerated_repeat(cc))
1080 {
1081 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1082
1083 if (common->fast_fail_start_ptr == 0)
1084 common->fast_fail_start_ptr = *private_data_start;
1085
1086 *private_data_start += sizeof(sljit_sw);
1087 common->fast_fail_end_ptr = *private_data_start;
1088
1089 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1090 return;
1091 }
1092
1093 cc = next_alt;
1094 }
1095 while (*cc == OP_ALT);
1096 }
1097
1098 static int get_class_iterator_size(pcre_uchar *cc)
1099 {
1100 sljit_u32 min;
1101 sljit_u32 max;
1102 switch(*cc)
1103 {
1104 case OP_CRSTAR:
1105 case OP_CRPLUS:
1106 return 2;
1107
1108 case OP_CRMINSTAR:
1109 case OP_CRMINPLUS:
1110 case OP_CRQUERY:
1111 case OP_CRMINQUERY:
1112 return 1;
1113
1114 case OP_CRRANGE:
1115 case OP_CRMINRANGE:
1116 min = GET2(cc, 1);
1117 max = GET2(cc, 1 + IMM2_SIZE);
1118 if (max == 0)
1119 return (*cc == OP_CRRANGE) ? 2 : 1;
1120 max -= min;
1121 if (max > 2)
1122 max = 2;
1123 return max;
1124
1125 default:
1126 return 0;
1127 }
1128 }
1129
1130 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1131 {
1132 pcre_uchar *end = bracketend(begin);
1133 pcre_uchar *next;
1134 pcre_uchar *next_end;
1135 pcre_uchar *max_end;
1136 pcre_uchar type;
1137 sljit_sw length = end - begin;
1138 int min, max, i;
1139
1140 /* Detect fixed iterations first. */
1141 if (end[-(1 + LINK_SIZE)] != OP_KET)
1142 return FALSE;
1143
1144 /* Already detected repeat. */
1145 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1146 return TRUE;
1147
1148 next = end;
1149 min = 1;
1150 while (1)
1151 {
1152 if (*next != *begin)
1153 break;
1154 next_end = bracketend(next);
1155 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1156 break;
1157 next = next_end;
1158 min++;
1159 }
1160
1161 if (min == 2)
1162 return FALSE;
1163
1164 max = 0;
1165 max_end = next;
1166 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1167 {
1168 type = *next;
1169 while (1)
1170 {
1171 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1172 break;
1173 next_end = bracketend(next + 2 + LINK_SIZE);
1174 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1175 break;
1176 next = next_end;
1177 max++;
1178 }
1179
1180 if (next[0] == type && next[1] == *begin && max >= 1)
1181 {
1182 next_end = bracketend(next + 1);
1183 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1184 {
1185 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1186 if (*next_end != OP_KET)
1187 break;
1188
1189 if (i == max)
1190 {
1191 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1192 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1193 /* +2 the original and the last. */
1194 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1195 if (min == 1)
1196 return TRUE;
1197 min--;
1198 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1199 }
1200 }
1201 }
1202 }
1203
1204 if (min >= 3)
1205 {
1206 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1207 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1208 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1209 return TRUE;
1210 }
1211
1212 return FALSE;
1213 }
1214
1215 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1216 case OP_MINSTAR: \
1217 case OP_MINPLUS: \
1218 case OP_QUERY: \
1219 case OP_MINQUERY: \
1220 case OP_MINSTARI: \
1221 case OP_MINPLUSI: \
1222 case OP_QUERYI: \
1223 case OP_MINQUERYI: \
1224 case OP_NOTMINSTAR: \
1225 case OP_NOTMINPLUS: \
1226 case OP_NOTQUERY: \
1227 case OP_NOTMINQUERY: \
1228 case OP_NOTMINSTARI: \
1229 case OP_NOTMINPLUSI: \
1230 case OP_NOTQUERYI: \
1231 case OP_NOTMINQUERYI:
1232
1233 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1234 case OP_STAR: \
1235 case OP_PLUS: \
1236 case OP_STARI: \
1237 case OP_PLUSI: \
1238 case OP_NOTSTAR: \
1239 case OP_NOTPLUS: \
1240 case OP_NOTSTARI: \
1241 case OP_NOTPLUSI:
1242
1243 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1244 case OP_UPTO: \
1245 case OP_MINUPTO: \
1246 case OP_UPTOI: \
1247 case OP_MINUPTOI: \
1248 case OP_NOTUPTO: \
1249 case OP_NOTMINUPTO: \
1250 case OP_NOTUPTOI: \
1251 case OP_NOTMINUPTOI:
1252
1253 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1254 case OP_TYPEMINSTAR: \
1255 case OP_TYPEMINPLUS: \
1256 case OP_TYPEQUERY: \
1257 case OP_TYPEMINQUERY:
1258
1259 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1260 case OP_TYPESTAR: \
1261 case OP_TYPEPLUS:
1262
1263 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1264 case OP_TYPEUPTO: \
1265 case OP_TYPEMINUPTO:
1266
1267 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1268 {
1269 pcre_uchar *cc = common->start;
1270 pcre_uchar *alternative;
1271 pcre_uchar *end = NULL;
1272 int private_data_ptr = *private_data_start;
1273 int space, size, bracketlen;
1274 BOOL repeat_check = TRUE;
1275
1276 while (cc < ccend)
1277 {
1278 space = 0;
1279 size = 0;
1280 bracketlen = 0;
1281 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1282 break;
1283
1284 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1285 {
1286 if (detect_repeat(common, cc))
1287 {
1288 /* These brackets are converted to repeats, so no global
1289 based single character repeat is allowed. */
1290 if (cc >= end)
1291 end = bracketend(cc);
1292 }
1293 }
1294 repeat_check = TRUE;
1295
1296 switch(*cc)
1297 {
1298 case OP_KET:
1299 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1300 {
1301 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1302 private_data_ptr += sizeof(sljit_sw);
1303 cc += common->private_data_ptrs[cc + 1 - common->start];
1304 }
1305 cc += 1 + LINK_SIZE;
1306 break;
1307
1308 case OP_ASSERT:
1309 case OP_ASSERT_NOT:
1310 case OP_ASSERTBACK:
1311 case OP_ASSERTBACK_NOT:
1312 case OP_ONCE:
1313 case OP_ONCE_NC:
1314 case OP_BRAPOS:
1315 case OP_SBRA:
1316 case OP_SBRAPOS:
1317 case OP_SCOND:
1318 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1319 private_data_ptr += sizeof(sljit_sw);
1320 bracketlen = 1 + LINK_SIZE;
1321 break;
1322
1323 case OP_CBRAPOS:
1324 case OP_SCBRAPOS:
1325 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1326 private_data_ptr += sizeof(sljit_sw);
1327 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1328 break;
1329
1330 case OP_COND:
1331 /* Might be a hidden SCOND. */
1332 alternative = cc + GET(cc, 1);
1333 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1334 {
1335 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1336 private_data_ptr += sizeof(sljit_sw);
1337 }
1338 bracketlen = 1 + LINK_SIZE;
1339 break;
1340
1341 case OP_BRA:
1342 bracketlen = 1 + LINK_SIZE;
1343 break;
1344
1345 case OP_CBRA:
1346 case OP_SCBRA:
1347 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1348 break;
1349
1350 case OP_BRAZERO:
1351 case OP_BRAMINZERO:
1352 case OP_BRAPOSZERO:
1353 repeat_check = FALSE;
1354 size = 1;
1355 break;
1356
1357 CASE_ITERATOR_PRIVATE_DATA_1
1358 space = 1;
1359 size = -2;
1360 break;
1361
1362 CASE_ITERATOR_PRIVATE_DATA_2A
1363 space = 2;
1364 size = -2;
1365 break;
1366
1367 CASE_ITERATOR_PRIVATE_DATA_2B
1368 space = 2;
1369 size = -(2 + IMM2_SIZE);
1370 break;
1371
1372 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1373 space = 1;
1374 size = 1;
1375 break;
1376
1377 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1378 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1379 space = 2;
1380 size = 1;
1381 break;
1382
1383 case OP_TYPEUPTO:
1384 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1385 space = 2;
1386 size = 1 + IMM2_SIZE;
1387 break;
1388
1389 case OP_TYPEMINUPTO:
1390 space = 2;
1391 size = 1 + IMM2_SIZE;
1392 break;
1393
1394 case OP_CLASS:
1395 case OP_NCLASS:
1396 space = get_class_iterator_size(cc + size);
1397 size = 1 + 32 / sizeof(pcre_uchar);
1398 break;
1399
1400 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1401 case OP_XCLASS:
1402 space = get_class_iterator_size(cc + size);
1403 size = GET(cc, 1);
1404 break;
1405 #endif
1406
1407 default:
1408 cc = next_opcode(common, cc);
1409 SLJIT_ASSERT(cc != NULL);
1410 break;
1411 }
1412
1413 /* Character iterators, which are not inside a repeated bracket,
1414 gets a private slot instead of allocating it on the stack. */
1415 if (space > 0 && cc >= end)
1416 {
1417 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1418 private_data_ptr += sizeof(sljit_sw) * space;
1419 }
1420
1421 if (size != 0)
1422 {
1423 if (size < 0)
1424 {
1425 cc += -size;
1426 #ifdef SUPPORT_UTF
1427 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1428 #endif
1429 }
1430 else
1431 cc += size;
1432 }
1433
1434 if (bracketlen > 0)
1435 {
1436 if (cc >= end)
1437 {
1438 end = bracketend(cc);
1439 if (end[-1 - LINK_SIZE] == OP_KET)
1440 end = NULL;
1441 }
1442 cc += bracketlen;
1443 }
1444 }
1445 *private_data_start = private_data_ptr;
1446 }
1447
1448 /* Returns with a frame_types (always < 0) if no need for frame. */
1449 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1450 {
1451 int length = 0;
1452 int possessive = 0;
1453 BOOL stack_restore = FALSE;
1454 BOOL setsom_found = recursive;
1455 BOOL setmark_found = recursive;
1456 /* The last capture is a local variable even for recursions. */
1457 BOOL capture_last_found = FALSE;
1458
1459 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1460 SLJIT_ASSERT(common->control_head_ptr != 0);
1461 *needs_control_head = TRUE;
1462 #else
1463 *needs_control_head = FALSE;
1464 #endif
1465
1466 if (ccend == NULL)
1467 {
1468 ccend = bracketend(cc) - (1 + LINK_SIZE);
1469 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1470 {
1471 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1472 /* This is correct regardless of common->capture_last_ptr. */
1473 capture_last_found = TRUE;
1474 }
1475 cc = next_opcode(common, cc);
1476 }
1477
1478 SLJIT_ASSERT(cc != NULL);
1479 while (cc < ccend)
1480 switch(*cc)
1481 {
1482 case OP_SET_SOM:
1483 SLJIT_ASSERT(common->has_set_som);
1484 stack_restore = TRUE;
1485 if (!setsom_found)
1486 {
1487 length += 2;
1488 setsom_found = TRUE;
1489 }
1490 cc += 1;
1491 break;
1492
1493 case OP_MARK:
1494 case OP_PRUNE_ARG:
1495 case OP_THEN_ARG:
1496 SLJIT_ASSERT(common->mark_ptr != 0);
1497 stack_restore = TRUE;
1498 if (!setmark_found)
1499 {
1500 length += 2;
1501 setmark_found = TRUE;
1502 }
1503 if (common->control_head_ptr != 0)
1504 *needs_control_head = TRUE;
1505 cc += 1 + 2 + cc[1];
1506 break;
1507
1508 case OP_RECURSE:
1509 stack_restore = TRUE;
1510 if (common->has_set_som && !setsom_found)
1511 {
1512 length += 2;
1513 setsom_found = TRUE;
1514 }
1515 if (common->mark_ptr != 0 && !setmark_found)
1516 {
1517 length += 2;
1518 setmark_found = TRUE;
1519 }
1520 if (common->capture_last_ptr != 0 && !capture_last_found)
1521 {
1522 length += 2;
1523 capture_last_found = TRUE;
1524 }
1525 cc += 1 + LINK_SIZE;
1526 break;
1527
1528 case OP_CBRA:
1529 case OP_CBRAPOS:
1530 case OP_SCBRA:
1531 case OP_SCBRAPOS:
1532 stack_restore = TRUE;
1533 if (common->capture_last_ptr != 0 && !capture_last_found)
1534 {
1535 length += 2;
1536 capture_last_found = TRUE;
1537 }
1538 length += 3;
1539 cc += 1 + LINK_SIZE + IMM2_SIZE;
1540 break;
1541
1542 case OP_THEN:
1543 stack_restore = TRUE;
1544 if (common->control_head_ptr != 0)
1545 *needs_control_head = TRUE;
1546 cc ++;
1547 break;
1548
1549 default:
1550 stack_restore = TRUE;
1551 /* Fall through. */
1552
1553 case OP_NOT_WORD_BOUNDARY:
1554 case OP_WORD_BOUNDARY:
1555 case OP_NOT_DIGIT:
1556 case OP_DIGIT:
1557 case OP_NOT_WHITESPACE:
1558 case OP_WHITESPACE:
1559 case OP_NOT_WORDCHAR:
1560 case OP_WORDCHAR:
1561 case OP_ANY:
1562 case OP_ALLANY:
1563 case OP_ANYBYTE:
1564 case OP_NOTPROP:
1565 case OP_PROP:
1566 case OP_ANYNL:
1567 case OP_NOT_HSPACE:
1568 case OP_HSPACE:
1569 case OP_NOT_VSPACE:
1570 case OP_VSPACE:
1571 case OP_EXTUNI:
1572 case OP_EODN:
1573 case OP_EOD:
1574 case OP_CIRC:
1575 case OP_CIRCM:
1576 case OP_DOLL:
1577 case OP_DOLLM:
1578 case OP_CHAR:
1579 case OP_CHARI:
1580 case OP_NOT:
1581 case OP_NOTI:
1582
1583 case OP_EXACT:
1584 case OP_POSSTAR:
1585 case OP_POSPLUS:
1586 case OP_POSQUERY:
1587 case OP_POSUPTO:
1588
1589 case OP_EXACTI:
1590 case OP_POSSTARI:
1591 case OP_POSPLUSI:
1592 case OP_POSQUERYI:
1593 case OP_POSUPTOI:
1594
1595 case OP_NOTEXACT:
1596 case OP_NOTPOSSTAR:
1597 case OP_NOTPOSPLUS:
1598 case OP_NOTPOSQUERY:
1599 case OP_NOTPOSUPTO:
1600
1601 case OP_NOTEXACTI:
1602 case OP_NOTPOSSTARI:
1603 case OP_NOTPOSPLUSI:
1604 case OP_NOTPOSQUERYI:
1605 case OP_NOTPOSUPTOI:
1606
1607 case OP_TYPEEXACT:
1608 case OP_TYPEPOSSTAR:
1609 case OP_TYPEPOSPLUS:
1610 case OP_TYPEPOSQUERY:
1611 case OP_TYPEPOSUPTO:
1612
1613 case OP_CLASS:
1614 case OP_NCLASS:
1615 case OP_XCLASS:
1616 case OP_CALLOUT:
1617
1618 cc = next_opcode(common, cc);
1619 SLJIT_ASSERT(cc != NULL);
1620 break;
1621 }
1622
1623 /* Possessive quantifiers can use a special case. */
1624 if (SLJIT_UNLIKELY(possessive == length))
1625 return stack_restore ? no_frame : no_stack;
1626
1627 if (length > 0)
1628 return length + 1;
1629 return stack_restore ? no_frame : no_stack;
1630 }
1631
1632 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1633 {
1634 DEFINE_COMPILER;
1635 BOOL setsom_found = recursive;
1636 BOOL setmark_found = recursive;
1637 /* The last capture is a local variable even for recursions. */
1638 BOOL capture_last_found = FALSE;
1639 int offset;
1640
1641 /* >= 1 + shortest item size (2) */
1642 SLJIT_UNUSED_ARG(stacktop);
1643 SLJIT_ASSERT(stackpos >= stacktop + 2);
1644
1645 stackpos = STACK(stackpos);
1646 if (ccend == NULL)
1647 {
1648 ccend = bracketend(cc) - (1 + LINK_SIZE);
1649 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1650 cc = next_opcode(common, cc);
1651 }
1652
1653 SLJIT_ASSERT(cc != NULL);
1654 while (cc < ccend)
1655 switch(*cc)
1656 {
1657 case OP_SET_SOM:
1658 SLJIT_ASSERT(common->has_set_som);
1659 if (!setsom_found)
1660 {
1661 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1663 stackpos += (int)sizeof(sljit_sw);
1664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1665 stackpos += (int)sizeof(sljit_sw);
1666 setsom_found = TRUE;
1667 }
1668 cc += 1;
1669 break;
1670
1671 case OP_MARK:
1672 case OP_PRUNE_ARG:
1673 case OP_THEN_ARG:
1674 SLJIT_ASSERT(common->mark_ptr != 0);
1675 if (!setmark_found)
1676 {
1677 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1678 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1679 stackpos += (int)sizeof(sljit_sw);
1680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1681 stackpos += (int)sizeof(sljit_sw);
1682 setmark_found = TRUE;
1683 }
1684 cc += 1 + 2 + cc[1];
1685 break;
1686
1687 case OP_RECURSE:
1688 if (common->has_set_som && !setsom_found)
1689 {
1690 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1691 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1692 stackpos += (int)sizeof(sljit_sw);
1693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1694 stackpos += (int)sizeof(sljit_sw);
1695 setsom_found = TRUE;
1696 }
1697 if (common->mark_ptr != 0 && !setmark_found)
1698 {
1699 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1700 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1701 stackpos += (int)sizeof(sljit_sw);
1702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1703 stackpos += (int)sizeof(sljit_sw);
1704 setmark_found = TRUE;
1705 }
1706 if (common->capture_last_ptr != 0 && !capture_last_found)
1707 {
1708 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1709 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1710 stackpos += (int)sizeof(sljit_sw);
1711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1712 stackpos += (int)sizeof(sljit_sw);
1713 capture_last_found = TRUE;
1714 }
1715 cc += 1 + LINK_SIZE;
1716 break;
1717
1718 case OP_CBRA:
1719 case OP_CBRAPOS:
1720 case OP_SCBRA:
1721 case OP_SCBRAPOS:
1722 if (common->capture_last_ptr != 0 && !capture_last_found)
1723 {
1724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1726 stackpos += (int)sizeof(sljit_sw);
1727 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1728 stackpos += (int)sizeof(sljit_sw);
1729 capture_last_found = TRUE;
1730 }
1731 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1732 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1733 stackpos += (int)sizeof(sljit_sw);
1734 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1735 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1736 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1737 stackpos += (int)sizeof(sljit_sw);
1738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1739 stackpos += (int)sizeof(sljit_sw);
1740
1741 cc += 1 + LINK_SIZE + IMM2_SIZE;
1742 break;
1743
1744 default:
1745 cc = next_opcode(common, cc);
1746 SLJIT_ASSERT(cc != NULL);
1747 break;
1748 }
1749
1750 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1751 SLJIT_ASSERT(stackpos == STACK(stacktop));
1752 }
1753
1754 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1755 {
1756 int private_data_length = needs_control_head ? 3 : 2;
1757 int size;
1758 pcre_uchar *alternative;
1759 /* Calculate the sum of the private machine words. */
1760 while (cc < ccend)
1761 {
1762 size = 0;
1763 switch(*cc)
1764 {
1765 case OP_KET:
1766 if (PRIVATE_DATA(cc) != 0)
1767 {
1768 private_data_length++;
1769 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1770 cc += PRIVATE_DATA(cc + 1);
1771 }
1772 cc += 1 + LINK_SIZE;
1773 break;
1774
1775 case OP_ASSERT:
1776 case OP_ASSERT_NOT:
1777 case OP_ASSERTBACK:
1778 case OP_ASSERTBACK_NOT:
1779 case OP_ONCE:
1780 case OP_ONCE_NC:
1781 case OP_BRAPOS:
1782 case OP_SBRA:
1783 case OP_SBRAPOS:
1784 case OP_SCOND:
1785 private_data_length++;
1786 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1787 cc += 1 + LINK_SIZE;
1788 break;
1789
1790 case OP_CBRA:
1791 case OP_SCBRA:
1792 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1793 private_data_length++;
1794 cc += 1 + LINK_SIZE + IMM2_SIZE;
1795 break;
1796
1797 case OP_CBRAPOS:
1798 case OP_SCBRAPOS:
1799 private_data_length += 2;
1800 cc += 1 + LINK_SIZE + IMM2_SIZE;
1801 break;
1802
1803 case OP_COND:
1804 /* Might be a hidden SCOND. */
1805 alternative = cc + GET(cc, 1);
1806 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1807 private_data_length++;
1808 cc += 1 + LINK_SIZE;
1809 break;
1810
1811 CASE_ITERATOR_PRIVATE_DATA_1
1812 if (PRIVATE_DATA(cc))
1813 private_data_length++;
1814 cc += 2;
1815 #ifdef SUPPORT_UTF
1816 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1817 #endif
1818 break;
1819
1820 CASE_ITERATOR_PRIVATE_DATA_2A
1821 if (PRIVATE_DATA(cc))
1822 private_data_length += 2;
1823 cc += 2;
1824 #ifdef SUPPORT_UTF
1825 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1826 #endif
1827 break;
1828
1829 CASE_ITERATOR_PRIVATE_DATA_2B
1830 if (PRIVATE_DATA(cc))
1831 private_data_length += 2;
1832 cc += 2 + IMM2_SIZE;
1833 #ifdef SUPPORT_UTF
1834 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1835 #endif
1836 break;
1837
1838 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1839 if (PRIVATE_DATA(cc))
1840 private_data_length++;
1841 cc += 1;
1842 break;
1843
1844 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1845 if (PRIVATE_DATA(cc))
1846 private_data_length += 2;
1847 cc += 1;
1848 break;
1849
1850 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1851 if (PRIVATE_DATA(cc))
1852 private_data_length += 2;
1853 cc += 1 + IMM2_SIZE;
1854 break;
1855
1856 case OP_CLASS:
1857 case OP_NCLASS:
1858 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1859 case OP_XCLASS:
1860 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1861 #else
1862 size = 1 + 32 / (int)sizeof(pcre_uchar);
1863 #endif
1864 if (PRIVATE_DATA(cc))
1865 private_data_length += get_class_iterator_size(cc + size);
1866 cc += size;
1867 break;
1868
1869 default:
1870 cc = next_opcode(common, cc);
1871 SLJIT_ASSERT(cc != NULL);
1872 break;
1873 }
1874 }
1875 SLJIT_ASSERT(cc == ccend);
1876 return private_data_length;
1877 }
1878
1879 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1880 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1881 {
1882 DEFINE_COMPILER;
1883 int srcw[2];
1884 int count, size;
1885 BOOL tmp1next = TRUE;
1886 BOOL tmp1empty = TRUE;
1887 BOOL tmp2empty = TRUE;
1888 pcre_uchar *alternative;
1889 enum {
1890 start,
1891 loop,
1892 end
1893 } status;
1894
1895 status = save ? start : loop;
1896 stackptr = STACK(stackptr - 2);
1897 stacktop = STACK(stacktop - 1);
1898
1899 if (!save)
1900 {
1901 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1902 if (stackptr < stacktop)
1903 {
1904 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1905 stackptr += sizeof(sljit_sw);
1906 tmp1empty = FALSE;
1907 }
1908 if (stackptr < stacktop)
1909 {
1910 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1911 stackptr += sizeof(sljit_sw);
1912 tmp2empty = FALSE;
1913 }
1914 /* The tmp1next must be TRUE in either way. */
1915 }
1916
1917 do
1918 {
1919 count = 0;
1920 switch(status)
1921 {
1922 case start:
1923 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1924 count = 1;
1925 srcw[0] = common->recursive_head_ptr;
1926 if (needs_control_head)
1927 {
1928 SLJIT_ASSERT(common->control_head_ptr != 0);
1929 count = 2;
1930 srcw[1] = common->control_head_ptr;
1931 }
1932 status = loop;
1933 break;
1934
1935 case loop:
1936 if (cc >= ccend)
1937 {
1938 status = end;
1939 break;
1940 }
1941
1942 switch(*cc)
1943 {
1944 case OP_KET:
1945 if (PRIVATE_DATA(cc) != 0)
1946 {
1947 count = 1;
1948 srcw[0] = PRIVATE_DATA(cc);
1949 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1950 cc += PRIVATE_DATA(cc + 1);
1951 }
1952 cc += 1 + LINK_SIZE;
1953 break;
1954
1955 case OP_ASSERT:
1956 case OP_ASSERT_NOT:
1957 case OP_ASSERTBACK:
1958 case OP_ASSERTBACK_NOT:
1959 case OP_ONCE:
1960 case OP_ONCE_NC:
1961 case OP_BRAPOS:
1962 case OP_SBRA:
1963 case OP_SBRAPOS:
1964 case OP_SCOND:
1965 count = 1;
1966 srcw[0] = PRIVATE_DATA(cc);
1967 SLJIT_ASSERT(srcw[0] != 0);
1968 cc += 1 + LINK_SIZE;
1969 break;
1970
1971 case OP_CBRA:
1972 case OP_SCBRA:
1973 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1974 {
1975 count = 1;
1976 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1977 }
1978 cc += 1 + LINK_SIZE + IMM2_SIZE;
1979 break;
1980
1981 case OP_CBRAPOS:
1982 case OP_SCBRAPOS:
1983 count = 2;
1984 srcw[0] = PRIVATE_DATA(cc);
1985 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1986 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1987 cc += 1 + LINK_SIZE + IMM2_SIZE;
1988 break;
1989
1990 case OP_COND:
1991 /* Might be a hidden SCOND. */
1992 alternative = cc + GET(cc, 1);
1993 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1994 {
1995 count = 1;
1996 srcw[0] = PRIVATE_DATA(cc);
1997 SLJIT_ASSERT(srcw[0] != 0);
1998 }
1999 cc += 1 + LINK_SIZE;
2000 break;
2001
2002 CASE_ITERATOR_PRIVATE_DATA_1
2003 if (PRIVATE_DATA(cc))
2004 {
2005 count = 1;
2006 srcw[0] = PRIVATE_DATA(cc);
2007 }
2008 cc += 2;
2009 #ifdef SUPPORT_UTF
2010 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2011 #endif
2012 break;
2013
2014 CASE_ITERATOR_PRIVATE_DATA_2A
2015 if (PRIVATE_DATA(cc))
2016 {
2017 count = 2;
2018 srcw[0] = PRIVATE_DATA(cc);
2019 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2020 }
2021 cc += 2;
2022 #ifdef SUPPORT_UTF
2023 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2024 #endif
2025 break;
2026
2027 CASE_ITERATOR_PRIVATE_DATA_2B
2028 if (PRIVATE_DATA(cc))
2029 {
2030 count = 2;
2031 srcw[0] = PRIVATE_DATA(cc);
2032 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2033 }
2034 cc += 2 + IMM2_SIZE;
2035 #ifdef SUPPORT_UTF
2036 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2037 #endif
2038 break;
2039
2040 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2041 if (PRIVATE_DATA(cc))
2042 {
2043 count = 1;
2044 srcw[0] = PRIVATE_DATA(cc);
2045 }
2046 cc += 1;
2047 break;
2048
2049 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2050 if (PRIVATE_DATA(cc))
2051 {
2052 count = 2;
2053 srcw[0] = PRIVATE_DATA(cc);
2054 srcw[1] = srcw[0] + sizeof(sljit_sw);
2055 }
2056 cc += 1;
2057 break;
2058
2059 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2060 if (PRIVATE_DATA(cc))
2061 {
2062 count = 2;
2063 srcw[0] = PRIVATE_DATA(cc);
2064 srcw[1] = srcw[0] + sizeof(sljit_sw);
2065 }
2066 cc += 1 + IMM2_SIZE;
2067 break;
2068
2069 case OP_CLASS:
2070 case OP_NCLASS:
2071 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2072 case OP_XCLASS:
2073 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2074 #else
2075 size = 1 + 32 / (int)sizeof(pcre_uchar);
2076 #endif
2077 if (PRIVATE_DATA(cc))
2078 switch(get_class_iterator_size(cc + size))
2079 {
2080 case 1:
2081 count = 1;
2082 srcw[0] = PRIVATE_DATA(cc);
2083 break;
2084
2085 case 2:
2086 count = 2;
2087 srcw[0] = PRIVATE_DATA(cc);
2088 srcw[1] = srcw[0] + sizeof(sljit_sw);
2089 break;
2090
2091 default:
2092 SLJIT_ASSERT_STOP();
2093 break;
2094 }
2095 cc += size;
2096 break;
2097
2098 default:
2099 cc = next_opcode(common, cc);
2100 SLJIT_ASSERT(cc != NULL);
2101 break;
2102 }
2103 break;
2104
2105 case end:
2106 SLJIT_ASSERT_STOP();
2107 break;
2108 }
2109
2110 while (count > 0)
2111 {
2112 count--;
2113 if (save)
2114 {
2115 if (tmp1next)
2116 {
2117 if (!tmp1empty)
2118 {
2119 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2120 stackptr += sizeof(sljit_sw);
2121 }
2122 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2123 tmp1empty = FALSE;
2124 tmp1next = FALSE;
2125 }
2126 else
2127 {
2128 if (!tmp2empty)
2129 {
2130 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2131 stackptr += sizeof(sljit_sw);
2132 }
2133 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2134 tmp2empty = FALSE;
2135 tmp1next = TRUE;
2136 }
2137 }
2138 else
2139 {
2140 if (tmp1next)
2141 {
2142 SLJIT_ASSERT(!tmp1empty);
2143 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2144 tmp1empty = stackptr >= stacktop;
2145 if (!tmp1empty)
2146 {
2147 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2148 stackptr += sizeof(sljit_sw);
2149 }
2150 tmp1next = FALSE;
2151 }
2152 else
2153 {
2154 SLJIT_ASSERT(!tmp2empty);
2155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2156 tmp2empty = stackptr >= stacktop;
2157 if (!tmp2empty)
2158 {
2159 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2160 stackptr += sizeof(sljit_sw);
2161 }
2162 tmp1next = TRUE;
2163 }
2164 }
2165 }
2166 }
2167 while (status != end);
2168
2169 if (save)
2170 {
2171 if (tmp1next)
2172 {
2173 if (!tmp1empty)
2174 {
2175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2176 stackptr += sizeof(sljit_sw);
2177 }
2178 if (!tmp2empty)
2179 {
2180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2181 stackptr += sizeof(sljit_sw);
2182 }
2183 }
2184 else
2185 {
2186 if (!tmp2empty)
2187 {
2188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2189 stackptr += sizeof(sljit_sw);
2190 }
2191 if (!tmp1empty)
2192 {
2193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2194 stackptr += sizeof(sljit_sw);
2195 }
2196 }
2197 }
2198 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2199 }
2200
2201 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2202 {
2203 pcre_uchar *end = bracketend(cc);
2204 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2205
2206 /* Assert captures then. */
2207 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2208 current_offset = NULL;
2209 /* Conditional block does not. */
2210 if (*cc == OP_COND || *cc == OP_SCOND)
2211 has_alternatives = FALSE;
2212
2213 cc = next_opcode(common, cc);
2214 if (has_alternatives)
2215 current_offset = common->then_offsets + (cc - common->start);
2216
2217 while (cc < end)
2218 {
2219 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2220 cc = set_then_offsets(common, cc, current_offset);
2221 else
2222 {
2223 if (*cc == OP_ALT && has_alternatives)
2224 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2225 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2226 *current_offset = 1;
2227 cc = next_opcode(common, cc);
2228 }
2229 }
2230
2231 return end;
2232 }
2233
2234 #undef CASE_ITERATOR_PRIVATE_DATA_1
2235 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2236 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2237 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2238 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2239 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2240
2241 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2242 {
2243 return (value & (value - 1)) == 0;
2244 }
2245
2246 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2247 {
2248 while (list)
2249 {
2250 /* sljit_set_label is clever enough to do nothing
2251 if either the jump or the label is NULL. */
2252 SET_LABEL(list->jump, label);
2253 list = list->next;
2254 }
2255 }
2256
2257 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2258 {
2259 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2260 if (list_item)
2261 {
2262 list_item->next = *list;
2263 list_item->jump = jump;
2264 *list = list_item;
2265 }
2266 }
2267
2268 static void add_stub(compiler_common *common, struct sljit_jump *start)
2269 {
2270 DEFINE_COMPILER;
2271 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2272
2273 if (list_item)
2274 {
2275 list_item->start = start;
2276 list_item->quit = LABEL();
2277 list_item->next = common->stubs;
2278 common->stubs = list_item;
2279 }
2280 }
2281
2282 static void flush_stubs(compiler_common *common)
2283 {
2284 DEFINE_COMPILER;
2285 stub_list *list_item = common->stubs;
2286
2287 while (list_item)
2288 {
2289 JUMPHERE(list_item->start);
2290 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2291 JUMPTO(SLJIT_JUMP, list_item->quit);
2292 list_item = list_item->next;
2293 }
2294 common->stubs = NULL;
2295 }
2296
2297 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2298 {
2299 DEFINE_COMPILER;
2300 label_addr_list *label_addr;
2301
2302 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2303 if (label_addr == NULL)
2304 return;
2305 label_addr->label = LABEL();
2306 label_addr->update_addr = update_addr;
2307 label_addr->next = common->label_addrs;
2308 common->label_addrs = label_addr;
2309 }
2310
2311 static SLJIT_INLINE void count_match(compiler_common *common)
2312 {
2313 DEFINE_COMPILER;
2314
2315 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2316 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2317 }
2318
2319 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2320 {
2321 /* May destroy all locals and registers except TMP2. */
2322 DEFINE_COMPILER;
2323
2324 SLJIT_ASSERT(size > 0);
2325 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2326 #ifdef DESTROY_REGISTERS
2327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2328 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2329 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2330 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2331 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2332 #endif
2333 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2334 }
2335
2336 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2337 {
2338 DEFINE_COMPILER;
2339
2340 SLJIT_ASSERT(size > 0);
2341 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2342 }
2343
2344 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2345 {
2346 DEFINE_COMPILER;
2347 sljit_uw *result;
2348
2349 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2350 return NULL;
2351
2352 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2353 if (SLJIT_UNLIKELY(result == NULL))
2354 {
2355 sljit_set_compiler_memory_error(compiler);
2356 return NULL;
2357 }
2358
2359 *(void**)result = common->read_only_data_head;
2360 common->read_only_data_head = (void *)result;
2361 return result + 1;
2362 }
2363
2364 static void free_read_only_data(void *current, void *allocator_data)
2365 {
2366 void *next;
2367
2368 SLJIT_UNUSED_ARG(allocator_data);
2369
2370 while (current != NULL)
2371 {
2372 next = *(void**)current;
2373 SLJIT_FREE(current, allocator_data);
2374 current = next;
2375 }
2376 }
2377
2378 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2379 {
2380 DEFINE_COMPILER;
2381 struct sljit_label *loop;
2382 int i;
2383
2384 /* At this point we can freely use all temporary registers. */
2385 SLJIT_ASSERT(length > 1);
2386 /* TMP1 returns with begin - 1. */
2387 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2388 if (length < 8)
2389 {
2390 for (i = 1; i < length; i++)
2391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2392 }
2393 else
2394 {
2395 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2396 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2397 loop = LABEL();
2398 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2399 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2400 JUMPTO(SLJIT_NOT_ZERO, loop);
2401 }
2402 }
2403
2404 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2405 {
2406 DEFINE_COMPILER;
2407 sljit_s32 i;
2408
2409 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2410
2411 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2412 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2414 }
2415
2416 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2417 {
2418 DEFINE_COMPILER;
2419 struct sljit_label *loop;
2420 int i;
2421
2422 SLJIT_ASSERT(length > 1);
2423 /* OVECTOR(1) contains the "string begin - 1" constant. */
2424 if (length > 2)
2425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2426 if (length < 8)
2427 {
2428 for (i = 2; i < length; i++)
2429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2430 }
2431 else
2432 {
2433 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2434 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2435 loop = LABEL();
2436 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2437 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2438 JUMPTO(SLJIT_NOT_ZERO, loop);
2439 }
2440
2441 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2442 if (common->mark_ptr != 0)
2443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2444 if (common->control_head_ptr != 0)
2445 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2446 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2448 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2449 }
2450
2451 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2452 {
2453 while (current != NULL)
2454 {
2455 switch (current[-2])
2456 {
2457 case type_then_trap:
2458 break;
2459
2460 case type_mark:
2461 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2462 return current[-4];
2463 break;
2464
2465 default:
2466 SLJIT_ASSERT_STOP();
2467 break;
2468 }
2469 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2470 current = (sljit_sw*)current[-1];
2471 }
2472 return -1;
2473 }
2474
2475 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2476 {
2477 DEFINE_COMPILER;
2478 struct sljit_label *loop;
2479 struct sljit_jump *early_quit;
2480
2481 /* At this point we can freely use all registers. */
2482 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2484
2485 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2486 if (common->mark_ptr != 0)
2487 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2488 OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2489 if (common->mark_ptr != 0)
2490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2491 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2492 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2493 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2494 /* Unlikely, but possible */
2495 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2496 loop = LABEL();
2497 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2498 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2499 /* Copy the integer value to the output buffer */
2500 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2501 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2502 #endif
2503 OP1(SLJIT_MOVU_S32, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2504 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2505 JUMPTO(SLJIT_NOT_ZERO, loop);
2506 JUMPHERE(early_quit);
2507
2508 /* Calculate the return value, which is the maximum ovector value. */
2509 if (topbracket > 1)
2510 {
2511 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2512 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2513
2514 /* OVECTOR(0) is never equal to SLJIT_S2. */
2515 loop = LABEL();
2516 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2517 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2518 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2519 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2520 }
2521 else
2522 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2523 }
2524
2525 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2526 {
2527 DEFINE_COMPILER;
2528 struct sljit_jump *jump;
2529
2530 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2531 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2532 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2533
2534 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2535 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2536 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2537 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2538
2539 /* Store match begin and end. */
2540 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2541 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2542
2543 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2544 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2545 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2546 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2547 #endif
2548 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2549 JUMPHERE(jump);
2550
2551 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2552 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2553 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2554 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2555 #endif
2556 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2557
2558 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2559 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2560 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2561 #endif
2562 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2563
2564 JUMPTO(SLJIT_JUMP, quit);
2565 }
2566
2567 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2568 {
2569 /* May destroy TMP1. */
2570 DEFINE_COMPILER;
2571 struct sljit_jump *jump;
2572
2573 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2574 {
2575 /* The value of -1 must be kept for start_used_ptr! */
2576 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2577 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2578 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2579 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2580 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2581 JUMPHERE(jump);
2582 }
2583 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2584 {
2585 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2587 JUMPHERE(jump);
2588 }
2589 }
2590
2591 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2592 {
2593 /* Detects if the character has an othercase. */
2594 unsigned int c;
2595
2596 #ifdef SUPPORT_UTF
2597 if (common->utf)
2598 {
2599 GETCHAR(c, cc);
2600 if (c > 127)
2601 {
2602 #ifdef SUPPORT_UCP
2603 return c != UCD_OTHERCASE(c);
2604 #else
2605 return FALSE;
2606 #endif
2607 }
2608 #ifndef COMPILE_PCRE8
2609 return common->fcc[c] != c;
2610 #endif
2611 }
2612 else
2613 #endif
2614 c = *cc;
2615 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2616 }
2617
2618 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2619 {
2620 /* Returns with the othercase. */
2621 #ifdef SUPPORT_UTF
2622 if (common->utf && c > 127)
2623 {
2624 #ifdef SUPPORT_UCP
2625 return UCD_OTHERCASE(c);
2626 #else
2627 return c;
2628 #endif
2629 }
2630 #endif
2631 return TABLE_GET(c, common->fcc, c);
2632 }
2633
2634 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2635 {
2636 /* Detects if the character and its othercase has only 1 bit difference. */
2637 unsigned int c, oc, bit;
2638 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2639 int n;
2640 #endif
2641
2642 #ifdef SUPPORT_UTF
2643 if (common->utf)
2644 {
2645 GETCHAR(c, cc);
2646 if (c <= 127)
2647 oc = common->fcc[c];
2648 else
2649 {
2650 #ifdef SUPPORT_UCP
2651 oc = UCD_OTHERCASE(c);
2652 #else
2653 oc = c;
2654 #endif
2655 }
2656 }
2657 else
2658 {
2659 c = *cc;
2660 oc = TABLE_GET(c, common->fcc, c);
2661 }
2662 #else
2663 c = *cc;
2664 oc = TABLE_GET(c, common->fcc, c);
2665 #endif
2666
2667 SLJIT_ASSERT(c != oc);
2668
2669 bit = c ^ oc;
2670 /* Optimized for English alphabet. */
2671 if (c <= 127 && bit == 0x20)
2672 return (0 << 8) | 0x20;
2673
2674 /* Since c != oc, they must have at least 1 bit difference. */
2675 if (!is_powerof2(bit))
2676 return 0;
2677
2678 #if defined COMPILE_PCRE8
2679
2680 #ifdef SUPPORT_UTF
2681 if (common->utf && c > 127)
2682 {
2683 n = GET_EXTRALEN(*cc);
2684 while ((bit & 0x3f) == 0)
2685 {
2686 n--;
2687 bit >>= 6;
2688 }
2689 return (n << 8) | bit;
2690 }
2691 #endif /* SUPPORT_UTF */
2692 return (0 << 8) | bit;
2693
2694 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2695
2696 #ifdef SUPPORT_UTF
2697 if (common->utf && c > 65535)
2698 {
2699 if (bit >= (1 << 10))
2700 bit >>= 10;
2701 else
2702 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2703 }
2704 #endif /* SUPPORT_UTF */
2705 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2706
2707 #endif /* COMPILE_PCRE[8|16|32] */
2708 }
2709
2710 static void check_partial(compiler_common *common, BOOL force)
2711 {
2712 /* Checks whether a partial matching is occurred. Does not modify registers. */
2713 DEFINE_COMPILER;
2714 struct sljit_jump *jump = NULL;
2715
2716 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2717
2718 if (common->mode == JIT_COMPILE)
2719 return;
2720
2721 if (!force)
2722 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2723 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2724 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2725
2726 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2728 else
2729 {
2730 if (common->partialmatchlabel != NULL)
2731 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2732 else
2733 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2734 }
2735
2736 if (jump != NULL)
2737 JUMPHERE(jump);
2738 }
2739
2740 static void check_str_end(compiler_common *common, jump_list **end_reached)
2741 {
2742 /* Does not affect registers. Usually used in a tight spot. */
2743 DEFINE_COMPILER;
2744 struct sljit_jump *jump;
2745
2746 if (common->mode == JIT_COMPILE)
2747 {
2748 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2749 return;
2750 }
2751
2752 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2753 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2754 {
2755 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2756 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2757 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2758 }
2759 else
2760 {
2761 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2762 if (common->partialmatchlabel != NULL)
2763 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2764 else
2765 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2766 }
2767 JUMPHERE(jump);
2768 }
2769
2770 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2771 {
2772 DEFINE_COMPILER;
2773 struct sljit_jump *jump;
2774
2775 if (common->mode == JIT_COMPILE)
2776 {
2777 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2778 return;
2779 }
2780
2781 /* Partial matching mode. */
2782 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2783 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2784 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2785 {
2786 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2787 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2788 }
2789 else
2790 {
2791 if (common->partialmatchlabel != NULL)
2792 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2793 else
2794 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2795 }
2796 JUMPHERE(jump);
2797 }
2798
2799 static void peek_char(compiler_common *common, sljit_u32 max)
2800 {
2801 /* Reads the character into TMP1, keeps STR_PTR.
2802 Does not check STR_END. TMP2 Destroyed. */
2803 DEFINE_COMPILER;
2804 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2805 struct sljit_jump *jump;
2806 #endif
2807
2808 SLJIT_UNUSED_ARG(max);
2809
2810 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2811 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2812 if (common->utf)
2813 {
2814 if (max < 128) return;
2815
2816 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2817 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2818 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2819 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2820 JUMPHERE(jump);
2821 }
2822 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2823
2824 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2825 if (common->utf)
2826 {
2827 if (max < 0xd800) return;
2828
2829 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2830 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2831 /* TMP2 contains the high surrogate. */
2832 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2833 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2834 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2835 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2836 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2837 JUMPHERE(jump);
2838 }
2839 #endif
2840 }
2841
2842 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2843
2844 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2845 {
2846 /* Tells whether the character codes below 128 are enough
2847 to determine a match. */
2848 const sljit_u8 value = nclass ? 0xff : 0;
2849 const sljit_u8 *end = bitset + 32;
2850
2851 bitset += 16;
2852 do
2853 {
2854 if (*bitset++ != value)
2855 return FALSE;
2856 }
2857 while (bitset < end);
2858 return TRUE;
2859 }
2860
2861 static void read_char7_type(compiler_common *common, BOOL full_read)
2862 {
2863 /* Reads the precise character type of a character into TMP1, if the character
2864 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2865 full_read argument tells whether characters above max are accepted or not. */
2866 DEFINE_COMPILER;
2867 struct sljit_jump *jump;
2868
2869 SLJIT_ASSERT(common->utf);
2870
2871 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2872 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2873
2874 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2875
2876 if (full_read)
2877 {
2878 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2879 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2880 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2881 JUMPHERE(jump);
2882 }
2883 }
2884
2885 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2886
2887 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2888 {
2889 /* Reads the precise value of a character into TMP1, if the character is
2890 between min and max (c >= min && c <= max). Otherwise it returns with a value
2891 outside the range. Does not check STR_END. */
2892 DEFINE_COMPILER;
2893 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2894 struct sljit_jump *jump;
2895 #endif
2896 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2897 struct sljit_jump *jump2;
2898 #endif
2899
2900 SLJIT_UNUSED_ARG(update_str_ptr);
2901 SLJIT_UNUSED_ARG(min);
2902 SLJIT_UNUSED_ARG(max);
2903 SLJIT_ASSERT(min <= max);
2904
2905 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2906 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2907
2908 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2909 if (common->utf)
2910 {
2911 if (max < 128 && !update_str_ptr) return;
2912
2913 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2914 if (min >= 0x10000)
2915 {
2916 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2917 if (update_str_ptr)
2918 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2919 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2920 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2921 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2922 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2923 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2924 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2925 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2926 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2927 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2928 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2929 if (!update_str_ptr)
2930 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2931 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2932 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2933 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2934 JUMPHERE(jump2);
2935 if (update_str_ptr)
2936 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2937 }
2938 else if (min >= 0x800 && max <= 0xffff)
2939 {
2940 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2941 if (update_str_ptr)
2942 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2943 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2944 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2945 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2946 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2947 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2948 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2949 if (!update_str_ptr)
2950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2951 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2952 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2953 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2954 JUMPHERE(jump2);
2955 if (update_str_ptr)
2956 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2957 }
2958 else if (max >= 0x800)
2959 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2960 else if (max < 128)
2961 {
2962 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2963 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2964 }
2965 else
2966 {
2967 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2968 if (!update_str_ptr)
2969 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2970 else
2971 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2972 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2973 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2974 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2975 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2976 if (update_str_ptr)
2977 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2978 }
2979 JUMPHERE(jump);
2980 }
2981 #endif
2982
2983 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2984 if (common->utf)
2985 {
2986 if (max >= 0x10000)
2987 {
2988 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2989 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2990 /* TMP2 contains the high surrogate. */
2991 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2992 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2993 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2994 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2995 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2996 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2997 JUMPHERE(jump);
2998 return;
2999 }
3000
3001 if (max < 0xd800 && !update_str_ptr) return;
3002
3003 /* Skip low surrogate if necessary. */
3004 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3005 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3006 if (update_str_ptr)
3007 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3008 if (max >= 0xd800)
3009 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3010 JUMPHERE(jump);
3011 }
3012 #endif
3013 }
3014
3015 static SLJIT_INLINE void read_char(compiler_common *common)
3016 {
3017 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3018 }
3019
3020 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3021 {
3022 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3023 DEFINE_COMPILER;
3024 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3025 struct sljit_jump *jump;
3026 #endif
3027 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3028 struct sljit_jump *jump2;
3029 #endif
3030
3031 SLJIT_UNUSED_ARG(update_str_ptr);
3032
3033 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3034 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3035
3036 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3037 if (common->utf)
3038 {
3039 /* This can be an extra read in some situations, but hopefully
3040 it is needed in most cases. */
3041 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3042 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3043 if (!update_str_ptr)
3044 {
3045 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3046 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3047 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3048 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3049 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3050 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3052 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3053 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3054 JUMPHERE(jump2);
3055 }
3056 else
3057 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3058 JUMPHERE(jump);
3059 return;
3060 }
3061 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3062
3063 #if !defined COMPILE_PCRE8
3064 /* The ctypes array contains only 256 values. */
3065 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3066 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3067 #endif
3068 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3069 #if !defined COMPILE_PCRE8
3070 JUMPHERE(jump);
3071 #endif
3072
3073 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3074 if (common->utf && update_str_ptr)
3075 {
3076 /* Skip low surrogate if necessary. */
3077 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3078 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3079 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3080 JUMPHERE(jump);
3081 }
3082 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3083 }
3084
3085 static void skip_char_back(compiler_common *common)
3086 {
3087 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3088 DEFINE_COMPILER;
3089 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3090 #if defined COMPILE_PCRE8
3091 struct sljit_label *label;
3092
3093 if (common->utf)
3094 {
3095 label = LABEL();
3096 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3097 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3098 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3099 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3100 return;
3101 }
3102 #elif defined COMPILE_PCRE16
3103 if (common->utf)
3104 {
3105 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3106 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3107 /* Skip low surrogate if necessary. */
3108 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3109 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3110 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3111 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3112 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3113 return;
3114 }
3115 #endif /* COMPILE_PCRE[8|16] */
3116 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3117 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3118 }
3119
3120 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3121 {
3122 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3123 DEFINE_COMPILER;
3124 struct sljit_jump *jump;
3125
3126 if (nltype == NLTYPE_ANY)
3127 {
3128 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3129 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3130 }
3131 else if (nltype == NLTYPE_ANYCRLF)
3132 {
3133 if (jumpifmatch)
3134 {
3135 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3136 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3137 }
3138 else
3139 {
3140 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3141 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3142 JUMPHERE(jump);
3143 }
3144 }
3145 else
3146 {
3147 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3148 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3149 }
3150 }
3151
3152 #ifdef SUPPORT_UTF
3153
3154 #if defined COMPILE_PCRE8
3155 static void do_utfreadchar(compiler_common *common)
3156 {
3157 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3158 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3159 DEFINE_COMPILER;
3160 struct sljit_jump *jump;
3161
3162 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3163 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3164 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3165 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3166 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3167 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3168
3169 /* Searching for the first zero. */
3170 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3171 jump = JUMP(SLJIT_NOT_ZERO);
3172 /* Two byte sequence. */
3173 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3174 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3175 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3176
3177 JUMPHERE(jump);
3178 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3179 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3180 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3181 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3182 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3183
3184 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3185 jump = JUMP(SLJIT_NOT_ZERO);
3186 /* Three byte sequence. */
3187 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3189 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3190
3191 /* Four byte sequence. */
3192 JUMPHERE(jump);
3193 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3194 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3195 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3196 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3197 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3198 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3199 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3200 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3201 }
3202
3203 static void do_utfreadchar16(compiler_common *common)
3204 {
3205 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3206 of the character (>= 0xc0). Return value in TMP1. */
3207 DEFINE_COMPILER;
3208 struct sljit_jump *jump;
3209
3210 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3211 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3212 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3213 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3214 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3215 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3216
3217 /* Searching for the first zero. */
3218 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3219 jump = JUMP(SLJIT_NOT_ZERO);
3220 /* Two byte sequence. */
3221 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3222 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3223
3224 JUMPHERE(jump);
3225 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3226 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3227 /* This code runs only in 8 bit mode. No need to shift the value. */
3228 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3229 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3230 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3231 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3232 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3233 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3234 /* Three byte sequence. */
3235 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3236 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3237 }
3238
3239 static void do_utfreadtype8(compiler_common *common)
3240 {
3241 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3242 of the character (>= 0xc0). Return value in TMP1. */
3243 DEFINE_COMPILER;
3244 struct sljit_jump *jump;
3245 struct sljit_jump *compare;
3246
3247 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3248
3249 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3250 jump = JUMP(SLJIT_NOT_ZERO);
3251 /* Two byte sequence. */
3252 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3253 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3254 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3255 /* The upper 5 bits are known at this point. */
3256 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3257 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3258 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3259 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3260 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3261 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3262
3263 JUMPHERE(compare);
3264 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3265 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3266
3267 /* We only have types for characters less than 256. */
3268 JUMPHERE(jump);
3269 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3271 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3272 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3273 }
3274
3275 #endif /* COMPILE_PCRE8 */
3276
3277 #endif /* SUPPORT_UTF */
3278
3279 #ifdef SUPPORT_UCP
3280
3281 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3282 #define UCD_BLOCK_MASK 127
3283 #define UCD_BLOCK_SHIFT 7
3284
3285 static void do_getucd(compiler_common *common)
3286 {
3287 /* Search the UCD record for the character comes in TMP1.
3288 Returns chartype in TMP1 and UCD offset in TMP2. */
3289 DEFINE_COMPILER;
3290
3291 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3292
3293 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3294 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3295 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3296 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3297 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3298 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3299 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3300 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3302 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3303 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3304 }
3305 #endif
3306
3307 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3308 {
3309 DEFINE_COMPILER;
3310 struct sljit_label *mainloop;
3311 struct sljit_label *newlinelabel = NULL;
3312 struct sljit_jump *start;
3313 struct sljit_jump *end = NULL;
3314 struct sljit_jump *end2 = NULL;
3315 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3316 struct sljit_jump *singlechar;
3317 #endif
3318 jump_list *newline = NULL;
3319 BOOL newlinecheck = FALSE;
3320 BOOL readuchar = FALSE;
3321
3322 if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3323 (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3324 newlinecheck = TRUE;
3325
3326 if (common->match_end_ptr != 0)
3327 {
3328 /* Search for the end of the first line. */
3329 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3330
3331 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3332 {
3333 mainloop = LABEL();
3334 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3335 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3337 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3338 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3339 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3340 JUMPHERE(end);
3341 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3342 }
3343 else
3344 {
3345 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3346 mainloop = LABEL();
3347 /* Continual stores does not cause data dependency. */
3348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3349 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3350 check_newlinechar(common, common->nltype, &newline, TRUE);
3351 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3352 JUMPHERE(end);
3353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3354 set_jumps(newline, LABEL());
3355 }
3356
3357 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3358 }
3359
3360 start = JUMP(SLJIT_JUMP);
3361
3362 if (newlinecheck)
3363 {
3364 newlinelabel = LABEL();
3365 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3366 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3367 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3368 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3369 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3370 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3371 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3372 #endif
3373 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3374 end2 = JUMP(SLJIT_JUMP);
3375 }
3376
3377 mainloop = LABEL();
3378
3379 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3380 #ifdef SUPPORT_UTF
3381 if (common->utf) readuchar = TRUE;
3382 #endif
3383 if (newlinecheck) readuchar = TRUE;
3384
3385 if (readuchar)
3386 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3387
3388 if (newlinecheck)
3389 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3390
3391 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3392 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3393 #if defined COMPILE_PCRE8
3394 if (common->utf)
3395 {
3396 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3397 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3398 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3399 JUMPHERE(singlechar);
3400 }
3401 #elif defined COMPILE_PCRE16
3402 if (common->utf)
3403 {
3404 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3405 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3406 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3407 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3408 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3409 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3410 JUMPHERE(singlechar);
3411 }
3412 #endif /* COMPILE_PCRE[8|16] */
3413 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3414 JUMPHERE(start);
3415
3416 if (newlinecheck)
3417 {
3418 JUMPHERE(end);
3419 JUMPHERE(end2);
3420 }
3421
3422 return mainloop;
3423 }
3424
3425 #define MAX_N_CHARS 16
3426 #define MAX_DIFF_CHARS 6
3427
3428 static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3429 {
3430 pcre_uchar i, len;
3431
3432 len = chars[0];
3433 if (len == 255)
3434 return;
3435
3436 if (len == 0)
3437 {
3438 chars[0] = 1;
3439 chars[1] = chr;
3440 return;
3441 }
3442
3443 for (i = len; i > 0; i--)
3444 if (chars[i] == chr)
3445 return;
3446
3447 if (len >= MAX_DIFF_CHARS - 1)
3448 {
3449 chars[0] = 255;
3450 return;
3451 }
3452
3453 len++;
3454 chars[len] = chr;
3455 chars[0] = len;
3456 }
3457
3458 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3459 {
3460 /* Recursive function, which scans prefix literals. */
3461 BOOL last, any, class, caseless;
3462 int len, repeat, len_save, consumed = 0;
3463 sljit_u32 chr; /* Any unicode character. */
3464 sljit_u8 *bytes, *bytes_end, byte;
3465 pcre_uchar *alternative, *cc_save, *oc;
3466 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3467 pcre_uchar othercase[8];
3468 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3469 pcre_uchar othercase[2];
3470 #else
3471 pcre_uchar othercase[1];
3472 #endif
3473
3474 repeat = 1;
3475 while (TRUE)
3476 {
3477 if (*rec_count == 0)
3478 return 0;
3479 (*rec_count)--;
3480
3481 last = TRUE;
3482 any = FALSE;
3483 class = FALSE;
3484 caseless = FALSE;
3485
3486 switch (*cc)
3487 {
3488 case OP_CHARI:
3489 caseless = TRUE;
3490 case OP_CHAR:
3491 last = FALSE;
3492 cc++;
3493 break;
3494
3495 case OP_SOD:
3496 case OP_SOM:
3497 case OP_SET_SOM:
3498 case OP_NOT_WORD_BOUNDARY:
3499 case OP_WORD_BOUNDARY:
3500 case OP_EODN:
3501 case OP_EOD:
3502 case OP_CIRC:
3503 case OP_CIRCM:
3504 case OP_DOLL:
3505 case OP_DOLLM:
3506 /* Zero width assertions. */
3507 cc++;
3508 continue;
3509
3510 case OP_ASSERT:
3511 case OP_ASSERT_NOT:
3512 case OP_ASSERTBACK:
3513 case OP_ASSERTBACK_NOT:
3514 cc = bracketend(cc);
3515 continue;
3516
3517 case OP_PLUSI:
3518 case OP_MINPLUSI:
3519 case OP_POSPLUSI:
3520 caseless = TRUE;
3521 case OP_PLUS:
3522 case OP_MINPLUS:
3523 case OP_POSPLUS:
3524 cc++;
3525 break;
3526
3527 case OP_EXACTI:
3528 caseless = TRUE;
3529 case OP_EXACT:
3530 repeat = GET2(cc, 1);
3531 last = FALSE;
3532 cc += 1 + IMM2_SIZE;
3533 break;
3534
3535 case OP_QUERYI:
3536 case OP_MINQUERYI:
3537 case OP_POSQUERYI:
3538 caseless = TRUE;
3539 case OP_QUERY:
3540 case OP_MINQUERY:
3541 case OP_POSQUERY:
3542 len = 1;
3543 cc++;
3544 #ifdef SUPPORT_UTF
3545 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3546 #endif
3547 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3548 if (max_chars == 0)
3549 return consumed;
3550 last = FALSE;
3551 break;
3552
3553 case OP_KET:
3554 cc += 1 + LINK_SIZE;
3555 continue;
3556
3557 case OP_ALT:
3558 cc += GET(cc, 1);
3559 continue;
3560
3561 case OP_ONCE:
3562 case OP_ONCE_NC:
3563 case OP_BRA:
3564 case OP_BRAPOS:
3565 case OP_CBRA:
3566 case OP_CBRAPOS:
3567 alternative = cc + GET(cc, 1);
3568 while (*alternative == OP_ALT)
3569 {
3570 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3571 if (max_chars == 0)
3572 return consumed;
3573 alternative += GET(alternative, 1);
3574 }
3575
3576 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3577 cc += IMM2_SIZE;
3578 cc += 1 + LINK_SIZE;
3579 continue;
3580
3581 case OP_CLASS:
3582 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3583 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3584 return consumed;
3585 #endif
3586 class = TRUE;
3587 break;
3588
3589 case OP_NCLASS:
3590 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3591 if (common->utf) return consumed;
3592 #endif
3593 class = TRUE;
3594 break;
3595
3596 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3597 case OP_XCLASS:
3598 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3599 if (common->utf) return consumed;
3600 #endif
3601 any = TRUE;
3602 cc += GET(cc, 1);
3603 break;
3604 #endif
3605
3606 case OP_DIGIT:
3607 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3608 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3609 return consumed;
3610 #endif
3611 any = TRUE;
3612 cc++;
3613 break;
3614
3615 case OP_WHITESPACE:
3616 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3617 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3618 return consumed;
3619 #endif
3620 any = TRUE;
3621 cc++;
3622 break;
3623
3624 case OP_WORDCHAR:
3625 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3626 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3627 return consumed;
3628 #endif
3629 any = TRUE;
3630 cc++;
3631 break;
3632
3633 case OP_NOT:
3634 case OP_NOTI:
3635 cc++;
3636 /* Fall through. */
3637 case OP_NOT_DIGIT:
3638 case OP_NOT_WHITESPACE:
3639 case OP_NOT_WORDCHAR:
3640 case OP_ANY:
3641 case OP_ALLANY:
3642 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3643 if (common->utf) return consumed;
3644 #endif
3645 any = TRUE;
3646 cc++;
3647 break;
3648
3649 #ifdef SUPPORT_UTF
3650 case OP_NOTPROP:
3651 case OP_PROP:
3652 #ifndef COMPILE_PCRE32
3653 if (common->utf) return consumed;
3654 #endif
3655 any = TRUE;
3656 cc += 1 + 2;
3657 break;
3658 #endif
3659
3660 case OP_TYPEEXACT:
3661 repeat = GET2(cc, 1);
3662 cc += 1 + IMM2_SIZE;
3663 continue;
3664
3665 case OP_NOTEXACT:
3666 case OP_NOTEXACTI:
3667 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3668 if (common->utf) return consumed;
3669 #endif
3670 any = TRUE;
3671 repeat = GET2(cc, 1);
3672 cc += 1 + IMM2_SIZE + 1;
3673 break;
3674
3675 default:
3676 return consumed;
3677 }
3678
3679 if (any)
3680 {
3681 do
3682 {
3683 chars[0] = 255;
3684
3685 consumed++;
3686 if (--max_chars == 0)
3687 return consumed;
3688 chars += MAX_DIFF_CHARS;
3689 }
3690 while (--repeat > 0);
3691
3692 repeat = 1;
3693 continue;
3694 }
3695
3696 if (class)
3697 {
3698 bytes = (sljit_u8*) (cc + 1);
3699 cc += 1 + 32 / sizeof(pcre_uchar);
3700
3701 switch (*cc)
3702 {
3703 case OP_CRSTAR:
3704 case OP_CRMINSTAR:
3705 case OP_CRPOSSTAR:
3706 case OP_CRQUERY:
3707 case OP_CRMINQUERY:
3708 case OP_CRPOSQUERY:
3709 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3710 if (max_chars == 0)
3711 return consumed;
3712 break;
3713
3714 default:
3715 case OP_CRPLUS:
3716 case OP_CRMINPLUS:
3717 case OP_CRPOSPLUS:
3718 break;
3719
3720 case OP_CRRANGE:
3721 case OP_CRMINRANGE:
3722 case OP_CRPOSRANGE:
3723 repeat = GET2(cc, 1);
3724 if (repeat <= 0)
3725 return consumed;
3726 break;
3727 }
3728
3729 do
3730 {
3731 if (bytes[31] & 0x80)
3732 chars[0] = 255;
3733 else if (chars[0] != 255)
3734 {
3735 bytes_end = bytes + 32;
3736 chr = 0;
3737 do
3738 {
3739 byte = *bytes++;
3740 SLJIT_ASSERT((chr & 0x7) == 0);
3741 if (byte == 0)
3742 chr += 8;
3743 else
3744 {
3745 do
3746 {
3747 if ((byte & 0x1) != 0)
3748 add_prefix_char(chr, chars);
3749 byte >>= 1;
3750 chr++;
3751 }
3752 while (byte != 0);
3753 chr = (chr + 7) & ~7;
3754 }
3755 }
3756 while (chars[0] != 255 && bytes < bytes_end);
3757 bytes = bytes_end - 32;
3758 }
3759
3760 consumed++;
3761 if (--max_chars == 0)
3762 return consumed;
3763 chars += MAX_DIFF_CHARS;
3764 }
3765 while (--repeat > 0);
3766
3767 switch (*cc)
3768 {
3769 case OP_CRSTAR:
3770 case OP_CRMINSTAR:
3771 case OP_CRPOSSTAR:
3772 return consumed;
3773
3774 case OP_CRQUERY:
3775 case OP_CRMINQUERY:
3776 case OP_CRPOSQUERY:
3777 cc++;
3778 break;
3779
3780 case OP_CRRANGE:
3781 case OP_CRMINRANGE:
3782 case OP_CRPOSRANGE:
3783 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3784 return consumed;
3785 cc += 1 + 2 * IMM2_SIZE;
3786 break;
3787 }
3788
3789 repeat = 1;
3790 continue;
3791 }
3792
3793 len = 1;
3794 #ifdef SUPPORT_UTF
3795 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3796 #endif
3797
3798 if (caseless && char_has_othercase(common, cc))
3799 {
3800 #ifdef SUPPORT_UTF
3801 if (common->utf)
3802 {
3803 GETCHAR(chr, cc);
3804 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3805 return consumed;
3806 }
3807 else
3808 #endif
3809 {
3810 chr = *cc;
3811 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3812 }
3813 }
3814 else
3815 {
3816 caseless = FALSE;
3817 othercase[0] = 0; /* Stops compiler warning - PH */
3818 }
3819
3820 len_save = len;
3821 cc_save = cc;
3822 while (TRUE)
3823 {
3824 oc = othercase;
3825 do
3826 {
3827 chr = *cc;
3828 add_prefix_char(*cc, chars);
3829
3830 if (caseless)
3831 add_prefix_char(*oc, chars);
3832
3833 len--;
3834 consumed++;
3835 if (--max_chars == 0)
3836 return consumed;
3837 chars += MAX_DIFF_CHARS;
3838 cc++;
3839 oc++;
3840 }
3841 while (len > 0);
3842
3843 if (--repeat == 0)
3844 break;
3845
3846 len = len_save;
3847 cc = cc_save;
3848 }
3849
3850 repeat = 1;
3851 if (last)
3852 return consumed;
3853 }
3854 }
3855
3856 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3857
3858 static sljit_s32 character_to_int32(pcre_uchar chr)
3859 {
3860 sljit_s32 value = (sljit_s32)chr;
3861 #if defined COMPILE_PCRE8
3862 #define SSE2_COMPARE_TYPE_INDEX 0
3863 return (value << 24) | (value << 16) | (value << 8) | value;
3864 #elif defined COMPILE_PCRE16
3865 #define SSE2_COMPARE_TYPE_INDEX 1
3866 return (value << 16) | value;
3867 #elif defined COMPILE_PCRE32
3868 #define SSE2_COMPARE_TYPE_INDEX 2
3869 return value;
3870 #else
3871 #error "Unsupported unit width"
3872 #endif
3873 }
3874
3875 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3876 {
3877 DEFINE_COMPILER;
3878 struct sljit_label *start;
3879 struct sljit_jump *quit[3];
3880 struct sljit_jump *nomatch;
3881 sljit_u8 instruction[8];
3882 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3883 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3884 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3885 BOOL load_twice = FALSE;
3886 pcre_uchar bit;
3887
3888 bit = char1 ^ char2;
3889 if (!is_powerof2(bit))
3890 bit = 0;
3891
3892 if ((char1 != char2) && bit == 0)
3893 load_twice = TRUE;
3894
3895 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3896
3897 /* First part (unaligned start) */
3898
3899 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3900
3901 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3902
3903 /* MOVD xmm, r/m32 */
3904 instruction[0] = 0x66;
3905 instruction[1] = 0x0f;
3906 instruction[2] = 0x6e;
3907 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3908 sljit_emit_op_custom(compiler, instruction, 4);
3909
3910 if (char1 != char2)
3911 {
3912 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3913
3914 /* MOVD xmm, r/m32 */
3915 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3916 sljit_emit_op_custom(compiler, instruction, 4);
3917 }
3918
3919 /* PSHUFD xmm1, xmm2/m128, imm8 */
3920 instruction[2] = 0x70;
3921 instruction[3] = 0xc0 | (2 << 3) | 2;
3922 instruction[4] = 0;
3923 sljit_emit_op_custom(compiler, instruction, 5);
3924
3925 if (char1 != char2)
3926 {
3927 /* PSHUFD xmm1, xmm2/m128, imm8 */
3928 instruction[3] = 0xc0 | (3 << 3) | 3;
3929 instruction[4] = 0;
3930 sljit_emit_op_custom(compiler, instruction, 5);
3931 }
3932
3933 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3934 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3935
3936 /* MOVDQA xmm1, xmm2/m128 */
3937 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3938
3939 if (str_ptr_ind < 8)
3940 {
3941 instruction[2] = 0x6f;
3942 instruction[3] = (0 << 3) | str_ptr_ind;
3943 sljit_emit_op_custom(compiler, instruction, 4);
3944
3945 if (load_twice)
3946 {
3947 instruction[3] = (1 << 3) | str_ptr_ind;
3948 sljit_emit_op_custom(compiler, instruction, 4);
3949 }
3950 }
3951 else
3952 {
3953 instruction[1] = 0x41;
3954 instruction[2] = 0x0f;
3955 instruction[3] = 0x6f;
3956 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3957 sljit_emit_op_custom(compiler, instruction, 5);
3958
3959 if (load_twice)
3960 {
3961 instruction[4] = (1 << 3) | str_ptr_ind;
3962 sljit_emit_op_custom(compiler, instruction, 5);
3963 }
3964 instruction[1] = 0x0f;
3965 }
3966
3967 #else
3968
3969 instruction[2] = 0x6f;
3970 instruction[3] = (0 << 3) | str_ptr_ind;
3971 sljit_emit_op_custom(compiler, instruction, 4);
3972
3973 if (load_twice)
3974 {
3975 instruction[3] = (1 << 3) | str_ptr_ind;
3976 sljit_emit_op_custom(compiler, instruction, 4);
3977 }
3978
3979 #endif
3980
3981 if (bit != 0)
3982 {
3983 /* POR xmm1, xmm2/m128 */
3984 instruction[2] = 0xeb;
3985 instruction[3] = 0xc0 | (0 << 3) | 3;
3986 sljit_emit_op_custom(compiler, instruction, 4);
3987 }
3988
3989 /* PCMPEQB/W/D xmm1, xmm2/m128 */
3990 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
3991 instruction[3] = 0xc0 | (0 << 3) | 2;
3992 sljit_emit_op_custom(compiler, instruction, 4);
3993
3994 if (load_twice)
3995 {
3996 instruction[3] = 0xc0 | (1 << 3) | 3;
3997 sljit_emit_op_custom(compiler, instruction, 4);
3998 }
3999
4000 /* PMOVMSKB reg, xmm */
4001 instruction[2] = 0xd7;
4002 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4003 sljit_emit_op_custom(compiler, instruction, 4);
4004
4005 if (load_twice)
4006 {
4007 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
4008 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4009 sljit_emit_op_custom(compiler, instruction, 4);
4010
4011 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4012 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
4013 }
4014
4015 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4016
4017 /* BSF r32, r/m32 */
4018 instruction[0] = 0x0f;
4019 instruction[1] = 0xbc;
4020 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4021 sljit_emit_op_custom(compiler, instruction, 3);
4022
4023 nomatch = JUMP(SLJIT_ZERO);
4024
4025 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4027 quit[1] = JUMP(SLJIT_JUMP);
4028
4029 JUMPHERE(nomatch);
4030
4031 start = LABEL();
4032 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4033 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4034
4035 /* Second part (aligned) */
4036
4037 instruction[0] = 0x66;
4038 instruction[1] = 0x0f;
4039
4040 /* MOVDQA xmm1, xmm2/m128 */
4041 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4042
4043 if (str_ptr_ind < 8)
4044 {
4045 instruction[2] = 0x6f;
4046 instruction[3] = (0 << 3) | str_ptr_ind;
4047 sljit_emit_op_custom(compiler, instruction, 4);
4048
4049 if (load_twice)
4050 {
4051 instruction[3] = (1 << 3) | str_ptr_ind;
4052 sljit_emit_op_custom(compiler, instruction, 4);
4053 }
4054 }
4055 else
4056 {
4057 instruction[1] = 0x41;
4058 instruction[2] = 0x0f;
4059 instruction[3] = 0x6f;
4060 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4061 sljit_emit_op_custom(compiler, instruction, 5);
4062
4063 if (load_twice)
4064 {
4065 instruction[4] = (1 << 3) | str_ptr_ind;
4066 sljit_emit_op_custom(compiler, instruction, 5);
4067 }
4068 instruction[1] = 0x0f;
4069 }
4070
4071 #else
4072
4073 instruction[2] = 0x6f;
4074 instruction[3] = (0 << 3) | str_ptr_ind;
4075 sljit_emit_op_custom(compiler, instruction, 4);
4076
4077 if (load_twice)
4078 {
4079 instruction[3] = (1 << 3) | str_ptr_ind;
4080 sljit_emit_op_custom(compiler, instruction, 4);
4081 }
4082
4083 #endif
4084
4085 if (bit != 0)
4086 {
4087 /* POR xmm1, xmm2/m128 */
4088 instruction[2] = 0xeb;
4089 instruction[3] = 0xc0 | (0 << 3) | 3;
4090 sljit_emit_op_custom(compiler, instruction, 4);
4091 }
4092
4093 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4094 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4095 instruction[3] = 0xc0 | (0 << 3) | 2;
4096 sljit_emit_op_custom(compiler, instruction, 4);
4097
4098 if (load_twice)
4099 {
4100 instruction[3] = 0xc0 | (1 << 3) | 3;
4101 sljit_emit_op_custom(compiler, instruction, 4);
4102 }
4103
4104 /* PMOVMSKB reg, xmm */
4105 instruction[2] = 0xd7;
4106 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4107 sljit_emit_op_custom(compiler, instruction, 4);
4108
4109 if (load_twice)
4110 {
4111 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4112 sljit_emit_op_custom(compiler, instruction, 4);
4113
4114 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4115 }
4116
4117 /* BSF r32, r/m32 */
4118 instruction[0] = 0x0f;
4119 instruction[1] = 0xbc;
4120 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4121 sljit_emit_op_custom(compiler, instruction, 3);
4122
4123 JUMPTO(SLJIT_ZERO, start);
4124
4125 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4126
4127 start = LABEL();
4128 SET_LABEL(quit[0], start);
4129 SET_LABEL(quit[1], start);
4130 SET_LABEL(quit[2], start);
4131 }
4132
4133 #undef SSE2_COMPARE_TYPE_INDEX
4134
4135 #endif
4136
4137 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4138 {
4139 DEFINE_COMPILER;
4140 struct sljit_label *start;
4141 struct sljit_jump *quit;
4142 struct sljit_jump *found;
4143 pcre_uchar mask;
4144 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4145 struct sljit_label *utf_start = NULL;
4146 struct sljit_jump *utf_quit = NULL;
4147 #endif
4148 BOOL has_match_end = (common->match_end_ptr != 0);
4149
4150 if (offset > 0)
4151 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4152
4153 if (has_match_end)
4154 {
4155 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4156
4157 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4158 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4159 if (sljit_x86_is_cmov_available())
4160 {
4161 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4162 sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4163 }
4164 #endif
4165 {
4166 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
4167 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4168 JUMPHERE(quit);
4169 }
4170 }
4171
4172 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4173 if (common->utf && offset > 0)
4174 utf_start = LABEL();
4175 #endif
4176
4177 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4178
4179 /* SSE2 accelerated first character search. */
4180
4181 if (sljit_x86_is_sse2_available())
4182 {
4183 fast_forward_first_char2_sse2(common, char1, char2);
4184
4185 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4186 if (common->mode == JIT_COMPILE)
4187 {
4188 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4189 SLJIT_ASSERT(common->forced_quit_label == NULL);
4190 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4191 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4192
4193 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4194 if (common->utf && offset > 0)
4195 {
4196 SLJIT_ASSERT(common->mode == JIT_COMPILE);
4197
4198 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4199 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4200 #if defined COMPILE_PCRE8
4201 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4202 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4203 #elif defined COMPILE_PCRE16
4204 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4205 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4206 #else
4207 #error "Unknown code width"
4208 #endif
4209 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4210 }
4211 #endif
4212
4213 if (offset > 0)
4214 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4215 }
4216 else if (sljit_x86_is_cmov_available())
4217 {
4218 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4219 sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
4220 }
4221 else
4222 {
4223 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4224 OP1(SLJIT_MOV, STR_PTR, 0, has_match_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end ? common->match_end_ptr : 0);
4225 JUMPHERE(quit);
4226 }
4227
4228 if (has_match_end)
4229 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4230 return;
4231 }
4232
4233 #endif
4234
4235 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4236
4237 start = LABEL();
4238 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4239
4240 if (char1 == char2)
4241 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4242 else
4243 {
4244 mask = char1 ^ char2;
4245 if (is_powerof2(mask))
4246 {
4247 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4248 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4249 }
4250 else
4251 {
4252 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4253 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4254 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4255 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4256 found = JUMP(SLJIT_NOT_ZERO);
4257 }
4258 }
4259
4260 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4261 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4262
4263 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4264 if (common->utf && offset > 0)
4265 utf_quit = JUMP(SLJIT_JUMP);
4266 #endif
4267
4268 JUMPHERE(found);
4269
4270 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4271 if (common->utf && offset > 0)
4272 {
4273 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4274 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4275 #if defined COMPILE_PCRE8
4276 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4277 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4278 #elif defined COMPILE_PCRE16
4279 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4280 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4281 #else
4282 #error "Unknown code width"
4283 #endif
4284 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4285 JUMPHERE(utf_quit);
4286 }
4287 #endif
4288
4289 JUMPHERE(quit);
4290
4291 if (has_match_end)
4292 {
4293 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4294 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4295 if (offset > 0)
4296 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4297 JUMPHERE(quit);
4298 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4299 }
4300
4301 if (offset > 0)
4302 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4303 }
4304
4305 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4306 {
4307 DEFINE_COMPILER;
4308 struct sljit_label *start;
4309 struct sljit_jump *quit;
4310 struct sljit_jump *match;
4311 /* bytes[0] represent the number of characters between 0
4312 and MAX_N_BYTES - 1, 255 represents any character. */
4313 pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4314 sljit_s32 offset;
4315 pcre_uchar mask;
4316 pcre_uchar *char_set, *char_set_end;
4317 int i, max, from;
4318 int range_right = -1, range_len;
4319 sljit_u8 *update_table = NULL;
4320 BOOL in_range;
4321 sljit_u32 rec_count;
4322
4323 for (i = 0; i < MAX_N_CHARS; i++)
4324 chars[i * MAX_DIFF_CHARS] = 0;
4325
4326 rec_count = 10000;
4327 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4328
4329 if (max < 1)
4330 return FALSE;
4331
4332 in_range = FALSE;
4333 /* Prevent compiler "uninitialized" warning */
4334 from = 0;
4335 range_len = 4 /* minimum length */ - 1;
4336 for (i = 0; i <= max; i++)
4337 {
4338 if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4339 {
4340 range_len = i - from;
4341 range_right = i - 1;
4342 }
4343
4344 if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4345 {
4346 SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4347 if (!in_range)
4348 {
4349 in_range = TRUE;
4350 from = i;
4351 }
4352 }
4353 else
4354 in_range = FALSE;
4355 }
4356
4357 if (range_right >= 0)
4358 {
4359 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4360 if (update_table == NULL)
4361 return TRUE;
4362 memset(update_table, IN_UCHARS(range_len), 256);
4363
4364 for (i = 0; i < range_len; i++)
4365 {
4366 char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4367 SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4368 char_set_end = char_set + char_set[0];
4369 char_set++;
4370 while (char_set <= char_set_end)
4371 {
4372 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4373 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4374 char_set++;
4375 }
4376 }
4377 }
4378
4379 offset = -1;
4380 /* Scan forward. */
4381 for (i = 0; i < max; i++)
4382 {
4383 if (offset == -1)
4384 {
4385 if (chars[i * MAX_DIFF_CHARS] <= 2)
4386 offset = i;
4387 }
4388 else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4389 {
4390 if (chars[i * MAX_DIFF_CHARS] == 1)
4391 offset = i;
4392 else
4393 {
4394 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4395 if (!is_powerof2(mask))
4396 {
4397 mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4398 if (is_powerof2(mask))
4399 offset = i;
4400 }
4401 }
4402 }
4403 }
4404
4405 if (range_right < 0)
4406 {
4407 if (offset < 0)
4408 return FALSE;
4409 SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4410 /* Works regardless the value is 1 or 2. */
4411 mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4412 fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4413 return TRUE;
4414 }
4415
4416 if (range_right == offset)
4417 offset = -1;
4418
4419 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4420
4421 max -= 1;
4422 SLJIT_ASSERT(max > 0);
4423 if (common->match_end_ptr != 0)
4424 {
4425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4426 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4427 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4428 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4429 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4430 JUMPHERE(quit);
4431 }
4432 else
4433 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4434
4435 SLJIT_ASSERT(range_right >= 0);
4436
4437 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4438 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4439 #endif
4440
4441 start = LABEL();
4442 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4443
4444 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4445 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4446 #else
4447 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4448 #endif
4449
4450 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4451 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4452 #else
4453 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4454 #endif
4455 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4456 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4457
4458 if (offset >= 0)
4459 {
4460 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4461 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4462
4463 if (chars[offset * MAX_DIFF_CHARS] == 1)
4464 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4465 else
4466 {
4467 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4468 if (is_powerof2(mask))
4469 {
4470 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4471 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4472 }
4473 else
4474 {
4475 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4476 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4477 JUMPHERE(match);
4478 }
4479 }
4480 }
4481
4482 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4483 if (common->utf && offset != 0)
4484 {
4485 if (offset < 0)
4486 {
4487 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4488 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4489 }
4490 else
4491 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4492 #if defined COMPILE_PCRE8
4493 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4494 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4495 #elif defined COMPILE_PCRE16
4496 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4497 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4498 #else
4499 #error "Unknown code width"
4500 #endif
4501 if (offset < 0)
4502 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4503 }
4504 #endif
4505
4506 if (offset >= 0)
4507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4508
4509 JUMPHERE(quit);
4510
4511 if (common->match_end_ptr != 0)
4512 {
4513 if (range_right >= 0)
4514 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4515 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4516 if (range_right >= 0)
4517 {
4518 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4519 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4520 JUMPHERE(quit);
4521 }
4522 }
4523 else
4524 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4525 return TRUE;
4526 }
4527
4528 #undef MAX_N_CHARS
4529 #undef MAX_DIFF_CHARS
4530
4531 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4532 {
4533 pcre_uchar oc;
4534
4535 oc = first_char;
4536 if (caseless)
4537 {
4538 oc = TABLE_GET(first_char, common->fcc, first_char);
4539 #if defined SUPPORT_UTF && !defined COMPILE_PCRE8
4540 if (first_char > 127 && common->utf)
4541 oc = UCD_OTHERCASE(first_char);
4542 #endif
4543 }
4544
4545 fast_forward_first_char2(common, first_char, oc, 0);
4546 }
4547
4548 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4549 {
4550 DEFINE_COMPILER;
4551 struct sljit_label *loop;
4552 struct sljit_jump *lastchar;
4553 struct sljit_jump *firstchar;
4554 struct sljit_jump *quit;
4555 struct sljit_jump *foundcr = NULL;
4556 struct sljit_jump *notfoundnl;
4557 jump_list *newline = NULL;
4558
4559 if (common->match_end_ptr != 0)
4560 {
4561 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4562 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4563 }
4564
4565 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4566 {
4567 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4568 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4569 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4570 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4571 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4572
4573 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4574 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4575 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
4576 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4577 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4578 #endif
4579 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4580
4581 loop = LABEL();
4582 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4583 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4584 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4585 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4586 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4587 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4588
4589 JUMPHERE(quit);
4590 JUMPHERE(firstchar);
4591 JUMPHERE(lastchar);
4592
4593 if (common->match_end_ptr != 0)
4594 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4595 return;
4596 }
4597
4598 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4599 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4600 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4601 skip_char_back(common);
4602
4603 loop = LABEL();
4604 common->ff_newline_shortcut = loop;
4605
4606 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4607 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4608 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4609 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4610 check_newlinechar(common, common->nltype, &newline, FALSE);
4611 set_jumps(newline, loop);
4612
4613 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4614 {
4615 quit = JUMP(SLJIT_JUMP);
4616 JUMPHERE(foundcr);
4617 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4618 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4619 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4620 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4621 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4622 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4623 #endif
4624 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4625 JUMPHERE(notfoundnl);
4626 JUMPHERE(quit);
4627 }
4628 JUMPHERE(lastchar);
4629 JUMPHERE(firstchar);
4630
4631 if (common->match_end_ptr != 0)
4632 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4633 }
4634
4635 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4636
4637 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4638 {
4639 DEFINE_COMPILER;
4640 struct sljit_label *start;
4641 struct sljit_jump *quit;
4642 struct sljit_jump *found = NULL;
4643 jump_list *matches = NULL;
4644 #ifndef COMPILE_PCRE8
4645 struct sljit_jump *jump;
4646 #endif
4647
4648 if (common->match_end_ptr != 0)
4649 {
4650 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4651 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4652 }
4653
4654 start = LABEL();
4655 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4656 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4657 #ifdef SUPPORT_UTF
4658 if (common->utf)
4659 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4660 #endif
4661
4662 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4663 {
4664 #ifndef COMPILE_PCRE8
4665 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4667 JUMPHERE(jump);
4668 #endif
4669 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4670 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4671 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4672 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4673 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4674 found = JUMP(SLJIT_NOT_ZERO);
4675 }
4676
4677 #ifdef SUPPORT_UTF
4678 if (common->utf)
4679 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4680 #endif
4681 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4682 #ifdef SUPPORT_UTF
4683 #if defined COMPILE_PCRE8
4684 if (common->utf)
4685 {
4686 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4687 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4688 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4689 }
4690 #elif defined COMPILE_PCRE16
4691 if (common->utf)
4692 {
4693 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4694 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4695 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4696 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4697 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4698 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4699 }
4700 #endif /* COMPILE_PCRE[8|16] */
4701 #endif /* SUPPORT_UTF */
4702 JUMPTO(SLJIT_JUMP, start);
4703 if (found != NULL)
4704 JUMPHERE(found);
4705 if (matches != NULL)
4706 set_jumps(matches, LABEL());
4707 JUMPHERE(quit);
4708
4709 if (common->match_end_ptr != 0)
4710 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4711 }
4712
4713 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4714 {
4715 DEFINE_COMPILER;
4716 struct sljit_label *loop;
4717 struct sljit_jump *toolong;
4718 struct sljit_jump *alreadyfound;
4719 struct sljit_jump *found;
4720 struct sljit_jump *foundoc = NULL;
4721 struct sljit_jump *notfound;
4722 sljit_u32 oc, bit;
4723
4724 SLJIT_ASSERT(common->req_char_ptr != 0);
4725 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4726 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4727 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4728 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4729
4730 if (has_firstchar)
4731 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4732 else
4733 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4734
4735 loop = LABEL();
4736 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4737
4738 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4739 oc = req_char;
4740 if (caseless)
4741 {
4742 oc = TABLE_GET(req_char, common->fcc, req_char);
4743 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4744 if (req_char > 127 && common->utf)
4745 oc = UCD_OTHERCASE(req_char);
4746 #endif
4747 }
4748 if (req_char == oc)
4749 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4750 else
4751 {
4752 bit = req_char ^ oc;
4753 if (is_powerof2(bit))
4754 {
4755 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4756 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4757 }
4758 else
4759 {
4760 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4761 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4762 }
4763 }
4764 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4765 JUMPTO(SLJIT_JUMP, loop);
4766
4767 JUMPHERE(found);
4768 if (foundoc)
4769 JUMPHERE(foundoc);
4770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4771 JUMPHERE(alreadyfound);
4772 JUMPHERE(toolong);
4773 return notfound;
4774 }
4775
4776 static void do_revertframes(compiler_common *common)
4777 {
4778 DEFINE_COMPILER;
4779 struct sljit_jump *jump;
4780 struct sljit_label *mainloop;
4781
4782 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4783 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4784 GET_LOCAL_BASE(TMP3, 0, 0);
4785
4786 /* Drop frames until we reach STACK_TOP. */
4787 mainloop = LABEL();
4788 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4789 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4790 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4791
4792 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4793 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4794 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4795 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4796 JUMPTO(SLJIT_JUMP, mainloop);
4797
4798 JUMPHERE(jump);
4799 jump = JUMP(SLJIT_SIG_LESS);
4800 /* End of dropping frames. */
4801 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4802
4803 JUMPHERE(jump);
4804 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4805 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4806 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4807 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4808 JUMPTO(SLJIT_JUMP, mainloop);
4809 }
4810
4811 static void check_wordboundary(compiler_common *common)
4812 {
4813 DEFINE_COMPILER;
4814 struct sljit_jump *skipread;
4815 jump_list *skipread_list = NULL;
4816 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4817 struct sljit_jump *jump;
4818 #endif
4819
4820 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4821
4822 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4823 /* Get type of the previous char, and put it to LOCALS1. */
4824 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4825 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4826 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4827 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4828 skip_char_back(common);
4829 check_start_used_ptr(common);
4830 read_char(common);
4831
4832 /* Testing char type. */
4833 #ifdef SUPPORT_UCP
4834 if (common->use_ucp)
4835 {
4836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4837 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4838 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4839 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4840 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4841 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4842 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4843 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4844 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4845 JUMPHERE(jump);
4846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4847 }
4848 else
4849 #endif
4850 {
4851 #ifndef COMPILE_PCRE8
4852 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4853 #elif defined SUPPORT_UTF
4854 /* Here LOCALS1 has already been zeroed. */
4855 jump = NULL;
4856 if (common->utf)
4857 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4858 #endif /* COMPILE_PCRE8 */
4859 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4860 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4861 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4862 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4863 #ifndef COMPILE_PCRE8
4864 JUMPHERE(jump);
4865 #elif defined SUPPORT_UTF
4866 if (jump != NULL)
4867 JUMPHERE(jump);
4868 #endif /* COMPILE_PCRE8 */
4869 }
4870 JUMPHERE(skipread);
4871
4872 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4873 check_str_end(common, &skipread_list);
4874 peek_char(common, READ_CHAR_MAX);
4875
4876 /* Testing char type. This is a code duplication. */
4877 #ifdef SUPPORT_UCP
4878 if (common->use_ucp)
4879 {
4880 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4881 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4882 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4883 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4884 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4885 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4886 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4887 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4888 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4889 JUMPHERE(jump);
4890 }
4891 else
4892 #endif
4893 {
4894 #ifndef COMPILE_PCRE8
4895 /* TMP2 may be destroyed by peek_char. */
4896 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4897 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4898 #elif defined SUPPORT_UTF
4899 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4900 jump = NULL;
4901 if (common->utf)
4902 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4903 #endif
4904 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4905 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4906 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4907 #ifndef COMPILE_PCRE8
4908 JUMPHERE(jump);
4909 #elif defined SUPPORT_UTF
4910 if (jump != NULL)
4911 JUMPHERE(jump);
4912 #endif /* COMPILE_PCRE8 */
4913 }
4914 set_jumps(skipread_list, LABEL());
4915
4916 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4917 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4918 }
4919
4920 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4921 {
4922 /* May destroy TMP1. */
4923 DEFINE_COMPILER;
4924 int ranges[MAX_RANGE_SIZE];
4925 sljit_u8 bit, cbit, all;
4926 int i, byte, length = 0;
4927
4928 bit = bits[0] & 0x1;
4929 /* All bits will be zero or one (since bit is zero or one). */
4930 all = -bit;
4931
4932 for (i = 0; i < 256; )
4933 {
4934 byte = i >> 3;
4935 if ((i & 0x7) == 0 && bits[byte] == all)
4936 i += 8;
4937 else
4938 {
4939 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4940 if (cbit != bit)
4941 {
4942 if (length >= MAX_RANGE_SIZE)
4943 return FALSE;
4944 ranges[length] = i;
4945 length++;
4946 bit = cbit;
4947 all = -cbit;
4948 }
4949 i++;
4950 }
4951 }
4952
4953 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4954 {
4955 if (length >= MAX_RANGE_SIZE)
4956 return FALSE;
4957 ranges[length] = 256;
4958 length++;
4959 }
4960
4961 if (length < 0 || length > 4)
4962 return FALSE;
4963
4964 bit = bits[0] & 0x1;
4965 if (invert) bit ^= 0x1;
4966
4967 /* No character is accepted. */
4968 if (length == 0 && bit == 0)
4969 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4970
4971 switch(length)
4972 {
4973 case 0:
4974 /* When bit != 0, all characters are accepted. */
4975 return TRUE;
4976
4977 case 1:
4978 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4979 return TRUE;
4980
4981 case 2:
4982 if (ranges[0] + 1 != ranges[1])
4983 {
4984 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4985 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4986 }
4987 else
4988 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4989 return TRUE;
4990
4991 case 3:
4992 if (bit != 0)
4993 {
4994 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4995 if (ranges[0] + 1 != ranges[1])
4996 {
4997 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4998 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4999 }
5000 else
5001 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5002 return TRUE;
5003 }
5004
5005 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5006 if (ranges[1] + 1 != ranges[2])
5007 {
5008 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5009 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5010 }
5011 else
5012 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5013 return TRUE;
5014
5015 case 4:
5016 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5017 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5018 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5019 && is_powerof2(ranges[2] - ranges[0]))
5020 {
5021 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5022 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5023 if (ranges[2] + 1 != ranges[3])
5024 {
5025 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5026 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5027 }
5028 else
5029 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5030 return TRUE;
5031 }
5032
5033 if (bit != 0)
5034 {
5035 i = 0;
5036 if (ranges[0] + 1 != ranges[1])
5037 {
5038 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5039 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5040 i = ranges[0];
5041 }
5042 else
5043 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5044
5045 if (ranges[2] + 1 != ranges[3])
5046 {
5047 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5048 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5049 }
5050 else
5051 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5052 return TRUE;
5053 }
5054
5055 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5056 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5057 if (ranges[1] + 1 != ranges[2])
5058 {
5059 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5060 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5061 }
5062 else
5063 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5064 return TRUE;
5065
5066 default:
5067 SLJIT_ASSERT_STOP();
5068 return FALSE;
5069 }
5070 }
5071
5072 static void check_anynewline(compiler_common *common)
5073 {
5074 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5075 DEFINE_COMPILER;
5076
5077 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5078
5079 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5080 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5081 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5082 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5083 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5084 #ifdef COMPILE_PCRE8
5085 if (common->utf)
5086 {
5087 #endif
5088 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5089 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5090 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5091 #ifdef COMPILE_PCRE8
5092 }
5093 #endif
5094 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5095 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5096 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5097 }
5098
5099 static void check_hspace(compiler_common *common)
5100 {
5101 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5102 DEFINE_COMPILER;
5103
5104 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5105
5106 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5107 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5108 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5109 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5110 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5111 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5112 #ifdef COMPILE_PCRE8
5113 if (common->utf)
5114 {
5115 #endif
5116 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5117 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5118 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5119 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5120 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5121 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5122 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5123 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5124 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5125 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5126 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5127 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5128 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5129 #ifdef COMPILE_PCRE8
5130 }
5131 #endif
5132 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5133 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5134
5135 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5136 }
5137
5138 static void check_vspace(compiler_common *common)
5139 {
5140 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5141 DEFINE_COMPILER;
5142
5143 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5144
5145 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5146 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5147 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5148 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5149 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5150 #ifdef COMPILE_PCRE8
5151 if (common->utf)
5152 {
5153 #endif
5154 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5155 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5156 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5157 #ifdef COMPILE_PCRE8
5158 }
5159 #endif
5160 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5161 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5162
5163 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5164 }
5165
5166 #define CHAR1 STR_END
5167 #define CHAR2 STACK_TOP
5168
5169 static void do_casefulcmp(compiler_common *common)
5170 {
5171 DEFINE_COMPILER;
5172 struct sljit_jump *jump;
5173 struct sljit_label *label;
5174
5175 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5176 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5177 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
5178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
5179 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5180 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5181
5182 label = LABEL();
5183 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5184 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5185 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5186 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5187 JUMPTO(SLJIT_NOT_ZERO, label);
5188
5189 JUMPHERE(jump);
5190 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5191 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
5192 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5193 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5194 }
5195
5196 #define LCC_TABLE STACK_LIMIT
5197
5198 static void do_caselesscmp(compiler_common *common)
5199 {
5200 DEFINE_COMPILER;
5201 struct sljit_jump *jump;
5202 struct sljit_label *label;
5203
5204 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5205 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5206
5207 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
5208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
5209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
5210 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
5211 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5212 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5213
5214 label = LABEL();
5215 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5216 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5217 #ifndef COMPILE_PCRE8
5218 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
5219 #endif
5220 OP1(SLJIT_MOV_U8, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
5221 #ifndef COMPILE_PCRE8
5222 JUMPHERE(jump);
5223 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
5224 #endif
5225 OP1(SLJIT_MOV_U8, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
5226 #ifndef COMPILE_PCRE8
5227 JUMPHERE(jump);
5228 #endif
5229 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5230 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5231 JUMPTO(SLJIT_NOT_ZERO, label);
5232
5233 JUMPHERE(jump);
5234 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5235 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
5236 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5237 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5238 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5239 }
5240
5241 #undef LCC_TABLE
5242 #undef CHAR1
5243 #undef CHAR2
5244
5245 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5246
5247 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
5248 {
5249 /* This function would be ineffective to do in JIT level. */
5250 sljit_u32 c1, c2;
5251 const pcre_uchar *src2 = args->uchar_ptr;
5252 const pcre_uchar *end2 = args->end;
5253 const ucd_record *ur;
5254 const sljit_u32 *pp;
5255
5256 while (src1 < end1)
5257 {
5258 if (src2 >= end2)
5259 return (pcre_uchar*)1;
5260 GETCHARINC(c1, src1);
5261 GETCHARINC(c2, src2);
5262 ur = GET_UCD(c2);
5263 if (c1 != c2 && c1 != c2 + ur->other_case)
5264 {
5265 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5266 for (;;)
5267 {
5268 if (c1 < *pp) return NULL;
5269 if (c1 == *pp++) break;
5270 }
5271 }
5272 }
5273 return src2;
5274 }
5275
5276 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5277
5278 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5279 compare_context *context, jump_list **backtracks)
5280 {
5281 DEFINE_COMPILER;
5282 unsigned int othercasebit = 0;
5283 pcre_uchar *othercasechar = NULL;
5284 #ifdef SUPPORT_UTF
5285 int utflength;
5286 #endif
5287
5288 if (caseless && char_has_othercase(common, cc))
5289 {
5290 othercasebit = char_get_othercase_bit(common, cc);
5291 SLJIT_ASSERT(othercasebit);
5292 /* Extracting bit difference info. */
5293 #if defined COMPILE_PCRE8
5294 othercasechar = cc + (othercasebit >> 8);
5295 othercasebit &= 0xff;
5296 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5297 /* Note that this code only handles characters in the BMP. If there
5298 ever are characters outside the BMP whose othercase differs in only one
5299 bit from itself (there currently are none), this code will need to be
5300 revised for COMPILE_PCRE32. */
5301 othercasechar = cc + (othercasebit >> 9);
5302 if ((othercasebit & 0x100) != 0)
5303 othercasebit = (othercasebit & 0xff) << 8;
5304 else
5305 othercasebit &= 0xff;
5306 #endif /* COMPILE_PCRE[8|16|32] */
5307 }
5308
5309 if (context->sourcereg == -1)
5310 {
5311 #if defined COMPILE_PCRE8
5312 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5313 if (context->length >= 4)
5314 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5315 else if (context->length >= 2)
5316 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5317 else
5318 #endif
5319 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5320 #elif defined COMPILE_PCRE16
5321 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5322 if (context->length >= 4)
5323 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5324 else
5325 #endif
5326 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5327 #elif defined COMPILE_PCRE32
5328 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5329 #endif /* COMPILE_PCRE[8|16|32] */
5330 context->sourcereg = TMP2;
5331 }
5332
5333 #ifdef SUPPORT_UTF
5334 utflength = 1;
5335 if (common->utf && HAS_EXTRALEN(*cc))
5336 utflength += GET_EXTRALEN(*cc);
5337
5338 do
5339 {
5340 #endif
5341
5342 context->length -= IN_UCHARS(1);
5343 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5344
5345 /* Unaligned read is supported. */
5346 if (othercasebit != 0 && othercasechar == cc)
5347 {
5348 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5349 context->oc.asuchars[context->ucharptr] = othercasebit;
5350 }
5351 else
5352 {
5353 context->c.asuchars[context->ucharptr] = *cc;
5354 context->oc.asuchars[context->ucharptr] = 0;
5355 }
5356 context->ucharptr++;
5357
5358 #if defined COMPILE_PCRE8
5359 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5360 #else
5361 if (context->ucharptr >= 2 || context->length == 0)
5362 #endif
5363 {
5364 if (context->length >= 4)
5365 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5366 else if (context->length >= 2)
5367 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5368 #if defined COMPILE_PCRE8
5369 else if (context->length >= 1)
5370 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5371 #endif /* COMPILE_PCRE8 */
5372 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5373
5374 switch(context->ucharptr)
5375 {
5376 case 4 / sizeof(pcre_uchar):
5377 if (context->oc.asint != 0)
5378 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5379 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5380 break;
5381
5382 case 2 / sizeof(pcre_uchar):
5383 if (context->oc.asushort != 0)
5384 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5385 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5386 break;
5387
5388 #ifdef COMPILE_PCRE8
5389 case 1:
5390 if (context->oc.asbyte != 0)
5391 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5392 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5393 break;
5394 #endif
5395
5396 default:
5397 SLJIT_ASSERT_STOP();
5398 break;
5399 }
5400 context->ucharptr = 0;
5401 }
5402
5403 #else
5404
5405 /* Unaligned read is unsupported or in 32 bit mode. */
5406 if (context->length >= 1)
5407 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5408
5409 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5410
5411 if (othercasebit != 0 && othercasechar == cc)
5412 {
5413 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5414 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5415 }
5416 else
5417 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5418
5419 #endif
5420
5421 cc++;
5422 #ifdef SUPPORT_UTF
5423 utflength--;
5424 }
5425 while (utflength > 0);
5426 #endif
5427
5428 return cc;
5429 }
5430
5431 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5432
5433 #define SET_TYPE_OFFSET(value) \
5434 if ((value) != typeoffset) \
5435 { \
5436 if ((value) < typeoffset) \
5437 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5438 else \
5439 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5440 } \
5441 typeoffset = (value);
5442
5443 #define SET_CHAR_OFFSET(value) \
5444 if ((value) != charoffset) \
5445 { \
5446 if ((value) < charoffset) \
5447 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5448 else \
5449 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5450 } \
5451 charoffset = (value);
5452
5453 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5454
5455 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5456 {
5457 DEFINE_COMPILER;
5458 jump_list *found = NULL;
5459 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5460 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5461 struct sljit_jump *jump = NULL;
5462 pcre_uchar *ccbegin;
5463 int compares, invertcmp, numberofcmps;
5464 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5465 BOOL utf = common->utf;
5466 #endif
5467
5468 #ifdef SUPPORT_UCP
5469 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5470 BOOL charsaved = FALSE;
5471 int typereg = TMP1;
5472 const sljit_u32 *other_cases;
5473 sljit_uw typeoffset;
5474 #endif
5475
5476 /* Scanning the necessary info. */
5477 cc++;
5478 ccbegin = cc;
5479 compares = 0;
5480 if (cc[-1] & XCL_MAP)
5481 {
5482 min = 0;
5483 cc += 32 / sizeof(pcre_uchar);
5484 }
5485
5486 while (*cc != XCL_END)
5487 {
5488 compares++;
5489 if (*cc == XCL_SINGLE)
5490 {
5491 cc ++;
5492 GETCHARINCTEST(c, cc);
5493 if (c > max) max = c;
5494 if (c < min) min = c;
5495 #ifdef SUPPORT_UCP
5496 needschar = TRUE;
5497 #endif
5498 }
5499 else if (*cc == XCL_RANGE)
5500 {
5501 cc ++;
5502 GETCHARINCTEST(c, cc);
5503 if (c < min) min = c;
5504 GETCHARINCTEST(c, cc);
5505 if (c > max) max = c;
5506 #ifdef SUPPORT_UCP
5507 needschar = TRUE;
5508 #endif
5509 }
5510 #ifdef SUPPORT_UCP
5511 else
5512 {
5513 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5514 cc++;
5515 if (*cc == PT_CLIST)
5516 {
5517 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5518 while (*other_cases != NOTACHAR)
5519 {
5520 if (*other_cases > max) max = *other_cases;
5521 if (*other_cases < min) min = *other_cases;
5522 other_cases++;
5523 }
5524 }
5525 else
5526 {
5527 max = READ_CHAR_MAX;
5528 min = 0;
5529 }
5530
5531 switch(*cc)
5532 {
5533 case PT_ANY:
5534 /* Any either accepts everything or ignored. */
5535 if (cc[-1] == XCL_PROP)
5536 {
5537 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5538 if (list == backtracks)
5539 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5540 return;
5541 }
5542 break;
5543
5544 case PT_LAMP:
5545 case PT_GC:
5546 case PT_PC:
5547 case PT_ALNUM:
5548 needstype = TRUE;
5549 break;
5550
5551 case PT_SC:
5552 needsscript = TRUE;
5553 break;
5554
5555 case PT_SPACE:
5556 case PT_PXSPACE:
5557 case PT_WORD:
5558 case PT_PXGRAPH:
5559 case PT_PXPRINT:
5560 case PT_PXPUNCT:
5561 needstype = TRUE;
5562 needschar = TRUE;
5563 break;
5564
5565 case PT_CLIST:
5566 case PT_UCNC:
5567 needschar = TRUE;
5568 break;
5569
5570 default:
5571 SLJIT_ASSERT_STOP();
5572 break;
5573 }
5574 cc += 2;
5575 }
5576 #endif
5577 }
5578 SLJIT_ASSERT(compares > 0);
5579
5580 /* We are not necessary in utf mode even in 8 bit mode. */
5581 cc = ccbegin;
5582 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5583
5584 if ((cc[-1] & XCL_HASPROP) == 0)
5585 {
5586 if ((cc[-1] & XCL_MAP) != 0)
5587 {
5588 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5589 if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5590 {
5591 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5592 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5593 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5594 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5595 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5596 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5597 }
5598
5599 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5600 JUMPHERE(jump);
5601
5602 cc += 32 / sizeof(pcre_uchar);
5603 }
5604 else
5605 {
5606 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5607 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5608 }
5609 }
5610 else if ((cc[-1] & XCL_MAP) != 0)
5611 {
5612 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5613 #ifdef SUPPORT_UCP
5614 charsaved = TRUE;
5615 #endif
5616 if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5617 {
5618 #ifdef COMPILE_PCRE8
5619 jump = NULL;
5620 if (common->utf)
5621 #endif
5622 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5623
5624 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5625 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5626 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5627 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5628 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5629 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5630
5631 #ifdef COMPILE_PCRE8
5632 if (common->utf)
5633 #endif
5634 JUMPHERE(jump);
5635 }
5636
5637 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5638 cc += 32 / sizeof(pcre_uchar);
5639 }
5640
5641 #ifdef SUPPORT_UCP
5642 if (needstype || needsscript)
5643 {
5644 if (needschar && !charsaved)
5645 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5646
5647 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5648 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5649 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5650 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5651 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5652 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5653 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5654
5655 /* Before anything else, we deal with scripts. */
5656 if (needsscript)
5657 {
5658 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5659 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5660
5661 ccbegin = cc;
5662
5663 while (*cc != XCL_END)
5664 {
5665 if (*cc == XCL_SINGLE)
5666 {
5667 cc ++;
5668 GETCHARINCTEST(c, cc);
5669 }
5670 else if (*cc == XCL_RANGE)
5671 {
5672 cc ++;
5673 GETCHARINCTEST(c, cc);
5674 GETCHARINCTEST(c, cc);
5675 }
5676 else
5677 {
5678 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5679 cc++;
5680 if (*cc == PT_SC)
5681 {
5682 compares--;
5683 invertcmp = (compares == 0 && list != backtracks);
5684 if (cc[-1] == XCL_NOTPROP)
5685 invertcmp ^= 0x1;
5686 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5687 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5688 }
5689 cc += 2;
5690 }
5691 }
5692
5693 cc = ccbegin;
5694 }
5695
5696 if (needschar)
5697 {
5698 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5699 }
5700
5701 if (needstype)
5702 {
5703 if (!needschar)
5704 {
5705 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5706 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5707 }
5708 else
5709 {
5710 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5711 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5712 typereg = RETURN_ADDR;
5713 }
5714 }
5715 }
5716 #endif
5717
5718 /* Generating code. */
5719 charoffset = 0;
5720 numberofcmps = 0;
5721 #ifdef SUPPORT_UCP
5722 typeoffset = 0;
5723 #endif
5724
5725 while (*cc != XCL_END)
5726 {
5727 compares--;
5728 invertcmp = (compares == 0 && list != backtracks);
5729 jump = NULL;
5730
5731 if (*cc == XCL_SINGLE)
5732 {
5733 cc ++;
5734 GETCHARINCTEST(c, cc);
5735
5736 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5737 {
5738 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5739 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5740 numberofcmps++;
5741 }
5742 else if (numberofcmps > 0)
5743 {
5744 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5745 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5746 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5747 numberofcmps = 0;
5748 }
5749 else
5750 {
5751 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5752 numberofcmps = 0;
5753 }
5754 }
5755 else if (*cc == XCL_RANGE)
5756 {
5757 cc ++;
5758 GETCHARINCTEST(c, cc);
5759 SET_CHAR_OFFSET(c);
5760 GETCHARINCTEST(c, cc);
5761
5762 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5763 {
5764 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5765 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5766 numberofcmps++;
5767 }
5768 else if (numberofcmps > 0)
5769 {
5770 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5771 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5772 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5773 numberofcmps = 0;
5774 }
5775 else
5776 {
5777 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5778 numberofcmps = 0;
5779 }
5780 }
5781 #ifdef SUPPORT_UCP
5782 else
5783 {
5784 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5785 if (*cc == XCL_NOTPROP)
5786 invertcmp ^= 0x1;
5787 cc++;
5788 switch(*cc)
5789 {
5790 case PT_ANY:
5791 if (!invertcmp)
5792 jump = JUMP(SLJIT_JUMP);
5793 break;
5794
5795 case PT_LAMP:
5796 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5797 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5798 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5799 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5800 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5801 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5802 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5803 break;
5804
5805 case PT_GC:
5806 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5807 SET_TYPE_OFFSET(c);
5808 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5809 break;
5810
5811 case PT_PC:
5812 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5813 break;
5814
5815 case PT_SC:
5816 compares++;
5817 /* Do nothing. */
5818 break;
5819
5820 case PT_SPACE:
5821 case PT_PXSPACE:
5822 SET_CHAR_OFFSET(9);
5823 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5824 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5825
5826 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5827 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5828
5829 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5830 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5831
5832 SET_TYPE_OFFSET(ucp_Zl);
5833 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5834 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5835 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5836 break;
5837
5838 case PT_WORD:
5839 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5840 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5841 /* Fall through. */
5842
5843 case PT_ALNUM:
5844 SET_TYPE_OFFSET(ucp_Ll);
5845 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5846 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5847 SET_TYPE_OFFSET(ucp_Nd);
5848 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5849 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5850 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5851 break;
5852
5853 case PT_CLIST:
5854 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5855
5856 /* At least three characters are required.
5857 Otherwise this case would be handled by the normal code path. */
5858 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5859 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5860
5861 /* Optimizing character pairs, if their difference is power of 2. */
5862 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5863 {
5864 if (charoffset == 0)
5865 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5866 else
5867 {
5868 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5869 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5870 }
5871 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5872 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5873 other_cases += 2;
5874 }
5875 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5876 {
5877 if (charoffset == 0)
5878 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5879 else
5880 {
5881 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5882 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5883 }
5884 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5885 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5886
5887 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5888 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5889
5890 other_cases += 3;
5891 }
5892 else
5893 {
5894 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5895 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5896 }
5897
5898 while (*other_cases != NOTACHAR)
5899 {
5900 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5901 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5902 }
5903 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5904 break;
5905
5906 case PT_UCNC:
5907 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5908 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5909 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5910 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5911 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5912 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5913
5914 SET_CHAR_OFFSET(0xa0);
5915 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5916 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5917 SET_CHAR_OFFSET(0);
5918 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5919 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5920 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5921 break;
5922
5923 case PT_PXGRAPH:
5924 /* C and Z groups are the farthest two groups. */
5925 SET_TYPE_OFFSET(ucp_Ll);
5926 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5927 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5928
5929 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5930
5931 /* In case of ucp_Cf, we overwrite the result. */
5932 SET_CHAR_OFFSET(0x2066);
5933 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5934 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5935
5936 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5937 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5938
5939 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5940 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5941
5942 JUMPHERE(jump);
5943 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5944 break;
5945
5946 case PT_PXPRINT:
5947 /* C and Z groups are the farthest two groups. */
5948 SET_TYPE_OFFSET(ucp_Ll);
5949 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5950 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5951
5952 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5953 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5954
5955 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5956
5957 /* In case of ucp_Cf, we overwrite the result. */
5958 SET_CHAR_OFFSET(0x2066);
5959 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5960 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5961
5962 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5963 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5964
5965 JUMPHERE(jump);
5966 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5967 break;
5968
5969 case PT_PXPUNCT:
5970 SET_TYPE_OFFSET(ucp_Sc);
5971 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5972 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5973
5974 SET_CHAR_OFFSET(0);
5975 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5976 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5977
5978 SET_TYPE_OFFSET(ucp_Pc);
5979 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5980 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5981 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5982 break;
5983
5984 default:
5985 SLJIT_ASSERT_STOP();
5986 break;
5987 }
5988 cc += 2;
5989 }
5990 #endif
5991
5992 if (jump != NULL)
5993 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5994 }
5995
5996 if (found != NULL)
5997 set_jumps(found, LABEL());
5998 }
5999
6000 #undef SET_TYPE_OFFSET
6001 #undef SET_CHAR_OFFSET
6002
6003 #endif
6004
6005 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6006 {
6007 DEFINE_COMPILER;
6008 int length;
6009 struct sljit_jump *jump[4];
6010 #ifdef SUPPORT_UTF
6011 struct sljit_label *label;
6012 #endif /* SUPPORT_UTF */
6013
6014 switch(type)
6015 {
6016 case OP_SOD:
6017 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6018 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6019 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6020 return cc;
6021
6022 case OP_SOM:
6023 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6024 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6025 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6026 return cc;
6027
6028 case OP_NOT_WORD_BOUNDARY:
6029 case OP_WORD_BOUNDARY:
6030 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6031 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6032 return cc;
6033
6034 case OP_EODN:
6035 /* Requires rather complex checks. */
6036 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6037 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6038 {
6039 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6040 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6041 if (common->mode == JIT_COMPILE)
6042 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6043 else
6044 {
6045 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6046 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6047 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
6048 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6049 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
6050 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6051 check_partial(common, TRUE);
6052 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6053 JUMPHERE(jump[1]);
6054 }
6055 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6056 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6057 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6058 }
6059 else if (common->nltype == NLTYPE_FIXED)
6060 {
6061 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6062 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6063 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6064 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6065 }
6066 else
6067 {
6068 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6069 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6070 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6071 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6072 jump[2] = JUMP(SLJIT_GREATER);
6073 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
6074 /* Equal. */
6075 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6076 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6077 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6078
6079 JUMPHERE(jump[1]);
6080 if (common->nltype == NLTYPE_ANYCRLF)
6081 {
6082 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6083 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6084 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6085 }
6086 else
6087 {
6088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6089 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6090 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6091 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6092 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6093 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6094 }
6095 JUMPHERE(jump[2]);
6096 JUMPHERE(jump[3]);
6097 }
6098 JUMPHERE(jump[0]);
6099 check_partial(common, FALSE);
6100 return cc;
6101
6102 case OP_EOD:
6103 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6104 check_partial(common, FALSE);
6105 return cc;
6106
6107 case OP_DOLL:
6108 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6109 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6110 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6111
6112 if (!common->endonly)
6113 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6114 else
6115 {
6116 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6117 check_partial(common, FALSE);
6118 }
6119 return cc;
6120
6121 case OP_DOLLM:
6122 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6123 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6124 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6125 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6126 check_partial(common, FALSE);
6127 jump[0] = JUMP(SLJIT_JUMP);
6128 JUMPHERE(jump[1]);
6129
6130 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6131 {
6132 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6133 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6134 if (common->mode == JIT_COMPILE)
6135 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6136 else
6137 {
6138 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6139 /* STR_PTR = STR_END - IN_UCHARS(1) */
6140 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6141 check_partial(common, TRUE);
6142 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6143 JUMPHERE(jump[1]);
6144 }
6145
6146 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6147 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6148 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6149 }
6150 else
6151 {
6152 peek_char(common, common->nlmax);
6153 check_newlinechar(common, common->nltype, backtracks, FALSE);
6154 }
6155 JUMPHERE(jump[0]);
6156 return cc;
6157
6158 case OP_CIRC:
6159 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6160 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6161 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6162 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6163 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6164 return cc;
6165
6166 case OP_CIRCM:
6167 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6168 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6169 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6170 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6171 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6172 jump[0] = JUMP(SLJIT_JUMP);
6173 JUMPHERE(jump[1]);
6174
6175 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6176 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6177 {
6178 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6179 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6180 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6181 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6182 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6183 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6184 }
6185 else
6186 {
6187 skip_char_back(common);
6188 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6189 check_newlinechar(common, common->nltype, backtracks, FALSE);
6190 }
6191 JUMPHERE(jump[0]);
6192 return cc;
6193
6194 case OP_REVERSE:
6195 length = GET(cc, 0);
6196 if (length == 0)
6197 return cc + LINK_SIZE;
6198 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6199 #ifdef SUPPORT_UTF
6200 if (common->utf)
6201 {
6202 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6203 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6204 label = LABEL();
6205 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6206 skip_char_back(common);
6207 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6208 JUMPTO(SLJIT_NOT_ZERO, label);
6209 }
6210 else
6211 #endif
6212 {
6213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6214 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6215 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6216 }
6217 check_start_used_ptr(common);
6218 return cc + LINK_SIZE;
6219 }
6220 SLJIT_ASSERT_STOP();
6221 return cc;
6222 }
6223
6224 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6225 {
6226 DEFINE_COMPILER;
6227 int length;
6228 unsigned int c, oc, bit;
6229 compare_context context;
6230 struct sljit_jump *jump[3];
6231 jump_list *end_list;
6232 #ifdef SUPPORT_UTF
6233 struct sljit_label *label;
6234 #ifdef SUPPORT_UCP
6235 pcre_uchar propdata[5];
6236 #endif
6237 #endif /* SUPPORT_UTF */
6238
6239 switch(type)
6240 {
6241 case OP_NOT_DIGIT:
6242 case OP_DIGIT:
6243 /* Digits are usually 0-9, so it is worth to optimize them. */
6244 if (check_str_ptr)
6245 detect_partial_match(common, backtracks);
6246 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6247 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6248 read_char7_type(common, type == OP_NOT_DIGIT);
6249 else
6250 #endif
6251 read_char8_type(common, type == OP_NOT_DIGIT);
6252 /* Flip the starting bit in the negative case. */
6253 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6254 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6255 return cc;
6256
6257 case OP_NOT_WHITESPACE:
6258 case OP_WHITESPACE:
6259 if (check_str_ptr)
6260 detect_partial_match(common, backtracks);
6261 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6262 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6263 read_char7_type(common, type == OP_NOT_WHITESPACE);
6264 else
6265 #endif
6266 read_char8_type(common, type == OP_NOT_WHITESPACE);
6267 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6268 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6269 return cc;
6270
6271 case OP_NOT_WORDCHAR:
6272 case OP_WORDCHAR:
6273 if (check_str_ptr)
6274 detect_partial_match(common, backtracks);
6275 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6276 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6277 read_char7_type(common, type == OP_NOT_WORDCHAR);
6278 else
6279 #endif
6280 read_char8_type(common, type == OP_NOT_WORDCHAR);
6281 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6282 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6283 return cc;
6284
6285 case OP_ANY:
6286 if (check_str_ptr)
6287 detect_partial_match(common, backtracks);
6288 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6289 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6290 {
6291 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6292 end_list = NULL;
6293 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6294 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6295 else
6296 check_str_end(common, &end_list);
6297
6298 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6299 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6300 set_jumps(end_list, LABEL());
6301 JUMPHERE(jump[0]);
6302 }
6303 else
6304 check_newlinechar(common, common->nltype, backtracks, TRUE);
6305 return cc;
6306
6307 case OP_ALLANY:
6308 if (check_str_ptr)
6309 detect_partial_match(common, backtracks);
6310 #ifdef SUPPORT_UTF
6311 if (common->utf)
6312 {
6313 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6314 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6315 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6316 #if defined COMPILE_PCRE8
6317 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6318 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6319 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6320 #elif defined COMPILE_PCRE16
6321 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6322 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6323 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6324 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
6325 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6326 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6327 #endif
6328 JUMPHERE(jump[0]);
6329 #endif /* COMPILE_PCRE[8|16] */
6330 return cc;
6331 }
6332 #endif
6333 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6334 return cc;
6335
6336 case OP_ANYBYTE:
6337 if (check_str_ptr)
6338 detect_partial_match(common, backtracks);
6339 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6340 return cc;
6341
6342 #ifdef SUPPORT_UTF
6343 #ifdef SUPPORT_UCP
6344 case OP_NOTPROP:
6345 case OP_PROP:
6346 propdata[0] = XCL_HASPROP;
6347 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6348 propdata[2] = cc[0];
6349 propdata[3] = cc[1];
6350 propdata[4] = XCL_END;
6351 if (check_str_ptr)
6352 detect_partial_match(common, backtracks);
6353 compile_xclass_matchingpath(common, propdata, backtracks);
6354 return cc + 2;
6355 #endif
6356 #endif
6357
6358 case OP_ANYNL:
6359 if (check_str_ptr)
6360 detect_partial_match(common, backtracks);
6361 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6362 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6363 /* We don't need to handle soft partial matching case. */
6364 end_list = NULL;
6365 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6366 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6367 else
6368 check_str_end(common, &end_list);
6369 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6370 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6371 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6372 jump[2] = JUMP(SLJIT_JUMP);
6373 JUMPHERE(jump[0]);
6374 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6375 set_jumps(end_list, LABEL());
6376 JUMPHERE(jump[1]);
6377 JUMPHERE(jump[2]);
6378 return cc;
6379
6380 case OP_NOT_HSPACE:
6381 case OP_HSPACE:
6382 if (check_str_ptr)
6383 detect_partial_match(common, backtracks);
6384 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6385 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6386 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6387 return cc;
6388
6389 case OP_NOT_VSPACE:
6390 case OP_VSPACE:
6391 if (check_str_ptr)
6392 detect_partial_match(common, backtracks);
6393 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6394 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6395 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6396 return cc;
6397
6398 #ifdef SUPPORT_UCP
6399 case OP_EXTUNI:
6400 if (check_str_ptr)
6401 detect_partial_match(common, backtracks);
6402 read_char(common);
6403 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6404 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6405 /* Optimize register allocation: use a real register. */
6406 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6407 OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6408
6409 label = LABEL();
6410 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6411 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6412 read_char(common);
6413 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6414 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6415 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6416
6417 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6418 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6419 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6420 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6421 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6422 JUMPTO(SLJIT_NOT_ZERO, label);
6423
6424 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6425 JUMPHERE(jump[0]);
6426 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6427
6428 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6429 {
6430 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6431 /* Since we successfully read a char above, partial matching must occure. */
6432 check_partial(common, TRUE);
6433 JUMPHERE(jump[0]);
6434 }
6435 return cc;
6436 #endif
6437
6438 case OP_CHAR:
6439 case OP_CHARI:
6440 length = 1;
6441 #ifdef SUPPORT_UTF
6442 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6443 #endif
6444 if (common->mode == JIT_COMPILE && check_str_ptr
6445 && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6446 {
6447 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6448 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6449
6450 context.length = IN_UCHARS(length);
6451 context.sourcereg = -1;
6452 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6453 context.ucharptr = 0;
6454 #endif
6455 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6456 }
6457
6458 if (check_str_ptr)
6459 detect_partial_match(common, backtracks);
6460 #ifdef SUPPORT_UTF
6461 if (common->utf)
6462 {
6463 GETCHAR(c, cc);
6464 }
6465 else
6466 #endif
6467 c = *cc;
6468
6469 if (type == OP_CHAR || !char_has_othercase(common, cc))
6470 {
6471 read_char_range(common, c, c, FALSE);
6472 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6473 return cc + length;
6474 }
6475 oc = char_othercase(common, c);
6476 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6477 bit = c ^ oc;
6478 if (is_powerof2(bit))
6479 {
6480 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6481 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6482 return cc + length;
6483 }
6484 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6485 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6486 JUMPHERE(jump[0]);
6487 return cc + length;
6488
6489 case OP_NOT:
6490 case OP_NOTI:
6491 if (check_str_ptr)
6492 detect_partial_match(common, backtracks);
6493 length = 1;
6494 #ifdef SUPPORT_UTF
6495 if (common->utf)
6496 {
6497 #ifdef COMPILE_PCRE8
6498 c = *cc;
6499 if (c < 128)
6500 {
6501 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6502 if (type == OP_NOT || !char_has_othercase(common, cc))
6503 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6504 else
6505 {
6506 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6507 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6508 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6509 }
6510 /* Skip the variable-length character. */
6511 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6512 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6513 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6514 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6515 JUMPHERE(jump[0]);
6516 return cc + 1;
6517 }
6518 else
6519 #endif /* COMPILE_PCRE8 */
6520 {
6521 GETCHARLEN(c, cc, length);
6522 }
6523 }
6524 else
6525 #endif /* SUPPORT_UTF */
6526 c = *cc;
6527
6528 if (type == OP_NOT || !char_has_othercase(common, cc))
6529 {
6530 read_char_range(common, c, c, TRUE);
6531 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6532 }
6533 else
6534 {
6535 oc = char_othercase(common, c);
6536 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6537 bit = c ^ oc;
6538 if (is_powerof2(bit))
6539 {
6540 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6541 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6542 }
6543 else
6544 {
6545 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6546 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6547 }
6548 }
6549 return cc + length;
6550
6551 case OP_CLASS:
6552 case OP_NCLASS:
6553 if (check_str_ptr)
6554 detect_partial_match(common, backtracks);
6555
6556 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6557 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6558 read_char_range(common, 0, bit, type == OP_NCLASS);
6559 #else
6560 read_char_range(common, 0, 255, type == OP_NCLASS);
6561 #endif
6562
6563 if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6564 return cc + 32 / sizeof(pcre_uchar);
6565
6566 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6567 jump[0] = NULL;
6568 if (common->utf)
6569 {
6570 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6571 if (type == OP_CLASS)
6572 {
6573 add_jump(compiler, backtracks, jump[0]);
6574 jump[0] = NULL;
6575 }
6576 }
6577 #elif !defined COMPILE_PCRE8
6578 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6579 if (type == OP_CLASS)
6580 {
6581 add_jump(compiler, backtracks, jump[0]);
6582 jump[0] = NULL;
6583 }
6584 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6585
6586 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6587 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6588 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6589 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6590 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6591 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6592
6593 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6594 if (jump[0] != NULL)
6595 JUMPHERE(jump[0]);
6596 #endif
6597 return cc + 32 / sizeof(pcre_uchar);
6598
6599 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6600 case OP_XCLASS:
6601 if (check_str_ptr)
6602 detect_partial_match(common, backtracks);
6603 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6604 return cc + GET(cc, 0) - 1;
6605 #endif
6606 }
6607 SLJIT_ASSERT_STOP();
6608 return cc;
6609 }
6610
6611 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6612 {
6613 /* This function consumes at least one input character. */
6614 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6615 DEFINE_COMPILER;
6616 pcre_uchar *ccbegin = cc;
6617 compare_context context;
6618 int size;
6619
6620 context.length = 0;
6621 do
6622 {
6623 if (cc >= ccend)
6624 break;
6625
6626 if (*cc == OP_CHAR)
6627 {
6628 size = 1;
6629 #ifdef SUPPORT_UTF
6630 if (common->utf && HAS_EXTRALEN(cc[1]))
6631 size += GET_EXTRALEN(cc[1]);
6632 #endif
6633 }
6634 else if (*cc == OP_CHARI)
6635 {
6636 size = 1;
6637 #ifdef SUPPORT_UTF
6638 if (common->utf)
6639 {
6640 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6641 size = 0;
6642 else if (HAS_EXTRALEN(cc[1]))
6643 size += GET_EXTRALEN(cc[1]);
6644 }
6645 else
6646 #endif
6647 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6648 size = 0;
6649 }
6650 else
6651 size = 0;
6652
6653 cc += 1 + size;
6654 context.length += IN_UCHARS(size);
6655 }
6656 while (size > 0 && context.length <= 128);
6657
6658 cc = ccbegin;
6659 if (context.length > 0)
6660 {
6661 /* We have a fixed-length byte sequence. */
6662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6663 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6664
6665 context.sourcereg = -1;
6666 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6667 context.ucharptr = 0;
6668 #endif
6669 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6670 return cc;
6671 }
6672
6673 /* A non-fixed length character will be checked if length == 0. */
6674 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6675 }
6676
6677 /* Forward definitions. */
6678 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6679 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6680
6681 #define PUSH_BACKTRACK(size, ccstart, error) \
6682 do \
6683 { \
6684 backtrack = sljit_alloc_memory(compiler, (size)); \
6685 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6686 return error; \
6687 memset(backtrack, 0, size); \
6688 backtrack->prev = parent->top; \
6689 backtrack->cc = (ccstart); \
6690 parent->top = backtrack; \
6691 } \
6692 while (0)
6693
6694 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6695 do \
6696 { \
6697 backtrack = sljit_alloc_memory(compiler, (size)); \
6698 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6699 return; \
6700 memset(backtrack, 0, size); \
6701 backtrack->prev = parent->top; \
6702 backtrack->cc = (ccstart); \
6703 parent->top = backtrack; \
6704 } \
6705 while (0)
6706
6707 #define BACKTRACK_AS(type) ((type *)backtrack)
6708
6709 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6710 {
6711 /* The OVECTOR offset goes to TMP2. */
6712 DEFINE_COMPILER;
6713 int count = GET2(cc, 1 + IMM2_SIZE);
6714 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6715 unsigned int offset;
6716 jump_list *found = NULL;
6717
6718 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6719
6720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6721
6722 count--;
6723 while (count-- > 0)
6724 {
6725 offset = GET2(slot, 0) << 1;
6726 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6727 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6728 slot += common->name_entry_size;
6729 }
6730
6731 offset = GET2(slot, 0) << 1;
6732 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6733 if (backtracks != NULL && !common->jscript_compat)
6734 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6735
6736 set_jumps(found, LABEL());
6737 }
6738
6739 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6740 {
6741 DEFINE_COMPILER;
6742 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6743 int offset = 0;
6744 struct sljit_jump *jump = NULL;
6745 struct sljit_jump *partial;
6746 struct sljit_jump *nopartial;
6747
6748 if (ref)
6749 {
6750 offset = GET2(cc, 1) << 1;
6751 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6752 /* OVECTOR(1) contains the "string begin - 1" constant. */
6753 if (withchecks && !common->jscript_compat)
6754 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6755 }
6756 else
6757 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6758
6759 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6760 if (common->utf && *cc == OP_REFI)
6761 {
6762 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6763 if (ref)
6764 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6765 else
6766 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6767
6768 if (withchecks)
6769 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6770
6771 /* Needed to save important temporary registers. */
6772 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6773 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6774 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6775 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6776 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6777 if (common->mode == JIT_COMPILE)
6778 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6779 else
6780 {
6781 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6782 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6783 check_partial(common, FALSE);
6784 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6785 JUMPHERE(nopartial);
6786 }
6787 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6788 }
6789 else
6790 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6791 {
6792 if (ref)
6793 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6794 else
6795 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6796
6797 if (withchecks)
6798 jump = JUMP(SLJIT_ZERO);
6799
6800 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6801 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6802 if (common->mode == JIT_COMPILE)
6803 add_jump(compiler, backtracks, partial);
6804
6805 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6806 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6807
6808 if (common->mode != JIT_COMPILE)
6809 {
6810 nopartial = JUMP(SLJIT_JUMP);
6811 JUMPHERE(partial);
6812 /* TMP2 -= STR_END - STR_PTR */
6813 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6814 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6815 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6816 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6817 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6818 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6819 JUMPHERE(partial);
6820 check_partial(common