/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1634 - (show annotations)
Fri Feb 19 08:10:18 2016 UTC (3 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 359096 byte(s)
Minor JIT fixes.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 sljit_ui top_bracket;
187 sljit_ui limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct char_iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 union {
285 jump_list *backtracks;
286 struct {
287 unsigned int othercasebit;
288 pcre_uchar chr;
289 BOOL enabled;
290 } charpos;
291 } u;
292 } char_iterator_backtrack;
293
294 typedef struct ref_iterator_backtrack {
295 backtrack_common common;
296 /* Next iteration. */
297 struct sljit_label *matchingpath;
298 } ref_iterator_backtrack;
299
300 typedef struct recurse_entry {
301 struct recurse_entry *next;
302 /* Contains the function entry. */
303 struct sljit_label *entry;
304 /* Collects the calls until the function is not created. */
305 jump_list *calls;
306 /* Points to the starting opcode. */
307 sljit_sw start;
308 } recurse_entry;
309
310 typedef struct recurse_backtrack {
311 backtrack_common common;
312 BOOL inlined_pattern;
313 } recurse_backtrack;
314
315 #define OP_THEN_TRAP OP_TABLE_LENGTH
316
317 typedef struct then_trap_backtrack {
318 backtrack_common common;
319 /* If then_trap is not NULL, this structure contains the real
320 then_trap for the backtracking path. */
321 struct then_trap_backtrack *then_trap;
322 /* Points to the starting opcode. */
323 sljit_sw start;
324 /* Exit point for the then opcodes of this alternative. */
325 jump_list *quit;
326 /* Frame size of the current alternative. */
327 int framesize;
328 } then_trap_backtrack;
329
330 #define MAX_RANGE_SIZE 4
331
332 typedef struct compiler_common {
333 /* The sljit ceneric compiler. */
334 struct sljit_compiler *compiler;
335 /* First byte code. */
336 pcre_uchar *start;
337 /* Maps private data offset to each opcode. */
338 sljit_si *private_data_ptrs;
339 /* Chain list of read-only data ptrs. */
340 void *read_only_data_head;
341 /* Tells whether the capturing bracket is optimized. */
342 sljit_ub *optimized_cbracket;
343 /* Tells whether the starting offset is a target of then. */
344 sljit_ub *then_offsets;
345 /* Current position where a THEN must jump. */
346 then_trap_backtrack *then_trap;
347 /* Starting offset of private data for capturing brackets. */
348 sljit_si cbra_ptr;
349 /* Output vector starting point. Must be divisible by 2. */
350 sljit_si ovector_start;
351 /* Points to the starting character of the current match. */
352 sljit_si start_ptr;
353 /* Last known position of the requested byte. */
354 sljit_si req_char_ptr;
355 /* Head of the last recursion. */
356 sljit_si recursive_head_ptr;
357 /* First inspected character for partial matching.
358 (Needed for avoiding zero length partial matches.) */
359 sljit_si start_used_ptr;
360 /* Starting pointer for partial soft matches. */
361 sljit_si hit_start;
362 /* Pointer of the match end position. */
363 sljit_si match_end_ptr;
364 /* Points to the marked string. */
365 sljit_si mark_ptr;
366 /* Recursive control verb management chain. */
367 sljit_si control_head_ptr;
368 /* Points to the last matched capture block index. */
369 sljit_si capture_last_ptr;
370 /* Fast forward skipping byte code pointer. */
371 pcre_uchar *fast_forward_bc_ptr;
372 /* Locals used by fast fail optimization. */
373 sljit_si fast_fail_start_ptr;
374 sljit_si fast_fail_end_ptr;
375
376 /* Flipped and lower case tables. */
377 const sljit_ub *fcc;
378 sljit_sw lcc;
379 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
380 int mode;
381 /* TRUE, when minlength is greater than 0. */
382 BOOL might_be_empty;
383 /* \K is found in the pattern. */
384 BOOL has_set_som;
385 /* (*SKIP:arg) is found in the pattern. */
386 BOOL has_skip_arg;
387 /* (*THEN) is found in the pattern. */
388 BOOL has_then;
389 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
390 BOOL has_skip_in_assert_back;
391 /* Currently in recurse or negative assert. */
392 BOOL local_exit;
393 /* Currently in a positive assert. */
394 BOOL positive_assert;
395 /* Newline control. */
396 int nltype;
397 sljit_ui nlmax;
398 sljit_ui nlmin;
399 int newline;
400 int bsr_nltype;
401 sljit_ui bsr_nlmax;
402 sljit_ui bsr_nlmin;
403 /* Dollar endonly. */
404 int endonly;
405 /* Tables. */
406 sljit_sw ctypes;
407 /* Named capturing brackets. */
408 pcre_uchar *name_table;
409 sljit_sw name_count;
410 sljit_sw name_entry_size;
411
412 /* Labels and jump lists. */
413 struct sljit_label *partialmatchlabel;
414 struct sljit_label *quit_label;
415 struct sljit_label *forced_quit_label;
416 struct sljit_label *accept_label;
417 struct sljit_label *ff_newline_shortcut;
418 stub_list *stubs;
419 label_addr_list *label_addrs;
420 recurse_entry *entries;
421 recurse_entry *currententry;
422 jump_list *partialmatch;
423 jump_list *quit;
424 jump_list *positive_assert_quit;
425 jump_list *forced_quit;
426 jump_list *accept;
427 jump_list *calllimit;
428 jump_list *stackalloc;
429 jump_list *revertframes;
430 jump_list *wordboundary;
431 jump_list *anynewline;
432 jump_list *hspace;
433 jump_list *vspace;
434 jump_list *casefulcmp;
435 jump_list *caselesscmp;
436 jump_list *reset_match;
437 BOOL jscript_compat;
438 #ifdef SUPPORT_UTF
439 BOOL utf;
440 #ifdef SUPPORT_UCP
441 BOOL use_ucp;
442 jump_list *getucd;
443 #endif
444 #ifdef COMPILE_PCRE8
445 jump_list *utfreadchar;
446 jump_list *utfreadchar16;
447 jump_list *utfreadtype8;
448 #endif
449 #endif /* SUPPORT_UTF */
450 } compiler_common;
451
452 /* For byte_sequence_compare. */
453
454 typedef struct compare_context {
455 int length;
456 int sourcereg;
457 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
458 int ucharptr;
459 union {
460 sljit_si asint;
461 sljit_uh asushort;
462 #if defined COMPILE_PCRE8
463 sljit_ub asbyte;
464 sljit_ub asuchars[4];
465 #elif defined COMPILE_PCRE16
466 sljit_uh asuchars[2];
467 #elif defined COMPILE_PCRE32
468 sljit_ui asuchars[1];
469 #endif
470 } c;
471 union {
472 sljit_si asint;
473 sljit_uh asushort;
474 #if defined COMPILE_PCRE8
475 sljit_ub asbyte;
476 sljit_ub asuchars[4];
477 #elif defined COMPILE_PCRE16
478 sljit_uh asuchars[2];
479 #elif defined COMPILE_PCRE32
480 sljit_ui asuchars[1];
481 #endif
482 } oc;
483 #endif
484 } compare_context;
485
486 /* Undefine sljit macros. */
487 #undef CMP
488
489 /* Used for accessing the elements of the stack. */
490 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
491
492 #define TMP1 SLJIT_R0
493 #define TMP2 SLJIT_R2
494 #define TMP3 SLJIT_R3
495 #define STR_PTR SLJIT_S0
496 #define STR_END SLJIT_S1
497 #define STACK_TOP SLJIT_R1
498 #define STACK_LIMIT SLJIT_S2
499 #define COUNT_MATCH SLJIT_S3
500 #define ARGUMENTS SLJIT_S4
501 #define RETURN_ADDR SLJIT_R4
502
503 /* Local space layout. */
504 /* These two locals can be used by the current opcode. */
505 #define LOCALS0 (0 * sizeof(sljit_sw))
506 #define LOCALS1 (1 * sizeof(sljit_sw))
507 /* Two local variables for possessive quantifiers (char1 cannot use them). */
508 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
509 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
510 /* Max limit of recursions. */
511 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
512 /* The output vector is stored on the stack, and contains pointers
513 to characters. The vector data is divided into two groups: the first
514 group contains the start / end character pointers, and the second is
515 the start pointers when the end of the capturing group has not yet reached. */
516 #define OVECTOR_START (common->ovector_start)
517 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
518 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
519 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
520
521 #if defined COMPILE_PCRE8
522 #define MOV_UCHAR SLJIT_MOV_UB
523 #define MOVU_UCHAR SLJIT_MOVU_UB
524 #elif defined COMPILE_PCRE16
525 #define MOV_UCHAR SLJIT_MOV_UH
526 #define MOVU_UCHAR SLJIT_MOVU_UH
527 #elif defined COMPILE_PCRE32
528 #define MOV_UCHAR SLJIT_MOV_UI
529 #define MOVU_UCHAR SLJIT_MOVU_UI
530 #else
531 #error Unsupported compiling mode
532 #endif
533
534 /* Shortcuts. */
535 #define DEFINE_COMPILER \
536 struct sljit_compiler *compiler = common->compiler
537 #define OP1(op, dst, dstw, src, srcw) \
538 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
539 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
540 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
541 #define LABEL() \
542 sljit_emit_label(compiler)
543 #define JUMP(type) \
544 sljit_emit_jump(compiler, (type))
545 #define JUMPTO(type, label) \
546 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
547 #define JUMPHERE(jump) \
548 sljit_set_label((jump), sljit_emit_label(compiler))
549 #define SET_LABEL(jump, label) \
550 sljit_set_label((jump), (label))
551 #define CMP(type, src1, src1w, src2, src2w) \
552 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
553 #define CMPTO(type, src1, src1w, src2, src2w, label) \
554 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
555 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
556 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
557 #define GET_LOCAL_BASE(dst, dstw, offset) \
558 sljit_get_local_base(compiler, (dst), (dstw), (offset))
559
560 #define READ_CHAR_MAX 0x7fffffff
561
562 static pcre_uchar *bracketend(pcre_uchar *cc)
563 {
564 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
565 do cc += GET(cc, 1); while (*cc == OP_ALT);
566 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
567 cc += 1 + LINK_SIZE;
568 return cc;
569 }
570
571 static int no_alternatives(pcre_uchar *cc)
572 {
573 int count = 0;
574 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
575 do
576 {
577 cc += GET(cc, 1);
578 count++;
579 }
580 while (*cc == OP_ALT);
581 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
582 return count;
583 }
584
585 /* Functions whose might need modification for all new supported opcodes:
586 next_opcode
587 check_opcode_types
588 set_private_data_ptrs
589 get_framesize
590 init_frame
591 get_private_data_copy_length
592 copy_private_data
593 compile_matchingpath
594 compile_backtrackingpath
595 */
596
597 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
598 {
599 SLJIT_UNUSED_ARG(common);
600 switch(*cc)
601 {
602 case OP_SOD:
603 case OP_SOM:
604 case OP_SET_SOM:
605 case OP_NOT_WORD_BOUNDARY:
606 case OP_WORD_BOUNDARY:
607 case OP_NOT_DIGIT:
608 case OP_DIGIT:
609 case OP_NOT_WHITESPACE:
610 case OP_WHITESPACE:
611 case OP_NOT_WORDCHAR:
612 case OP_WORDCHAR:
613 case OP_ANY:
614 case OP_ALLANY:
615 case OP_NOTPROP:
616 case OP_PROP:
617 case OP_ANYNL:
618 case OP_NOT_HSPACE:
619 case OP_HSPACE:
620 case OP_NOT_VSPACE:
621 case OP_VSPACE:
622 case OP_EXTUNI:
623 case OP_EODN:
624 case OP_EOD:
625 case OP_CIRC:
626 case OP_CIRCM:
627 case OP_DOLL:
628 case OP_DOLLM:
629 case OP_CRSTAR:
630 case OP_CRMINSTAR:
631 case OP_CRPLUS:
632 case OP_CRMINPLUS:
633 case OP_CRQUERY:
634 case OP_CRMINQUERY:
635 case OP_CRRANGE:
636 case OP_CRMINRANGE:
637 case OP_CRPOSSTAR:
638 case OP_CRPOSPLUS:
639 case OP_CRPOSQUERY:
640 case OP_CRPOSRANGE:
641 case OP_CLASS:
642 case OP_NCLASS:
643 case OP_REF:
644 case OP_REFI:
645 case OP_DNREF:
646 case OP_DNREFI:
647 case OP_RECURSE:
648 case OP_CALLOUT:
649 case OP_ALT:
650 case OP_KET:
651 case OP_KETRMAX:
652 case OP_KETRMIN:
653 case OP_KETRPOS:
654 case OP_REVERSE:
655 case OP_ASSERT:
656 case OP_ASSERT_NOT:
657 case OP_ASSERTBACK:
658 case OP_ASSERTBACK_NOT:
659 case OP_ONCE:
660 case OP_ONCE_NC:
661 case OP_BRA:
662 case OP_BRAPOS:
663 case OP_CBRA:
664 case OP_CBRAPOS:
665 case OP_COND:
666 case OP_SBRA:
667 case OP_SBRAPOS:
668 case OP_SCBRA:
669 case OP_SCBRAPOS:
670 case OP_SCOND:
671 case OP_CREF:
672 case OP_DNCREF:
673 case OP_RREF:
674 case OP_DNRREF:
675 case OP_DEF:
676 case OP_BRAZERO:
677 case OP_BRAMINZERO:
678 case OP_BRAPOSZERO:
679 case OP_PRUNE:
680 case OP_SKIP:
681 case OP_THEN:
682 case OP_COMMIT:
683 case OP_FAIL:
684 case OP_ACCEPT:
685 case OP_ASSERT_ACCEPT:
686 case OP_CLOSE:
687 case OP_SKIPZERO:
688 return cc + PRIV(OP_lengths)[*cc];
689
690 case OP_CHAR:
691 case OP_CHARI:
692 case OP_NOT:
693 case OP_NOTI:
694 case OP_STAR:
695 case OP_MINSTAR:
696 case OP_PLUS:
697 case OP_MINPLUS:
698 case OP_QUERY:
699 case OP_MINQUERY:
700 case OP_UPTO:
701 case OP_MINUPTO:
702 case OP_EXACT:
703 case OP_POSSTAR:
704 case OP_POSPLUS:
705 case OP_POSQUERY:
706 case OP_POSUPTO:
707 case OP_STARI:
708 case OP_MINSTARI:
709 case OP_PLUSI:
710 case OP_MINPLUSI:
711 case OP_QUERYI:
712 case OP_MINQUERYI:
713 case OP_UPTOI:
714 case OP_MINUPTOI:
715 case OP_EXACTI:
716 case OP_POSSTARI:
717 case OP_POSPLUSI:
718 case OP_POSQUERYI:
719 case OP_POSUPTOI:
720 case OP_NOTSTAR:
721 case OP_NOTMINSTAR:
722 case OP_NOTPLUS:
723 case OP_NOTMINPLUS:
724 case OP_NOTQUERY:
725 case OP_NOTMINQUERY:
726 case OP_NOTUPTO:
727 case OP_NOTMINUPTO:
728 case OP_NOTEXACT:
729 case OP_NOTPOSSTAR:
730 case OP_NOTPOSPLUS:
731 case OP_NOTPOSQUERY:
732 case OP_NOTPOSUPTO:
733 case OP_NOTSTARI:
734 case OP_NOTMINSTARI:
735 case OP_NOTPLUSI:
736 case OP_NOTMINPLUSI:
737 case OP_NOTQUERYI:
738 case OP_NOTMINQUERYI:
739 case OP_NOTUPTOI:
740 case OP_NOTMINUPTOI:
741 case OP_NOTEXACTI:
742 case OP_NOTPOSSTARI:
743 case OP_NOTPOSPLUSI:
744 case OP_NOTPOSQUERYI:
745 case OP_NOTPOSUPTOI:
746 cc += PRIV(OP_lengths)[*cc];
747 #ifdef SUPPORT_UTF
748 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
749 #endif
750 return cc;
751
752 /* Special cases. */
753 case OP_TYPESTAR:
754 case OP_TYPEMINSTAR:
755 case OP_TYPEPLUS:
756 case OP_TYPEMINPLUS:
757 case OP_TYPEQUERY:
758 case OP_TYPEMINQUERY:
759 case OP_TYPEUPTO:
760 case OP_TYPEMINUPTO:
761 case OP_TYPEEXACT:
762 case OP_TYPEPOSSTAR:
763 case OP_TYPEPOSPLUS:
764 case OP_TYPEPOSQUERY:
765 case OP_TYPEPOSUPTO:
766 return cc + PRIV(OP_lengths)[*cc] - 1;
767
768 case OP_ANYBYTE:
769 #ifdef SUPPORT_UTF
770 if (common->utf) return NULL;
771 #endif
772 return cc + 1;
773
774 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
775 case OP_XCLASS:
776 return cc + GET(cc, 1);
777 #endif
778
779 case OP_MARK:
780 case OP_PRUNE_ARG:
781 case OP_SKIP_ARG:
782 case OP_THEN_ARG:
783 return cc + 1 + 2 + cc[1];
784
785 default:
786 /* All opcodes are supported now! */
787 SLJIT_ASSERT_STOP();
788 return NULL;
789 }
790 }
791
792 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
793 {
794 int count;
795 pcre_uchar *slot;
796 pcre_uchar *assert_back_end = cc - 1;
797
798 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
799 while (cc < ccend)
800 {
801 switch(*cc)
802 {
803 case OP_SET_SOM:
804 common->has_set_som = TRUE;
805 common->might_be_empty = TRUE;
806 cc += 1;
807 break;
808
809 case OP_REF:
810 case OP_REFI:
811 common->optimized_cbracket[GET2(cc, 1)] = 0;
812 cc += 1 + IMM2_SIZE;
813 break;
814
815 case OP_CBRAPOS:
816 case OP_SCBRAPOS:
817 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
818 cc += 1 + LINK_SIZE + IMM2_SIZE;
819 break;
820
821 case OP_COND:
822 case OP_SCOND:
823 /* Only AUTO_CALLOUT can insert this opcode. We do
824 not intend to support this case. */
825 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
826 return FALSE;
827 cc += 1 + LINK_SIZE;
828 break;
829
830 case OP_CREF:
831 common->optimized_cbracket[GET2(cc, 1)] = 0;
832 cc += 1 + IMM2_SIZE;
833 break;
834
835 case OP_DNREF:
836 case OP_DNREFI:
837 case OP_DNCREF:
838 count = GET2(cc, 1 + IMM2_SIZE);
839 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
840 while (count-- > 0)
841 {
842 common->optimized_cbracket[GET2(slot, 0)] = 0;
843 slot += common->name_entry_size;
844 }
845 cc += 1 + 2 * IMM2_SIZE;
846 break;
847
848 case OP_RECURSE:
849 /* Set its value only once. */
850 if (common->recursive_head_ptr == 0)
851 {
852 common->recursive_head_ptr = common->ovector_start;
853 common->ovector_start += sizeof(sljit_sw);
854 }
855 cc += 1 + LINK_SIZE;
856 break;
857
858 case OP_CALLOUT:
859 if (common->capture_last_ptr == 0)
860 {
861 common->capture_last_ptr = common->ovector_start;
862 common->ovector_start += sizeof(sljit_sw);
863 }
864 cc += 2 + 2 * LINK_SIZE;
865 break;
866
867 case OP_ASSERTBACK:
868 slot = bracketend(cc);
869 if (slot > assert_back_end)
870 assert_back_end = slot;
871 cc += 1 + LINK_SIZE;
872 break;
873
874 case OP_THEN_ARG:
875 common->has_then = TRUE;
876 common->control_head_ptr = 1;
877 /* Fall through. */
878
879 case OP_PRUNE_ARG:
880 case OP_MARK:
881 if (common->mark_ptr == 0)
882 {
883 common->mark_ptr = common->ovector_start;
884 common->ovector_start += sizeof(sljit_sw);
885 }
886 cc += 1 + 2 + cc[1];
887 break;
888
889 case OP_THEN:
890 common->has_then = TRUE;
891 common->control_head_ptr = 1;
892 cc += 1;
893 break;
894
895 case OP_SKIP:
896 if (cc < assert_back_end)
897 common->has_skip_in_assert_back = TRUE;
898 cc += 1;
899 break;
900
901 case OP_SKIP_ARG:
902 common->control_head_ptr = 1;
903 common->has_skip_arg = TRUE;
904 if (cc < assert_back_end)
905 common->has_skip_in_assert_back = TRUE;
906 cc += 1 + 2 + cc[1];
907 break;
908
909 default:
910 cc = next_opcode(common, cc);
911 if (cc == NULL)
912 return FALSE;
913 break;
914 }
915 }
916 return TRUE;
917 }
918
919 static BOOL is_accelerated_repeat(pcre_uchar *cc)
920 {
921 switch(*cc)
922 {
923 case OP_TYPESTAR:
924 case OP_TYPEMINSTAR:
925 case OP_TYPEPLUS:
926 case OP_TYPEMINPLUS:
927 case OP_TYPEPOSSTAR:
928 case OP_TYPEPOSPLUS:
929 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
930
931 case OP_STAR:
932 case OP_MINSTAR:
933 case OP_PLUS:
934 case OP_MINPLUS:
935 case OP_POSSTAR:
936 case OP_POSPLUS:
937
938 case OP_STARI:
939 case OP_MINSTARI:
940 case OP_PLUSI:
941 case OP_MINPLUSI:
942 case OP_POSSTARI:
943 case OP_POSPLUSI:
944
945 case OP_NOTSTAR:
946 case OP_NOTMINSTAR:
947 case OP_NOTPLUS:
948 case OP_NOTMINPLUS:
949 case OP_NOTPOSSTAR:
950 case OP_NOTPOSPLUS:
951
952 case OP_NOTSTARI:
953 case OP_NOTMINSTARI:
954 case OP_NOTPLUSI:
955 case OP_NOTMINPLUSI:
956 case OP_NOTPOSSTARI:
957 case OP_NOTPOSPLUSI:
958 return TRUE;
959
960 case OP_CLASS:
961 case OP_NCLASS:
962 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
963 case OP_XCLASS:
964 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
965 #else
966 cc += (1 + (32 / sizeof(pcre_uchar)));
967 #endif
968
969 switch(*cc)
970 {
971 case OP_CRSTAR:
972 case OP_CRMINSTAR:
973 case OP_CRPLUS:
974 case OP_CRMINPLUS:
975 case OP_CRPOSSTAR:
976 case OP_CRPOSPLUS:
977 return TRUE;
978 }
979 break;
980 }
981 return FALSE;
982 }
983
984 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
985 {
986 pcre_uchar *cc = common->start;
987 pcre_uchar *end;
988
989 /* Skip not repeated brackets. */
990 while (TRUE)
991 {
992 switch(*cc)
993 {
994 case OP_SOD:
995 case OP_SOM:
996 case OP_SET_SOM:
997 case OP_NOT_WORD_BOUNDARY:
998 case OP_WORD_BOUNDARY:
999 case OP_EODN:
1000 case OP_EOD:
1001 case OP_CIRC:
1002 case OP_CIRCM:
1003 case OP_DOLL:
1004 case OP_DOLLM:
1005 /* Zero width assertions. */
1006 cc++;
1007 continue;
1008 }
1009
1010 if (*cc != OP_BRA && *cc != OP_CBRA)
1011 break;
1012
1013 end = cc + GET(cc, 1);
1014 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1015 return FALSE;
1016 if (*cc == OP_CBRA)
1017 {
1018 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1019 return FALSE;
1020 cc += IMM2_SIZE;
1021 }
1022 cc += 1 + LINK_SIZE;
1023 }
1024
1025 if (is_accelerated_repeat(cc))
1026 {
1027 common->fast_forward_bc_ptr = cc;
1028 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1029 *private_data_start += sizeof(sljit_sw);
1030 return TRUE;
1031 }
1032 return FALSE;
1033 }
1034
1035 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_si depth)
1036 {
1037 pcre_uchar *next_alt;
1038
1039 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1040
1041 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1042 return;
1043
1044 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1045 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1046 return;
1047
1048 do
1049 {
1050 next_alt = cc + GET(cc, 1);
1051
1052 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1053
1054 while (TRUE)
1055 {
1056 switch(*cc)
1057 {
1058 case OP_SOD:
1059 case OP_SOM:
1060 case OP_SET_SOM:
1061 case OP_NOT_WORD_BOUNDARY:
1062 case OP_WORD_BOUNDARY:
1063 case OP_EODN:
1064 case OP_EOD:
1065 case OP_CIRC:
1066 case OP_CIRCM:
1067 case OP_DOLL:
1068 case OP_DOLLM:
1069 /* Zero width assertions. */
1070 cc++;
1071 continue;
1072 }
1073 break;
1074 }
1075
1076 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1077 detect_fast_fail(common, cc, private_data_start, depth - 1);
1078
1079 if (is_accelerated_repeat(cc))
1080 {
1081 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1082
1083 if (common->fast_fail_start_ptr == 0)
1084 common->fast_fail_start_ptr = *private_data_start;
1085
1086 *private_data_start += sizeof(sljit_sw);
1087 common->fast_fail_end_ptr = *private_data_start;
1088
1089 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1090 return;
1091 }
1092
1093 cc = next_alt;
1094 }
1095 while (*cc == OP_ALT);
1096 }
1097
1098 static int get_class_iterator_size(pcre_uchar *cc)
1099 {
1100 sljit_ui min;
1101 sljit_ui max;
1102 switch(*cc)
1103 {
1104 case OP_CRSTAR:
1105 case OP_CRPLUS:
1106 return 2;
1107
1108 case OP_CRMINSTAR:
1109 case OP_CRMINPLUS:
1110 case OP_CRQUERY:
1111 case OP_CRMINQUERY:
1112 return 1;
1113
1114 case OP_CRRANGE:
1115 case OP_CRMINRANGE:
1116 min = GET2(cc, 1);
1117 max = GET2(cc, 1 + IMM2_SIZE);
1118 if (max == 0)
1119 return (*cc == OP_CRRANGE) ? 2 : 1;
1120 max -= min;
1121 if (max > 2)
1122 max = 2;
1123 return max;
1124
1125 default:
1126 return 0;
1127 }
1128 }
1129
1130 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1131 {
1132 pcre_uchar *end = bracketend(begin);
1133 pcre_uchar *next;
1134 pcre_uchar *next_end;
1135 pcre_uchar *max_end;
1136 pcre_uchar type;
1137 sljit_sw length = end - begin;
1138 int min, max, i;
1139
1140 /* Detect fixed iterations first. */
1141 if (end[-(1 + LINK_SIZE)] != OP_KET)
1142 return FALSE;
1143
1144 /* Already detected repeat. */
1145 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1146 return TRUE;
1147
1148 next = end;
1149 min = 1;
1150 while (1)
1151 {
1152 if (*next != *begin)
1153 break;
1154 next_end = bracketend(next);
1155 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1156 break;
1157 next = next_end;
1158 min++;
1159 }
1160
1161 if (min == 2)
1162 return FALSE;
1163
1164 max = 0;
1165 max_end = next;
1166 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1167 {
1168 type = *next;
1169 while (1)
1170 {
1171 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1172 break;
1173 next_end = bracketend(next + 2 + LINK_SIZE);
1174 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1175 break;
1176 next = next_end;
1177 max++;
1178 }
1179
1180 if (next[0] == type && next[1] == *begin && max >= 1)
1181 {
1182 next_end = bracketend(next + 1);
1183 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1184 {
1185 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1186 if (*next_end != OP_KET)
1187 break;
1188
1189 if (i == max)
1190 {
1191 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1192 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1193 /* +2 the original and the last. */
1194 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1195 if (min == 1)
1196 return TRUE;
1197 min--;
1198 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1199 }
1200 }
1201 }
1202 }
1203
1204 if (min >= 3)
1205 {
1206 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1207 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1208 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1209 return TRUE;
1210 }
1211
1212 return FALSE;
1213 }
1214
1215 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1216 case OP_MINSTAR: \
1217 case OP_MINPLUS: \
1218 case OP_QUERY: \
1219 case OP_MINQUERY: \
1220 case OP_MINSTARI: \
1221 case OP_MINPLUSI: \
1222 case OP_QUERYI: \
1223 case OP_MINQUERYI: \
1224 case OP_NOTMINSTAR: \
1225 case OP_NOTMINPLUS: \
1226 case OP_NOTQUERY: \
1227 case OP_NOTMINQUERY: \
1228 case OP_NOTMINSTARI: \
1229 case OP_NOTMINPLUSI: \
1230 case OP_NOTQUERYI: \
1231 case OP_NOTMINQUERYI:
1232
1233 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1234 case OP_STAR: \
1235 case OP_PLUS: \
1236 case OP_STARI: \
1237 case OP_PLUSI: \
1238 case OP_NOTSTAR: \
1239 case OP_NOTPLUS: \
1240 case OP_NOTSTARI: \
1241 case OP_NOTPLUSI:
1242
1243 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1244 case OP_UPTO: \
1245 case OP_MINUPTO: \
1246 case OP_UPTOI: \
1247 case OP_MINUPTOI: \
1248 case OP_NOTUPTO: \
1249 case OP_NOTMINUPTO: \
1250 case OP_NOTUPTOI: \
1251 case OP_NOTMINUPTOI:
1252
1253 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1254 case OP_TYPEMINSTAR: \
1255 case OP_TYPEMINPLUS: \
1256 case OP_TYPEQUERY: \
1257 case OP_TYPEMINQUERY:
1258
1259 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1260 case OP_TYPESTAR: \
1261 case OP_TYPEPLUS:
1262
1263 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1264 case OP_TYPEUPTO: \
1265 case OP_TYPEMINUPTO:
1266
1267 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1268 {
1269 pcre_uchar *cc = common->start;
1270 pcre_uchar *alternative;
1271 pcre_uchar *end = NULL;
1272 int private_data_ptr = *private_data_start;
1273 int space, size, bracketlen;
1274 BOOL repeat_check = TRUE;
1275
1276 while (cc < ccend)
1277 {
1278 space = 0;
1279 size = 0;
1280 bracketlen = 0;
1281 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1282 break;
1283
1284 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1285 {
1286 if (detect_repeat(common, cc))
1287 {
1288 /* These brackets are converted to repeats, so no global
1289 based single character repeat is allowed. */
1290 if (cc >= end)
1291 end = bracketend(cc);
1292 }
1293 }
1294 repeat_check = TRUE;
1295
1296 switch(*cc)
1297 {
1298 case OP_KET:
1299 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1300 {
1301 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1302 private_data_ptr += sizeof(sljit_sw);
1303 cc += common->private_data_ptrs[cc + 1 - common->start];
1304 }
1305 cc += 1 + LINK_SIZE;
1306 break;
1307
1308 case OP_ASSERT:
1309 case OP_ASSERT_NOT:
1310 case OP_ASSERTBACK:
1311 case OP_ASSERTBACK_NOT:
1312 case OP_ONCE:
1313 case OP_ONCE_NC:
1314 case OP_BRAPOS:
1315 case OP_SBRA:
1316 case OP_SBRAPOS:
1317 case OP_SCOND:
1318 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1319 private_data_ptr += sizeof(sljit_sw);
1320 bracketlen = 1 + LINK_SIZE;
1321 break;
1322
1323 case OP_CBRAPOS:
1324 case OP_SCBRAPOS:
1325 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1326 private_data_ptr += sizeof(sljit_sw);
1327 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1328 break;
1329
1330 case OP_COND:
1331 /* Might be a hidden SCOND. */
1332 alternative = cc + GET(cc, 1);
1333 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1334 {
1335 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1336 private_data_ptr += sizeof(sljit_sw);
1337 }
1338 bracketlen = 1 + LINK_SIZE;
1339 break;
1340
1341 case OP_BRA:
1342 bracketlen = 1 + LINK_SIZE;
1343 break;
1344
1345 case OP_CBRA:
1346 case OP_SCBRA:
1347 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1348 break;
1349
1350 case OP_BRAZERO:
1351 case OP_BRAMINZERO:
1352 case OP_BRAPOSZERO:
1353 repeat_check = FALSE;
1354 size = 1;
1355 break;
1356
1357 CASE_ITERATOR_PRIVATE_DATA_1
1358 space = 1;
1359 size = -2;
1360 break;
1361
1362 CASE_ITERATOR_PRIVATE_DATA_2A
1363 space = 2;
1364 size = -2;
1365 break;
1366
1367 CASE_ITERATOR_PRIVATE_DATA_2B
1368 space = 2;
1369 size = -(2 + IMM2_SIZE);
1370 break;
1371
1372 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1373 space = 1;
1374 size = 1;
1375 break;
1376
1377 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1378 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1379 space = 2;
1380 size = 1;
1381 break;
1382
1383 case OP_TYPEUPTO:
1384 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1385 space = 2;
1386 size = 1 + IMM2_SIZE;
1387 break;
1388
1389 case OP_TYPEMINUPTO:
1390 space = 2;
1391 size = 1 + IMM2_SIZE;
1392 break;
1393
1394 case OP_CLASS:
1395 case OP_NCLASS:
1396 size += 1 + 32 / sizeof(pcre_uchar);
1397 space = get_class_iterator_size(cc + size);
1398 break;
1399
1400 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1401 case OP_XCLASS:
1402 size = GET(cc, 1);
1403 space = get_class_iterator_size(cc + size);
1404 break;
1405 #endif
1406
1407 default:
1408 cc = next_opcode(common, cc);
1409 SLJIT_ASSERT(cc != NULL);
1410 break;
1411 }
1412
1413 /* Character iterators, which are not inside a repeated bracket,
1414 gets a private slot instead of allocating it on the stack. */
1415 if (space > 0 && cc >= end)
1416 {
1417 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1418 private_data_ptr += sizeof(sljit_sw) * space;
1419 }
1420
1421 if (size != 0)
1422 {
1423 if (size < 0)
1424 {
1425 cc += -size;
1426 #ifdef SUPPORT_UTF
1427 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1428 #endif
1429 }
1430 else
1431 cc += size;
1432 }
1433
1434 if (bracketlen > 0)
1435 {
1436 if (cc >= end)
1437 {
1438 end = bracketend(cc);
1439 if (end[-1 - LINK_SIZE] == OP_KET)
1440 end = NULL;
1441 }
1442 cc += bracketlen;
1443 }
1444 }
1445 *private_data_start = private_data_ptr;
1446 }
1447
1448 /* Returns with a frame_types (always < 0) if no need for frame. */
1449 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1450 {
1451 int length = 0;
1452 int possessive = 0;
1453 BOOL stack_restore = FALSE;
1454 BOOL setsom_found = recursive;
1455 BOOL setmark_found = recursive;
1456 /* The last capture is a local variable even for recursions. */
1457 BOOL capture_last_found = FALSE;
1458
1459 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1460 SLJIT_ASSERT(common->control_head_ptr != 0);
1461 *needs_control_head = TRUE;
1462 #else
1463 *needs_control_head = FALSE;
1464 #endif
1465
1466 if (ccend == NULL)
1467 {
1468 ccend = bracketend(cc) - (1 + LINK_SIZE);
1469 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1470 {
1471 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1472 /* This is correct regardless of common->capture_last_ptr. */
1473 capture_last_found = TRUE;
1474 }
1475 cc = next_opcode(common, cc);
1476 }
1477
1478 SLJIT_ASSERT(cc != NULL);
1479 while (cc < ccend)
1480 switch(*cc)
1481 {
1482 case OP_SET_SOM:
1483 SLJIT_ASSERT(common->has_set_som);
1484 stack_restore = TRUE;
1485 if (!setsom_found)
1486 {
1487 length += 2;
1488 setsom_found = TRUE;
1489 }
1490 cc += 1;
1491 break;
1492
1493 case OP_MARK:
1494 case OP_PRUNE_ARG:
1495 case OP_THEN_ARG:
1496 SLJIT_ASSERT(common->mark_ptr != 0);
1497 stack_restore = TRUE;
1498 if (!setmark_found)
1499 {
1500 length += 2;
1501 setmark_found = TRUE;
1502 }
1503 if (common->control_head_ptr != 0)
1504 *needs_control_head = TRUE;
1505 cc += 1 + 2 + cc[1];
1506 break;
1507
1508 case OP_RECURSE:
1509 stack_restore = TRUE;
1510 if (common->has_set_som && !setsom_found)
1511 {
1512 length += 2;
1513 setsom_found = TRUE;
1514 }
1515 if (common->mark_ptr != 0 && !setmark_found)
1516 {
1517 length += 2;
1518 setmark_found = TRUE;
1519 }
1520 if (common->capture_last_ptr != 0 && !capture_last_found)
1521 {
1522 length += 2;
1523 capture_last_found = TRUE;
1524 }
1525 cc += 1 + LINK_SIZE;
1526 break;
1527
1528 case OP_CBRA:
1529 case OP_CBRAPOS:
1530 case OP_SCBRA:
1531 case OP_SCBRAPOS:
1532 stack_restore = TRUE;
1533 if (common->capture_last_ptr != 0 && !capture_last_found)
1534 {
1535 length += 2;
1536 capture_last_found = TRUE;
1537 }
1538 length += 3;
1539 cc += 1 + LINK_SIZE + IMM2_SIZE;
1540 break;
1541
1542 case OP_THEN:
1543 stack_restore = TRUE;
1544 if (common->control_head_ptr != 0)
1545 *needs_control_head = TRUE;
1546 cc ++;
1547 break;
1548
1549 default:
1550 stack_restore = TRUE;
1551 /* Fall through. */
1552
1553 case OP_NOT_WORD_BOUNDARY:
1554 case OP_WORD_BOUNDARY:
1555 case OP_NOT_DIGIT:
1556 case OP_DIGIT:
1557 case OP_NOT_WHITESPACE:
1558 case OP_WHITESPACE:
1559 case OP_NOT_WORDCHAR:
1560 case OP_WORDCHAR:
1561 case OP_ANY:
1562 case OP_ALLANY:
1563 case OP_ANYBYTE:
1564 case OP_NOTPROP:
1565 case OP_PROP:
1566 case OP_ANYNL:
1567 case OP_NOT_HSPACE:
1568 case OP_HSPACE:
1569 case OP_NOT_VSPACE:
1570 case OP_VSPACE:
1571 case OP_EXTUNI:
1572 case OP_EODN:
1573 case OP_EOD:
1574 case OP_CIRC:
1575 case OP_CIRCM:
1576 case OP_DOLL:
1577 case OP_DOLLM:
1578 case OP_CHAR:
1579 case OP_CHARI:
1580 case OP_NOT:
1581 case OP_NOTI:
1582
1583 case OP_EXACT:
1584 case OP_POSSTAR:
1585 case OP_POSPLUS:
1586 case OP_POSQUERY:
1587 case OP_POSUPTO:
1588
1589 case OP_EXACTI:
1590 case OP_POSSTARI:
1591 case OP_POSPLUSI:
1592 case OP_POSQUERYI:
1593 case OP_POSUPTOI:
1594
1595 case OP_NOTEXACT:
1596 case OP_NOTPOSSTAR:
1597 case OP_NOTPOSPLUS:
1598 case OP_NOTPOSQUERY:
1599 case OP_NOTPOSUPTO:
1600
1601 case OP_NOTEXACTI:
1602 case OP_NOTPOSSTARI:
1603 case OP_NOTPOSPLUSI:
1604 case OP_NOTPOSQUERYI:
1605 case OP_NOTPOSUPTOI:
1606
1607 case OP_TYPEEXACT:
1608 case OP_TYPEPOSSTAR:
1609 case OP_TYPEPOSPLUS:
1610 case OP_TYPEPOSQUERY:
1611 case OP_TYPEPOSUPTO:
1612
1613 case OP_CLASS:
1614 case OP_NCLASS:
1615 case OP_XCLASS:
1616 case OP_CALLOUT:
1617
1618 cc = next_opcode(common, cc);
1619 SLJIT_ASSERT(cc != NULL);
1620 break;
1621 }
1622
1623 /* Possessive quantifiers can use a special case. */
1624 if (SLJIT_UNLIKELY(possessive == length))
1625 return stack_restore ? no_frame : no_stack;
1626
1627 if (length > 0)
1628 return length + 1;
1629 return stack_restore ? no_frame : no_stack;
1630 }
1631
1632 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1633 {
1634 DEFINE_COMPILER;
1635 BOOL setsom_found = recursive;
1636 BOOL setmark_found = recursive;
1637 /* The last capture is a local variable even for recursions. */
1638 BOOL capture_last_found = FALSE;
1639 int offset;
1640
1641 /* >= 1 + shortest item size (2) */
1642 SLJIT_UNUSED_ARG(stacktop);
1643 SLJIT_ASSERT(stackpos >= stacktop + 2);
1644
1645 stackpos = STACK(stackpos);
1646 if (ccend == NULL)
1647 {
1648 ccend = bracketend(cc) - (1 + LINK_SIZE);
1649 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1650 cc = next_opcode(common, cc);
1651 }
1652
1653 SLJIT_ASSERT(cc != NULL);
1654 while (cc < ccend)
1655 switch(*cc)
1656 {
1657 case OP_SET_SOM:
1658 SLJIT_ASSERT(common->has_set_som);
1659 if (!setsom_found)
1660 {
1661 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1663 stackpos += (int)sizeof(sljit_sw);
1664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1665 stackpos += (int)sizeof(sljit_sw);
1666 setsom_found = TRUE;
1667 }
1668 cc += 1;
1669 break;
1670
1671 case OP_MARK:
1672 case OP_PRUNE_ARG:
1673 case OP_THEN_ARG:
1674 SLJIT_ASSERT(common->mark_ptr != 0);
1675 if (!setmark_found)
1676 {
1677 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1678 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1679 stackpos += (int)sizeof(sljit_sw);
1680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1681 stackpos += (int)sizeof(sljit_sw);
1682 setmark_found = TRUE;
1683 }
1684 cc += 1 + 2 + cc[1];
1685 break;
1686
1687 case OP_RECURSE:
1688 if (common->has_set_som && !setsom_found)
1689 {
1690 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1691 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1692 stackpos += (int)sizeof(sljit_sw);
1693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1694 stackpos += (int)sizeof(sljit_sw);
1695 setsom_found = TRUE;
1696 }
1697 if (common->mark_ptr != 0 && !setmark_found)
1698 {
1699 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1700 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1701 stackpos += (int)sizeof(sljit_sw);
1702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1703 stackpos += (int)sizeof(sljit_sw);
1704 setmark_found = TRUE;
1705 }
1706 if (common->capture_last_ptr != 0 && !capture_last_found)
1707 {
1708 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1709 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1710 stackpos += (int)sizeof(sljit_sw);
1711 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1712 stackpos += (int)sizeof(sljit_sw);
1713 capture_last_found = TRUE;
1714 }
1715 cc += 1 + LINK_SIZE;
1716 break;
1717
1718 case OP_CBRA:
1719 case OP_CBRAPOS:
1720 case OP_SCBRA:
1721 case OP_SCBRAPOS:
1722 if (common->capture_last_ptr != 0 && !capture_last_found)
1723 {
1724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1726 stackpos += (int)sizeof(sljit_sw);
1727 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1728 stackpos += (int)sizeof(sljit_sw);
1729 capture_last_found = TRUE;
1730 }
1731 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1732 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1733 stackpos += (int)sizeof(sljit_sw);
1734 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1735 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1736 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1737 stackpos += (int)sizeof(sljit_sw);
1738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1739 stackpos += (int)sizeof(sljit_sw);
1740
1741 cc += 1 + LINK_SIZE + IMM2_SIZE;
1742 break;
1743
1744 default:
1745 cc = next_opcode(common, cc);
1746 SLJIT_ASSERT(cc != NULL);
1747 break;
1748 }
1749
1750 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1751 SLJIT_ASSERT(stackpos == STACK(stacktop));
1752 }
1753
1754 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1755 {
1756 int private_data_length = needs_control_head ? 3 : 2;
1757 int size;
1758 pcre_uchar *alternative;
1759 /* Calculate the sum of the private machine words. */
1760 while (cc < ccend)
1761 {
1762 size = 0;
1763 switch(*cc)
1764 {
1765 case OP_KET:
1766 if (PRIVATE_DATA(cc) != 0)
1767 {
1768 private_data_length++;
1769 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1770 cc += PRIVATE_DATA(cc + 1);
1771 }
1772 cc += 1 + LINK_SIZE;
1773 break;
1774
1775 case OP_ASSERT:
1776 case OP_ASSERT_NOT:
1777 case OP_ASSERTBACK:
1778 case OP_ASSERTBACK_NOT:
1779 case OP_ONCE:
1780 case OP_ONCE_NC:
1781 case OP_BRAPOS:
1782 case OP_SBRA:
1783 case OP_SBRAPOS:
1784 case OP_SCOND:
1785 private_data_length++;
1786 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1787 cc += 1 + LINK_SIZE;
1788 break;
1789
1790 case OP_CBRA:
1791 case OP_SCBRA:
1792 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1793 private_data_length++;
1794 cc += 1 + LINK_SIZE + IMM2_SIZE;
1795 break;
1796
1797 case OP_CBRAPOS:
1798 case OP_SCBRAPOS:
1799 private_data_length += 2;
1800 cc += 1 + LINK_SIZE + IMM2_SIZE;
1801 break;
1802
1803 case OP_COND:
1804 /* Might be a hidden SCOND. */
1805 alternative = cc + GET(cc, 1);
1806 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1807 private_data_length++;
1808 cc += 1 + LINK_SIZE;
1809 break;
1810
1811 CASE_ITERATOR_PRIVATE_DATA_1
1812 if (PRIVATE_DATA(cc))
1813 private_data_length++;
1814 cc += 2;
1815 #ifdef SUPPORT_UTF
1816 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1817 #endif
1818 break;
1819
1820 CASE_ITERATOR_PRIVATE_DATA_2A
1821 if (PRIVATE_DATA(cc))
1822 private_data_length += 2;
1823 cc += 2;
1824 #ifdef SUPPORT_UTF
1825 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1826 #endif
1827 break;
1828
1829 CASE_ITERATOR_PRIVATE_DATA_2B
1830 if (PRIVATE_DATA(cc))
1831 private_data_length += 2;
1832 cc += 2 + IMM2_SIZE;
1833 #ifdef SUPPORT_UTF
1834 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1835 #endif
1836 break;
1837
1838 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1839 if (PRIVATE_DATA(cc))
1840 private_data_length++;
1841 cc += 1;
1842 break;
1843
1844 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1845 if (PRIVATE_DATA(cc))
1846 private_data_length += 2;
1847 cc += 1;
1848 break;
1849
1850 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1851 if (PRIVATE_DATA(cc))
1852 private_data_length += 2;
1853 cc += 1 + IMM2_SIZE;
1854 break;
1855
1856 case OP_CLASS:
1857 case OP_NCLASS:
1858 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1859 case OP_XCLASS:
1860 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1861 #else
1862 size = 1 + 32 / (int)sizeof(pcre_uchar);
1863 #endif
1864 if (PRIVATE_DATA(cc))
1865 private_data_length += get_class_iterator_size(cc + size);
1866 cc += size;
1867 break;
1868
1869 default:
1870 cc = next_opcode(common, cc);
1871 SLJIT_ASSERT(cc != NULL);
1872 break;
1873 }
1874 }
1875 SLJIT_ASSERT(cc == ccend);
1876 return private_data_length;
1877 }
1878
1879 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1880 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1881 {
1882 DEFINE_COMPILER;
1883 int srcw[2];
1884 int count, size;
1885 BOOL tmp1next = TRUE;
1886 BOOL tmp1empty = TRUE;
1887 BOOL tmp2empty = TRUE;
1888 pcre_uchar *alternative;
1889 enum {
1890 start,
1891 loop,
1892 end
1893 } status;
1894
1895 status = save ? start : loop;
1896 stackptr = STACK(stackptr - 2);
1897 stacktop = STACK(stacktop - 1);
1898
1899 if (!save)
1900 {
1901 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1902 if (stackptr < stacktop)
1903 {
1904 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1905 stackptr += sizeof(sljit_sw);
1906 tmp1empty = FALSE;
1907 }
1908 if (stackptr < stacktop)
1909 {
1910 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1911 stackptr += sizeof(sljit_sw);
1912 tmp2empty = FALSE;
1913 }
1914 /* The tmp1next must be TRUE in either way. */
1915 }
1916
1917 do
1918 {
1919 count = 0;
1920 switch(status)
1921 {
1922 case start:
1923 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1924 count = 1;
1925 srcw[0] = common->recursive_head_ptr;
1926 if (needs_control_head)
1927 {
1928 SLJIT_ASSERT(common->control_head_ptr != 0);
1929 count = 2;
1930 srcw[1] = common->control_head_ptr;
1931 }
1932 status = loop;
1933 break;
1934
1935 case loop:
1936 if (cc >= ccend)
1937 {
1938 status = end;
1939 break;
1940 }
1941
1942 switch(*cc)
1943 {
1944 case OP_KET:
1945 if (PRIVATE_DATA(cc) != 0)
1946 {
1947 count = 1;
1948 srcw[0] = PRIVATE_DATA(cc);
1949 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1950 cc += PRIVATE_DATA(cc + 1);
1951 }
1952 cc += 1 + LINK_SIZE;
1953 break;
1954
1955 case OP_ASSERT:
1956 case OP_ASSERT_NOT:
1957 case OP_ASSERTBACK:
1958 case OP_ASSERTBACK_NOT:
1959 case OP_ONCE:
1960 case OP_ONCE_NC:
1961 case OP_BRAPOS:
1962 case OP_SBRA:
1963 case OP_SBRAPOS:
1964 case OP_SCOND:
1965 count = 1;
1966 srcw[0] = PRIVATE_DATA(cc);
1967 SLJIT_ASSERT(srcw[0] != 0);
1968 cc += 1 + LINK_SIZE;
1969 break;
1970
1971 case OP_CBRA:
1972 case OP_SCBRA:
1973 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1974 {
1975 count = 1;
1976 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1977 }
1978 cc += 1 + LINK_SIZE + IMM2_SIZE;
1979 break;
1980
1981 case OP_CBRAPOS:
1982 case OP_SCBRAPOS:
1983 count = 2;
1984 srcw[0] = PRIVATE_DATA(cc);
1985 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1986 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1987 cc += 1 + LINK_SIZE + IMM2_SIZE;
1988 break;
1989
1990 case OP_COND:
1991 /* Might be a hidden SCOND. */
1992 alternative = cc + GET(cc, 1);
1993 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1994 {
1995 count = 1;
1996 srcw[0] = PRIVATE_DATA(cc);
1997 SLJIT_ASSERT(srcw[0] != 0);
1998 }
1999 cc += 1 + LINK_SIZE;
2000 break;
2001
2002 CASE_ITERATOR_PRIVATE_DATA_1
2003 if (PRIVATE_DATA(cc))
2004 {
2005 count = 1;
2006 srcw[0] = PRIVATE_DATA(cc);
2007 }
2008 cc += 2;
2009 #ifdef SUPPORT_UTF
2010 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2011 #endif
2012 break;
2013
2014 CASE_ITERATOR_PRIVATE_DATA_2A
2015 if (PRIVATE_DATA(cc))
2016 {
2017 count = 2;
2018 srcw[0] = PRIVATE_DATA(cc);
2019 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2020 }
2021 cc += 2;
2022 #ifdef SUPPORT_UTF
2023 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2024 #endif
2025 break;
2026
2027 CASE_ITERATOR_PRIVATE_DATA_2B
2028 if (PRIVATE_DATA(cc))
2029 {
2030 count = 2;
2031 srcw[0] = PRIVATE_DATA(cc);
2032 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2033 }
2034 cc += 2 + IMM2_SIZE;
2035 #ifdef SUPPORT_UTF
2036 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2037 #endif
2038 break;
2039
2040 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2041 if (PRIVATE_DATA(cc))
2042 {
2043 count = 1;
2044 srcw[0] = PRIVATE_DATA(cc);
2045 }
2046 cc += 1;
2047 break;
2048
2049 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2050 if (PRIVATE_DATA(cc))
2051 {
2052 count = 2;
2053 srcw[0] = PRIVATE_DATA(cc);
2054 srcw[1] = srcw[0] + sizeof(sljit_sw);
2055 }
2056 cc += 1;
2057 break;
2058
2059 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2060 if (PRIVATE_DATA(cc))
2061 {
2062 count = 2;
2063 srcw[0] = PRIVATE_DATA(cc);
2064 srcw[1] = srcw[0] + sizeof(sljit_sw);
2065 }
2066 cc += 1 + IMM2_SIZE;
2067 break;
2068
2069 case OP_CLASS:
2070 case OP_NCLASS:
2071 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2072 case OP_XCLASS:
2073 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2074 #else
2075 size = 1 + 32 / (int)sizeof(pcre_uchar);
2076 #endif
2077 if (PRIVATE_DATA(cc))
2078 switch(get_class_iterator_size(cc + size))
2079 {
2080 case 1:
2081 count = 1;
2082 srcw[0] = PRIVATE_DATA(cc);
2083 break;
2084
2085 case 2:
2086 count = 2;
2087 srcw[0] = PRIVATE_DATA(cc);
2088 srcw[1] = srcw[0] + sizeof(sljit_sw);
2089 break;
2090
2091 default:
2092 SLJIT_ASSERT_STOP();
2093 break;
2094 }
2095 cc += size;
2096 break;
2097
2098 default:
2099 cc = next_opcode(common, cc);
2100 SLJIT_ASSERT(cc != NULL);
2101 break;
2102 }
2103 break;
2104
2105 case end:
2106 SLJIT_ASSERT_STOP();
2107 break;
2108 }
2109
2110 while (count > 0)
2111 {
2112 count--;
2113 if (save)
2114 {
2115 if (tmp1next)
2116 {
2117 if (!tmp1empty)
2118 {
2119 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2120 stackptr += sizeof(sljit_sw);
2121 }
2122 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2123 tmp1empty = FALSE;
2124 tmp1next = FALSE;
2125 }
2126 else
2127 {
2128 if (!tmp2empty)
2129 {
2130 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2131 stackptr += sizeof(sljit_sw);
2132 }
2133 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2134 tmp2empty = FALSE;
2135 tmp1next = TRUE;
2136 }
2137 }
2138 else
2139 {
2140 if (tmp1next)
2141 {
2142 SLJIT_ASSERT(!tmp1empty);
2143 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2144 tmp1empty = stackptr >= stacktop;
2145 if (!tmp1empty)
2146 {
2147 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2148 stackptr += sizeof(sljit_sw);
2149 }
2150 tmp1next = FALSE;
2151 }
2152 else
2153 {
2154 SLJIT_ASSERT(!tmp2empty);
2155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2156 tmp2empty = stackptr >= stacktop;
2157 if (!tmp2empty)
2158 {
2159 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2160 stackptr += sizeof(sljit_sw);
2161 }
2162 tmp1next = TRUE;
2163 }
2164 }
2165 }
2166 }
2167 while (status != end);
2168
2169 if (save)
2170 {
2171 if (tmp1next)
2172 {
2173 if (!tmp1empty)
2174 {
2175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2176 stackptr += sizeof(sljit_sw);
2177 }
2178 if (!tmp2empty)
2179 {
2180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2181 stackptr += sizeof(sljit_sw);
2182 }
2183 }
2184 else
2185 {
2186 if (!tmp2empty)
2187 {
2188 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2189 stackptr += sizeof(sljit_sw);
2190 }
2191 if (!tmp1empty)
2192 {
2193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2194 stackptr += sizeof(sljit_sw);
2195 }
2196 }
2197 }
2198 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2199 }
2200
2201 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
2202 {
2203 pcre_uchar *end = bracketend(cc);
2204 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2205
2206 /* Assert captures then. */
2207 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2208 current_offset = NULL;
2209 /* Conditional block does not. */
2210 if (*cc == OP_COND || *cc == OP_SCOND)
2211 has_alternatives = FALSE;
2212
2213 cc = next_opcode(common, cc);
2214 if (has_alternatives)
2215 current_offset = common->then_offsets + (cc - common->start);
2216
2217 while (cc < end)
2218 {
2219 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2220 cc = set_then_offsets(common, cc, current_offset);
2221 else
2222 {
2223 if (*cc == OP_ALT && has_alternatives)
2224 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2225 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2226 *current_offset = 1;
2227 cc = next_opcode(common, cc);
2228 }
2229 }
2230
2231 return end;
2232 }
2233
2234 #undef CASE_ITERATOR_PRIVATE_DATA_1
2235 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2236 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2237 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2238 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2239 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2240
2241 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2242 {
2243 return (value & (value - 1)) == 0;
2244 }
2245
2246 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2247 {
2248 while (list)
2249 {
2250 /* sljit_set_label is clever enough to do nothing
2251 if either the jump or the label is NULL. */
2252 SET_LABEL(list->jump, label);
2253 list = list->next;
2254 }
2255 }
2256
2257 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2258 {
2259 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2260 if (list_item)
2261 {
2262 list_item->next = *list;
2263 list_item->jump = jump;
2264 *list = list_item;
2265 }
2266 }
2267
2268 static void add_stub(compiler_common *common, struct sljit_jump *start)
2269 {
2270 DEFINE_COMPILER;
2271 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2272
2273 if (list_item)
2274 {
2275 list_item->start = start;
2276 list_item->quit = LABEL();
2277 list_item->next = common->stubs;
2278 common->stubs = list_item;
2279 }
2280 }
2281
2282 static void flush_stubs(compiler_common *common)
2283 {
2284 DEFINE_COMPILER;
2285 stub_list *list_item = common->stubs;
2286
2287 while (list_item)
2288 {
2289 JUMPHERE(list_item->start);
2290 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2291 JUMPTO(SLJIT_JUMP, list_item->quit);
2292 list_item = list_item->next;
2293 }
2294 common->stubs = NULL;
2295 }
2296
2297 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2298 {
2299 DEFINE_COMPILER;
2300 label_addr_list *label_addr;
2301
2302 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2303 if (label_addr == NULL)
2304 return;
2305 label_addr->label = LABEL();
2306 label_addr->update_addr = update_addr;
2307 label_addr->next = common->label_addrs;
2308 common->label_addrs = label_addr;
2309 }
2310
2311 static SLJIT_INLINE void count_match(compiler_common *common)
2312 {
2313 DEFINE_COMPILER;
2314
2315 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2316 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2317 }
2318
2319 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2320 {
2321 /* May destroy all locals and registers except TMP2. */
2322 DEFINE_COMPILER;
2323
2324 SLJIT_ASSERT(size > 0);
2325 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2326 #ifdef DESTROY_REGISTERS
2327 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2328 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2329 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2330 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2331 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2332 #endif
2333 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2334 }
2335
2336 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2337 {
2338 DEFINE_COMPILER;
2339
2340 SLJIT_ASSERT(size > 0);
2341 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2342 }
2343
2344 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2345 {
2346 DEFINE_COMPILER;
2347 sljit_uw *result;
2348
2349 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2350 return NULL;
2351
2352 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2353 if (SLJIT_UNLIKELY(result == NULL))
2354 {
2355 sljit_set_compiler_memory_error(compiler);
2356 return NULL;
2357 }
2358
2359 *(void**)result = common->read_only_data_head;
2360 common->read_only_data_head = (void *)result;
2361 return result + 1;
2362 }
2363
2364 static void free_read_only_data(void *current, void *allocator_data)
2365 {
2366 void *next;
2367
2368 SLJIT_UNUSED_ARG(allocator_data);
2369
2370 while (current != NULL)
2371 {
2372 next = *(void**)current;
2373 SLJIT_FREE(current, allocator_data);
2374 current = next;
2375 }
2376 }
2377
2378 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2379 {
2380 DEFINE_COMPILER;
2381 struct sljit_label *loop;
2382 int i;
2383
2384 /* At this point we can freely use all temporary registers. */
2385 SLJIT_ASSERT(length > 1);
2386 /* TMP1 returns with begin - 1. */
2387 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2388 if (length < 8)
2389 {
2390 for (i = 1; i < length; i++)
2391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2392 }
2393 else
2394 {
2395 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2396 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2397 loop = LABEL();
2398 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2399 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2400 JUMPTO(SLJIT_NOT_ZERO, loop);
2401 }
2402 }
2403
2404 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2405 {
2406 DEFINE_COMPILER;
2407 sljit_si i;
2408
2409 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2410
2411 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2412 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2414 }
2415
2416 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2417 {
2418 DEFINE_COMPILER;
2419 struct sljit_label *loop;
2420 int i;
2421
2422 SLJIT_ASSERT(length > 1);
2423 /* OVECTOR(1) contains the "string begin - 1" constant. */
2424 if (length > 2)
2425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2426 if (length < 8)
2427 {
2428 for (i = 2; i < length; i++)
2429 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2430 }
2431 else
2432 {
2433 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2434 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2435 loop = LABEL();
2436 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2437 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2438 JUMPTO(SLJIT_NOT_ZERO, loop);
2439 }
2440
2441 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2442 if (common->mark_ptr != 0)
2443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2444 if (common->control_head_ptr != 0)
2445 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2446 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2448 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2449 }
2450
2451 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2452 {
2453 while (current != NULL)
2454 {
2455 switch (current[-2])
2456 {
2457 case type_then_trap:
2458 break;
2459
2460 case type_mark:
2461 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2462 return current[-4];
2463 break;
2464
2465 default:
2466 SLJIT_ASSERT_STOP();
2467 break;
2468 }
2469 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2470 current = (sljit_sw*)current[-1];
2471 }
2472 return -1;
2473 }
2474
2475 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2476 {
2477 DEFINE_COMPILER;
2478 struct sljit_label *loop;
2479 struct sljit_jump *early_quit;
2480
2481 /* At this point we can freely use all registers. */
2482 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2484
2485 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2486 if (common->mark_ptr != 0)
2487 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2488 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2489 if (common->mark_ptr != 0)
2490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2491 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2492 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2493 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2494 /* Unlikely, but possible */
2495 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2496 loop = LABEL();
2497 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2498 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2499 /* Copy the integer value to the output buffer */
2500 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2501 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2502 #endif
2503 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2504 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2505 JUMPTO(SLJIT_NOT_ZERO, loop);
2506 JUMPHERE(early_quit);
2507
2508 /* Calculate the return value, which is the maximum ovector value. */
2509 if (topbracket > 1)
2510 {
2511 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2512 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2513
2514 /* OVECTOR(0) is never equal to SLJIT_S2. */
2515 loop = LABEL();
2516 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2517 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2518 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2519 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2520 }
2521 else
2522 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2523 }
2524
2525 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2526 {
2527 DEFINE_COMPILER;
2528 struct sljit_jump *jump;
2529
2530 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2531 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2532 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2533
2534 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2535 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2536 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2537 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2538
2539 /* Store match begin and end. */
2540 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2541 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2542
2543 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2544 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2545 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2546 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2547 #endif
2548 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2549 JUMPHERE(jump);
2550
2551 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2552 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2553 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2554 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2555 #endif
2556 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2557
2558 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2559 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2560 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2561 #endif
2562 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2563
2564 JUMPTO(SLJIT_JUMP, quit);
2565 }
2566
2567 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2568 {
2569 /* May destroy TMP1. */
2570 DEFINE_COMPILER;
2571 struct sljit_jump *jump;
2572
2573 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2574 {
2575 /* The value of -1 must be kept for start_used_ptr! */
2576 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2577 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2578 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2579 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2580 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2581 JUMPHERE(jump);
2582 }
2583 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2584 {
2585 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2587 JUMPHERE(jump);
2588 }
2589 }
2590
2591 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2592 {
2593 /* Detects if the character has an othercase. */
2594 unsigned int c;
2595
2596 #ifdef SUPPORT_UTF
2597 if (common->utf)
2598 {
2599 GETCHAR(c, cc);
2600 if (c > 127)
2601 {
2602 #ifdef SUPPORT_UCP
2603 return c != UCD_OTHERCASE(c);
2604 #else
2605 return FALSE;
2606 #endif
2607 }
2608 #ifndef COMPILE_PCRE8
2609 return common->fcc[c] != c;
2610 #endif
2611 }
2612 else
2613 #endif
2614 c = *cc;
2615 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2616 }
2617
2618 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2619 {
2620 /* Returns with the othercase. */
2621 #ifdef SUPPORT_UTF
2622 if (common->utf && c > 127)
2623 {
2624 #ifdef SUPPORT_UCP
2625 return UCD_OTHERCASE(c);
2626 #else
2627 return c;
2628 #endif
2629 }
2630 #endif
2631 return TABLE_GET(c, common->fcc, c);
2632 }
2633
2634 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2635 {
2636 /* Detects if the character and its othercase has only 1 bit difference. */
2637 unsigned int c, oc, bit;
2638 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2639 int n;
2640 #endif
2641
2642 #ifdef SUPPORT_UTF
2643 if (common->utf)
2644 {
2645 GETCHAR(c, cc);
2646 if (c <= 127)
2647 oc = common->fcc[c];
2648 else
2649 {
2650 #ifdef SUPPORT_UCP
2651 oc = UCD_OTHERCASE(c);
2652 #else
2653 oc = c;
2654 #endif
2655 }
2656 }
2657 else
2658 {
2659 c = *cc;
2660 oc = TABLE_GET(c, common->fcc, c);
2661 }
2662 #else
2663 c = *cc;
2664 oc = TABLE_GET(c, common->fcc, c);
2665 #endif
2666
2667 SLJIT_ASSERT(c != oc);
2668
2669 bit = c ^ oc;
2670 /* Optimized for English alphabet. */
2671 if (c <= 127 && bit == 0x20)
2672 return (0 << 8) | 0x20;
2673
2674 /* Since c != oc, they must have at least 1 bit difference. */
2675 if (!is_powerof2(bit))
2676 return 0;
2677
2678 #if defined COMPILE_PCRE8
2679
2680 #ifdef SUPPORT_UTF
2681 if (common->utf && c > 127)
2682 {
2683 n = GET_EXTRALEN(*cc);
2684 while ((bit & 0x3f) == 0)
2685 {
2686 n--;
2687 bit >>= 6;
2688 }
2689 return (n << 8) | bit;
2690 }
2691 #endif /* SUPPORT_UTF */
2692 return (0 << 8) | bit;
2693
2694 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2695
2696 #ifdef SUPPORT_UTF
2697 if (common->utf && c > 65535)
2698 {
2699 if (bit >= (1 << 10))
2700 bit >>= 10;
2701 else
2702 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2703 }
2704 #endif /* SUPPORT_UTF */
2705 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2706
2707 #endif /* COMPILE_PCRE[8|16|32] */
2708 }
2709
2710 static void check_partial(compiler_common *common, BOOL force)
2711 {
2712 /* Checks whether a partial matching is occurred. Does not modify registers. */
2713 DEFINE_COMPILER;
2714 struct sljit_jump *jump = NULL;
2715
2716 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2717
2718 if (common->mode == JIT_COMPILE)
2719 return;
2720
2721 if (!force)
2722 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2723 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2724 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2725
2726 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2728 else
2729 {
2730 if (common->partialmatchlabel != NULL)
2731 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2732 else
2733 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2734 }
2735
2736 if (jump != NULL)
2737 JUMPHERE(jump);
2738 }
2739
2740 static void check_str_end(compiler_common *common, jump_list **end_reached)
2741 {
2742 /* Does not affect registers. Usually used in a tight spot. */
2743 DEFINE_COMPILER;
2744 struct sljit_jump *jump;
2745
2746 if (common->mode == JIT_COMPILE)
2747 {
2748 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2749 return;
2750 }
2751
2752 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2753 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2754 {
2755 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2756 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2757 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2758 }
2759 else
2760 {
2761 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2762 if (common->partialmatchlabel != NULL)
2763 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2764 else
2765 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2766 }
2767 JUMPHERE(jump);
2768 }
2769
2770 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2771 {
2772 DEFINE_COMPILER;
2773 struct sljit_jump *jump;
2774
2775 if (common->mode == JIT_COMPILE)
2776 {
2777 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2778 return;
2779 }
2780
2781 /* Partial matching mode. */
2782 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2783 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2784 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2785 {
2786 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2787 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2788 }
2789 else
2790 {
2791 if (common->partialmatchlabel != NULL)
2792 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2793 else
2794 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2795 }
2796 JUMPHERE(jump);
2797 }
2798
2799 static void peek_char(compiler_common *common, sljit_ui max)
2800 {
2801 /* Reads the character into TMP1, keeps STR_PTR.
2802 Does not check STR_END. TMP2 Destroyed. */
2803 DEFINE_COMPILER;
2804 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2805 struct sljit_jump *jump;
2806 #endif
2807
2808 SLJIT_UNUSED_ARG(max);
2809
2810 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2811 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2812 if (common->utf)
2813 {
2814 if (max < 128) return;
2815
2816 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2817 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2818 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2819 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2820 JUMPHERE(jump);
2821 }
2822 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2823
2824 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2825 if (common->utf)
2826 {
2827 if (max < 0xd800) return;
2828
2829 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2830 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2831 /* TMP2 contains the high surrogate. */
2832 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2833 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2834 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2835 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2836 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2837 JUMPHERE(jump);
2838 }
2839 #endif
2840 }
2841
2842 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2843
2844 static BOOL is_char7_bitset(const sljit_ub *bitset, BOOL nclass)
2845 {
2846 /* Tells whether the character codes below 128 are enough
2847 to determine a match. */
2848 const sljit_ub value = nclass ? 0xff : 0;
2849 const sljit_ub *end = bitset + 32;
2850
2851 bitset += 16;
2852 do
2853 {
2854 if (*bitset++ != value)
2855 return FALSE;
2856 }
2857 while (bitset < end);
2858 return TRUE;
2859 }
2860
2861 static void read_char7_type(compiler_common *common, BOOL full_read)
2862 {
2863 /* Reads the precise character type of a character into TMP1, if the character
2864 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2865 full_read argument tells whether characters above max are accepted or not. */
2866 DEFINE_COMPILER;
2867 struct sljit_jump *jump;
2868
2869 SLJIT_ASSERT(common->utf);
2870
2871 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2872 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2873
2874 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2875
2876 if (full_read)
2877 {
2878 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2879 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2880 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2881 JUMPHERE(jump);
2882 }
2883 }
2884
2885 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2886
2887 static void read_char_range(compiler_common *common, sljit_ui min, sljit_ui max, BOOL update_str_ptr)
2888 {
2889 /* Reads the precise value of a character into TMP1, if the character is
2890 between min and max (c >= min && c <= max). Otherwise it returns with a value
2891 outside the range. Does not check STR_END. */
2892 DEFINE_COMPILER;
2893 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2894 struct sljit_jump *jump;
2895 #endif
2896 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2897 struct sljit_jump *jump2;
2898 #endif
2899
2900 SLJIT_UNUSED_ARG(update_str_ptr);
2901 SLJIT_UNUSED_ARG(min);
2902 SLJIT_UNUSED_ARG(max);
2903 SLJIT_ASSERT(min <= max);
2904
2905 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2906 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2907
2908 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2909 if (common->utf)
2910 {
2911 if (max < 128 && !update_str_ptr) return;
2912
2913 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2914 if (min >= 0x10000)
2915 {
2916 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2917 if (update_str_ptr)
2918 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2919 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2920 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2921 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2922 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2923 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2924 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2925 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2926 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2927 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2928 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2929 if (!update_str_ptr)
2930 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2931 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2932 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2933 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2934 JUMPHERE(jump2);
2935 if (update_str_ptr)
2936 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2937 }
2938 else if (min >= 0x800 && max <= 0xffff)
2939 {
2940 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2941 if (update_str_ptr)
2942 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2943 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2944 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2945 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2946 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2947 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2948 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2949 if (!update_str_ptr)
2950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2951 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2952 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2953 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2954 JUMPHERE(jump2);
2955 if (update_str_ptr)
2956 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2957 }
2958 else if (max >= 0x800)
2959 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2960 else if (max < 128)
2961 {
2962 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2963 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2964 }
2965 else
2966 {
2967 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2968 if (!update_str_ptr)
2969 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2970 else
2971 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2972 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2973 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2974 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2975 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2976 if (update_str_ptr)
2977 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2978 }
2979 JUMPHERE(jump);
2980 }
2981 #endif
2982
2983 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2984 if (common->utf)
2985 {
2986 if (max >= 0x10000)
2987 {
2988 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2989 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2990 /* TMP2 contains the high surrogate. */
2991 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2992 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2993 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2994 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2995 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2996 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2997 JUMPHERE(jump);
2998 return;
2999 }
3000
3001 if (max < 0xd800 && !update_str_ptr) return;
3002
3003 /* Skip low surrogate if necessary. */
3004 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3005 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3006 if (update_str_ptr)
3007 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3008 if (max >= 0xd800)
3009 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3010 JUMPHERE(jump);
3011 }
3012 #endif
3013 }
3014
3015 static SLJIT_INLINE void read_char(compiler_common *common)
3016 {
3017 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3018 }
3019
3020 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3021 {
3022 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3023 DEFINE_COMPILER;
3024 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3025 struct sljit_jump *jump;
3026 #endif
3027 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3028 struct sljit_jump *jump2;
3029 #endif
3030
3031 SLJIT_UNUSED_ARG(update_str_ptr);
3032
3033 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3034 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3035
3036 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3037 if (common->utf)
3038 {
3039 /* This can be an extra read in some situations, but hopefully
3040 it is needed in most cases. */
3041 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3042 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3043 if (!update_str_ptr)
3044 {
3045 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3046 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3047 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3048 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3049 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3050 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3052 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3053 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3054 JUMPHERE(jump2);
3055 }
3056 else
3057 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3058 JUMPHERE(jump);
3059 return;
3060 }
3061 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3062
3063 #if !defined COMPILE_PCRE8
3064 /* The ctypes array contains only 256 values. */
3065 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3066 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3067 #endif
3068 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3069 #if !defined COMPILE_PCRE8
3070 JUMPHERE(jump);
3071 #endif
3072
3073 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3074 if (common->utf && update_str_ptr)
3075 {
3076 /* Skip low surrogate if necessary. */
3077 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3078 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3079 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3080 JUMPHERE(jump);
3081 }
3082 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3083 }
3084
3085 static void skip_char_back(compiler_common *common)
3086 {
3087 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3088 DEFINE_COMPILER;
3089 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3090 #if defined COMPILE_PCRE8
3091 struct sljit_label *label;
3092
3093 if (common->utf)
3094 {
3095 label = LABEL();
3096 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3097 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3098 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3099 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3100 return;
3101 }
3102 #elif defined COMPILE_PCRE16
3103 if (common->utf)
3104 {
3105 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3106 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3107 /* Skip low surrogate if necessary. */
3108 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3109 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3110 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3111 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3112 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3113 return;
3114 }
3115 #endif /* COMPILE_PCRE[8|16] */
3116 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3117 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3118 }
3119
3120 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3121 {
3122 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3123 DEFINE_COMPILER;
3124 struct sljit_jump *jump;
3125
3126 if (nltype == NLTYPE_ANY)
3127 {
3128 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3129 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3130 }
3131 else if (nltype == NLTYPE_ANYCRLF)
3132 {
3133 if (jumpifmatch)
3134 {
3135 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3136 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3137 }
3138 else
3139 {
3140 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3141 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3142 JUMPHERE(jump);
3143 }
3144 }
3145 else
3146 {
3147 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3148 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3149 }
3150 }
3151
3152 #ifdef SUPPORT_UTF
3153
3154 #if defined COMPILE_PCRE8
3155 static void do_utfreadchar(compiler_common *common)
3156 {
3157 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3158 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3159 DEFINE_COMPILER;
3160 struct sljit_jump *jump;
3161
3162 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3163 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3164 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3165 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3166 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3167 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3168
3169 /* Searching for the first zero. */
3170 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3171 jump = JUMP(SLJIT_NOT_ZERO);
3172 /* Two byte sequence. */
3173 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3174 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3175 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3176
3177 JUMPHERE(jump);
3178 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3179 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3180 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3181 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3182 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3183
3184 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3185 jump = JUMP(SLJIT_NOT_ZERO);
3186 /* Three byte sequence. */
3187 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3189 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3190
3191 /* Four byte sequence. */
3192 JUMPHERE(jump);
3193 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3194 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3195 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3196 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3197 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3198 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3199 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3200 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3201 }
3202
3203 static void do_utfreadchar16(compiler_common *common)
3204 {
3205 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3206 of the character (>= 0xc0). Return value in TMP1. */
3207 DEFINE_COMPILER;
3208 struct sljit_jump *jump;
3209
3210 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3211 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3212 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3213 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3214 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3215 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3216
3217 /* Searching for the first zero. */
3218 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3219 jump = JUMP(SLJIT_NOT_ZERO);
3220 /* Two byte sequence. */
3221 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3222 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3223
3224 JUMPHERE(jump);
3225 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3226 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3227 /* This code runs only in 8 bit mode. No need to shift the value. */
3228 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3229 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3230 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3231 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3232 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3233 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3234 /* Three byte sequence. */
3235 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3236 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3237 }
3238
3239 static void do_utfreadtype8(compiler_common *common)
3240 {
3241 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3242 of the character (>= 0xc0). Return value in TMP1. */
3243 DEFINE_COMPILER;
3244 struct sljit_jump *jump;
3245 struct sljit_jump *compare;
3246
3247 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3248
3249 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3250 jump = JUMP(SLJIT_NOT_ZERO);
3251 /* Two byte sequence. */
3252 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3253 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3254 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3255 /* The upper 5 bits are known at this point. */
3256 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3257 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3258 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3259 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3260 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3261 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3262
3263 JUMPHERE(compare);
3264 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3265 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3266
3267 /* We only have types for characters less than 256. */
3268 JUMPHERE(jump);
3269 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3271 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3272 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3273 }
3274
3275 #endif /* COMPILE_PCRE8 */
3276
3277 #endif /* SUPPORT_UTF */
3278
3279 #ifdef SUPPORT_UCP
3280
3281 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3282 #define UCD_BLOCK_MASK 127
3283 #define UCD_BLOCK_SHIFT 7
3284
3285 static void do_getucd(compiler_common *common)
3286 {
3287 /* Search the UCD record for the character comes in TMP1.
3288 Returns chartype in TMP1 and UCD offset in TMP2. */
3289 DEFINE_COMPILER;
3290
3291 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3292
3293 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3294 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3295 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3296 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3297 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3298 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3299 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3300 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3302 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3303 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3304 }
3305 #endif
3306
3307 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3308 {
3309 DEFINE_COMPILER;
3310 struct sljit_label *mainloop;
3311 struct sljit_label *newlinelabel = NULL;
3312 struct sljit_jump *start;
3313 struct sljit_jump *end = NULL;
3314 struct sljit_jump *end2 = NULL;
3315 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3316 struct sljit_jump *singlechar;
3317 #endif
3318 jump_list *newline = NULL;
3319 BOOL newlinecheck = FALSE;
3320 BOOL readuchar = FALSE;
3321
3322 if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3323 (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3324 newlinecheck = TRUE;
3325
3326 if (common->match_end_ptr != 0)
3327 {
3328 /* Search for the end of the first line. */
3329 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3330
3331 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3332 {
3333 mainloop = LABEL();
3334 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3335 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3337 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3338 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3339 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3340 JUMPHERE(end);
3341 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3342 }
3343 else
3344 {
3345 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3346 mainloop = LABEL();
3347 /* Continual stores does not cause data dependency. */
3348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3349 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3350 check_newlinechar(common, common->nltype, &newline, TRUE);
3351 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3352 JUMPHERE(end);
3353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3354 set_jumps(newline, LABEL());
3355 }
3356
3357 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3358 }
3359
3360 start = JUMP(SLJIT_JUMP);
3361
3362 if (newlinecheck)
3363 {
3364 newlinelabel = LABEL();
3365 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3366 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3367 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3368 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3369 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3370 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3371 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3372 #endif
3373 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3374 end2 = JUMP(SLJIT_JUMP);
3375 }
3376
3377 mainloop = LABEL();
3378
3379 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3380 #ifdef SUPPORT_UTF
3381 if (common->utf) readuchar = TRUE;
3382 #endif
3383 if (newlinecheck) readuchar = TRUE;
3384
3385 if (readuchar)
3386 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3387
3388 if (newlinecheck)
3389 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3390
3391 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3392 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3393 #if defined COMPILE_PCRE8
3394 if (common->utf)
3395 {
3396 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3397 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3398 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3399 JUMPHERE(singlechar);
3400 }
3401 #elif defined COMPILE_PCRE16
3402 if (common->utf)
3403 {
3404 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3405 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3406 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3407 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3408 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3409 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3410 JUMPHERE(singlechar);
3411 }
3412 #endif /* COMPILE_PCRE[8|16] */
3413 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3414 JUMPHERE(start);
3415
3416 if (newlinecheck)
3417 {
3418 JUMPHERE(end);
3419 JUMPHERE(end2);
3420 }
3421
3422 return mainloop;
3423 }
3424
3425 #define MAX_N_CHARS 16
3426 #define MAX_DIFF_CHARS 6
3427
3428 static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3429 {
3430 pcre_uchar i, len;
3431
3432 len = chars[0];
3433 if (len == 255)
3434 return;
3435
3436 if (len == 0)
3437 {
3438 chars[0] = 1;
3439 chars[1] = chr;
3440 return;
3441 }
3442
3443 for (i = len; i > 0; i--)
3444 if (chars[i] == chr)
3445 return;
3446
3447 if (len >= MAX_DIFF_CHARS - 1)
3448 {
3449 chars[0] = 255;
3450 return;
3451 }
3452
3453 len++;
3454 chars[len] = chr;
3455 chars[0] = len;
3456 }
3457
3458 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, pcre_uint32 *rec_count)
3459 {
3460 /* Recursive function, which scans prefix literals. */
3461 BOOL last, any, class, caseless;
3462 int len, repeat, len_save, consumed = 0;
3463 sljit_ui chr;
3464 sljit_ub *bytes, *bytes_end, byte;
3465 pcre_uchar *alternative, *cc_save, *oc;
3466 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3467 pcre_uchar othercase[8];
3468 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3469 pcre_uchar othercase[2];
3470 #else
3471 pcre_uchar othercase[1];
3472 #endif
3473
3474 repeat = 1;
3475 while (TRUE)
3476 {
3477 if (*rec_count == 0)
3478 return 0;
3479 (*rec_count)--;
3480
3481 last = TRUE;
3482 any = FALSE;
3483 class = FALSE;
3484 caseless = FALSE;
3485
3486 switch (*cc)
3487 {
3488 case OP_CHARI:
3489 caseless = TRUE;
3490 case OP_CHAR:
3491 last = FALSE;
3492 cc++;
3493 break;
3494
3495 case OP_SOD:
3496 case OP_SOM:
3497 case OP_SET_SOM:
3498 case OP_NOT_WORD_BOUNDARY:
3499 case OP_WORD_BOUNDARY:
3500 case OP_EODN:
3501 case OP_EOD:
3502 case OP_CIRC:
3503 case OP_CIRCM:
3504 case OP_DOLL:
3505 case OP_DOLLM:
3506 /* Zero width assertions. */
3507 cc++;
3508 continue;
3509
3510 case OP_ASSERT:
3511 case OP_ASSERT_NOT:
3512 case OP_ASSERTBACK:
3513 case OP_ASSERTBACK_NOT:
3514 cc = bracketend(cc);
3515 continue;
3516
3517 case OP_PLUSI:
3518 case OP_MINPLUSI:
3519 case OP_POSPLUSI:
3520 caseless = TRUE;
3521 case OP_PLUS:
3522 case OP_MINPLUS:
3523 case OP_POSPLUS:
3524 cc++;
3525 break;
3526
3527 case OP_EXACTI:
3528 caseless = TRUE;
3529 case OP_EXACT:
3530 repeat = GET2(cc, 1);
3531 last = FALSE;
3532 cc += 1 + IMM2_SIZE;
3533 break;
3534
3535 case OP_QUERYI:
3536 case OP_MINQUERYI:
3537 case OP_POSQUERYI:
3538 caseless = TRUE;
3539 case OP_QUERY:
3540 case OP_MINQUERY:
3541 case OP_POSQUERY:
3542 len = 1;
3543 cc++;
3544 #ifdef SUPPORT_UTF
3545 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3546 #endif
3547 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3548 if (max_chars == 0)
3549 return consumed;
3550 last = FALSE;
3551 break;
3552
3553 case OP_KET:
3554 cc += 1 + LINK_SIZE;
3555 continue;
3556
3557 case OP_ALT:
3558 cc += GET(cc, 1);
3559 continue;
3560
3561 case OP_ONCE:
3562 case OP_ONCE_NC:
3563 case OP_BRA:
3564 case OP_BRAPOS:
3565 case OP_CBRA:
3566 case OP_CBRAPOS:
3567 alternative = cc + GET(cc, 1);
3568 while (*alternative == OP_ALT)
3569 {
3570 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3571 if (max_chars == 0)
3572 return consumed;
3573 alternative += GET(alternative, 1);
3574 }
3575
3576 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3577 cc += IMM2_SIZE;
3578 cc += 1 + LINK_SIZE;
3579 continue;
3580
3581 case OP_CLASS:
3582 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3583 if (common->utf && !is_char7_bitset((const sljit_ub *)(cc + 1), FALSE))
3584 return consumed;
3585 #endif
3586 class = TRUE;
3587 break;
3588
3589 case OP_NCLASS:
3590 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3591 if (common->utf) return consumed;
3592 #endif
3593 class = TRUE;
3594 break;
3595
3596 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3597 case OP_XCLASS:
3598 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3599 if (common->utf) return consumed;
3600 #endif
3601 any = TRUE;
3602 cc += GET(cc, 1);
3603 break;
3604 #endif
3605
3606 case OP_DIGIT:
3607 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3608 if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_digit, FALSE))
3609 return consumed;
3610 #endif
3611 any = TRUE;
3612 cc++;
3613 break;
3614
3615 case OP_WHITESPACE:
3616 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3617 if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_space, FALSE))
3618 return consumed;
3619 #endif
3620 any = TRUE;
3621 cc++;
3622 break;
3623
3624 case OP_WORDCHAR:
3625 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3626 if (common->utf && !is_char7_bitset((const sljit_ub *)common->ctypes - cbit_length + cbit_word, FALSE))
3627 return consumed;
3628 #endif
3629 any = TRUE;
3630 cc++;
3631 break;
3632
3633 case OP_NOT:
3634 case OP_NOTI:
3635 cc++;
3636 /* Fall through. */
3637 case OP_NOT_DIGIT:
3638 case OP_NOT_WHITESPACE:
3639 case OP_NOT_WORDCHAR:
3640 case OP_ANY:
3641 case OP_ALLANY:
3642 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3643 if (common->utf) return consumed;
3644 #endif
3645 any = TRUE;
3646 cc++;
3647 break;
3648
3649 #ifdef SUPPORT_UTF
3650 case OP_NOTPROP:
3651 case OP_PROP:
3652 #ifndef COMPILE_PCRE32
3653 if (common->utf) return consumed;
3654 #endif
3655 any = TRUE;
3656 cc += 1 + 2;
3657 break;
3658 #endif
3659
3660 case OP_TYPEEXACT:
3661 repeat = GET2(cc, 1);
3662 cc += 1 + IMM2_SIZE;
3663 continue;
3664
3665 case OP_NOTEXACT:
3666 case OP_NOTEXACTI:
3667 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3668 if (common->utf) return consumed;
3669 #endif
3670 any = TRUE;
3671 repeat = GET2(cc, 1);
3672 cc += 1 + IMM2_SIZE + 1;
3673 break;
3674
3675 default:
3676 return consumed;
3677 }
3678
3679 if (any)
3680 {
3681 do
3682 {
3683 chars[0] = 255;
3684
3685 consumed++;
3686 if (--max_chars == 0)
3687 return consumed;
3688 chars += MAX_DIFF_CHARS;
3689 }
3690 while (--repeat > 0);
3691
3692 repeat = 1;
3693 continue;
3694 }
3695
3696 if (class)
3697 {
3698 bytes = (sljit_ub*) (cc + 1);
3699 cc += 1 + 32 / sizeof(pcre_uchar);
3700
3701 switch (*cc)
3702 {
3703 case OP_CRSTAR:
3704 case OP_CRMINSTAR:
3705 case OP_CRPOSSTAR:
3706 case OP_CRQUERY:
3707 case OP_CRMINQUERY:
3708 case OP_CRPOSQUERY:
3709 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3710 if (max_chars == 0)
3711 return consumed;
3712 break;
3713
3714 default:
3715 case OP_CRPLUS:
3716 case OP_CRMINPLUS:
3717 case OP_CRPOSPLUS:
3718 break;
3719
3720 case OP_CRRANGE:
3721 case OP_CRMINRANGE:
3722 case OP_CRPOSRANGE:
3723 repeat = GET2(cc, 1);
3724 if (repeat <= 0)
3725 return consumed;
3726 break;
3727 }
3728
3729 do
3730 {
3731 if (bytes[31] & 0x80)
3732 chars[0] = 255;
3733 else if (chars[0] != 255)
3734 {
3735 bytes_end = bytes + 32;
3736 chr = 0;
3737 do
3738 {
3739 byte = *bytes++;
3740 SLJIT_ASSERT((chr & 0x7) == 0);
3741 if (byte == 0)
3742 chr += 8;
3743 else
3744 {
3745 do
3746 {
3747 if ((byte & 0x1) != 0)
3748 add_prefix_char(chr, chars);
3749 byte >>= 1;
3750 chr++;
3751 }
3752 while (byte != 0);
3753 chr = (chr + 7) & ~7;
3754 }
3755 }
3756 while (chars[0] != 255 && bytes < bytes_end);
3757 bytes = bytes_end - 32;
3758 }
3759
3760 consumed++;
3761 if (--max_chars == 0)
3762 return consumed;
3763 chars += MAX_DIFF_CHARS;
3764 }
3765 while (--repeat > 0);
3766
3767 switch (*cc)
3768 {
3769 case OP_CRSTAR:
3770 case OP_CRMINSTAR:
3771 case OP_CRPOSSTAR:
3772 return consumed;
3773
3774 case OP_CRQUERY:
3775 case OP_CRMINQUERY:
3776 case OP_CRPOSQUERY:
3777 cc++;
3778 break;
3779
3780 case OP_CRRANGE:
3781 case OP_CRMINRANGE:
3782 case OP_CRPOSRANGE:
3783 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3784 return consumed;
3785 cc += 1 + 2 * IMM2_SIZE;
3786 break;
3787 }
3788
3789 repeat = 1;
3790 continue;
3791 }
3792
3793 len = 1;
3794 #ifdef SUPPORT_UTF
3795 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3796 #endif
3797
3798 if (caseless && char_has_othercase(common, cc))
3799 {
3800 #ifdef SUPPORT_UTF
3801 if (common->utf)
3802 {
3803 GETCHAR(chr, cc);
3804 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3805 return consumed;
3806 }
3807 else
3808 #endif
3809 {
3810 chr = *cc;
3811 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3812 }
3813 }
3814 else
3815 {
3816 caseless = FALSE;
3817 othercase[0] = 0; /* Stops compiler warning - PH */
3818 }
3819
3820 len_save = len;
3821 cc_save = cc;
3822 while (TRUE)
3823 {
3824 oc = othercase;
3825 do
3826 {
3827 chr = *cc;
3828 add_prefix_char(*cc, chars);
3829
3830 if (caseless)
3831 add_prefix_char(*oc, chars);
3832
3833 len--;
3834 consumed++;
3835 if (--max_chars == 0)
3836 return consumed;
3837 chars += MAX_DIFF_CHARS;
3838 cc++;
3839 oc++;
3840 }
3841 while (len > 0);
3842
3843 if (--repeat == 0)
3844 break;
3845
3846 len = len_save;
3847 cc = cc_save;
3848 }
3849
3850 repeat = 1;
3851 if (last)
3852 return consumed;
3853 }
3854 }
3855
3856 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3857
3858 static sljit_si character_to_int32(pcre_uchar chr)
3859 {
3860 sljit_si value = (sljit_si)chr;
3861 #if defined COMPILE_PCRE8
3862 #define SSE2_COMPARE_TYPE_INDEX 0
3863 return (value << 24) | (value << 16) | (value << 8) | value;
3864 #elif defined COMPILE_PCRE16
3865 #define SSE2_COMPARE_TYPE_INDEX 1
3866 return (value << 16) | value;
3867 #elif defined COMPILE_PCRE32
3868 #define SSE2_COMPARE_TYPE_INDEX 2
3869 return value;
3870 #else
3871 #error "Unsupported unit width"
3872 #endif
3873 }
3874
3875 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3876 {
3877 DEFINE_COMPILER;
3878 struct sljit_label *start;
3879 struct sljit_jump *quit[3];
3880 struct sljit_jump *nomatch;
3881 sljit_ub instruction[8];
3882 sljit_si tmp1_ind = sljit_get_register_index(TMP1);
3883 sljit_si tmp2_ind = sljit_get_register_index(TMP2);
3884 sljit_si str_ptr_ind = sljit_get_register_index(STR_PTR);
3885 BOOL load_twice = FALSE;
3886 pcre_uchar bit;
3887
3888 bit = char1 ^ char2;
3889 if (!is_powerof2(bit))
3890 bit = 0;
3891
3892 if ((char1 != char2) && bit == 0)
3893 load_twice = TRUE;
3894
3895 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3896
3897 /* First part (unaligned start) */
3898
3899 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3900
3901 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3902
3903 /* MOVD xmm, r/m32 */
3904 instruction[0] = 0x66;
3905 instruction[1] = 0x0f;
3906 instruction[2] = 0x6e;
3907 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3908 sljit_emit_op_custom(compiler, instruction, 4);
3909
3910 if (char1 != char2)
3911 {
3912 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3913
3914 /* MOVD xmm, r/m32 */
3915 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3916 sljit_emit_op_custom(compiler, instruction, 4);
3917 }
3918
3919 /* PSHUFD xmm1, xmm2/m128, imm8 */
3920 instruction[2] = 0x70;
3921 instruction[3] = 0xc0 | (2 << 3) | 2;
3922 instruction[4] = 0;
3923 sljit_emit_op_custom(compiler, instruction, 5);
3924
3925 if (char1 != char2)
3926 {
3927 /* PSHUFD xmm1, xmm2/m128, imm8 */
3928 instruction[3] = 0xc0 | (3 << 3) | 3;
3929 instruction[4] = 0;
3930 sljit_emit_op_custom(compiler, instruction, 5);
3931 }
3932
3933 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3934 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3935
3936 /* MOVDQA xmm1, xmm2/m128 */
3937 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3938
3939 if (str_ptr_ind < 8)
3940 {
3941 instruction[2] = 0x6f;
3942 instruction[3] = (0 << 3) | str_ptr_ind;
3943 sljit_emit_op_custom(compiler, instruction, 4);
3944
3945 if (load_twice)
3946 {
3947 instruction[3] = (1 << 3) | str_ptr_ind;
3948 sljit_emit_op_custom(compiler, instruction, 4);
3949 }
3950 }
3951 else
3952 {
3953 instruction[1] = 0x41;
3954 instruction[2] = 0x0f;
3955 instruction[3] = 0x6f;
3956 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3957 sljit_emit_op_custom(compiler, instruction, 5);
3958
3959 if (load_twice)
3960 {
3961 instruction[4] = (1 << 3) | str_ptr_ind;
3962 sljit_emit_op_custom(compiler, instruction, 5);
3963 }
3964 instruction[1] = 0x0f;
3965 }
3966
3967 #else
3968
3969 instruction[2] = 0x6f;
3970 instruction[3] = (0 << 3) | str_ptr_ind;
3971 sljit_emit_op_custom(compiler, instruction, 4);
3972
3973 if (load_twice)
3974 {
3975 instruction[3] = (1 << 3) | str_ptr_ind;
3976 sljit_emit_op_custom(compiler, instruction, 4);
3977 }
3978
3979 #endif
3980
3981 if (bit != 0)
3982 {
3983 /* POR xmm1, xmm2/m128 */
3984 instruction[2] = 0xeb;
3985 instruction[3] = 0xc0 | (0 << 3) | 3;
3986 sljit_emit_op_custom(compiler, instruction, 4);
3987 }
3988
3989 /* PCMPEQB/W/D xmm1, xmm2/m128 */
3990 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
3991 instruction[3] = 0xc0 | (0 << 3) | 2;
3992 sljit_emit_op_custom(compiler, instruction, 4);
3993
3994 if (load_twice)
3995 {
3996 instruction[3] = 0xc0 | (1 << 3) | 3;
3997 sljit_emit_op_custom(compiler, instruction, 4);
3998 }
3999
4000 /* PMOVMSKB reg, xmm */
4001 instruction[2] = 0xd7;
4002 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4003 sljit_emit_op_custom(compiler, instruction, 4);
4004
4005 if (load_twice)
4006 {
4007 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
4008 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4009 sljit_emit_op_custom(compiler, instruction, 4);
4010
4011 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4012 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
4013 }
4014
4015 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4016
4017 /* BSF r32, r/m32 */
4018 instruction[0] = 0x0f;
4019 instruction[1] = 0xbc;
4020 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4021 sljit_emit_op_custom(compiler, instruction, 3);
4022
4023 nomatch = JUMP(SLJIT_ZERO);
4024
4025 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4027 quit[1] = JUMP(SLJIT_JUMP);
4028
4029 JUMPHERE(nomatch);
4030
4031 start = LABEL();
4032 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4033 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4034
4035 /* Second part (aligned) */
4036
4037 instruction[0] = 0x66;
4038 instruction[1] = 0x0f;
4039
4040 /* MOVDQA xmm1, xmm2/m128 */
4041 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4042
4043 if (str_ptr_ind < 8)
4044 {
4045 instruction[2] = 0x6f;
4046 instruction[3] = (0 << 3) | str_ptr_ind;
4047 sljit_emit_op_custom(compiler, instruction, 4);
4048
4049 if (load_twice)
4050 {
4051 instruction[3] = (1 << 3) | str_ptr_ind;
4052 sljit_emit_op_custom(compiler, instruction, 4);
4053 }
4054 }
4055 else
4056 {
4057 instruction[1] = 0x41;
4058 instruction[2] = 0x0f;
4059 instruction[3] = 0x6f;
4060 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4061 sljit_emit_op_custom(compiler, instruction, 5);
4062
4063 if (load_twice)
4064 {
4065 instruction[4] = (1 << 3) | str_ptr_ind;
4066 sljit_emit_op_custom(compiler, instruction, 5);
4067 }
4068 instruction[1] = 0x0f;
4069 }
4070
4071 #else
4072
4073 instruction[2] = 0x6f;
4074 instruction[3] = (0 << 3) | str_ptr_ind;
4075 sljit_emit_op_custom(compiler, instruction, 4);
4076
4077 if (load_twice)
4078 {
4079 instruction[3] = (1 << 3) | str_ptr_ind;
4080 sljit_emit_op_custom(compiler, instruction, 4);
4081 }
4082
4083 #endif
4084
4085 if (bit != 0)
4086 {
4087 /* POR xmm1, xmm2/m128 */
4088 instruction[2] = 0xeb;
4089 instruction[3] = 0xc0 | (0 << 3) | 3;
4090 sljit_emit_op_custom(compiler, instruction, 4);
4091 }
4092
4093 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4094 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4095 instruction[3] = 0xc0 | (0 << 3) | 2;
4096 sljit_emit_op_custom(compiler, instruction, 4);
4097
4098 if (load_twice)
4099 {
4100 instruction[3] = 0xc0 | (1 << 3) | 3;
4101 sljit_emit_op_custom(compiler, instruction, 4);
4102 }
4103
4104 /* PMOVMSKB reg, xmm */
4105 instruction[2] = 0xd7;
4106 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4107 sljit_emit_op_custom(compiler, instruction, 4);
4108
4109 if (load_twice)
4110 {
4111 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4112 sljit_emit_op_custom(compiler, instruction, 4);
4113
4114 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4115 }
4116
4117 /* BSF r32, r/m32 */
4118 instruction[0] = 0x0f;
4119 instruction[1] = 0xbc;
4120 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4121 sljit_emit_op_custom(compiler, instruction, 3);
4122
4123 JUMPTO(SLJIT_ZERO, start);
4124
4125 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4126
4127 start = LABEL();
4128 SET_LABEL(quit[0], start);
4129 SET_LABEL(quit[1], start);
4130 SET_LABEL(quit[2], start);
4131 }
4132
4133 #undef SSE2_COMPARE_TYPE_INDEX
4134
4135 #endif
4136
4137 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_si offset)
4138 {
4139 DEFINE_COMPILER;
4140 struct sljit_label *start;
4141 struct sljit_jump *quit;
4142 struct sljit_jump *found;
4143 pcre_uchar mask;
4144 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4145 struct sljit_label *utf_start = NULL;
4146 struct sljit_jump *utf_quit = NULL;
4147 #endif
4148 BOOL has_match_end_ptr = (common->match_end_ptr != 0);
4149
4150 if (offset > 0)
4151 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4152
4153 if (has_match_end_ptr)
4154 {
4155 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4156
4157 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4158 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4159 if (sljit_x86_is_cmov_available())
4160 {
4161 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4162 sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4163 }
4164 #endif
4165 {
4166 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
4167 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4168 JUMPHERE(quit);
4169 }
4170 }
4171
4172 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4173 if (common->utf && offset > 0)
4174 utf_start = LABEL();
4175 #endif
4176
4177 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4178
4179 /* SSE2 accelerated first character search. */
4180
4181 if (sljit_x86_is_sse2_available())
4182 {
4183 fast_forward_first_char2_sse2(common, char1, char2);
4184
4185 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4186 if (common->mode == JIT_COMPILE)
4187 {
4188 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4189 SLJIT_ASSERT(common->forced_quit_label == NULL);
4190 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4191 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4192
4193 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4194 if (common->utf && offset > 0)
4195 {
4196 SLJIT_ASSERT(common->mode == JIT_COMPILE);
4197
4198 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4199 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4200 #if defined COMPILE_PCRE8
4201 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4202 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4203 #elif defined COMPILE_PCRE16
4204 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4205 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4206 #else
4207 #error "Unknown code width"
4208 #endif
4209 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4210 }
4211 #endif
4212
4213 if (offset > 0)
4214 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4215 }
4216 else if (sljit_x86_is_cmov_available())
4217 {
4218 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4219 sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_match_end_ptr ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end_ptr ? common->match_end_ptr : 0);
4220 }
4221 else
4222 {
4223 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4224 OP1(SLJIT_MOV, STR_PTR, 0, has_match_end_ptr ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_match_end_ptr ? common->match_end_ptr : 0);
4225 JUMPHERE(quit);
4226 }
4227
4228 if (has_match_end_ptr)
4229 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4230 return;
4231 }
4232
4233 #endif
4234
4235 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4236
4237 start = LABEL();
4238 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4239
4240 if (char1 == char2)
4241 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4242 else
4243 {
4244 mask = char1 ^ char2;
4245 if (is_powerof2(mask))
4246 {
4247 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4248 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4249 }
4250 else
4251 {
4252 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4253 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4254 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4255 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4256 found = JUMP(SLJIT_NOT_ZERO);
4257 }
4258 }
4259
4260 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4261 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4262
4263 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4264 if (common->utf && offset > 0)
4265 utf_quit = JUMP(SLJIT_JUMP);
4266 #endif
4267
4268 JUMPHERE(found);
4269
4270 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4271 if (common->utf && offset > 0)
4272 {
4273 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4274 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4275 #if defined COMPILE_PCRE8
4276 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4277 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4278 #elif defined COMPILE_PCRE16
4279 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4280 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4281 #else
4282 #error "Unknown code width"
4283 #endif
4284 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4285 JUMPHERE(utf_quit);
4286 }
4287 #endif
4288
4289 JUMPHERE(quit);
4290
4291 if (has_match_end_ptr)
4292 {
4293 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4294 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4295 if (offset > 0)
4296 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4297 JUMPHERE(quit);
4298 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4299 }
4300
4301 if (offset > 0)
4302 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4303 }
4304
4305 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4306 {
4307 DEFINE_COMPILER;
4308 struct sljit_label *start;
4309 struct sljit_jump *quit;
4310 struct sljit_jump *match;
4311 /* bytes[0] represent the number of characters between 0
4312 and MAX_N_BYTES - 1, 255 represents any character. */
4313 pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4314 sljit_si offset;
4315 pcre_uchar mask;
4316 pcre_uchar *char_set, *char_set_end;
4317 int i, max, from;
4318 int range_right = -1, range_len;
4319 sljit_ub *update_table = NULL;
4320 BOOL in_range;
4321 sljit_ui rec_count;
4322
4323 for (i = 0; i < MAX_N_CHARS; i++)
4324 chars[i * MAX_DIFF_CHARS] = 0;
4325
4326 rec_count = 10000;
4327 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4328
4329 if (max < 1)
4330 return FALSE;
4331
4332 in_range = FALSE;
4333 /* Prevent compiler "uninitialized" warning */
4334 from = 0;
4335 range_len = 4 /* minimum length */ - 1;
4336 for (i = 0; i <= max; i++)
4337 {
4338 if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4339 {
4340 range_len = i - from;
4341 range_right = i - 1;
4342 }
4343
4344 if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4345 {
4346 SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4347 if (!in_range)
4348 {
4349 in_range = TRUE;
4350 from = i;
4351 }
4352 }
4353 else
4354 in_range = FALSE;
4355 }
4356
4357 if (range_right >= 0)
4358 {
4359 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
4360 if (update_table == NULL)
4361 return TRUE;
4362 memset(update_table, IN_UCHARS(range_len), 256);
4363
4364 for (i = 0; i < range_len; i++)
4365 {
4366 char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4367 SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4368 char_set_end = char_set + char_set[0];
4369 char_set++;
4370 while (char_set <= char_set_end)
4371 {
4372 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4373 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4374 char_set++;
4375 }
4376 }
4377 }
4378
4379 offset = -1;
4380 /* Scan forward. */
4381 for (i = 0; i < max; i++)
4382 {
4383 if (offset == -1)
4384 {
4385 if (chars[i * MAX_DIFF_CHARS] <= 2)
4386 offset = i;
4387 }
4388 else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4389 {
4390 if (chars[i * MAX_DIFF_CHARS] == 1)
4391 offset = i;
4392 else
4393 {
4394 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4395 if (!is_powerof2(mask))
4396 {
4397 mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4398 if (is_powerof2(mask))
4399 offset = i;
4400 }
4401 }
4402 }
4403 }
4404
4405 if (range_right < 0)
4406 {
4407 if (offset < 0)
4408 return FALSE;
4409 SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4410 /* Works regardless the value is 1 or 2. */
4411 mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4412 fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4413 return TRUE;
4414 }
4415
4416 if (range_right == offset)
4417 offset = -1;
4418
4419 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4420
4421 max -= 1;
4422 SLJIT_ASSERT(max > 0);
4423 if (common->match_end_ptr != 0)
4424 {
4425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4426 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4427 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4428 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4429 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4430 JUMPHERE(quit);
4431 }
4432 else
4433 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4434
4435 SLJIT_ASSERT(range_right >= 0);
4436
4437 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4438 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4439 #endif
4440
4441 start = LABEL();
4442 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4443
4444 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4445 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4446 #else
4447 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4448 #endif
4449
4450 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4451 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4452 #else
4453 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4454 #endif
4455 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4456 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4457
4458 if (offset >= 0)
4459 {
4460 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4461 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4462
4463 if (chars[offset * MAX_DIFF_CHARS] == 1)
4464 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4465 else
4466 {
4467 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4468 if (is_powerof2(mask))
4469 {
4470 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4471 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4472 }
4473 else
4474 {
4475 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4476 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4477 JUMPHERE(match);
4478 }
4479 }
4480 }
4481
4482 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4483 if (common->utf && offset != 0)
4484 {
4485 if (offset < 0)
4486 {
4487 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4488 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4489 }
4490 else
4491 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4492 #if defined COMPILE_PCRE8
4493 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4494 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4495 #elif defined COMPILE_PCRE16
4496 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4497 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4498 #else
4499 #error "Unknown code width"
4500 #endif
4501 if (offset < 0)
4502 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4503 }
4504 #endif
4505
4506 if (offset >= 0)
4507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4508
4509 JUMPHERE(quit);
4510
4511 if (common->match_end_ptr != 0)
4512 {
4513 if (range_right >= 0)
4514 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4515 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4516 if (range_right >= 0)
4517 {
4518 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4519 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4520 JUMPHERE(quit);
4521 }
4522 }
4523 else
4524 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4525 return TRUE;
4526 }
4527
4528 #undef MAX_N_CHARS
4529 #undef MAX_DIFF_CHARS
4530
4531 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4532 {
4533 pcre_uchar oc;
4534
4535 oc = first_char;
4536 if (caseless)
4537 {
4538 oc = TABLE_GET(first_char, common->fcc, first_char);
4539 #if defined SUPPORT_UTF && !defined COMPILE_PCRE8
4540 if (first_char > 127 && common->utf)
4541 oc = UCD_OTHERCASE(first_char);
4542 #endif
4543 }
4544
4545 fast_forward_first_char2(common, first_char, oc, 0);
4546 }
4547
4548 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4549 {
4550 DEFINE_COMPILER;
4551 struct sljit_label *loop;
4552 struct sljit_jump *lastchar;
4553 struct sljit_jump *firstchar;
4554 struct sljit_jump *quit;
4555 struct sljit_jump *foundcr = NULL;
4556 struct sljit_jump *notfoundnl;
4557 jump_list *newline = NULL;
4558
4559 if (common->match_end_ptr != 0)
4560 {
4561 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4562 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4563 }
4564
4565 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4566 {
4567 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4568 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4569 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4570 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4571 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4572
4573 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4574 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4575 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
4576 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4577 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4578 #endif
4579 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4580
4581 loop = LABEL();
4582 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4583 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4584 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4585 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4586 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4587 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4588
4589 JUMPHERE(quit);
4590 JUMPHERE(firstchar);
4591 JUMPHERE(lastchar);
4592
4593 if (common->match_end_ptr != 0)
4594 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4595 return;
4596 }
4597
4598 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4599 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4600 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4601 skip_char_back(common);
4602
4603 loop = LABEL();
4604 common->ff_newline_shortcut = loop;
4605
4606 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4607 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4608 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4609 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4610 check_newlinechar(common, common->nltype, &newline, FALSE);
4611 set_jumps(newline, loop);
4612
4613 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4614 {
4615 quit = JUMP(SLJIT_JUMP);
4616 JUMPHERE(foundcr);
4617 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4618 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4619 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4620 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4621 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4622 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4623 #endif
4624 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4625 JUMPHERE(notfoundnl);
4626 JUMPHERE(quit);
4627 }
4628 JUMPHERE(lastchar);
4629 JUMPHERE(firstchar);
4630
4631 if (common->match_end_ptr != 0)
4632 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4633 }
4634
4635 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4636
4637 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_ub *start_bits)
4638 {
4639 DEFINE_COMPILER;
4640 struct sljit_label *start;
4641 struct sljit_jump *quit;
4642 struct sljit_jump *found = NULL;
4643 jump_list *matches = NULL;
4644 #ifndef COMPILE_PCRE8
4645 struct sljit_jump *jump;
4646 #endif
4647
4648 if (common->match_end_ptr != 0)
4649 {
4650 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4651 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4652 }
4653
4654 start = LABEL();
4655 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4656 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4657 #ifdef SUPPORT_UTF
4658 if (common->utf)
4659 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4660 #endif
4661
4662 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4663 {
4664 #ifndef COMPILE_PCRE8
4665 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4667 JUMPHERE(jump);
4668 #endif
4669 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4670 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4671 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4672 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4673 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4674 found = JUMP(SLJIT_NOT_ZERO);
4675 }
4676
4677 #ifdef SUPPORT_UTF
4678 if (common->utf)
4679 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4680 #endif
4681 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4682 #ifdef SUPPORT_UTF
4683 #if defined COMPILE_PCRE8
4684 if (common->utf)
4685 {
4686 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4687 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4688 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4689 }
4690 #elif defined COMPILE_PCRE16
4691 if (common->utf)
4692 {
4693 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4694 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4695 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4696 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4697 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4698 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4699 }
4700 #endif /* COMPILE_PCRE[8|16] */
4701 #endif /* SUPPORT_UTF */
4702 JUMPTO(SLJIT_JUMP, start);
4703 if (found != NULL)
4704 JUMPHERE(found);
4705 if (matches != NULL)
4706 set_jumps(matches, LABEL());
4707 JUMPHERE(quit);
4708
4709 if (common->match_end_ptr != 0)
4710 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4711 }
4712
4713 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4714 {
4715 DEFINE_COMPILER;
4716 struct sljit_label *loop;
4717 struct sljit_jump *toolong;
4718 struct sljit_jump *alreadyfound;
4719 struct sljit_jump *found;
4720 struct sljit_jump *foundoc = NULL;
4721 struct sljit_jump *notfound;
4722 pcre_uint32 oc, bit;
4723
4724 SLJIT_ASSERT(common->req_char_ptr != 0);
4725 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4726 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4727 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4728 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4729
4730 if (has_firstchar)
4731 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4732 else
4733 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4734
4735 loop = LABEL();
4736 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4737
4738 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4739 oc = req_char;
4740 if (caseless)
4741 {
4742 oc = TABLE_GET(req_char, common->fcc, req_char);
4743 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4744 if (req_char > 127 && common->utf)
4745 oc = UCD_OTHERCASE(req_char);
4746 #endif
4747 }
4748 if (req_char == oc)
4749 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4750 else
4751 {
4752 bit = req_char ^ oc;
4753 if (is_powerof2(bit))
4754 {
4755 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4756 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4757 }
4758 else
4759 {
4760 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4761 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4762 }
4763 }
4764 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4765 JUMPTO(SLJIT_JUMP, loop);
4766
4767 JUMPHERE(found);
4768 if (foundoc)
4769 JUMPHERE(foundoc);
4770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4771 JUMPHERE(alreadyfound);
4772 JUMPHERE(toolong);
4773 return notfound;
4774 }
4775
4776 static void do_revertframes(compiler_common *common)
4777 {
4778 DEFINE_COMPILER;
4779 struct sljit_jump *jump;
4780 struct sljit_label *mainloop;
4781
4782 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4783 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4784 GET_LOCAL_BASE(TMP3, 0, 0);
4785
4786 /* Drop frames until we reach STACK_TOP. */
4787 mainloop = LABEL();
4788 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4789 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4790 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4791
4792 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4793 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4794 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4795 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4796 JUMPTO(SLJIT_JUMP, mainloop);
4797
4798 JUMPHERE(jump);
4799 jump = JUMP(SLJIT_SIG_LESS);
4800 /* End of dropping frames. */
4801 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4802
4803 JUMPHERE(jump);
4804 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4805 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4806 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4807 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4808 JUMPTO(SLJIT_JUMP, mainloop);
4809 }
4810
4811 static void check_wordboundary(compiler_common *common)
4812 {
4813 DEFINE_COMPILER;
4814 struct sljit_jump *skipread;
4815 jump_list *skipread_list = NULL;
4816 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4817 struct sljit_jump *jump;
4818 #endif
4819
4820 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4821
4822 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4823 /* Get type of the previous char, and put it to LOCALS1. */
4824 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4825 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4826 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4827 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4828 skip_char_back(common);
4829 check_start_used_ptr(common);
4830 read_char(common);
4831
4832 /* Testing char type. */
4833 #ifdef SUPPORT_UCP
4834 if (common->use_ucp)
4835 {
4836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4837 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4838 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4839 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4840 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4841 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4842 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4843 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4844 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4845 JUMPHERE(jump);
4846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4847 }
4848 else
4849 #endif
4850 {
4851 #ifndef COMPILE_PCRE8
4852 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4853 #elif defined SUPPORT_UTF
4854 /* Here LOCALS1 has already been zeroed. */
4855 jump = NULL;
4856 if (common->utf)
4857 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4858 #endif /* COMPILE_PCRE8 */
4859 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4860 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4861 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4862 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4863 #ifndef COMPILE_PCRE8
4864 JUMPHERE(jump);
4865 #elif defined SUPPORT_UTF
4866 if (jump != NULL)
4867 JUMPHERE(jump);
4868 #endif /* COMPILE_PCRE8 */
4869 }
4870 JUMPHERE(skipread);
4871
4872 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4873 check_str_end(common, &skipread_list);
4874 peek_char(common, READ_CHAR_MAX);
4875
4876 /* Testing char type. This is a code duplication. */
4877 #ifdef SUPPORT_UCP
4878 if (common->use_ucp)
4879 {
4880 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4881 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4882 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4883 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4884 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4885 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4886 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4887 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4888 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4889 JUMPHERE(jump);
4890 }
4891 else
4892 #endif
4893 {
4894 #ifndef COMPILE_PCRE8
4895 /* TMP2 may be destroyed by peek_char. */
4896 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4897 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4898 #elif defined SUPPORT_UTF
4899 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4900 jump = NULL;
4901 if (common->utf)
4902 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4903 #endif
4904 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4905 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4906 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4907 #ifndef COMPILE_PCRE8
4908 JUMPHERE(jump);
4909 #elif defined SUPPORT_UTF
4910 if (jump != NULL)
4911 JUMPHERE(jump);
4912 #endif /* COMPILE_PCRE8 */
4913 }
4914 set_jumps(skipread_list, LABEL());
4915
4916 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4917 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4918 }
4919
4920 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4921 {
4922 DEFINE_COMPILER;
4923 int ranges[MAX_RANGE_SIZE];
4924 pcre_uint8 bit, cbit, all;
4925 int i, byte, length = 0;
4926
4927 bit = bits[0] & 0x1;
4928 /* All bits will be zero or one (since bit is zero or one). */
4929 all = -bit;
4930
4931 for (i = 0; i < 256; )
4932 {
4933 byte = i >> 3;
4934 if ((i & 0x7) == 0 && bits[byte] == all)
4935 i += 8;
4936 else
4937 {
4938 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4939 if (cbit != bit)
4940 {
4941 if (length >= MAX_RANGE_SIZE)
4942 return FALSE;
4943 ranges[length] = i;
4944 length++;
4945 bit = cbit;
4946 all = -cbit;
4947 }
4948 i++;
4949 }
4950 }
4951
4952 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4953 {
4954 if (length >= MAX_RANGE_SIZE)
4955 return FALSE;
4956 ranges[length] = 256;
4957 length++;
4958 }
4959
4960 if (length < 0 || length > 4)
4961 return FALSE;
4962
4963 bit = bits[0] & 0x1;
4964 if (invert) bit ^= 0x1;
4965
4966 /* No character is accepted. */
4967 if (length == 0 && bit == 0)
4968 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4969
4970 switch(length)
4971 {
4972 case 0:
4973 /* When bit != 0, all characters are accepted. */
4974 return TRUE;
4975
4976 case 1:
4977 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4978 return TRUE;
4979
4980 case 2:
4981 if (ranges[0] + 1 != ranges[1])
4982 {
4983 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4984 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4985 }
4986 else
4987 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4988 return TRUE;
4989
4990 case 3:
4991 if (bit != 0)
4992 {
4993 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4994 if (ranges[0] + 1 != ranges[1])
4995 {
4996 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4997 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4998 }
4999 else
5000 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5001 return TRUE;
5002 }
5003
5004 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5005 if (ranges[1] + 1 != ranges[2])
5006 {
5007 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5008 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5009 }
5010 else
5011 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5012 return TRUE;
5013
5014 case 4:
5015 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5016 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5017 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5018 && is_powerof2(ranges[2] - ranges[0]))
5019 {
5020 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5021 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5022 if (ranges[2] + 1 != ranges[3])
5023 {
5024 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5025 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5026 }
5027 else
5028 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5029 return TRUE;
5030 }
5031
5032 if (bit != 0)
5033 {
5034 i = 0;
5035 if (ranges[0] + 1 != ranges[1])
5036 {
5037 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5038 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5039 i = ranges[0];
5040 }
5041 else
5042 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5043
5044 if (ranges[2] + 1 != ranges[3])
5045 {
5046 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5047 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5048 }
5049 else
5050 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5051 return TRUE;
5052 }
5053
5054 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5055 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5056 if (ranges[1] + 1 != ranges[2])
5057 {
5058 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5059 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5060 }
5061 else
5062 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5063 return TRUE;
5064
5065 default:
5066 SLJIT_ASSERT_STOP();
5067 return FALSE;
5068 }
5069 }
5070
5071 static void check_anynewline(compiler_common *common)
5072 {
5073 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5074 DEFINE_COMPILER;
5075
5076 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5077
5078 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5079 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5080 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5081 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5082 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5083 #ifdef COMPILE_PCRE8
5084 if (common->utf)
5085 {
5086 #endif
5087 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5088 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5089 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5090 #ifdef COMPILE_PCRE8
5091 }
5092 #endif
5093 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5094 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5095 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5096 }
5097
5098 static void check_hspace(compiler_common *common)
5099 {
5100 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5101 DEFINE_COMPILER;
5102
5103 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5104
5105 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5106 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5107 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5108 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5109 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5110 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5111 #ifdef COMPILE_PCRE8
5112 if (common->utf)
5113 {
5114 #endif
5115 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5116 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5117 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5118 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5119 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5120 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5121 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5122 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5123 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5124 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5125 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5126 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5127 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5128 #ifdef COMPILE_PCRE8
5129 }
5130 #endif
5131 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5132 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5133
5134 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5135 }
5136
5137 static void check_vspace(compiler_common *common)
5138 {
5139 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5140 DEFINE_COMPILER;
5141
5142 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5143
5144 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5145 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5146 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5147 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5148 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5149 #ifdef COMPILE_PCRE8
5150 if (common->utf)
5151 {
5152 #endif
5153 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5154 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5155 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5156 #ifdef COMPILE_PCRE8
5157 }
5158 #endif
5159 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5160 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5161
5162 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5163 }
5164
5165 #define CHAR1 STR_END
5166 #define CHAR2 STACK_TOP
5167
5168 static void do_casefulcmp(compiler_common *common)
5169 {
5170 DEFINE_COMPILER;
5171 struct sljit_jump *jump;
5172 struct sljit_label *label;
5173
5174 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5175 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5176 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
5177 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
5178 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5179 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5180
5181 label = LABEL();
5182 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5183 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5184 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5185 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5186 JUMPTO(SLJIT_NOT_ZERO, label);
5187
5188 JUMPHERE(jump);
5189 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5190 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
5191 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5192 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5193 }
5194
5195 #define LCC_TABLE STACK_LIMIT
5196
5197 static void do_caselesscmp(compiler_common *common)
5198 {
5199 DEFINE_COMPILER;
5200 struct sljit_jump *jump;
5201 struct sljit_label *label;
5202
5203 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5204 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5205
5206 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
5207 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
5208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
5209 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
5210 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5211 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5212
5213 label = LABEL();
5214 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5215 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5216 #ifndef COMPILE_PCRE8
5217 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
5218 #endif
5219 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
5220 #ifndef COMPILE_PCRE8
5221 JUMPHERE(jump);
5222 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
5223 #endif
5224 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
5225 #ifndef COMPILE_PCRE8
5226 JUMPHERE(jump);
5227 #endif
5228 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5229 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5230 JUMPTO(SLJIT_NOT_ZERO, label);
5231
5232 JUMPHERE(jump);
5233 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5234 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
5235 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5236 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5237 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5238 }
5239
5240 #undef LCC_TABLE
5241 #undef CHAR1
5242 #undef CHAR2
5243
5244 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5245
5246 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
5247 {
5248 /* This function would be ineffective to do in JIT level. */
5249 pcre_uint32 c1, c2;
5250 const pcre_uchar *src2 = args->uchar_ptr;
5251 const pcre_uchar *end2 = args->end;
5252 const ucd_record *ur;
5253 const pcre_uint32 *pp;
5254
5255 while (src1 < end1)
5256 {
5257 if (src2 >= end2)
5258 return (pcre_uchar*)1;
5259 GETCHARINC(c1, src1);
5260 GETCHARINC(c2, src2);
5261 ur = GET_UCD(c2);
5262 if (c1 != c2 && c1 != c2 + ur->other_case)
5263 {
5264 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5265 for (;;)
5266 {
5267 if (c1 < *pp) return NULL;
5268 if (c1 == *pp++) break;
5269 }
5270 }
5271 }
5272 return src2;
5273 }
5274
5275 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5276
5277 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5278 compare_context *context, jump_list **backtracks)
5279 {
5280 DEFINE_COMPILER;
5281 unsigned int othercasebit = 0;
5282 pcre_uchar *othercasechar = NULL;
5283 #ifdef SUPPORT_UTF
5284 int utflength;
5285 #endif
5286
5287 if (caseless && char_has_othercase(common, cc))
5288 {
5289 othercasebit = char_get_othercase_bit(common, cc);
5290 SLJIT_ASSERT(othercasebit);
5291 /* Extracting bit difference info. */
5292 #if defined COMPILE_PCRE8
5293 othercasechar = cc + (othercasebit >> 8);
5294 othercasebit &= 0xff;
5295 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5296 /* Note that this code only handles characters in the BMP. If there
5297 ever are characters outside the BMP whose othercase differs in only one
5298 bit from itself (there currently are none), this code will need to be
5299 revised for COMPILE_PCRE32. */
5300 othercasechar = cc + (othercasebit >> 9);
5301 if ((othercasebit & 0x100) != 0)
5302 othercasebit = (othercasebit & 0xff) << 8;
5303 else
5304 othercasebit &= 0xff;
5305 #endif /* COMPILE_PCRE[8|16|32] */
5306 }
5307
5308 if (context->sourcereg == -1)
5309 {
5310 #if defined COMPILE_PCRE8
5311 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5312 if (context->length >= 4)
5313 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5314 else if (context->length >= 2)
5315 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5316 else
5317 #endif
5318 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5319 #elif defined COMPILE_PCRE16
5320 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5321 if (context->length >= 4)
5322 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5323 else
5324 #endif
5325 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5326 #elif defined COMPILE_PCRE32
5327 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5328 #endif /* COMPILE_PCRE[8|16|32] */
5329 context->sourcereg = TMP2;
5330 }
5331
5332 #ifdef SUPPORT_UTF
5333 utflength = 1;
5334 if (common->utf && HAS_EXTRALEN(*cc))
5335 utflength += GET_EXTRALEN(*cc);
5336
5337 do
5338 {
5339 #endif
5340
5341 context->length -= IN_UCHARS(1);
5342 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5343
5344 /* Unaligned read is supported. */
5345 if (othercasebit != 0 && othercasechar == cc)
5346 {
5347 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5348 context->oc.asuchars[context->ucharptr] = othercasebit;
5349 }
5350 else
5351 {
5352 context->c.asuchars[context->ucharptr] = *cc;
5353 context->oc.asuchars[context->ucharptr] = 0;
5354 }
5355 context->ucharptr++;
5356
5357 #if defined COMPILE_PCRE8
5358 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5359 #else
5360 if (context->ucharptr >= 2 || context->length == 0)
5361 #endif
5362 {
5363 if (context->length >= 4)
5364 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5365 else if (context->length >= 2)
5366 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5367 #if defined COMPILE_PCRE8
5368 else if (context->length >= 1)
5369 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5370 #endif /* COMPILE_PCRE8 */
5371 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5372
5373 switch(context->ucharptr)
5374 {
5375 case 4 / sizeof(pcre_uchar):
5376 if (context->oc.asint != 0)
5377 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5378 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5379 break;
5380
5381 case 2 / sizeof(pcre_uchar):
5382 if (context->oc.asushort != 0)
5383 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5384 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5385 break;
5386
5387 #ifdef COMPILE_PCRE8
5388 case 1:
5389 if (context->oc.asbyte != 0)
5390 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5391 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5392 break;
5393 #endif
5394
5395 default:
5396 SLJIT_ASSERT_STOP();
5397 break;
5398 }
5399 context->ucharptr = 0;
5400 }
5401
5402 #else
5403
5404 /* Unaligned read is unsupported or in 32 bit mode. */
5405 if (context->length >= 1)
5406 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5407
5408 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5409
5410 if (othercasebit != 0 && othercasechar == cc)
5411 {
5412 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5413 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5414 }
5415 else
5416 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5417
5418 #endif
5419
5420 cc++;
5421 #ifdef SUPPORT_UTF
5422 utflength--;
5423 }
5424 while (utflength > 0);
5425 #endif
5426
5427 return cc;
5428 }
5429
5430 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5431
5432 #define SET_TYPE_OFFSET(value) \
5433 if ((value) != typeoffset) \
5434 { \
5435 if ((value) < typeoffset) \
5436 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5437 else \
5438 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5439 } \
5440 typeoffset = (value);
5441
5442 #define SET_CHAR_OFFSET(value) \
5443 if ((value) != charoffset) \
5444 { \
5445 if ((value) < charoffset) \
5446 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5447 else \
5448 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5449 } \
5450 charoffset = (value);
5451
5452 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5453
5454 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5455 {
5456 DEFINE_COMPILER;
5457 jump_list *found = NULL;
5458 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5459 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5460 struct sljit_jump *jump = NULL;
5461 pcre_uchar *ccbegin;
5462 int compares, invertcmp, numberofcmps;
5463 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5464 BOOL utf = common->utf;
5465 #endif
5466
5467 #ifdef SUPPORT_UCP
5468 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5469 BOOL charsaved = FALSE;
5470 int typereg = TMP1;
5471 const sljit_ui *other_cases;
5472 sljit_uw typeoffset;
5473 #endif
5474
5475 /* Scanning the necessary info. */
5476 cc++;
5477 ccbegin = cc;
5478 compares = 0;
5479 if (cc[-1] & XCL_MAP)
5480 {
5481 min = 0;
5482 cc += 32 / sizeof(pcre_uchar);
5483 }
5484
5485 while (*cc != XCL_END)
5486 {
5487 compares++;
5488 if (*cc == XCL_SINGLE)
5489 {
5490 cc ++;
5491 GETCHARINCTEST(c, cc);
5492 if (c > max) max = c;
5493 if (c < min) min = c;
5494 #ifdef SUPPORT_UCP
5495 needschar = TRUE;
5496 #endif
5497 }
5498 else if (*cc == XCL_RANGE)
5499 {
5500 cc ++;
5501 GETCHARINCTEST(c, cc);
5502 if (c < min) min = c;
5503 GETCHARINCTEST(c, cc);
5504 if (c > max) max = c;
5505 #ifdef SUPPORT_UCP
5506 needschar = TRUE;
5507 #endif
5508 }
5509 #ifdef SUPPORT_UCP
5510 else
5511 {
5512 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5513 cc++;
5514 if (*cc == PT_CLIST)
5515 {
5516 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5517 while (*other_cases != NOTACHAR)
5518 {
5519 if (*other_cases > max) max = *other_cases;
5520 if (*other_cases < min) min = *other_cases;
5521 other_cases++;
5522 }
5523 }
5524 else
5525 {
5526 max = READ_CHAR_MAX;
5527 min = 0;
5528 }
5529
5530 switch(*cc)
5531 {
5532 case PT_ANY:
5533 /* Any either accepts everything or ignored. */
5534 if (cc[-1] == XCL_PROP)
5535 {
5536 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5537 if (list == backtracks)
5538 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5539 return;
5540 }
5541 break;
5542
5543 case PT_LAMP:
5544 case PT_GC:
5545 case PT_PC:
5546 case PT_ALNUM:
5547 needstype = TRUE;
5548 break;
5549
5550 case PT_SC:
5551 needsscript = TRUE;
5552 break;
5553
5554 case PT_SPACE:
5555 case PT_PXSPACE:
5556 case PT_WORD:
5557 case PT_PXGRAPH:
5558 case PT_PXPRINT:
5559 case PT_PXPUNCT:
5560 needstype = TRUE;
5561 needschar = TRUE;
5562 break;
5563
5564 case PT_CLIST:
5565 case PT_UCNC:
5566 needschar = TRUE;
5567 break;
5568
5569 default:
5570 SLJIT_ASSERT_STOP();
5571 break;
5572 }
5573 cc += 2;
5574 }
5575 #endif
5576 }
5577 SLJIT_ASSERT(compares > 0);
5578
5579 /* We are not necessary in utf mode even in 8 bit mode. */
5580 cc = ccbegin;
5581 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5582
5583 if ((cc[-1] & XCL_HASPROP) == 0)
5584 {
5585 if ((cc[-1] & XCL_MAP) != 0)
5586 {
5587 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5588 if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found))
5589 {
5590 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5591 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5592 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5593 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5594 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5595 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5596 }
5597
5598 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5599 JUMPHERE(jump);
5600
5601 cc += 32 / sizeof(pcre_uchar);
5602 }
5603 else
5604 {
5605 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5606 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5607 }
5608 }
5609 else if ((cc[-1] & XCL_MAP) != 0)
5610 {
5611 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5612 #ifdef SUPPORT_UCP
5613 charsaved = TRUE;
5614 #endif
5615 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
5616 {
5617 #ifdef COMPILE_PCRE8
5618 jump = NULL;
5619 if (common->utf)
5620 #endif
5621 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5622
5623 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5624 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5625 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5626 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5627 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5628 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5629
5630 #ifdef COMPILE_PCRE8
5631 if (common->utf)
5632 #endif
5633 JUMPHERE(jump);
5634 }
5635
5636 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5637 cc += 32 / sizeof(pcre_uchar);
5638 }
5639
5640 #ifdef SUPPORT_UCP
5641 if (needstype || needsscript)
5642 {
5643 if (needschar && !charsaved)
5644 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5645
5646 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5647 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5648 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5649 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5650 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5651 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5652 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5653
5654 /* Before anything else, we deal with scripts. */
5655 if (needsscript)
5656 {
5657 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5658 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5659
5660 ccbegin = cc;
5661
5662 while (*cc != XCL_END)
5663 {
5664 if (*cc == XCL_SINGLE)
5665 {
5666 cc ++;
5667 GETCHARINCTEST(c, cc);
5668 }
5669 else if (*cc == XCL_RANGE)
5670 {
5671 cc ++;
5672 GETCHARINCTEST(c, cc);
5673 GETCHARINCTEST(c, cc);
5674 }
5675 else
5676 {
5677 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5678 cc++;
5679 if (*cc == PT_SC)
5680 {
5681 compares--;
5682 invertcmp = (compares == 0 && list != backtracks);
5683 if (cc[-1] == XCL_NOTPROP)
5684 invertcmp ^= 0x1;
5685 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5686 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5687 }
5688 cc += 2;
5689 }
5690 }
5691
5692 cc = ccbegin;
5693 }
5694
5695 if (needschar)
5696 {
5697 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5698 }
5699
5700 if (needstype)
5701 {
5702 if (!needschar)
5703 {
5704 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5705 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5706 }
5707 else
5708 {
5709 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5710 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5711 typereg = RETURN_ADDR;
5712 }
5713 }
5714 }
5715 #endif
5716
5717 /* Generating code. */
5718 charoffset = 0;
5719 numberofcmps = 0;
5720 #ifdef SUPPORT_UCP
5721 typeoffset = 0;
5722 #endif
5723
5724 while (*cc != XCL_END)
5725 {
5726 compares--;
5727 invertcmp = (compares == 0 && list != backtracks);
5728 jump = NULL;
5729
5730 if (*cc == XCL_SINGLE)
5731 {
5732 cc ++;
5733 GETCHARINCTEST(c, cc);
5734
5735 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5736 {
5737 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5738 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5739 numberofcmps++;
5740 }
5741 else if (numberofcmps > 0)
5742 {
5743 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5744 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5745 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5746 numberofcmps = 0;
5747 }
5748 else
5749 {
5750 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5751 numberofcmps = 0;
5752 }
5753 }
5754 else if (*cc == XCL_RANGE)
5755 {
5756 cc ++;
5757 GETCHARINCTEST(c, cc);
5758 SET_CHAR_OFFSET(c);
5759 GETCHARINCTEST(c, cc);
5760
5761 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5762 {
5763 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5764 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5765 numberofcmps++;
5766 }
5767 else if (numberofcmps > 0)
5768 {
5769 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5770 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5771 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5772 numberofcmps = 0;
5773 }
5774 else
5775 {
5776 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5777 numberofcmps = 0;
5778 }
5779 }
5780 #ifdef SUPPORT_UCP
5781 else
5782 {
5783 if (*cc == XCL_NOTPROP)
5784 invertcmp ^= 0x1;
5785 cc++;
5786 switch(*cc)
5787 {
5788 case PT_ANY:
5789 if (!invertcmp)
5790 jump = JUMP(SLJIT_JUMP);
5791 break;
5792
5793 case PT_LAMP:
5794 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5795 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5796 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5797 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5798 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5799 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5800 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5801 break;
5802
5803 case PT_GC:
5804 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5805 SET_TYPE_OFFSET(c);
5806 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5807 break;
5808
5809 case PT_PC:
5810 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5811 break;
5812
5813 case PT_SC:
5814 compares++;
5815 /* Do nothing. */
5816 break;
5817
5818 case PT_SPACE:
5819 case PT_PXSPACE:
5820 SET_CHAR_OFFSET(9);
5821 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5822 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5823
5824 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5825 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5826
5827 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5828 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5829
5830 SET_TYPE_OFFSET(ucp_Zl);
5831 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5832 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5833 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5834 break;
5835
5836 case PT_WORD:
5837 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5838 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5839 /* Fall through. */
5840
5841 case PT_ALNUM:
5842 SET_TYPE_OFFSET(ucp_Ll);
5843 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5844 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5845 SET_TYPE_OFFSET(ucp_Nd);
5846 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5847 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5848 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5849 break;
5850
5851 case PT_CLIST:
5852 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5853
5854 /* At least three characters are required.
5855 Otherwise this case would be handled by the normal code path. */
5856 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5857 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5858
5859 /* Optimizing character pairs, if their difference is power of 2. */
5860 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5861 {
5862 if (charoffset == 0)
5863 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5864 else
5865 {
5866 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5867 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5868 }
5869 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5870 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5871 other_cases += 2;
5872 }
5873 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5874 {
5875 if (charoffset == 0)
5876 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5877 else
5878 {
5879 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5880 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5881 }
5882 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5883 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5884
5885 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5886 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5887
5888 other_cases += 3;
5889 }
5890 else
5891 {
5892 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5893 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5894 }
5895
5896 while (*other_cases != NOTACHAR)
5897 {
5898 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5899 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5900 }
5901 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5902 break;
5903
5904 case PT_UCNC:
5905 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5906 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5907 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5908 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5909 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5910 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5911
5912 SET_CHAR_OFFSET(0xa0);
5913 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5914 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5915 SET_CHAR_OFFSET(0);
5916 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5917 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5918 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5919 break;
5920
5921 case PT_PXGRAPH:
5922 /* C and Z groups are the farthest two groups. */
5923 SET_TYPE_OFFSET(ucp_Ll);
5924 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5925 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5926
5927 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5928
5929 /* In case of ucp_Cf, we overwrite the result. */
5930 SET_CHAR_OFFSET(0x2066);
5931 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5932 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5933
5934 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5935 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5936
5937 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5938 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5939
5940 JUMPHERE(jump);
5941 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5942 break;
5943
5944 case PT_PXPRINT:
5945 /* C and Z groups are the farthest two groups. */
5946 SET_TYPE_OFFSET(ucp_Ll);
5947 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5948 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5949
5950 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5951 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5952
5953 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5954
5955 /* In case of ucp_Cf, we overwrite the result. */
5956 SET_CHAR_OFFSET(0x2066);
5957 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5958 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5959
5960 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5961 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5962
5963 JUMPHERE(jump);
5964 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5965 break;
5966
5967 case PT_PXPUNCT:
5968 SET_TYPE_OFFSET(ucp_Sc);
5969 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5970 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5971
5972 SET_CHAR_OFFSET(0);
5973 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5974 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5975
5976 SET_TYPE_OFFSET(ucp_Pc);
5977 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5978 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5979 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5980 break;
5981
5982 default:
5983 SLJIT_ASSERT_STOP();
5984 break;
5985 }
5986 cc += 2;
5987 }
5988 #endif
5989
5990 if (jump != NULL)
5991 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5992 }
5993
5994 if (found != NULL)
5995 set_jumps(found, LABEL());
5996 }
5997
5998 #undef SET_TYPE_OFFSET
5999 #undef SET_CHAR_OFFSET
6000
6001 #endif
6002
6003 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6004 {
6005 DEFINE_COMPILER;
6006 int length;
6007 struct sljit_jump *jump[4];
6008 #ifdef SUPPORT_UTF
6009 struct sljit_label *label;
6010 #endif /* SUPPORT_UTF */
6011
6012 switch(type)
6013 {
6014 case OP_SOD:
6015 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6016 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6017 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6018 return cc;
6019
6020 case OP_SOM:
6021 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6023 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6024 return cc;
6025
6026 case OP_NOT_WORD_BOUNDARY:
6027 case OP_WORD_BOUNDARY:
6028 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6029 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6030 return cc;
6031
6032 case OP_EODN:
6033 /* Requires rather complex checks. */
6034 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6035 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6036 {
6037 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6038 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6039 if (common->mode == JIT_COMPILE)
6040 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6041 else
6042 {
6043 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6044 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6045 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
6046 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6047 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
6048 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6049 check_partial(common, TRUE);
6050 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6051 JUMPHERE(jump[1]);
6052 }
6053 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6054 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6055 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6056 }
6057 else if (common->nltype == NLTYPE_FIXED)
6058 {
6059 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6060 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6061 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6062 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6063 }
6064 else
6065 {
6066 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6067 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6068 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6069 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6070 jump[2] = JUMP(SLJIT_GREATER);
6071 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
6072 /* Equal. */
6073 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6074 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6075 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6076
6077 JUMPHERE(jump[1]);
6078 if (common->nltype == NLTYPE_ANYCRLF)
6079 {
6080 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6081 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6082 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6083 }
6084 else
6085 {
6086 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6087 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6088 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6089 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6090 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6091 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6092 }
6093 JUMPHERE(jump[2]);
6094 JUMPHERE(jump[3]);
6095 }
6096 JUMPHERE(jump[0]);
6097 check_partial(common, FALSE);
6098 return cc;
6099
6100 case OP_EOD:
6101 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6102 check_partial(common, FALSE);
6103 return cc;
6104
6105 case OP_DOLL:
6106 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6107 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6108 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6109
6110 if (!common->endonly)
6111 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6112 else
6113 {
6114 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6115 check_partial(common, FALSE);
6116 }
6117 return cc;
6118
6119 case OP_DOLLM:
6120 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6121 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6122 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6123 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6124 check_partial(common, FALSE);
6125 jump[0] = JUMP(SLJIT_JUMP);
6126 JUMPHERE(jump[1]);
6127
6128 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6129 {
6130 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6131 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6132 if (common->mode == JIT_COMPILE)
6133 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6134 else
6135 {
6136 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6137 /* STR_PTR = STR_END - IN_UCHARS(1) */
6138 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6139 check_partial(common, TRUE);
6140 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6141 JUMPHERE(jump[1]);
6142 }
6143
6144 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6145 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6146 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6147 }
6148 else
6149 {
6150 peek_char(common, common->nlmax);
6151 check_newlinechar(common, common->nltype, backtracks, FALSE);
6152 }
6153 JUMPHERE(jump[0]);
6154 return cc;
6155
6156 case OP_CIRC:
6157 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6158 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6159 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6160 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6161 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6162 return cc;
6163
6164 case OP_CIRCM:
6165 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6166 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6167 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6168 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6169 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6170 jump[0] = JUMP(SLJIT_JUMP);
6171 JUMPHERE(jump[1]);
6172
6173 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6174 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6175 {
6176 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6177 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6178 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6179 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6180 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6181 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6182 }
6183 else
6184 {
6185 skip_char_back(common);
6186 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6187 check_newlinechar(common, common->nltype, backtracks, FALSE);
6188 }
6189 JUMPHERE(jump[0]);
6190 return cc;
6191
6192 case OP_REVERSE:
6193 length = GET(cc, 0);
6194 if (length == 0)
6195 return cc + LINK_SIZE;
6196 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6197 #ifdef SUPPORT_UTF
6198 if (common->utf)
6199 {
6200 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6201 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6202 label = LABEL();
6203 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6204 skip_char_back(common);
6205 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6206 JUMPTO(SLJIT_NOT_ZERO, label);
6207 }
6208 else
6209 #endif
6210 {
6211 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6212 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6213 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6214 }
6215 check_start_used_ptr(common);
6216 return cc + LINK_SIZE;
6217 }
6218 SLJIT_ASSERT_STOP();
6219 return cc;
6220 }
6221
6222 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6223 {
6224 DEFINE_COMPILER;
6225 int length;
6226 unsigned int c, oc, bit;
6227 compare_context context;
6228 struct sljit_jump *jump[3];
6229 jump_list *end_list;
6230 #ifdef SUPPORT_UTF
6231 struct sljit_label *label;
6232 #ifdef SUPPORT_UCP
6233 pcre_uchar propdata[5];
6234 #endif
6235 #endif /* SUPPORT_UTF */
6236
6237 switch(type)
6238 {
6239 case OP_NOT_DIGIT:
6240 case OP_DIGIT:
6241 /* Digits are usually 0-9, so it is worth to optimize them. */
6242 if (check_str_ptr)
6243 detect_partial_match(common, backtracks);
6244 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6245 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
6246 read_char7_type(common, type == OP_NOT_DIGIT);
6247 else
6248 #endif
6249 read_char8_type(common, type == OP_NOT_DIGIT);
6250 /* Flip the starting bit in the negative case. */
6251 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6252 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6253 return cc;
6254
6255 case OP_NOT_WHITESPACE:
6256 case OP_WHITESPACE:
6257 if (check_str_ptr)
6258 detect_partial_match(common, backtracks);
6259 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6260 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
6261 read_char7_type(common, type == OP_NOT_WHITESPACE);
6262 else
6263 #endif
6264 read_char8_type(common, type == OP_NOT_WHITESPACE);
6265 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6266 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6267 return cc;
6268
6269 case OP_NOT_WORDCHAR:
6270 case OP_WORDCHAR:
6271 if (check_str_ptr)
6272 detect_partial_match(common, backtracks);
6273 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6274 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
6275 read_char7_type(common, type == OP_NOT_WORDCHAR);
6276 else
6277 #endif
6278 read_char8_type(common, type == OP_NOT_WORDCHAR);
6279 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6280 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6281 return cc;
6282
6283 case OP_ANY:
6284 if (check_str_ptr)
6285 detect_partial_match(common, backtracks);
6286 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6287 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6288 {
6289 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6290 end_list = NULL;
6291 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6292 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6293 else
6294 check_str_end(common, &end_list);
6295
6296 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6297 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6298 set_jumps(end_list, LABEL());
6299 JUMPHERE(jump[0]);
6300 }
6301 else
6302 check_newlinechar(common, common->nltype, backtracks, TRUE);
6303 return cc;
6304
6305 case OP_ALLANY:
6306 if (check_str_ptr)
6307 detect_partial_match(common, backtracks);
6308 #ifdef SUPPORT_UTF
6309 if (common->utf)
6310 {
6311 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6312 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6313 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6314 #if defined COMPILE_PCRE8
6315 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6316 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6317 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6318 #elif defined COMPILE_PCRE16
6319 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6320 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6321 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6322 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
6323 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6324 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6325 #endif
6326 JUMPHERE(jump[0]);
6327 #endif /* COMPILE_PCRE[8|16] */
6328 return cc;
6329 }
6330 #endif
6331 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6332 return cc;
6333
6334 case OP_ANYBYTE:
6335 if (check_str_ptr)
6336 detect_partial_match(common, backtracks);
6337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6338 return cc;
6339
6340 #ifdef SUPPORT_UTF
6341 #ifdef SUPPORT_UCP
6342 case OP_NOTPROP:
6343 case OP_PROP:
6344 propdata[0] = XCL_HASPROP;
6345 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6346 propdata[2] = cc[0];
6347 propdata[3] = cc[1];
6348 propdata[4] = XCL_END;
6349 if (check_str_ptr)
6350 detect_partial_match(common, backtracks);
6351 compile_xclass_matchingpath(common, propdata, backtracks);
6352 return cc + 2;
6353 #endif
6354 #endif
6355
6356 case OP_ANYNL:
6357 if (check_str_ptr)
6358 detect_partial_match(common, backtracks);
6359 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6360 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6361 /* We don't need to handle soft partial matching case. */
6362 end_list = NULL;
6363 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6364 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6365 else
6366 check_str_end(common, &end_list);
6367 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6368 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6369 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6370 jump[2] = JUMP(SLJIT_JUMP);
6371 JUMPHERE(jump[0]);
6372 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6373 set_jumps(end_list, LABEL());
6374 JUMPHERE(jump[1]);
6375 JUMPHERE(jump[2]);
6376 return cc;
6377
6378 case OP_NOT_HSPACE:
6379 case OP_HSPACE:
6380 if (check_str_ptr)
6381 detect_partial_match(common, backtracks);
6382 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6383 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6384 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6385 return cc;
6386
6387 case OP_NOT_VSPACE:
6388 case OP_VSPACE:
6389 if (check_str_ptr)
6390 detect_partial_match(common, backtracks);
6391 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6392 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6393 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6394 return cc;
6395
6396 #ifdef SUPPORT_UCP
6397 case OP_EXTUNI:
6398 if (check_str_ptr)
6399 detect_partial_match(common, backtracks);
6400 read_char(common);
6401 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6402 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6403 /* Optimize register allocation: use a real register. */
6404 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6405 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6406
6407 label = LABEL();
6408 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6409 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6410 read_char(common);
6411 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6412 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6413 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6414
6415 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6416 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6417 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6418 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6419 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6420 JUMPTO(SLJIT_NOT_ZERO, label);
6421
6422 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6423 JUMPHERE(jump[0]);
6424 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6425
6426 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6427 {
6428 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6429 /* Since we successfully read a char above, partial matching must occure. */
6430 check_partial(common, TRUE);
6431 JUMPHERE(jump[0]);
6432 }
6433 return cc;
6434 #endif
6435
6436 case OP_CHAR:
6437 case OP_CHARI:
6438 length = 1;
6439 #ifdef SUPPORT_UTF
6440 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6441 #endif
6442 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6443 {
6444 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6445 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6446
6447 context.length = IN_UCHARS(length);
6448 context.sourcereg = -1;
6449 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6450 context.ucharptr = 0;
6451 #endif
6452 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6453 }
6454
6455 if (check_str_ptr)
6456 detect_partial_match(common, backtracks);
6457 #ifdef SUPPORT_UTF
6458 if (common->utf)
6459 {
6460 GETCHAR(c, cc);
6461 }
6462 else
6463 #endif
6464 c = *cc;
6465
6466 if (type == OP_CHAR || !char_has_othercase(common, cc))
6467 {
6468 read_char_range(common, c, c, FALSE);
6469 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6470 return cc + length;
6471 }
6472 oc = char_othercase(common, c);
6473 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6474 bit = c ^ oc;
6475 if (is_powerof2(bit))
6476 {
6477 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6478 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6479 return cc + length;
6480 }
6481 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6482 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6483 JUMPHERE(jump[0]);
6484 return cc + length;
6485
6486 case OP_NOT:
6487 case OP_NOTI:
6488 if (check_str_ptr)
6489 detect_partial_match(common, backtracks);
6490 length = 1;
6491 #ifdef SUPPORT_UTF
6492 if (common->utf)
6493 {
6494 #ifdef COMPILE_PCRE8
6495 c = *cc;
6496 if (c < 128)
6497 {
6498 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6499 if (type == OP_NOT || !char_has_othercase(common, cc))
6500 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6501 else
6502 {
6503 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6504 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6505 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6506 }
6507 /* Skip the variable-length character. */
6508 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6509 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6510 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6511 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6512 JUMPHERE(jump[0]);
6513 return cc + 1;
6514 }
6515 else
6516 #endif /* COMPILE_PCRE8 */
6517 {
6518 GETCHARLEN(c, cc, length);
6519 }
6520 }
6521 else
6522 #endif /* SUPPORT_UTF */
6523 c = *cc;
6524
6525 if (type == OP_NOT || !char_has_othercase(common, cc))
6526 {
6527 read_char_range(common, c, c, TRUE);
6528 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6529 }
6530 else
6531 {
6532 oc = char_othercase(common, c);
6533 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6534 bit = c ^ oc;
6535 if (is_powerof2(bit))
6536 {
6537 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6538 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6539 }
6540 else
6541 {
6542 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6543 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6544 }
6545 }
6546 return cc + length;
6547
6548 case OP_CLASS:
6549 case OP_NCLASS:
6550 if (check_str_ptr)
6551 detect_partial_match(common, backtracks);
6552
6553 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6554 bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255;
6555 read_char_range(common, 0, bit, type == OP_NCLASS);
6556 #else
6557 read_char_range(common, 0, 255, type == OP_NCLASS);
6558 #endif
6559
6560 if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks))
6561 return cc + 32 / sizeof(pcre_uchar);
6562
6563 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6564 jump[0] = NULL;
6565 if (common->utf)
6566 {
6567 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6568 if (type == OP_CLASS)
6569 {
6570 add_jump(compiler, backtracks, jump[0]);
6571 jump[0] = NULL;
6572 }
6573 }
6574 #elif !defined COMPILE_PCRE8
6575 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6576 if (type == OP_CLASS)
6577 {
6578 add_jump(compiler, backtracks, jump[0]);
6579 jump[0] = NULL;
6580 }
6581 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6582
6583 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6584 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6585 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6586 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6587 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6588 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6589
6590 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6591 if (jump[0] != NULL)
6592 JUMPHERE(jump[0]);
6593 #endif
6594 return cc + 32 / sizeof(pcre_uchar);
6595
6596 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6597 case OP_XCLASS:
6598 if (check_str_ptr)
6599 detect_partial_match(common, backtracks);
6600 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6601 return cc + GET(cc, 0) - 1;
6602 #endif
6603 }
6604 SLJIT_ASSERT_STOP();
6605 return cc;
6606 }
6607
6608 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6609 {
6610 /* This function consumes at least one input character. */
6611 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6612 DEFINE_COMPILER;
6613 pcre_uchar *ccbegin = cc;
6614 compare_context context;
6615 int size;
6616
6617 context.length = 0;
6618 do
6619 {
6620 if (cc >= ccend)
6621 break;
6622
6623 if (*cc == OP_CHAR)
6624 {
6625 size = 1;
6626 #ifdef SUPPORT_UTF
6627 if (common->utf && HAS_EXTRALEN(cc[1]))
6628 size += GET_EXTRALEN(cc[1]);
6629 #endif
6630 }
6631 else if (*cc == OP_CHARI)
6632 {
6633 size = 1;
6634 #ifdef SUPPORT_UTF
6635 if (common->utf)
6636 {
6637 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6638 size = 0;
6639 else if (HAS_EXTRALEN(cc[1]))
6640 size += GET_EXTRALEN(cc[1]);
6641 }
6642 else
6643 #endif
6644 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6645 size = 0;
6646 }
6647 else
6648 size = 0;
6649
6650 cc += 1 + size;
6651 context.length += IN_UCHARS(size);
6652 }
6653 while (size > 0 && context.length <= 128);
6654
6655 cc = ccbegin;
6656 if (context.length > 0)
6657 {
6658 /* We have a fixed-length byte sequence. */
6659 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6660 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6661
6662 context.sourcereg = -1;
6663 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6664 context.ucharptr = 0;
6665 #endif
6666 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6667 return cc;
6668 }
6669
6670 /* A non-fixed length character will be checked if length == 0. */
6671 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6672 }
6673
6674 /* Forward definitions. */
6675 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6676 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6677
6678 #define PUSH_BACKTRACK(size, ccstart, error) \
6679 do \
6680 { \
6681 backtrack = sljit_alloc_memory(compiler, (size)); \
6682 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6683 return error; \
6684 memset(backtrack, 0, size); \
6685 backtrack->prev = parent->top; \
6686 backtrack->cc = (ccstart); \
6687 parent->top = backtrack; \
6688 } \
6689 while (0)
6690
6691 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6692 do \
6693 { \
6694 backtrack = sljit_alloc_memory(compiler, (size)); \
6695 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6696 return; \
6697 memset(backtrack, 0, size); \
6698 backtrack->prev = parent->top; \
6699 backtrack->cc = (ccstart); \
6700 parent->top = backtrack; \
6701 } \
6702 while (0)
6703
6704 #define BACKTRACK_AS(type) ((type *)backtrack)
6705
6706 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6707 {
6708 /* The OVECTOR offset goes to TMP2. */
6709 DEFINE_COMPILER;
6710 int count = GET2(cc, 1 + IMM2_SIZE);
6711 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6712 unsigned int offset;
6713 jump_list *found = NULL;
6714
6715 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6716
6717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6718
6719 count--;
6720 while (count-- > 0)
6721 {
6722 offset = GET2(slot, 0) << 1;
6723 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6724 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6725 slot += common->name_entry_size;
6726 }
6727
6728 offset = GET2(slot, 0) << 1;
6729 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6730 if (backtracks != NULL && !common->jscript_compat)
6731 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6732
6733 set_jumps(found, LABEL());
6734 }
6735
6736 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6737 {
6738 DEFINE_COMPILER;
6739 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6740 int offset = 0;
6741 struct sljit_jump *jump = NULL;
6742 struct sljit_jump *partial;
6743 struct sljit_jump *nopartial;
6744
6745 if (ref)
6746 {
6747 offset = GET2(cc, 1) << 1;
6748 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6749 /* OVECTOR(1) contains the "string begin - 1" constant. */
6750 if (withchecks && !common->jscript_compat)
6751 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6752 }
6753 else
6754 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6755
6756 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6757 if (common->utf && *cc == OP_REFI)
6758 {
6759 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6760 if (ref)
6761 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6762 else
6763 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6764
6765 if (withchecks)
6766 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6767
6768 /* Needed to save important temporary registers. */
6769 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6770 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6772 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6773 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6774 if (common->mode == JIT_COMPILE)
6775 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6776 else
6777 {
6778 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6779 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6780 check_partial(common, FALSE);
6781 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6782 JUMPHERE(nopartial);
6783 }
6784 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6785 }
6786 else
6787 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6788 {
6789 if (ref)
6790 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6791 else
6792 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6793
6794 if (withchecks)
6795 jump = JUMP(SLJIT_ZERO);
6796
6797 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6798 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6799 if (common->mode == JIT_COMPILE)
6800 add_jump(compiler, backtracks, partial);
6801
6802 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6803 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6804
6805 if (common->mode != JIT_COMPILE)
6806 {
6807 nopartial = JUMP(SLJIT_JUMP);
6808 JUMPHERE(partial);
6809 /* TMP2 -= STR_END - STR_PTR */
6810 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6811 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6812 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6813 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6814 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6815 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6816 JUMPHERE(partial);
6817 check_partial(common, FALSE);
6818 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6819 JUMPHERE(nopartial);
6820 }