/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1632 - (show annotations)
Fri Feb 12 14:43:22 2016 UTC (3 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 356832 byte(s)
Error occurred while calculating annotation data.
Migrate fast-fail support from PCRE2-JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
183 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
184 PUBL(jit_callback) callback;
185 void *userdata;
186 pcre_uint32 top_bracket;
187 pcre_uint32 limit_match;
188 } executable_functions;
189
190 typedef struct jump_list {
191 struct sljit_jump *jump;
192 struct jump_list *next;
193 } jump_list;
194
195 typedef struct stub_list {
196 struct sljit_jump *start;
197 struct sljit_label *quit;
198 struct stub_list *next;
199 } stub_list;
200
201 typedef struct label_addr_list {
202 struct sljit_label *label;
203 sljit_uw *update_addr;
204 struct label_addr_list *next;
205 } label_addr_list;
206
207 enum frame_types {
208 no_frame = -1,
209 no_stack = -2
210 };
211
212 enum control_types {
213 type_mark = 0,
214 type_then_trap = 1
215 };
216
217 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
218
219 /* The following structure is the key data type for the recursive
220 code generator. It is allocated by compile_matchingpath, and contains
221 the arguments for compile_backtrackingpath. Must be the first member
222 of its descendants. */
223 typedef struct backtrack_common {
224 /* Concatenation stack. */
225 struct backtrack_common *prev;
226 jump_list *nextbacktracks;
227 /* Internal stack (for component operators). */
228 struct backtrack_common *top;
229 jump_list *topbacktracks;
230 /* Opcode pointer. */
231 pcre_uchar *cc;
232 } backtrack_common;
233
234 typedef struct assert_backtrack {
235 backtrack_common common;
236 jump_list *condfailed;
237 /* Less than 0 if a frame is not needed. */
238 int framesize;
239 /* Points to our private memory word on the stack. */
240 int private_data_ptr;
241 /* For iterators. */
242 struct sljit_label *matchingpath;
243 } assert_backtrack;
244
245 typedef struct bracket_backtrack {
246 backtrack_common common;
247 /* Where to coninue if an alternative is successfully matched. */
248 struct sljit_label *alternative_matchingpath;
249 /* For rmin and rmax iterators. */
250 struct sljit_label *recursive_matchingpath;
251 /* For greedy ? operator. */
252 struct sljit_label *zero_matchingpath;
253 /* Contains the branches of a failed condition. */
254 union {
255 /* Both for OP_COND, OP_SCOND. */
256 jump_list *condfailed;
257 assert_backtrack *assert;
258 /* For OP_ONCE. Less than 0 if not needed. */
259 int framesize;
260 } u;
261 /* Points to our private memory word on the stack. */
262 int private_data_ptr;
263 } bracket_backtrack;
264
265 typedef struct bracketpos_backtrack {
266 backtrack_common common;
267 /* Points to our private memory word on the stack. */
268 int private_data_ptr;
269 /* Reverting stack is needed. */
270 int framesize;
271 /* Allocated stack size. */
272 int stacksize;
273 } bracketpos_backtrack;
274
275 typedef struct braminzero_backtrack {
276 backtrack_common common;
277 struct sljit_label *matchingpath;
278 } braminzero_backtrack;
279
280 typedef struct char_iterator_backtrack {
281 backtrack_common common;
282 /* Next iteration. */
283 struct sljit_label *matchingpath;
284 union {
285 jump_list *backtracks;
286 struct {
287 unsigned int othercasebit;
288 pcre_uchar chr;
289 BOOL enabled;
290 } charpos;
291 } u;
292 } char_iterator_backtrack;
293
294 typedef struct ref_iterator_backtrack {
295 backtrack_common common;
296 /* Next iteration. */
297 struct sljit_label *matchingpath;
298 } ref_iterator_backtrack;
299
300 typedef struct recurse_entry {
301 struct recurse_entry *next;
302 /* Contains the function entry. */
303 struct sljit_label *entry;
304 /* Collects the calls until the function is not created. */
305 jump_list *calls;
306 /* Points to the starting opcode. */
307 sljit_sw start;
308 } recurse_entry;
309
310 typedef struct recurse_backtrack {
311 backtrack_common common;
312 BOOL inlined_pattern;
313 } recurse_backtrack;
314
315 #define OP_THEN_TRAP OP_TABLE_LENGTH
316
317 typedef struct then_trap_backtrack {
318 backtrack_common common;
319 /* If then_trap is not NULL, this structure contains the real
320 then_trap for the backtracking path. */
321 struct then_trap_backtrack *then_trap;
322 /* Points to the starting opcode. */
323 sljit_sw start;
324 /* Exit point for the then opcodes of this alternative. */
325 jump_list *quit;
326 /* Frame size of the current alternative. */
327 int framesize;
328 } then_trap_backtrack;
329
330 #define MAX_RANGE_SIZE 4
331
332 typedef struct compiler_common {
333 /* The sljit ceneric compiler. */
334 struct sljit_compiler *compiler;
335 /* First byte code. */
336 pcre_uchar *start;
337 /* Maps private data offset to each opcode. */
338 sljit_si *private_data_ptrs;
339 /* Chain list of read-only data ptrs. */
340 void *read_only_data_head;
341 /* Tells whether the capturing bracket is optimized. */
342 sljit_ub *optimized_cbracket;
343 /* Tells whether the starting offset is a target of then. */
344 sljit_ub *then_offsets;
345 /* Current position where a THEN must jump. */
346 then_trap_backtrack *then_trap;
347 /* Starting offset of private data for capturing brackets. */
348 sljit_si cbra_ptr;
349 /* Output vector starting point. Must be divisible by 2. */
350 sljit_si ovector_start;
351 /* Points to the starting character of the current match. */
352 sljit_si start_ptr;
353 /* Last known position of the requested byte. */
354 sljit_si req_char_ptr;
355 /* Head of the last recursion. */
356 sljit_si recursive_head_ptr;
357 /* First inspected character for partial matching.
358 (Needed for avoiding zero length partial matches.) */
359 sljit_si start_used_ptr;
360 /* Starting pointer for partial soft matches. */
361 sljit_si hit_start;
362 /* End pointer of the first line. */
363 sljit_si first_line_end;
364 /* Points to the marked string. */
365 sljit_si mark_ptr;
366 /* Recursive control verb management chain. */
367 sljit_si control_head_ptr;
368 /* Points to the last matched capture block index. */
369 sljit_si capture_last_ptr;
370 /* Fast forward skipping byte code pointer. */
371 pcre_uchar *fast_forward_bc_ptr;
372 /* Locals used by fast fail optimization. */
373 sljit_si fast_fail_start_ptr;
374 sljit_si fast_fail_end_ptr;
375
376 /* Flipped and lower case tables. */
377 const sljit_ub *fcc;
378 sljit_sw lcc;
379 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
380 int mode;
381 /* TRUE, when minlength is greater than 0. */
382 BOOL might_be_empty;
383 /* \K is found in the pattern. */
384 BOOL has_set_som;
385 /* (*SKIP:arg) is found in the pattern. */
386 BOOL has_skip_arg;
387 /* (*THEN) is found in the pattern. */
388 BOOL has_then;
389 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
390 BOOL has_skip_in_assert_back;
391 /* Currently in recurse or negative assert. */
392 BOOL local_exit;
393 /* Currently in a positive assert. */
394 BOOL positive_assert;
395 /* Newline control. */
396 int nltype;
397 sljit_ui nlmax;
398 sljit_ui nlmin;
399 int newline;
400 int bsr_nltype;
401 sljit_ui bsr_nlmax;
402 sljit_ui bsr_nlmin;
403 /* Dollar endonly. */
404 int endonly;
405 /* Tables. */
406 sljit_sw ctypes;
407 /* Named capturing brackets. */
408 pcre_uchar *name_table;
409 sljit_sw name_count;
410 sljit_sw name_entry_size;
411
412 /* Labels and jump lists. */
413 struct sljit_label *partialmatchlabel;
414 struct sljit_label *quit_label;
415 struct sljit_label *forced_quit_label;
416 struct sljit_label *accept_label;
417 struct sljit_label *ff_newline_shortcut;
418 stub_list *stubs;
419 label_addr_list *label_addrs;
420 recurse_entry *entries;
421 recurse_entry *currententry;
422 jump_list *partialmatch;
423 jump_list *quit;
424 jump_list *positive_assert_quit;
425 jump_list *forced_quit;
426 jump_list *accept;
427 jump_list *calllimit;
428 jump_list *stackalloc;
429 jump_list *revertframes;
430 jump_list *wordboundary;
431 jump_list *anynewline;
432 jump_list *hspace;
433 jump_list *vspace;
434 jump_list *casefulcmp;
435 jump_list *caselesscmp;
436 jump_list *reset_match;
437 BOOL jscript_compat;
438 #ifdef SUPPORT_UTF
439 BOOL utf;
440 #ifdef SUPPORT_UCP
441 BOOL use_ucp;
442 jump_list *getucd;
443 #endif
444 #ifdef COMPILE_PCRE8
445 jump_list *utfreadchar;
446 jump_list *utfreadchar16;
447 jump_list *utfreadtype8;
448 #endif
449 #endif /* SUPPORT_UTF */
450 } compiler_common;
451
452 /* For byte_sequence_compare. */
453
454 typedef struct compare_context {
455 int length;
456 int sourcereg;
457 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
458 int ucharptr;
459 union {
460 sljit_si asint;
461 sljit_uh asushort;
462 #if defined COMPILE_PCRE8
463 sljit_ub asbyte;
464 sljit_ub asuchars[4];
465 #elif defined COMPILE_PCRE16
466 sljit_uh asuchars[2];
467 #elif defined COMPILE_PCRE32
468 sljit_ui asuchars[1];
469 #endif
470 } c;
471 union {
472 sljit_si asint;
473 sljit_uh asushort;
474 #if defined COMPILE_PCRE8
475 sljit_ub asbyte;
476 sljit_ub asuchars[4];
477 #elif defined COMPILE_PCRE16
478 sljit_uh asuchars[2];
479 #elif defined COMPILE_PCRE32
480 sljit_ui asuchars[1];
481 #endif
482 } oc;
483 #endif
484 } compare_context;
485
486 /* Undefine sljit macros. */
487 #undef CMP
488
489 /* Used for accessing the elements of the stack. */
490 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
491
492 #define TMP1 SLJIT_R0
493 #define TMP2 SLJIT_R2
494 #define TMP3 SLJIT_R3
495 #define STR_PTR SLJIT_S0
496 #define STR_END SLJIT_S1
497 #define STACK_TOP SLJIT_R1
498 #define STACK_LIMIT SLJIT_S2
499 #define COUNT_MATCH SLJIT_S3
500 #define ARGUMENTS SLJIT_S4
501 #define RETURN_ADDR SLJIT_R4
502
503 /* Local space layout. */
504 /* These two locals can be used by the current opcode. */
505 #define LOCALS0 (0 * sizeof(sljit_sw))
506 #define LOCALS1 (1 * sizeof(sljit_sw))
507 /* Two local variables for possessive quantifiers (char1 cannot use them). */
508 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
509 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
510 /* Max limit of recursions. */
511 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
512 /* The output vector is stored on the stack, and contains pointers
513 to characters. The vector data is divided into two groups: the first
514 group contains the start / end character pointers, and the second is
515 the start pointers when the end of the capturing group has not yet reached. */
516 #define OVECTOR_START (common->ovector_start)
517 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
518 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
519 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
520
521 #if defined COMPILE_PCRE8
522 #define MOV_UCHAR SLJIT_MOV_UB
523 #define MOVU_UCHAR SLJIT_MOVU_UB
524 #elif defined COMPILE_PCRE16
525 #define MOV_UCHAR SLJIT_MOV_UH
526 #define MOVU_UCHAR SLJIT_MOVU_UH
527 #elif defined COMPILE_PCRE32
528 #define MOV_UCHAR SLJIT_MOV_UI
529 #define MOVU_UCHAR SLJIT_MOVU_UI
530 #else
531 #error Unsupported compiling mode
532 #endif
533
534 /* Shortcuts. */
535 #define DEFINE_COMPILER \
536 struct sljit_compiler *compiler = common->compiler
537 #define OP1(op, dst, dstw, src, srcw) \
538 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
539 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
540 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
541 #define LABEL() \
542 sljit_emit_label(compiler)
543 #define JUMP(type) \
544 sljit_emit_jump(compiler, (type))
545 #define JUMPTO(type, label) \
546 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
547 #define JUMPHERE(jump) \
548 sljit_set_label((jump), sljit_emit_label(compiler))
549 #define SET_LABEL(jump, label) \
550 sljit_set_label((jump), (label))
551 #define CMP(type, src1, src1w, src2, src2w) \
552 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
553 #define CMPTO(type, src1, src1w, src2, src2w, label) \
554 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
555 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
556 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
557 #define GET_LOCAL_BASE(dst, dstw, offset) \
558 sljit_get_local_base(compiler, (dst), (dstw), (offset))
559
560 #define READ_CHAR_MAX 0x7fffffff
561
562 static pcre_uchar *bracketend(pcre_uchar *cc)
563 {
564 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
565 do cc += GET(cc, 1); while (*cc == OP_ALT);
566 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
567 cc += 1 + LINK_SIZE;
568 return cc;
569 }
570
571 static int no_alternatives(pcre_uchar *cc)
572 {
573 int count = 0;
574 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
575 do
576 {
577 cc += GET(cc, 1);
578 count++;
579 }
580 while (*cc == OP_ALT);
581 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
582 return count;
583 }
584
585 static int ones_in_half_byte[16] = {
586 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
587 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
588 };
589
590 /* Functions whose might need modification for all new supported opcodes:
591 next_opcode
592 check_opcode_types
593 set_private_data_ptrs
594 get_framesize
595 init_frame
596 get_private_data_copy_length
597 copy_private_data
598 compile_matchingpath
599 compile_backtrackingpath
600 */
601
602 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
603 {
604 SLJIT_UNUSED_ARG(common);
605 switch(*cc)
606 {
607 case OP_SOD:
608 case OP_SOM:
609 case OP_SET_SOM:
610 case OP_NOT_WORD_BOUNDARY:
611 case OP_WORD_BOUNDARY:
612 case OP_NOT_DIGIT:
613 case OP_DIGIT:
614 case OP_NOT_WHITESPACE:
615 case OP_WHITESPACE:
616 case OP_NOT_WORDCHAR:
617 case OP_WORDCHAR:
618 case OP_ANY:
619 case OP_ALLANY:
620 case OP_NOTPROP:
621 case OP_PROP:
622 case OP_ANYNL:
623 case OP_NOT_HSPACE:
624 case OP_HSPACE:
625 case OP_NOT_VSPACE:
626 case OP_VSPACE:
627 case OP_EXTUNI:
628 case OP_EODN:
629 case OP_EOD:
630 case OP_CIRC:
631 case OP_CIRCM:
632 case OP_DOLL:
633 case OP_DOLLM:
634 case OP_CRSTAR:
635 case OP_CRMINSTAR:
636 case OP_CRPLUS:
637 case OP_CRMINPLUS:
638 case OP_CRQUERY:
639 case OP_CRMINQUERY:
640 case OP_CRRANGE:
641 case OP_CRMINRANGE:
642 case OP_CRPOSSTAR:
643 case OP_CRPOSPLUS:
644 case OP_CRPOSQUERY:
645 case OP_CRPOSRANGE:
646 case OP_CLASS:
647 case OP_NCLASS:
648 case OP_REF:
649 case OP_REFI:
650 case OP_DNREF:
651 case OP_DNREFI:
652 case OP_RECURSE:
653 case OP_CALLOUT:
654 case OP_ALT:
655 case OP_KET:
656 case OP_KETRMAX:
657 case OP_KETRMIN:
658 case OP_KETRPOS:
659 case OP_REVERSE:
660 case OP_ASSERT:
661 case OP_ASSERT_NOT:
662 case OP_ASSERTBACK:
663 case OP_ASSERTBACK_NOT:
664 case OP_ONCE:
665 case OP_ONCE_NC:
666 case OP_BRA:
667 case OP_BRAPOS:
668 case OP_CBRA:
669 case OP_CBRAPOS:
670 case OP_COND:
671 case OP_SBRA:
672 case OP_SBRAPOS:
673 case OP_SCBRA:
674 case OP_SCBRAPOS:
675 case OP_SCOND:
676 case OP_CREF:
677 case OP_DNCREF:
678 case OP_RREF:
679 case OP_DNRREF:
680 case OP_DEF:
681 case OP_BRAZERO:
682 case OP_BRAMINZERO:
683 case OP_BRAPOSZERO:
684 case OP_PRUNE:
685 case OP_SKIP:
686 case OP_THEN:
687 case OP_COMMIT:
688 case OP_FAIL:
689 case OP_ACCEPT:
690 case OP_ASSERT_ACCEPT:
691 case OP_CLOSE:
692 case OP_SKIPZERO:
693 return cc + PRIV(OP_lengths)[*cc];
694
695 case OP_CHAR:
696 case OP_CHARI:
697 case OP_NOT:
698 case OP_NOTI:
699 case OP_STAR:
700 case OP_MINSTAR:
701 case OP_PLUS:
702 case OP_MINPLUS:
703 case OP_QUERY:
704 case OP_MINQUERY:
705 case OP_UPTO:
706 case OP_MINUPTO:
707 case OP_EXACT:
708 case OP_POSSTAR:
709 case OP_POSPLUS:
710 case OP_POSQUERY:
711 case OP_POSUPTO:
712 case OP_STARI:
713 case OP_MINSTARI:
714 case OP_PLUSI:
715 case OP_MINPLUSI:
716 case OP_QUERYI:
717 case OP_MINQUERYI:
718 case OP_UPTOI:
719 case OP_MINUPTOI:
720 case OP_EXACTI:
721 case OP_POSSTARI:
722 case OP_POSPLUSI:
723 case OP_POSQUERYI:
724 case OP_POSUPTOI:
725 case OP_NOTSTAR:
726 case OP_NOTMINSTAR:
727 case OP_NOTPLUS:
728 case OP_NOTMINPLUS:
729 case OP_NOTQUERY:
730 case OP_NOTMINQUERY:
731 case OP_NOTUPTO:
732 case OP_NOTMINUPTO:
733 case OP_NOTEXACT:
734 case OP_NOTPOSSTAR:
735 case OP_NOTPOSPLUS:
736 case OP_NOTPOSQUERY:
737 case OP_NOTPOSUPTO:
738 case OP_NOTSTARI:
739 case OP_NOTMINSTARI:
740 case OP_NOTPLUSI:
741 case OP_NOTMINPLUSI:
742 case OP_NOTQUERYI:
743 case OP_NOTMINQUERYI:
744 case OP_NOTUPTOI:
745 case OP_NOTMINUPTOI:
746 case OP_NOTEXACTI:
747 case OP_NOTPOSSTARI:
748 case OP_NOTPOSPLUSI:
749 case OP_NOTPOSQUERYI:
750 case OP_NOTPOSUPTOI:
751 cc += PRIV(OP_lengths)[*cc];
752 #ifdef SUPPORT_UTF
753 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
754 #endif
755 return cc;
756
757 /* Special cases. */
758 case OP_TYPESTAR:
759 case OP_TYPEMINSTAR:
760 case OP_TYPEPLUS:
761 case OP_TYPEMINPLUS:
762 case OP_TYPEQUERY:
763 case OP_TYPEMINQUERY:
764 case OP_TYPEUPTO:
765 case OP_TYPEMINUPTO:
766 case OP_TYPEEXACT:
767 case OP_TYPEPOSSTAR:
768 case OP_TYPEPOSPLUS:
769 case OP_TYPEPOSQUERY:
770 case OP_TYPEPOSUPTO:
771 return cc + PRIV(OP_lengths)[*cc] - 1;
772
773 case OP_ANYBYTE:
774 #ifdef SUPPORT_UTF
775 if (common->utf) return NULL;
776 #endif
777 return cc + 1;
778
779 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
780 case OP_XCLASS:
781 return cc + GET(cc, 1);
782 #endif
783
784 case OP_MARK:
785 case OP_PRUNE_ARG:
786 case OP_SKIP_ARG:
787 case OP_THEN_ARG:
788 return cc + 1 + 2 + cc[1];
789
790 default:
791 /* All opcodes are supported now! */
792 SLJIT_ASSERT_STOP();
793 return NULL;
794 }
795 }
796
797 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
798 {
799 int count;
800 pcre_uchar *slot;
801 pcre_uchar *assert_back_end = cc - 1;
802
803 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
804 while (cc < ccend)
805 {
806 switch(*cc)
807 {
808 case OP_SET_SOM:
809 common->has_set_som = TRUE;
810 common->might_be_empty = TRUE;
811 cc += 1;
812 break;
813
814 case OP_REF:
815 case OP_REFI:
816 common->optimized_cbracket[GET2(cc, 1)] = 0;
817 cc += 1 + IMM2_SIZE;
818 break;
819
820 case OP_CBRAPOS:
821 case OP_SCBRAPOS:
822 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
823 cc += 1 + LINK_SIZE + IMM2_SIZE;
824 break;
825
826 case OP_COND:
827 case OP_SCOND:
828 /* Only AUTO_CALLOUT can insert this opcode. We do
829 not intend to support this case. */
830 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
831 return FALSE;
832 cc += 1 + LINK_SIZE;
833 break;
834
835 case OP_CREF:
836 common->optimized_cbracket[GET2(cc, 1)] = 0;
837 cc += 1 + IMM2_SIZE;
838 break;
839
840 case OP_DNREF:
841 case OP_DNREFI:
842 case OP_DNCREF:
843 count = GET2(cc, 1 + IMM2_SIZE);
844 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
845 while (count-- > 0)
846 {
847 common->optimized_cbracket[GET2(slot, 0)] = 0;
848 slot += common->name_entry_size;
849 }
850 cc += 1 + 2 * IMM2_SIZE;
851 break;
852
853 case OP_RECURSE:
854 /* Set its value only once. */
855 if (common->recursive_head_ptr == 0)
856 {
857 common->recursive_head_ptr = common->ovector_start;
858 common->ovector_start += sizeof(sljit_sw);
859 }
860 cc += 1 + LINK_SIZE;
861 break;
862
863 case OP_CALLOUT:
864 if (common->capture_last_ptr == 0)
865 {
866 common->capture_last_ptr = common->ovector_start;
867 common->ovector_start += sizeof(sljit_sw);
868 }
869 cc += 2 + 2 * LINK_SIZE;
870 break;
871
872 case OP_ASSERTBACK:
873 slot = bracketend(cc);
874 if (slot > assert_back_end)
875 assert_back_end = slot;
876 cc += 1 + LINK_SIZE;
877 break;
878
879 case OP_THEN_ARG:
880 common->has_then = TRUE;
881 common->control_head_ptr = 1;
882 /* Fall through. */
883
884 case OP_PRUNE_ARG:
885 case OP_MARK:
886 if (common->mark_ptr == 0)
887 {
888 common->mark_ptr = common->ovector_start;
889 common->ovector_start += sizeof(sljit_sw);
890 }
891 cc += 1 + 2 + cc[1];
892 break;
893
894 case OP_THEN:
895 common->has_then = TRUE;
896 common->control_head_ptr = 1;
897 cc += 1;
898 break;
899
900 case OP_SKIP:
901 if (cc < assert_back_end)
902 common->has_skip_in_assert_back = TRUE;
903 cc += 1;
904 break;
905
906 case OP_SKIP_ARG:
907 common->control_head_ptr = 1;
908 common->has_skip_arg = TRUE;
909 if (cc < assert_back_end)
910 common->has_skip_in_assert_back = TRUE;
911 cc += 1 + 2 + cc[1];
912 break;
913
914 default:
915 cc = next_opcode(common, cc);
916 if (cc == NULL)
917 return FALSE;
918 break;
919 }
920 }
921 return TRUE;
922 }
923
924 static BOOL is_accelerated_repeat(pcre_uchar *cc)
925 {
926 switch(*cc)
927 {
928 case OP_TYPESTAR:
929 case OP_TYPEMINSTAR:
930 case OP_TYPEPLUS:
931 case OP_TYPEMINPLUS:
932 case OP_TYPEPOSSTAR:
933 case OP_TYPEPOSPLUS:
934 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
935
936 case OP_STAR:
937 case OP_MINSTAR:
938 case OP_PLUS:
939 case OP_MINPLUS:
940 case OP_POSSTAR:
941 case OP_POSPLUS:
942
943 case OP_STARI:
944 case OP_MINSTARI:
945 case OP_PLUSI:
946 case OP_MINPLUSI:
947 case OP_POSSTARI:
948 case OP_POSPLUSI:
949
950 case OP_NOTSTAR:
951 case OP_NOTMINSTAR:
952 case OP_NOTPLUS:
953 case OP_NOTMINPLUS:
954 case OP_NOTPOSSTAR:
955 case OP_NOTPOSPLUS:
956
957 case OP_NOTSTARI:
958 case OP_NOTMINSTARI:
959 case OP_NOTPLUSI:
960 case OP_NOTMINPLUSI:
961 case OP_NOTPOSSTARI:
962 case OP_NOTPOSPLUSI:
963 return TRUE;
964
965 case OP_CLASS:
966 case OP_NCLASS:
967 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
968 case OP_XCLASS:
969 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
970 #else
971 cc += (1 + (32 / sizeof(pcre_uchar)));
972 #endif
973
974 switch(*cc)
975 {
976 case OP_CRSTAR:
977 case OP_CRMINSTAR:
978 case OP_CRPLUS:
979 case OP_CRMINPLUS:
980 case OP_CRPOSSTAR:
981 case OP_CRPOSPLUS:
982 return TRUE;
983 }
984 break;
985 }
986 return FALSE;
987 }
988
989 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_si depth)
990 {
991 pcre_uchar *next_alt;
992
993 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
994
995 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
996 return;
997
998 next_alt = bracketend(cc) - (1 + LINK_SIZE);
999 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1000 return;
1001
1002 do
1003 {
1004 next_alt = cc + GET(cc, 1);
1005
1006 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1007
1008 while (TRUE)
1009 {
1010 switch(*cc)
1011 {
1012 case OP_SOD:
1013 case OP_SOM:
1014 case OP_SET_SOM:
1015 case OP_NOT_WORD_BOUNDARY:
1016 case OP_WORD_BOUNDARY:
1017 case OP_EODN:
1018 case OP_EOD:
1019 case OP_CIRC:
1020 case OP_CIRCM:
1021 case OP_DOLL:
1022 case OP_DOLLM:
1023 /* Zero width assertions. */
1024 cc++;
1025 continue;
1026 }
1027 break;
1028 }
1029
1030 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1031 detect_fast_fail(common, cc, private_data_start, depth - 1);
1032
1033 if (is_accelerated_repeat(cc))
1034 {
1035 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1036
1037 if (common->fast_fail_start_ptr == 0)
1038 common->fast_fail_start_ptr = *private_data_start;
1039
1040 *private_data_start += sizeof(sljit_sw);
1041 common->fast_fail_end_ptr = *private_data_start;
1042
1043 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1044 return;
1045 }
1046
1047 cc = next_alt;
1048 }
1049 while (*cc == OP_ALT);
1050 }
1051
1052 static int get_class_iterator_size(pcre_uchar *cc)
1053 {
1054 sljit_ui min;
1055 sljit_ui max;
1056 switch(*cc)
1057 {
1058 case OP_CRSTAR:
1059 case OP_CRPLUS:
1060 return 2;
1061
1062 case OP_CRMINSTAR:
1063 case OP_CRMINPLUS:
1064 case OP_CRQUERY:
1065 case OP_CRMINQUERY:
1066 return 1;
1067
1068 case OP_CRRANGE:
1069 case OP_CRMINRANGE:
1070 min = GET2(cc, 1);
1071 max = GET2(cc, 1 + IMM2_SIZE);
1072 if (max == 0)
1073 return (*cc == OP_CRRANGE) ? 2 : 1;
1074 max -= min;
1075 if (max > 2)
1076 max = 2;
1077 return max;
1078
1079 default:
1080 return 0;
1081 }
1082 }
1083
1084 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1085 {
1086 pcre_uchar *end = bracketend(begin);
1087 pcre_uchar *next;
1088 pcre_uchar *next_end;
1089 pcre_uchar *max_end;
1090 pcre_uchar type;
1091 sljit_sw length = end - begin;
1092 int min, max, i;
1093
1094 /* Detect fixed iterations first. */
1095 if (end[-(1 + LINK_SIZE)] != OP_KET)
1096 return FALSE;
1097
1098 /* Already detected repeat. */
1099 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1100 return TRUE;
1101
1102 next = end;
1103 min = 1;
1104 while (1)
1105 {
1106 if (*next != *begin)
1107 break;
1108 next_end = bracketend(next);
1109 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1110 break;
1111 next = next_end;
1112 min++;
1113 }
1114
1115 if (min == 2)
1116 return FALSE;
1117
1118 max = 0;
1119 max_end = next;
1120 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1121 {
1122 type = *next;
1123 while (1)
1124 {
1125 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1126 break;
1127 next_end = bracketend(next + 2 + LINK_SIZE);
1128 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1129 break;
1130 next = next_end;
1131 max++;
1132 }
1133
1134 if (next[0] == type && next[1] == *begin && max >= 1)
1135 {
1136 next_end = bracketend(next + 1);
1137 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1138 {
1139 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1140 if (*next_end != OP_KET)
1141 break;
1142
1143 if (i == max)
1144 {
1145 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1146 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1147 /* +2 the original and the last. */
1148 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1149 if (min == 1)
1150 return TRUE;
1151 min--;
1152 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1153 }
1154 }
1155 }
1156 }
1157
1158 if (min >= 3)
1159 {
1160 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1161 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1162 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1163 return TRUE;
1164 }
1165
1166 return FALSE;
1167 }
1168
1169 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1170 case OP_MINSTAR: \
1171 case OP_MINPLUS: \
1172 case OP_QUERY: \
1173 case OP_MINQUERY: \
1174 case OP_MINSTARI: \
1175 case OP_MINPLUSI: \
1176 case OP_QUERYI: \
1177 case OP_MINQUERYI: \
1178 case OP_NOTMINSTAR: \
1179 case OP_NOTMINPLUS: \
1180 case OP_NOTQUERY: \
1181 case OP_NOTMINQUERY: \
1182 case OP_NOTMINSTARI: \
1183 case OP_NOTMINPLUSI: \
1184 case OP_NOTQUERYI: \
1185 case OP_NOTMINQUERYI:
1186
1187 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1188 case OP_STAR: \
1189 case OP_PLUS: \
1190 case OP_STARI: \
1191 case OP_PLUSI: \
1192 case OP_NOTSTAR: \
1193 case OP_NOTPLUS: \
1194 case OP_NOTSTARI: \
1195 case OP_NOTPLUSI:
1196
1197 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1198 case OP_UPTO: \
1199 case OP_MINUPTO: \
1200 case OP_UPTOI: \
1201 case OP_MINUPTOI: \
1202 case OP_NOTUPTO: \
1203 case OP_NOTMINUPTO: \
1204 case OP_NOTUPTOI: \
1205 case OP_NOTMINUPTOI:
1206
1207 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1208 case OP_TYPEMINSTAR: \
1209 case OP_TYPEMINPLUS: \
1210 case OP_TYPEQUERY: \
1211 case OP_TYPEMINQUERY:
1212
1213 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1214 case OP_TYPESTAR: \
1215 case OP_TYPEPLUS:
1216
1217 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1218 case OP_TYPEUPTO: \
1219 case OP_TYPEMINUPTO:
1220
1221 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1222 {
1223 pcre_uchar *cc = common->start;
1224 pcre_uchar *alternative;
1225 pcre_uchar *end = NULL;
1226 int private_data_ptr = *private_data_start;
1227 int space, size, bracketlen;
1228 BOOL repeat_check = TRUE;
1229
1230 while (cc < ccend)
1231 {
1232 space = 0;
1233 size = 0;
1234 bracketlen = 0;
1235 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1236 break;
1237
1238 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1239 {
1240 if (detect_repeat(common, cc))
1241 {
1242 /* These brackets are converted to repeats, so no global
1243 based single character repeat is allowed. */
1244 if (cc >= end)
1245 end = bracketend(cc);
1246 }
1247 }
1248 repeat_check = TRUE;
1249
1250 switch(*cc)
1251 {
1252 case OP_KET:
1253 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1254 {
1255 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1256 private_data_ptr += sizeof(sljit_sw);
1257 cc += common->private_data_ptrs[cc + 1 - common->start];
1258 }
1259 cc += 1 + LINK_SIZE;
1260 break;
1261
1262 case OP_ASSERT:
1263 case OP_ASSERT_NOT:
1264 case OP_ASSERTBACK:
1265 case OP_ASSERTBACK_NOT:
1266 case OP_ONCE:
1267 case OP_ONCE_NC:
1268 case OP_BRAPOS:
1269 case OP_SBRA:
1270 case OP_SBRAPOS:
1271 case OP_SCOND:
1272 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1273 private_data_ptr += sizeof(sljit_sw);
1274 bracketlen = 1 + LINK_SIZE;
1275 break;
1276
1277 case OP_CBRAPOS:
1278 case OP_SCBRAPOS:
1279 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1280 private_data_ptr += sizeof(sljit_sw);
1281 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1282 break;
1283
1284 case OP_COND:
1285 /* Might be a hidden SCOND. */
1286 alternative = cc + GET(cc, 1);
1287 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1288 {
1289 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1290 private_data_ptr += sizeof(sljit_sw);
1291 }
1292 bracketlen = 1 + LINK_SIZE;
1293 break;
1294
1295 case OP_BRA:
1296 bracketlen = 1 + LINK_SIZE;
1297 break;
1298
1299 case OP_CBRA:
1300 case OP_SCBRA:
1301 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1302 break;
1303
1304 case OP_BRAZERO:
1305 case OP_BRAMINZERO:
1306 case OP_BRAPOSZERO:
1307 repeat_check = FALSE;
1308 size = 1;
1309 break;
1310
1311 CASE_ITERATOR_PRIVATE_DATA_1
1312 space = 1;
1313 size = -2;
1314 break;
1315
1316 CASE_ITERATOR_PRIVATE_DATA_2A
1317 space = 2;
1318 size = -2;
1319 break;
1320
1321 CASE_ITERATOR_PRIVATE_DATA_2B
1322 space = 2;
1323 size = -(2 + IMM2_SIZE);
1324 break;
1325
1326 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1327 space = 1;
1328 size = 1;
1329 break;
1330
1331 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1332 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1333 space = 2;
1334 size = 1;
1335 break;
1336
1337 case OP_TYPEUPTO:
1338 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1339 space = 2;
1340 size = 1 + IMM2_SIZE;
1341 break;
1342
1343 case OP_TYPEMINUPTO:
1344 space = 2;
1345 size = 1 + IMM2_SIZE;
1346 break;
1347
1348 case OP_CLASS:
1349 case OP_NCLASS:
1350 size += 1 + 32 / sizeof(pcre_uchar);
1351 space = get_class_iterator_size(cc + size);
1352 break;
1353
1354 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1355 case OP_XCLASS:
1356 size = GET(cc, 1);
1357 space = get_class_iterator_size(cc + size);
1358 break;
1359 #endif
1360
1361 default:
1362 cc = next_opcode(common, cc);
1363 SLJIT_ASSERT(cc != NULL);
1364 break;
1365 }
1366
1367 /* Character iterators, which are not inside a repeated bracket,
1368 gets a private slot instead of allocating it on the stack. */
1369 if (space > 0 && cc >= end)
1370 {
1371 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1372 private_data_ptr += sizeof(sljit_sw) * space;
1373 }
1374
1375 if (size != 0)
1376 {
1377 if (size < 0)
1378 {
1379 cc += -size;
1380 #ifdef SUPPORT_UTF
1381 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1382 #endif
1383 }
1384 else
1385 cc += size;
1386 }
1387
1388 if (bracketlen > 0)
1389 {
1390 if (cc >= end)
1391 {
1392 end = bracketend(cc);
1393 if (end[-1 - LINK_SIZE] == OP_KET)
1394 end = NULL;
1395 }
1396 cc += bracketlen;
1397 }
1398 }
1399 *private_data_start = private_data_ptr;
1400 }
1401
1402 /* Returns with a frame_types (always < 0) if no need for frame. */
1403 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1404 {
1405 int length = 0;
1406 int possessive = 0;
1407 BOOL stack_restore = FALSE;
1408 BOOL setsom_found = recursive;
1409 BOOL setmark_found = recursive;
1410 /* The last capture is a local variable even for recursions. */
1411 BOOL capture_last_found = FALSE;
1412
1413 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1414 SLJIT_ASSERT(common->control_head_ptr != 0);
1415 *needs_control_head = TRUE;
1416 #else
1417 *needs_control_head = FALSE;
1418 #endif
1419
1420 if (ccend == NULL)
1421 {
1422 ccend = bracketend(cc) - (1 + LINK_SIZE);
1423 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1424 {
1425 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1426 /* This is correct regardless of common->capture_last_ptr. */
1427 capture_last_found = TRUE;
1428 }
1429 cc = next_opcode(common, cc);
1430 }
1431
1432 SLJIT_ASSERT(cc != NULL);
1433 while (cc < ccend)
1434 switch(*cc)
1435 {
1436 case OP_SET_SOM:
1437 SLJIT_ASSERT(common->has_set_som);
1438 stack_restore = TRUE;
1439 if (!setsom_found)
1440 {
1441 length += 2;
1442 setsom_found = TRUE;
1443 }
1444 cc += 1;
1445 break;
1446
1447 case OP_MARK:
1448 case OP_PRUNE_ARG:
1449 case OP_THEN_ARG:
1450 SLJIT_ASSERT(common->mark_ptr != 0);
1451 stack_restore = TRUE;
1452 if (!setmark_found)
1453 {
1454 length += 2;
1455 setmark_found = TRUE;
1456 }
1457 if (common->control_head_ptr != 0)
1458 *needs_control_head = TRUE;
1459 cc += 1 + 2 + cc[1];
1460 break;
1461
1462 case OP_RECURSE:
1463 stack_restore = TRUE;
1464 if (common->has_set_som && !setsom_found)
1465 {
1466 length += 2;
1467 setsom_found = TRUE;
1468 }
1469 if (common->mark_ptr != 0 && !setmark_found)
1470 {
1471 length += 2;
1472 setmark_found = TRUE;
1473 }
1474 if (common->capture_last_ptr != 0 && !capture_last_found)
1475 {
1476 length += 2;
1477 capture_last_found = TRUE;
1478 }
1479 cc += 1 + LINK_SIZE;
1480 break;
1481
1482 case OP_CBRA:
1483 case OP_CBRAPOS:
1484 case OP_SCBRA:
1485 case OP_SCBRAPOS:
1486 stack_restore = TRUE;
1487 if (common->capture_last_ptr != 0 && !capture_last_found)
1488 {
1489 length += 2;
1490 capture_last_found = TRUE;
1491 }
1492 length += 3;
1493 cc += 1 + LINK_SIZE + IMM2_SIZE;
1494 break;
1495
1496 case OP_THEN:
1497 stack_restore = TRUE;
1498 if (common->control_head_ptr != 0)
1499 *needs_control_head = TRUE;
1500 cc ++;
1501 break;
1502
1503 default:
1504 stack_restore = TRUE;
1505 /* Fall through. */
1506
1507 case OP_NOT_WORD_BOUNDARY:
1508 case OP_WORD_BOUNDARY:
1509 case OP_NOT_DIGIT:
1510 case OP_DIGIT:
1511 case OP_NOT_WHITESPACE:
1512 case OP_WHITESPACE:
1513 case OP_NOT_WORDCHAR:
1514 case OP_WORDCHAR:
1515 case OP_ANY:
1516 case OP_ALLANY:
1517 case OP_ANYBYTE:
1518 case OP_NOTPROP:
1519 case OP_PROP:
1520 case OP_ANYNL:
1521 case OP_NOT_HSPACE:
1522 case OP_HSPACE:
1523 case OP_NOT_VSPACE:
1524 case OP_VSPACE:
1525 case OP_EXTUNI:
1526 case OP_EODN:
1527 case OP_EOD:
1528 case OP_CIRC:
1529 case OP_CIRCM:
1530 case OP_DOLL:
1531 case OP_DOLLM:
1532 case OP_CHAR:
1533 case OP_CHARI:
1534 case OP_NOT:
1535 case OP_NOTI:
1536
1537 case OP_EXACT:
1538 case OP_POSSTAR:
1539 case OP_POSPLUS:
1540 case OP_POSQUERY:
1541 case OP_POSUPTO:
1542
1543 case OP_EXACTI:
1544 case OP_POSSTARI:
1545 case OP_POSPLUSI:
1546 case OP_POSQUERYI:
1547 case OP_POSUPTOI:
1548
1549 case OP_NOTEXACT:
1550 case OP_NOTPOSSTAR:
1551 case OP_NOTPOSPLUS:
1552 case OP_NOTPOSQUERY:
1553 case OP_NOTPOSUPTO:
1554
1555 case OP_NOTEXACTI:
1556 case OP_NOTPOSSTARI:
1557 case OP_NOTPOSPLUSI:
1558 case OP_NOTPOSQUERYI:
1559 case OP_NOTPOSUPTOI:
1560
1561 case OP_TYPEEXACT:
1562 case OP_TYPEPOSSTAR:
1563 case OP_TYPEPOSPLUS:
1564 case OP_TYPEPOSQUERY:
1565 case OP_TYPEPOSUPTO:
1566
1567 case OP_CLASS:
1568 case OP_NCLASS:
1569 case OP_XCLASS:
1570 case OP_CALLOUT:
1571
1572 cc = next_opcode(common, cc);
1573 SLJIT_ASSERT(cc != NULL);
1574 break;
1575 }
1576
1577 /* Possessive quantifiers can use a special case. */
1578 if (SLJIT_UNLIKELY(possessive == length))
1579 return stack_restore ? no_frame : no_stack;
1580
1581 if (length > 0)
1582 return length + 1;
1583 return stack_restore ? no_frame : no_stack;
1584 }
1585
1586 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1587 {
1588 DEFINE_COMPILER;
1589 BOOL setsom_found = recursive;
1590 BOOL setmark_found = recursive;
1591 /* The last capture is a local variable even for recursions. */
1592 BOOL capture_last_found = FALSE;
1593 int offset;
1594
1595 /* >= 1 + shortest item size (2) */
1596 SLJIT_UNUSED_ARG(stacktop);
1597 SLJIT_ASSERT(stackpos >= stacktop + 2);
1598
1599 stackpos = STACK(stackpos);
1600 if (ccend == NULL)
1601 {
1602 ccend = bracketend(cc) - (1 + LINK_SIZE);
1603 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1604 cc = next_opcode(common, cc);
1605 }
1606
1607 SLJIT_ASSERT(cc != NULL);
1608 while (cc < ccend)
1609 switch(*cc)
1610 {
1611 case OP_SET_SOM:
1612 SLJIT_ASSERT(common->has_set_som);
1613 if (!setsom_found)
1614 {
1615 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1616 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1617 stackpos += (int)sizeof(sljit_sw);
1618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1619 stackpos += (int)sizeof(sljit_sw);
1620 setsom_found = TRUE;
1621 }
1622 cc += 1;
1623 break;
1624
1625 case OP_MARK:
1626 case OP_PRUNE_ARG:
1627 case OP_THEN_ARG:
1628 SLJIT_ASSERT(common->mark_ptr != 0);
1629 if (!setmark_found)
1630 {
1631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1632 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1633 stackpos += (int)sizeof(sljit_sw);
1634 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1635 stackpos += (int)sizeof(sljit_sw);
1636 setmark_found = TRUE;
1637 }
1638 cc += 1 + 2 + cc[1];
1639 break;
1640
1641 case OP_RECURSE:
1642 if (common->has_set_som && !setsom_found)
1643 {
1644 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1645 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1646 stackpos += (int)sizeof(sljit_sw);
1647 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1648 stackpos += (int)sizeof(sljit_sw);
1649 setsom_found = TRUE;
1650 }
1651 if (common->mark_ptr != 0 && !setmark_found)
1652 {
1653 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1654 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1655 stackpos += (int)sizeof(sljit_sw);
1656 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1657 stackpos += (int)sizeof(sljit_sw);
1658 setmark_found = TRUE;
1659 }
1660 if (common->capture_last_ptr != 0 && !capture_last_found)
1661 {
1662 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1663 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1664 stackpos += (int)sizeof(sljit_sw);
1665 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1666 stackpos += (int)sizeof(sljit_sw);
1667 capture_last_found = TRUE;
1668 }
1669 cc += 1 + LINK_SIZE;
1670 break;
1671
1672 case OP_CBRA:
1673 case OP_CBRAPOS:
1674 case OP_SCBRA:
1675 case OP_SCBRAPOS:
1676 if (common->capture_last_ptr != 0 && !capture_last_found)
1677 {
1678 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1679 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1680 stackpos += (int)sizeof(sljit_sw);
1681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1682 stackpos += (int)sizeof(sljit_sw);
1683 capture_last_found = TRUE;
1684 }
1685 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1687 stackpos += (int)sizeof(sljit_sw);
1688 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1689 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1690 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1691 stackpos += (int)sizeof(sljit_sw);
1692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1693 stackpos += (int)sizeof(sljit_sw);
1694
1695 cc += 1 + LINK_SIZE + IMM2_SIZE;
1696 break;
1697
1698 default:
1699 cc = next_opcode(common, cc);
1700 SLJIT_ASSERT(cc != NULL);
1701 break;
1702 }
1703
1704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1705 SLJIT_ASSERT(stackpos == STACK(stacktop));
1706 }
1707
1708 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1709 {
1710 int private_data_length = needs_control_head ? 3 : 2;
1711 int size;
1712 pcre_uchar *alternative;
1713 /* Calculate the sum of the private machine words. */
1714 while (cc < ccend)
1715 {
1716 size = 0;
1717 switch(*cc)
1718 {
1719 case OP_KET:
1720 if (PRIVATE_DATA(cc) != 0)
1721 {
1722 private_data_length++;
1723 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1724 cc += PRIVATE_DATA(cc + 1);
1725 }
1726 cc += 1 + LINK_SIZE;
1727 break;
1728
1729 case OP_ASSERT:
1730 case OP_ASSERT_NOT:
1731 case OP_ASSERTBACK:
1732 case OP_ASSERTBACK_NOT:
1733 case OP_ONCE:
1734 case OP_ONCE_NC:
1735 case OP_BRAPOS:
1736 case OP_SBRA:
1737 case OP_SBRAPOS:
1738 case OP_SCOND:
1739 private_data_length++;
1740 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1741 cc += 1 + LINK_SIZE;
1742 break;
1743
1744 case OP_CBRA:
1745 case OP_SCBRA:
1746 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1747 private_data_length++;
1748 cc += 1 + LINK_SIZE + IMM2_SIZE;
1749 break;
1750
1751 case OP_CBRAPOS:
1752 case OP_SCBRAPOS:
1753 private_data_length += 2;
1754 cc += 1 + LINK_SIZE + IMM2_SIZE;
1755 break;
1756
1757 case OP_COND:
1758 /* Might be a hidden SCOND. */
1759 alternative = cc + GET(cc, 1);
1760 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1761 private_data_length++;
1762 cc += 1 + LINK_SIZE;
1763 break;
1764
1765 CASE_ITERATOR_PRIVATE_DATA_1
1766 if (PRIVATE_DATA(cc))
1767 private_data_length++;
1768 cc += 2;
1769 #ifdef SUPPORT_UTF
1770 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1771 #endif
1772 break;
1773
1774 CASE_ITERATOR_PRIVATE_DATA_2A
1775 if (PRIVATE_DATA(cc))
1776 private_data_length += 2;
1777 cc += 2;
1778 #ifdef SUPPORT_UTF
1779 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1780 #endif
1781 break;
1782
1783 CASE_ITERATOR_PRIVATE_DATA_2B
1784 if (PRIVATE_DATA(cc))
1785 private_data_length += 2;
1786 cc += 2 + IMM2_SIZE;
1787 #ifdef SUPPORT_UTF
1788 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1789 #endif
1790 break;
1791
1792 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1793 if (PRIVATE_DATA(cc))
1794 private_data_length++;
1795 cc += 1;
1796 break;
1797
1798 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1799 if (PRIVATE_DATA(cc))
1800 private_data_length += 2;
1801 cc += 1;
1802 break;
1803
1804 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1805 if (PRIVATE_DATA(cc))
1806 private_data_length += 2;
1807 cc += 1 + IMM2_SIZE;
1808 break;
1809
1810 case OP_CLASS:
1811 case OP_NCLASS:
1812 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1813 case OP_XCLASS:
1814 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1815 #else
1816 size = 1 + 32 / (int)sizeof(pcre_uchar);
1817 #endif
1818 if (PRIVATE_DATA(cc))
1819 private_data_length += get_class_iterator_size(cc + size);
1820 cc += size;
1821 break;
1822
1823 default:
1824 cc = next_opcode(common, cc);
1825 SLJIT_ASSERT(cc != NULL);
1826 break;
1827 }
1828 }
1829 SLJIT_ASSERT(cc == ccend);
1830 return private_data_length;
1831 }
1832
1833 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1834 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1835 {
1836 DEFINE_COMPILER;
1837 int srcw[2];
1838 int count, size;
1839 BOOL tmp1next = TRUE;
1840 BOOL tmp1empty = TRUE;
1841 BOOL tmp2empty = TRUE;
1842 pcre_uchar *alternative;
1843 enum {
1844 start,
1845 loop,
1846 end
1847 } status;
1848
1849 status = save ? start : loop;
1850 stackptr = STACK(stackptr - 2);
1851 stacktop = STACK(stacktop - 1);
1852
1853 if (!save)
1854 {
1855 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1856 if (stackptr < stacktop)
1857 {
1858 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1859 stackptr += sizeof(sljit_sw);
1860 tmp1empty = FALSE;
1861 }
1862 if (stackptr < stacktop)
1863 {
1864 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1865 stackptr += sizeof(sljit_sw);
1866 tmp2empty = FALSE;
1867 }
1868 /* The tmp1next must be TRUE in either way. */
1869 }
1870
1871 do
1872 {
1873 count = 0;
1874 switch(status)
1875 {
1876 case start:
1877 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1878 count = 1;
1879 srcw[0] = common->recursive_head_ptr;
1880 if (needs_control_head)
1881 {
1882 SLJIT_ASSERT(common->control_head_ptr != 0);
1883 count = 2;
1884 srcw[1] = common->control_head_ptr;
1885 }
1886 status = loop;
1887 break;
1888
1889 case loop:
1890 if (cc >= ccend)
1891 {
1892 status = end;
1893 break;
1894 }
1895
1896 switch(*cc)
1897 {
1898 case OP_KET:
1899 if (PRIVATE_DATA(cc) != 0)
1900 {
1901 count = 1;
1902 srcw[0] = PRIVATE_DATA(cc);
1903 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1904 cc += PRIVATE_DATA(cc + 1);
1905 }
1906 cc += 1 + LINK_SIZE;
1907 break;
1908
1909 case OP_ASSERT:
1910 case OP_ASSERT_NOT:
1911 case OP_ASSERTBACK:
1912 case OP_ASSERTBACK_NOT:
1913 case OP_ONCE:
1914 case OP_ONCE_NC:
1915 case OP_BRAPOS:
1916 case OP_SBRA:
1917 case OP_SBRAPOS:
1918 case OP_SCOND:
1919 count = 1;
1920 srcw[0] = PRIVATE_DATA(cc);
1921 SLJIT_ASSERT(srcw[0] != 0);
1922 cc += 1 + LINK_SIZE;
1923 break;
1924
1925 case OP_CBRA:
1926 case OP_SCBRA:
1927 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1928 {
1929 count = 1;
1930 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1931 }
1932 cc += 1 + LINK_SIZE + IMM2_SIZE;
1933 break;
1934
1935 case OP_CBRAPOS:
1936 case OP_SCBRAPOS:
1937 count = 2;
1938 srcw[0] = PRIVATE_DATA(cc);
1939 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1940 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1941 cc += 1 + LINK_SIZE + IMM2_SIZE;
1942 break;
1943
1944 case OP_COND:
1945 /* Might be a hidden SCOND. */
1946 alternative = cc + GET(cc, 1);
1947 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1948 {
1949 count = 1;
1950 srcw[0] = PRIVATE_DATA(cc);
1951 SLJIT_ASSERT(srcw[0] != 0);
1952 }
1953 cc += 1 + LINK_SIZE;
1954 break;
1955
1956 CASE_ITERATOR_PRIVATE_DATA_1
1957 if (PRIVATE_DATA(cc))
1958 {
1959 count = 1;
1960 srcw[0] = PRIVATE_DATA(cc);
1961 }
1962 cc += 2;
1963 #ifdef SUPPORT_UTF
1964 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1965 #endif
1966 break;
1967
1968 CASE_ITERATOR_PRIVATE_DATA_2A
1969 if (PRIVATE_DATA(cc))
1970 {
1971 count = 2;
1972 srcw[0] = PRIVATE_DATA(cc);
1973 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1974 }
1975 cc += 2;
1976 #ifdef SUPPORT_UTF
1977 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1978 #endif
1979 break;
1980
1981 CASE_ITERATOR_PRIVATE_DATA_2B
1982 if (PRIVATE_DATA(cc))
1983 {
1984 count = 2;
1985 srcw[0] = PRIVATE_DATA(cc);
1986 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1987 }
1988 cc += 2 + IMM2_SIZE;
1989 #ifdef SUPPORT_UTF
1990 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1991 #endif
1992 break;
1993
1994 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1995 if (PRIVATE_DATA(cc))
1996 {
1997 count = 1;
1998 srcw[0] = PRIVATE_DATA(cc);
1999 }
2000 cc += 1;
2001 break;
2002
2003 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2004 if (PRIVATE_DATA(cc))
2005 {
2006 count = 2;
2007 srcw[0] = PRIVATE_DATA(cc);
2008 srcw[1] = srcw[0] + sizeof(sljit_sw);
2009 }
2010 cc += 1;
2011 break;
2012
2013 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2014 if (PRIVATE_DATA(cc))
2015 {
2016 count = 2;
2017 srcw[0] = PRIVATE_DATA(cc);
2018 srcw[1] = srcw[0] + sizeof(sljit_sw);
2019 }
2020 cc += 1 + IMM2_SIZE;
2021 break;
2022
2023 case OP_CLASS:
2024 case OP_NCLASS:
2025 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2026 case OP_XCLASS:
2027 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2028 #else
2029 size = 1 + 32 / (int)sizeof(pcre_uchar);
2030 #endif
2031 if (PRIVATE_DATA(cc))
2032 switch(get_class_iterator_size(cc + size))
2033 {
2034 case 1:
2035 count = 1;
2036 srcw[0] = PRIVATE_DATA(cc);
2037 break;
2038
2039 case 2:
2040 count = 2;
2041 srcw[0] = PRIVATE_DATA(cc);
2042 srcw[1] = srcw[0] + sizeof(sljit_sw);
2043 break;
2044
2045 default:
2046 SLJIT_ASSERT_STOP();
2047 break;
2048 }
2049 cc += size;
2050 break;
2051
2052 default:
2053 cc = next_opcode(common, cc);
2054 SLJIT_ASSERT(cc != NULL);
2055 break;
2056 }
2057 break;
2058
2059 case end:
2060 SLJIT_ASSERT_STOP();
2061 break;
2062 }
2063
2064 while (count > 0)
2065 {
2066 count--;
2067 if (save)
2068 {
2069 if (tmp1next)
2070 {
2071 if (!tmp1empty)
2072 {
2073 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2074 stackptr += sizeof(sljit_sw);
2075 }
2076 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2077 tmp1empty = FALSE;
2078 tmp1next = FALSE;
2079 }
2080 else
2081 {
2082 if (!tmp2empty)
2083 {
2084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2085 stackptr += sizeof(sljit_sw);
2086 }
2087 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2088 tmp2empty = FALSE;
2089 tmp1next = TRUE;
2090 }
2091 }
2092 else
2093 {
2094 if (tmp1next)
2095 {
2096 SLJIT_ASSERT(!tmp1empty);
2097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2098 tmp1empty = stackptr >= stacktop;
2099 if (!tmp1empty)
2100 {
2101 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2102 stackptr += sizeof(sljit_sw);
2103 }
2104 tmp1next = FALSE;
2105 }
2106 else
2107 {
2108 SLJIT_ASSERT(!tmp2empty);
2109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2110 tmp2empty = stackptr >= stacktop;
2111 if (!tmp2empty)
2112 {
2113 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2114 stackptr += sizeof(sljit_sw);
2115 }
2116 tmp1next = TRUE;
2117 }
2118 }
2119 }
2120 }
2121 while (status != end);
2122
2123 if (save)
2124 {
2125 if (tmp1next)
2126 {
2127 if (!tmp1empty)
2128 {
2129 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2130 stackptr += sizeof(sljit_sw);
2131 }
2132 if (!tmp2empty)
2133 {
2134 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2135 stackptr += sizeof(sljit_sw);
2136 }
2137 }
2138 else
2139 {
2140 if (!tmp2empty)
2141 {
2142 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2143 stackptr += sizeof(sljit_sw);
2144 }
2145 if (!tmp1empty)
2146 {
2147 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2148 stackptr += sizeof(sljit_sw);
2149 }
2150 }
2151 }
2152 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2153 }
2154
2155 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
2156 {
2157 pcre_uchar *end = bracketend(cc);
2158 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2159
2160 /* Assert captures then. */
2161 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2162 current_offset = NULL;
2163 /* Conditional block does not. */
2164 if (*cc == OP_COND || *cc == OP_SCOND)
2165 has_alternatives = FALSE;
2166
2167 cc = next_opcode(common, cc);
2168 if (has_alternatives)
2169 current_offset = common->then_offsets + (cc - common->start);
2170
2171 while (cc < end)
2172 {
2173 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2174 cc = set_then_offsets(common, cc, current_offset);
2175 else
2176 {
2177 if (*cc == OP_ALT && has_alternatives)
2178 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2179 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2180 *current_offset = 1;
2181 cc = next_opcode(common, cc);
2182 }
2183 }
2184
2185 return end;
2186 }
2187
2188 #undef CASE_ITERATOR_PRIVATE_DATA_1
2189 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2190 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2191 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2192 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2193 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2194
2195 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2196 {
2197 return (value & (value - 1)) == 0;
2198 }
2199
2200 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2201 {
2202 while (list)
2203 {
2204 /* sljit_set_label is clever enough to do nothing
2205 if either the jump or the label is NULL. */
2206 SET_LABEL(list->jump, label);
2207 list = list->next;
2208 }
2209 }
2210
2211 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2212 {
2213 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2214 if (list_item)
2215 {
2216 list_item->next = *list;
2217 list_item->jump = jump;
2218 *list = list_item;
2219 }
2220 }
2221
2222 static void add_stub(compiler_common *common, struct sljit_jump *start)
2223 {
2224 DEFINE_COMPILER;
2225 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2226
2227 if (list_item)
2228 {
2229 list_item->start = start;
2230 list_item->quit = LABEL();
2231 list_item->next = common->stubs;
2232 common->stubs = list_item;
2233 }
2234 }
2235
2236 static void flush_stubs(compiler_common *common)
2237 {
2238 DEFINE_COMPILER;
2239 stub_list *list_item = common->stubs;
2240
2241 while (list_item)
2242 {
2243 JUMPHERE(list_item->start);
2244 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2245 JUMPTO(SLJIT_JUMP, list_item->quit);
2246 list_item = list_item->next;
2247 }
2248 common->stubs = NULL;
2249 }
2250
2251 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2252 {
2253 DEFINE_COMPILER;
2254 label_addr_list *label_addr;
2255
2256 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2257 if (label_addr == NULL)
2258 return;
2259 label_addr->label = LABEL();
2260 label_addr->update_addr = update_addr;
2261 label_addr->next = common->label_addrs;
2262 common->label_addrs = label_addr;
2263 }
2264
2265 static SLJIT_INLINE void count_match(compiler_common *common)
2266 {
2267 DEFINE_COMPILER;
2268
2269 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2270 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2271 }
2272
2273 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2274 {
2275 /* May destroy all locals and registers except TMP2. */
2276 DEFINE_COMPILER;
2277
2278 SLJIT_ASSERT(size > 0);
2279 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2280 #ifdef DESTROY_REGISTERS
2281 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2282 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2283 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2284 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2285 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2286 #endif
2287 add_stub(common, CMP(SLJIT_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2288 }
2289
2290 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2291 {
2292 DEFINE_COMPILER;
2293
2294 SLJIT_ASSERT(size > 0);
2295 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2296 }
2297
2298 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2299 {
2300 DEFINE_COMPILER;
2301 sljit_uw *result;
2302
2303 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2304 return NULL;
2305
2306 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2307 if (SLJIT_UNLIKELY(result == NULL))
2308 {
2309 sljit_set_compiler_memory_error(compiler);
2310 return NULL;
2311 }
2312
2313 *(void**)result = common->read_only_data_head;
2314 common->read_only_data_head = (void *)result;
2315 return result + 1;
2316 }
2317
2318 static void free_read_only_data(void *current, void *allocator_data)
2319 {
2320 void *next;
2321
2322 SLJIT_UNUSED_ARG(allocator_data);
2323
2324 while (current != NULL)
2325 {
2326 next = *(void**)current;
2327 SLJIT_FREE(current, allocator_data);
2328 current = next;
2329 }
2330 }
2331
2332 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2333 {
2334 DEFINE_COMPILER;
2335 struct sljit_label *loop;
2336 int i;
2337
2338 /* At this point we can freely use all temporary registers. */
2339 SLJIT_ASSERT(length > 1);
2340 /* TMP1 returns with begin - 1. */
2341 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2342 if (length < 8)
2343 {
2344 for (i = 1; i < length; i++)
2345 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2346 }
2347 else
2348 {
2349 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2350 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2351 loop = LABEL();
2352 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw), SLJIT_R0, 0);
2353 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2354 JUMPTO(SLJIT_NOT_ZERO, loop);
2355 }
2356 }
2357
2358 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2359 {
2360 DEFINE_COMPILER;
2361 sljit_si i;
2362
2363 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2364
2365 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2366 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2368 }
2369
2370 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2371 {
2372 DEFINE_COMPILER;
2373 struct sljit_label *loop;
2374 int i;
2375
2376 SLJIT_ASSERT(length > 1);
2377 /* OVECTOR(1) contains the "string begin - 1" constant. */
2378 if (length > 2)
2379 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2380 if (length < 8)
2381 {
2382 for (i = 2; i < length; i++)
2383 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2384 }
2385 else
2386 {
2387 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2388 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2389 loop = LABEL();
2390 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2391 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2392 JUMPTO(SLJIT_NOT_ZERO, loop);
2393 }
2394
2395 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2396 if (common->mark_ptr != 0)
2397 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2398 if (common->control_head_ptr != 0)
2399 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2400 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2401 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2402 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2403 }
2404
2405 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2406 {
2407 while (current != NULL)
2408 {
2409 switch (current[-2])
2410 {
2411 case type_then_trap:
2412 break;
2413
2414 case type_mark:
2415 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2416 return current[-4];
2417 break;
2418
2419 default:
2420 SLJIT_ASSERT_STOP();
2421 break;
2422 }
2423 SLJIT_ASSERT(current > (sljit_sw*)current[-1]);
2424 current = (sljit_sw*)current[-1];
2425 }
2426 return -1;
2427 }
2428
2429 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2430 {
2431 DEFINE_COMPILER;
2432 struct sljit_label *loop;
2433 struct sljit_jump *early_quit;
2434
2435 /* At this point we can freely use all registers. */
2436 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2438
2439 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2440 if (common->mark_ptr != 0)
2441 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2442 OP1(SLJIT_MOV_SI, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2443 if (common->mark_ptr != 0)
2444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2445 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2446 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2447 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START);
2448 /* Unlikely, but possible */
2449 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2450 loop = LABEL();
2451 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0, SLJIT_R0, 0);
2452 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2453 /* Copy the integer value to the output buffer */
2454 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2455 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2456 #endif
2457 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_R2), sizeof(int), SLJIT_S1, 0);
2458 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2459 JUMPTO(SLJIT_NOT_ZERO, loop);
2460 JUMPHERE(early_quit);
2461
2462 /* Calculate the return value, which is the maximum ovector value. */
2463 if (topbracket > 1)
2464 {
2465 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2466 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2467
2468 /* OVECTOR(0) is never equal to SLJIT_S2. */
2469 loop = LABEL();
2470 OP1(SLJIT_MOVU, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2471 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2472 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2473 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2474 }
2475 else
2476 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2477 }
2478
2479 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2480 {
2481 DEFINE_COMPILER;
2482 struct sljit_jump *jump;
2483
2484 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2485 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2486 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2487
2488 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2489 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2490 OP1(SLJIT_MOV_SI, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2491 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2492
2493 /* Store match begin and end. */
2494 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2495 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2496
2497 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2498 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2499 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2500 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2501 #endif
2502 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2503 JUMPHERE(jump);
2504
2505 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2506 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2507 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2508 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2509 #endif
2510 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2511
2512 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2513 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2514 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2515 #endif
2516 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2517
2518 JUMPTO(SLJIT_JUMP, quit);
2519 }
2520
2521 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2522 {
2523 /* May destroy TMP1. */
2524 DEFINE_COMPILER;
2525 struct sljit_jump *jump;
2526
2527 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2528 {
2529 /* The value of -1 must be kept for start_used_ptr! */
2530 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2531 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2532 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2533 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2534 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2535 JUMPHERE(jump);
2536 }
2537 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2538 {
2539 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2540 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2541 JUMPHERE(jump);
2542 }
2543 }
2544
2545 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2546 {
2547 /* Detects if the character has an othercase. */
2548 unsigned int c;
2549
2550 #ifdef SUPPORT_UTF
2551 if (common->utf)
2552 {
2553 GETCHAR(c, cc);
2554 if (c > 127)
2555 {
2556 #ifdef SUPPORT_UCP
2557 return c != UCD_OTHERCASE(c);
2558 #else
2559 return FALSE;
2560 #endif
2561 }
2562 #ifndef COMPILE_PCRE8
2563 return common->fcc[c] != c;
2564 #endif
2565 }
2566 else
2567 #endif
2568 c = *cc;
2569 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2570 }
2571
2572 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2573 {
2574 /* Returns with the othercase. */
2575 #ifdef SUPPORT_UTF
2576 if (common->utf && c > 127)
2577 {
2578 #ifdef SUPPORT_UCP
2579 return UCD_OTHERCASE(c);
2580 #else
2581 return c;
2582 #endif
2583 }
2584 #endif
2585 return TABLE_GET(c, common->fcc, c);
2586 }
2587
2588 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2589 {
2590 /* Detects if the character and its othercase has only 1 bit difference. */
2591 unsigned int c, oc, bit;
2592 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2593 int n;
2594 #endif
2595
2596 #ifdef SUPPORT_UTF
2597 if (common->utf)
2598 {
2599 GETCHAR(c, cc);
2600 if (c <= 127)
2601 oc = common->fcc[c];
2602 else
2603 {
2604 #ifdef SUPPORT_UCP
2605 oc = UCD_OTHERCASE(c);
2606 #else
2607 oc = c;
2608 #endif
2609 }
2610 }
2611 else
2612 {
2613 c = *cc;
2614 oc = TABLE_GET(c, common->fcc, c);
2615 }
2616 #else
2617 c = *cc;
2618 oc = TABLE_GET(c, common->fcc, c);
2619 #endif
2620
2621 SLJIT_ASSERT(c != oc);
2622
2623 bit = c ^ oc;
2624 /* Optimized for English alphabet. */
2625 if (c <= 127 && bit == 0x20)
2626 return (0 << 8) | 0x20;
2627
2628 /* Since c != oc, they must have at least 1 bit difference. */
2629 if (!is_powerof2(bit))
2630 return 0;
2631
2632 #if defined COMPILE_PCRE8
2633
2634 #ifdef SUPPORT_UTF
2635 if (common->utf && c > 127)
2636 {
2637 n = GET_EXTRALEN(*cc);
2638 while ((bit & 0x3f) == 0)
2639 {
2640 n--;
2641 bit >>= 6;
2642 }
2643 return (n << 8) | bit;
2644 }
2645 #endif /* SUPPORT_UTF */
2646 return (0 << 8) | bit;
2647
2648 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2649
2650 #ifdef SUPPORT_UTF
2651 if (common->utf && c > 65535)
2652 {
2653 if (bit >= (1 << 10))
2654 bit >>= 10;
2655 else
2656 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2657 }
2658 #endif /* SUPPORT_UTF */
2659 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2660
2661 #endif /* COMPILE_PCRE[8|16|32] */
2662 }
2663
2664 static void check_partial(compiler_common *common, BOOL force)
2665 {
2666 /* Checks whether a partial matching is occurred. Does not modify registers. */
2667 DEFINE_COMPILER;
2668 struct sljit_jump *jump = NULL;
2669
2670 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2671
2672 if (common->mode == JIT_COMPILE)
2673 return;
2674
2675 if (!force)
2676 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2677 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2678 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2679
2680 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2682 else
2683 {
2684 if (common->partialmatchlabel != NULL)
2685 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2686 else
2687 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2688 }
2689
2690 if (jump != NULL)
2691 JUMPHERE(jump);
2692 }
2693
2694 static void check_str_end(compiler_common *common, jump_list **end_reached)
2695 {
2696 /* Does not affect registers. Usually used in a tight spot. */
2697 DEFINE_COMPILER;
2698 struct sljit_jump *jump;
2699
2700 if (common->mode == JIT_COMPILE)
2701 {
2702 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2703 return;
2704 }
2705
2706 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2707 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2708 {
2709 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2710 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2711 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2712 }
2713 else
2714 {
2715 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2716 if (common->partialmatchlabel != NULL)
2717 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2718 else
2719 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2720 }
2721 JUMPHERE(jump);
2722 }
2723
2724 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2725 {
2726 DEFINE_COMPILER;
2727 struct sljit_jump *jump;
2728
2729 if (common->mode == JIT_COMPILE)
2730 {
2731 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2732 return;
2733 }
2734
2735 /* Partial matching mode. */
2736 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2737 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2738 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2739 {
2740 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2741 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2742 }
2743 else
2744 {
2745 if (common->partialmatchlabel != NULL)
2746 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2747 else
2748 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2749 }
2750 JUMPHERE(jump);
2751 }
2752
2753 static void peek_char(compiler_common *common, sljit_ui max)
2754 {
2755 /* Reads the character into TMP1, keeps STR_PTR.
2756 Does not check STR_END. TMP2 Destroyed. */
2757 DEFINE_COMPILER;
2758 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2759 struct sljit_jump *jump;
2760 #endif
2761
2762 SLJIT_UNUSED_ARG(max);
2763
2764 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2765 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2766 if (common->utf)
2767 {
2768 if (max < 128) return;
2769
2770 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2771 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2772 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2773 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2774 JUMPHERE(jump);
2775 }
2776 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2777
2778 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2779 if (common->utf)
2780 {
2781 if (max < 0xd800) return;
2782
2783 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2784 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2785 /* TMP2 contains the high surrogate. */
2786 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2787 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2788 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2789 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2790 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2791 JUMPHERE(jump);
2792 }
2793 #endif
2794 }
2795
2796 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2797
2798 static BOOL is_char7_bitset(const sljit_ub *bitset, BOOL nclass)
2799 {
2800 /* Tells whether the character codes below 128 are enough
2801 to determine a match. */
2802 const sljit_ub value = nclass ? 0xff : 0;
2803 const sljit_ub *end = bitset + 32;
2804
2805 bitset += 16;
2806 do
2807 {
2808 if (*bitset++ != value)
2809 return FALSE;
2810 }
2811 while (bitset < end);
2812 return TRUE;
2813 }
2814
2815 static void read_char7_type(compiler_common *common, BOOL full_read)
2816 {
2817 /* Reads the precise character type of a character into TMP1, if the character
2818 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2819 full_read argument tells whether characters above max are accepted or not. */
2820 DEFINE_COMPILER;
2821 struct sljit_jump *jump;
2822
2823 SLJIT_ASSERT(common->utf);
2824
2825 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2826 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2827
2828 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2829
2830 if (full_read)
2831 {
2832 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2833 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2834 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2835 JUMPHERE(jump);
2836 }
2837 }
2838
2839 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2840
2841 static void read_char_range(compiler_common *common, sljit_ui min, sljit_ui max, BOOL update_str_ptr)
2842 {
2843 /* Reads the precise value of a character into TMP1, if the character is
2844 between min and max (c >= min && c <= max). Otherwise it returns with a value
2845 outside the range. Does not check STR_END. */
2846 DEFINE_COMPILER;
2847 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2848 struct sljit_jump *jump;
2849 #endif
2850 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2851 struct sljit_jump *jump2;
2852 #endif
2853
2854 SLJIT_UNUSED_ARG(update_str_ptr);
2855 SLJIT_UNUSED_ARG(min);
2856 SLJIT_UNUSED_ARG(max);
2857 SLJIT_ASSERT(min <= max);
2858
2859 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2860 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2861
2862 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2863 if (common->utf)
2864 {
2865 if (max < 128 && !update_str_ptr) return;
2866
2867 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2868 if (min >= 0x10000)
2869 {
2870 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2871 if (update_str_ptr)
2872 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2873 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2874 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2875 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2876 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2877 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2878 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2879 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2880 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2881 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2882 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2883 if (!update_str_ptr)
2884 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2885 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2886 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2887 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2888 JUMPHERE(jump2);
2889 if (update_str_ptr)
2890 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2891 }
2892 else if (min >= 0x800 && max <= 0xffff)
2893 {
2894 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2895 if (update_str_ptr)
2896 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2897 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2898 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
2899 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2900 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2901 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2902 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2903 if (!update_str_ptr)
2904 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2905 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2906 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2907 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2908 JUMPHERE(jump2);
2909 if (update_str_ptr)
2910 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2911 }
2912 else if (max >= 0x800)
2913 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2914 else if (max < 128)
2915 {
2916 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2917 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2918 }
2919 else
2920 {
2921 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2922 if (!update_str_ptr)
2923 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2924 else
2925 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2926 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2927 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2928 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2929 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2930 if (update_str_ptr)
2931 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2932 }
2933 JUMPHERE(jump);
2934 }
2935 #endif
2936
2937 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2938 if (common->utf)
2939 {
2940 if (max >= 0x10000)
2941 {
2942 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2943 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2944 /* TMP2 contains the high surrogate. */
2945 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2946 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2947 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2948 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2949 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2950 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2951 JUMPHERE(jump);
2952 return;
2953 }
2954
2955 if (max < 0xd800 && !update_str_ptr) return;
2956
2957 /* Skip low surrogate if necessary. */
2958 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2959 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2960 if (update_str_ptr)
2961 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2962 if (max >= 0xd800)
2963 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2964 JUMPHERE(jump);
2965 }
2966 #endif
2967 }
2968
2969 static SLJIT_INLINE void read_char(compiler_common *common)
2970 {
2971 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2972 }
2973
2974 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2975 {
2976 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2977 DEFINE_COMPILER;
2978 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2979 struct sljit_jump *jump;
2980 #endif
2981 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2982 struct sljit_jump *jump2;
2983 #endif
2984
2985 SLJIT_UNUSED_ARG(update_str_ptr);
2986
2987 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2988 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2989
2990 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2991 if (common->utf)
2992 {
2993 /* This can be an extra read in some situations, but hopefully
2994 it is needed in most cases. */
2995 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2996 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2997 if (!update_str_ptr)
2998 {
2999 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3000 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3001 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3002 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3003 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3004 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3006 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3007 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3008 JUMPHERE(jump2);
3009 }
3010 else
3011 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3012 JUMPHERE(jump);
3013 return;
3014 }
3015 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3016
3017 #if !defined COMPILE_PCRE8
3018 /* The ctypes array contains only 256 values. */
3019 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3020 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3021 #endif
3022 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3023 #if !defined COMPILE_PCRE8
3024 JUMPHERE(jump);
3025 #endif
3026
3027 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3028 if (common->utf && update_str_ptr)
3029 {
3030 /* Skip low surrogate if necessary. */
3031 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3032 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3033 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3034 JUMPHERE(jump);
3035 }
3036 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3037 }
3038
3039 static void skip_char_back(compiler_common *common)
3040 {
3041 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3042 DEFINE_COMPILER;
3043 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3044 #if defined COMPILE_PCRE8
3045 struct sljit_label *label;
3046
3047 if (common->utf)
3048 {
3049 label = LABEL();
3050 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3051 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3052 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3053 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3054 return;
3055 }
3056 #elif defined COMPILE_PCRE16
3057 if (common->utf)
3058 {
3059 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3060 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3061 /* Skip low surrogate if necessary. */
3062 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3063 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3064 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3065 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3066 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3067 return;
3068 }
3069 #endif /* COMPILE_PCRE[8|16] */
3070 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3071 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3072 }
3073
3074 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3075 {
3076 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3077 DEFINE_COMPILER;
3078 struct sljit_jump *jump;
3079
3080 if (nltype == NLTYPE_ANY)
3081 {
3082 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3083 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3084 }
3085 else if (nltype == NLTYPE_ANYCRLF)
3086 {
3087 if (jumpifmatch)
3088 {
3089 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3090 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3091 }
3092 else
3093 {
3094 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3095 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3096 JUMPHERE(jump);
3097 }
3098 }
3099 else
3100 {
3101 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3102 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3103 }
3104 }
3105
3106 #ifdef SUPPORT_UTF
3107
3108 #if defined COMPILE_PCRE8
3109 static void do_utfreadchar(compiler_common *common)
3110 {
3111 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3112 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3113 DEFINE_COMPILER;
3114 struct sljit_jump *jump;
3115
3116 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3117 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3118 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3119 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3120 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3121 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3122
3123 /* Searching for the first zero. */
3124 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3125 jump = JUMP(SLJIT_NOT_ZERO);
3126 /* Two byte sequence. */
3127 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3128 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3129 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3130
3131 JUMPHERE(jump);
3132 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3133 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3134 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3135 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3136 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3137
3138 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3139 jump = JUMP(SLJIT_NOT_ZERO);
3140 /* Three byte sequence. */
3141 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3142 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3143 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3144
3145 /* Four byte sequence. */
3146 JUMPHERE(jump);
3147 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3148 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3149 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3150 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3151 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3152 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3153 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3154 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3155 }
3156
3157 static void do_utfreadchar16(compiler_common *common)
3158 {
3159 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3160 of the character (>= 0xc0). Return value in TMP1. */
3161 DEFINE_COMPILER;
3162 struct sljit_jump *jump;
3163
3164 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3165 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3166 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3167 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3168 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3169 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3170
3171 /* Searching for the first zero. */
3172 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3173 jump = JUMP(SLJIT_NOT_ZERO);
3174 /* Two byte sequence. */
3175 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3176 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3177
3178 JUMPHERE(jump);
3179 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3180 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_NOT_ZERO);
3181 /* This code runs only in 8 bit mode. No need to shift the value. */
3182 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3183 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3184 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3185 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3186 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3187 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3188 /* Three byte sequence. */
3189 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3190 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3191 }
3192
3193 static void do_utfreadtype8(compiler_common *common)
3194 {
3195 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3196 of the character (>= 0xc0). Return value in TMP1. */
3197 DEFINE_COMPILER;
3198 struct sljit_jump *jump;
3199 struct sljit_jump *compare;
3200
3201 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3202
3203 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3204 jump = JUMP(SLJIT_NOT_ZERO);
3205 /* Two byte sequence. */
3206 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3207 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3208 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3209 /* The upper 5 bits are known at this point. */
3210 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3211 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3212 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3213 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3214 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3215 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3216
3217 JUMPHERE(compare);
3218 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3219 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3220
3221 /* We only have types for characters less than 256. */
3222 JUMPHERE(jump);
3223 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3225 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3226 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3227 }
3228
3229 #endif /* COMPILE_PCRE8 */
3230
3231 #endif /* SUPPORT_UTF */
3232
3233 #ifdef SUPPORT_UCP
3234
3235 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3236 #define UCD_BLOCK_MASK 127
3237 #define UCD_BLOCK_SHIFT 7
3238
3239 static void do_getucd(compiler_common *common)
3240 {
3241 /* Search the UCD record for the character comes in TMP1.
3242 Returns chartype in TMP1 and UCD offset in TMP2. */
3243 DEFINE_COMPILER;
3244
3245 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3246
3247 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3248 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3249 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3250 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3251 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3252 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3253 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3254 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3256 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3257 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3258 }
3259 #endif
3260
3261 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
3262 {
3263 DEFINE_COMPILER;
3264 struct sljit_label *mainloop;
3265 struct sljit_label *newlinelabel = NULL;
3266 struct sljit_jump *start;
3267 struct sljit_jump *end = NULL;
3268 struct sljit_jump *nl = NULL;
3269 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3270 struct sljit_jump *singlechar;
3271 #endif
3272 jump_list *newline = NULL;
3273 BOOL newlinecheck = FALSE;
3274 BOOL readuchar = FALSE;
3275
3276 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
3277 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3278 newlinecheck = TRUE;
3279
3280 if (firstline)
3281 {
3282 /* Search for the end of the first line. */
3283 SLJIT_ASSERT(common->first_line_end != 0);
3284 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3285
3286 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3287 {
3288 mainloop = LABEL();
3289 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3290 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3291 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3292 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3293 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3294 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3295 JUMPHERE(end);
3296 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3297 }
3298 else
3299 {
3300 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3301 mainloop = LABEL();
3302 /* Continual stores does not cause data dependency. */
3303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3304 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3305 check_newlinechar(common, common->nltype, &newline, TRUE);
3306 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3307 JUMPHERE(end);
3308 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->first_line_end, STR_PTR, 0);
3309 set_jumps(newline, LABEL());
3310 }
3311
3312 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3313 }
3314
3315 start = JUMP(SLJIT_JUMP);
3316
3317 if (newlinecheck)
3318 {
3319 newlinelabel = LABEL();
3320 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3321 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3322 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3323 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3324 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3325 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3326 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3327 #endif
3328 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3329 nl = JUMP(SLJIT_JUMP);
3330 }
3331
3332 mainloop = LABEL();
3333
3334 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3335 #ifdef SUPPORT_UTF
3336 if (common->utf) readuchar = TRUE;
3337 #endif
3338 if (newlinecheck) readuchar = TRUE;
3339
3340 if (readuchar)
3341 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3342
3343 if (newlinecheck)
3344 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3345
3346 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3347 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3348 #if defined COMPILE_PCRE8
3349 if (common->utf)
3350 {
3351 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3352 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3353 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3354 JUMPHERE(singlechar);
3355 }
3356 #elif defined COMPILE_PCRE16
3357 if (common->utf)
3358 {
3359 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3360 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3361 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3362 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
3363 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3364 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3365 JUMPHERE(singlechar);
3366 }
3367 #endif /* COMPILE_PCRE[8|16] */
3368 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3369 JUMPHERE(start);
3370
3371 if (newlinecheck)
3372 {
3373 JUMPHERE(end);
3374 JUMPHERE(nl);
3375 }
3376
3377 return mainloop;
3378 }
3379
3380 #define MAX_N_CHARS 16
3381 #define MAX_N_BYTES 8
3382
3383 static SLJIT_INLINE void add_prefix_byte(pcre_uint8 byte, pcre_uint8 *bytes)
3384 {
3385 pcre_uint8 len = bytes[0];
3386 int i;
3387
3388 if (len == 255)
3389 return;
3390
3391 if (len == 0)
3392 {
3393 bytes[0] = 1;
3394 bytes[1] = byte;
3395 return;
3396 }
3397
3398 for (i = len; i > 0; i--)
3399 if (bytes[i] == byte)
3400 return;
3401
3402 if (len >= MAX_N_BYTES - 1)
3403 {
3404 bytes[0] = 255;
3405 return;
3406 }
3407
3408 len++;
3409 bytes[len] = byte;
3410 bytes[0] = len;
3411 }
3412
3413 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, pcre_uint8 *bytes, int max_chars, pcre_uint32 *rec_count)
3414 {
3415 /* Recursive function, which scans prefix literals. */
3416 BOOL last, any, caseless;
3417 int len, repeat, len_save, consumed = 0;
3418 pcre_uint32 chr, mask;
3419 pcre_uchar *alternative, *cc_save, *oc;
3420 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3421 pcre_uchar othercase[8];
3422 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3423 pcre_uchar othercase[2];
3424 #else
3425 pcre_uchar othercase[1];
3426 #endif
3427
3428 repeat = 1;
3429 while (TRUE)
3430 {
3431 if (*rec_count == 0)
3432 return 0;
3433 (*rec_count)--;
3434
3435 last = TRUE;
3436 any = FALSE;
3437 caseless = FALSE;
3438
3439 switch (*cc)
3440 {
3441 case OP_CHARI:
3442 caseless = TRUE;
3443 case OP_CHAR:
3444 last = FALSE;
3445 cc++;
3446 break;
3447
3448 case OP_SOD:
3449 case OP_SOM:
3450 case OP_SET_SOM:
3451 case OP_NOT_WORD_BOUNDARY:
3452 case OP_WORD_BOUNDARY:
3453 case OP_EODN:
3454 case OP_EOD:
3455 case OP_CIRC:
3456 case OP_CIRCM:
3457 case OP_DOLL:
3458 case OP_DOLLM:
3459 /* Zero width assertions. */
3460 cc++;
3461 continue;
3462
3463 case OP_ASSERT:
3464 case OP_ASSERT_NOT:
3465 case OP_ASSERTBACK:
3466 case OP_ASSERTBACK_NOT:
3467 cc = bracketend(cc);
3468 continue;
3469
3470 case OP_PLUSI:
3471 case OP_MINPLUSI:
3472 case OP_POSPLUSI:
3473 caseless = TRUE;
3474 case OP_PLUS:
3475 case OP_MINPLUS:
3476 case OP_POSPLUS:
3477 cc++;
3478 break;
3479
3480 case OP_EXACTI:
3481 caseless = TRUE;
3482 case OP_EXACT:
3483 repeat = GET2(cc, 1);
3484 last = FALSE;
3485 cc += 1 + IMM2_SIZE;
3486 break;
3487
3488 case OP_QUERYI:
3489 case OP_MINQUERYI:
3490 case OP_POSQUERYI:
3491 caseless = TRUE;
3492 case OP_QUERY:
3493 case OP_MINQUERY:
3494 case OP_POSQUERY:
3495 len = 1;
3496 cc++;
3497 #ifdef SUPPORT_UTF
3498 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3499 #endif
3500 max_chars = scan_prefix(common, cc + len, chars, bytes, max_chars, rec_count);
3501 if (max_chars == 0)
3502 return consumed;
3503 last = FALSE;
3504 break;
3505
3506 case OP_KET:
3507 cc += 1 + LINK_SIZE;
3508 continue;
3509
3510 case OP_ALT:
3511 cc += GET(cc, 1);
3512 continue;
3513
3514 case OP_ONCE:
3515 case OP_ONCE_NC:
3516 case OP_BRA:
3517 case OP_BRAPOS:
3518 case OP_CBRA:
3519 case OP_CBRAPOS:
3520 alternative = cc + GET(cc, 1);
3521 while (*alternative == OP_ALT)
3522 {
3523 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, bytes, max_chars, rec_count);
3524 if (max_chars == 0)
3525 return consumed;
3526 alternative += GET(alternative, 1);
3527 }
3528
3529 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3530 cc += IMM2_SIZE;
3531 cc += 1 + LINK_SIZE;
3532 continue;
3533
3534 case OP_CLASS:
3535 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3536 if (common->utf && !is_char7_bitset((const pcre_uint8 *)(cc + 1), FALSE)) return consumed;
3537 #endif
3538 any = TRUE;
3539 cc += 1 + 32 / sizeof(pcre_uchar);
3540 break;
3541
3542 case OP_NCLASS:
3543 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3544 if (common->utf) return consumed;
3545 #endif
3546 any = TRUE;
3547 cc += 1 + 32 / sizeof(pcre_uchar);
3548 break;
3549
3550 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3551 case OP_XCLASS:
3552 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3553 if (common->utf) return consumed;
3554 #endif
3555 any = TRUE;
3556 cc += GET(cc, 1);
3557 break;
3558 #endif
3559
3560 case OP_DIGIT:
3561 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3562 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3563 return consumed;
3564 #endif
3565 any = TRUE;
3566 cc++;
3567 break;
3568
3569 case OP_WHITESPACE:
3570 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3571 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3572 return consumed;
3573 #endif
3574 any = TRUE;
3575 cc++;
3576 break;
3577
3578 case OP_WORDCHAR:
3579 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3580 if (common->utf && !is_char7_bitset((const pcre_uint8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3581 return consumed;
3582 #endif
3583 any = TRUE;
3584 cc++;
3585 break;
3586
3587 case OP_NOT:
3588 case OP_NOTI:
3589 cc++;
3590 /* Fall through. */
3591 case OP_NOT_DIGIT:
3592 case OP_NOT_WHITESPACE:
3593 case OP_NOT_WORDCHAR:
3594 case OP_ANY:
3595 case OP_ALLANY:
3596 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3597 if (common->utf) return consumed;
3598 #endif
3599 any = TRUE;
3600 cc++;
3601 break;
3602
3603 #ifdef SUPPORT_UCP
3604 case OP_NOTPROP:
3605 case OP_PROP:
3606 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3607 if (common->utf) return consumed;
3608 #endif
3609 any = TRUE;
3610 cc += 1 + 2;
3611 break;
3612 #endif
3613
3614 case OP_TYPEEXACT:
3615 repeat = GET2(cc, 1);
3616 cc += 1 + IMM2_SIZE;
3617 continue;
3618
3619 case OP_NOTEXACT:
3620 case OP_NOTEXACTI:
3621 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3622 if (common->utf) return consumed;
3623 #endif
3624 any = TRUE;
3625 repeat = GET2(cc, 1);
3626 cc += 1 + IMM2_SIZE + 1;
3627 break;
3628
3629 default:
3630 return consumed;
3631 }
3632
3633 if (any)
3634 {
3635 #if defined COMPILE_PCRE8
3636 mask = 0xff;
3637 #elif defined COMPILE_PCRE16
3638 mask = 0xffff;
3639 #elif defined COMPILE_PCRE32
3640 mask = 0xffffffff;
3641 #else
3642 SLJIT_ASSERT_STOP();
3643 #endif
3644
3645 do
3646 {
3647 chars[0] = mask;
3648 chars[1] = mask;
3649 bytes[0] = 255;
3650
3651 consumed++;
3652 if (--max_chars == 0)
3653 return consumed;
3654 chars += 2;
3655 bytes += MAX_N_BYTES;
3656 }
3657 while (--repeat > 0);
3658
3659 repeat = 1;
3660 continue;
3661 }
3662
3663 len = 1;
3664 #ifdef SUPPORT_UTF
3665 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3666 #endif
3667
3668 if (caseless && char_has_othercase(common, cc))
3669 {
3670 #ifdef SUPPORT_UTF
3671 if (common->utf)
3672 {
3673 GETCHAR(chr, cc);
3674 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3675 return consumed;
3676 }
3677 else
3678 #endif
3679 {
3680 chr = *cc;
3681 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3682 }
3683 }
3684 else
3685 caseless = FALSE;
3686
3687 len_save = len;
3688 cc_save = cc;
3689 while (TRUE)
3690 {
3691 oc = othercase;
3692 do
3693 {
3694 chr = *cc;
3695 #ifdef COMPILE_PCRE32
3696 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3697 return consumed;
3698 #endif
3699 add_prefix_byte((pcre_uint8)chr, bytes);
3700
3701 mask = 0;
3702 if (caseless)
3703 {
3704 add_prefix_byte((pcre_uint8)*oc, bytes);
3705 mask = *cc ^ *oc;
3706 chr |= mask;
3707 }
3708
3709 #ifdef COMPILE_PCRE32
3710 if (chars[0] == NOTACHAR && chars[1] == 0)
3711 #else
3712 if (chars[0] == NOTACHAR)
3713 #endif
3714 {
3715 chars[0] = chr;
3716 chars[1] = mask;
3717 }
3718 else
3719 {
3720 mask |= chars[0] ^ chr;
3721 chr |= mask;
3722 chars[0] = chr;
3723 chars[1] |= mask;
3724 }
3725
3726 len--;
3727 consumed++;
3728 if (--max_chars == 0)
3729 return consumed;
3730 chars += 2;
3731 bytes += MAX_N_BYTES;
3732 cc++;
3733 oc++;
3734 }
3735 while (len > 0);
3736
3737 if (--repeat == 0)
3738 break;
3739
3740 len = len_save;
3741 cc = cc_save;
3742 }
3743
3744 repeat = 1;
3745 if (last)
3746 return consumed;
3747 }
3748 }
3749
3750 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
3751
3752 static sljit_si character_to_int32(pcre_uchar chr)
3753 {
3754 sljit_si value = (sljit_si)chr;
3755 #if defined COMPILE_PCRE8
3756 #define SSE2_COMPARE_TYPE_INDEX 0
3757 return (value << 24) | (value << 16) | (value << 8) | value;
3758 #elif defined COMPILE_PCRE16
3759 #define SSE2_COMPARE_TYPE_INDEX 1
3760 return (value << 16) | value;
3761 #elif defined COMPILE_PCRE32
3762 #define SSE2_COMPARE_TYPE_INDEX 2
3763 return value;
3764 #else
3765 #error "Unsupported unit width"
3766 #endif
3767 }
3768
3769 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3770 {
3771 DEFINE_COMPILER;
3772 struct sljit_label *start;
3773 struct sljit_jump *quit[3];
3774 struct sljit_jump *nomatch;
3775 sljit_ub instruction[8];
3776 sljit_si tmp1_ind = sljit_get_register_index(TMP1);
3777 sljit_si tmp2_ind = sljit_get_register_index(TMP2);
3778 sljit_si str_ptr_ind = sljit_get_register_index(STR_PTR);
3779 BOOL load_twice = FALSE;
3780 pcre_uchar bit;
3781
3782 bit = char1 ^ char2;
3783 if (!is_powerof2(bit))
3784 bit = 0;
3785
3786 if ((char1 != char2) && bit == 0)
3787 load_twice = TRUE;
3788
3789 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3790
3791 /* First part (unaligned start) */
3792
3793 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3794
3795 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3796
3797 /* MOVD xmm, r/m32 */
3798 instruction[0] = 0x66;
3799 instruction[1] = 0x0f;
3800 instruction[2] = 0x6e;
3801 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3802 sljit_emit_op_custom(compiler, instruction, 4);
3803
3804 if (char1 != char2)
3805 {
3806 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3807
3808 /* MOVD xmm, r/m32 */
3809 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3810 sljit_emit_op_custom(compiler, instruction, 4);
3811 }
3812
3813 /* PSHUFD xmm1, xmm2/m128, imm8 */
3814 instruction[2] = 0x70;
3815 instruction[3] = 0xc0 | (2 << 3) | 2;
3816 instruction[4] = 0;
3817 sljit_emit_op_custom(compiler, instruction, 5);
3818
3819 if (char1 != char2)
3820 {
3821 /* PSHUFD xmm1, xmm2/m128, imm8 */
3822 instruction[3] = 0xc0 | (3 << 3) | 3;
3823 instruction[4] = 0;
3824 sljit_emit_op_custom(compiler, instruction, 5);
3825 }
3826
3827 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
3828 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
3829
3830 /* MOVDQA xmm1, xmm2/m128 */
3831 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3832
3833 if (str_ptr_ind < 8)
3834 {
3835 instruction[2] = 0x6f;
3836 instruction[3] = (0 << 3) | str_ptr_ind;
3837 sljit_emit_op_custom(compiler, instruction, 4);
3838
3839 if (load_twice)
3840 {
3841 instruction[3] = (1 << 3) | str_ptr_ind;
3842 sljit_emit_op_custom(compiler, instruction, 4);
3843 }
3844 }
3845 else
3846 {
3847 instruction[1] = 0x41;
3848 instruction[2] = 0x0f;
3849 instruction[3] = 0x6f;
3850 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3851 sljit_emit_op_custom(compiler, instruction, 5);
3852
3853 if (load_twice)
3854 {
3855 instruction[4] = (1 << 3) | str_ptr_ind;
3856 sljit_emit_op_custom(compiler, instruction, 5);
3857 }
3858 instruction[1] = 0x0f;
3859 }
3860
3861 #else
3862
3863 instruction[2] = 0x6f;
3864 instruction[3] = (0 << 3) | str_ptr_ind;
3865 sljit_emit_op_custom(compiler, instruction, 4);
3866
3867 if (load_twice)
3868 {
3869 instruction[3] = (1 << 3) | str_ptr_ind;
3870 sljit_emit_op_custom(compiler, instruction, 4);
3871 }
3872
3873 #endif
3874
3875 if (bit != 0)
3876 {
3877 /* POR xmm1, xmm2/m128 */
3878 instruction[2] = 0xeb;
3879 instruction[3] = 0xc0 | (0 << 3) | 3;
3880 sljit_emit_op_custom(compiler, instruction, 4);
3881 }
3882
3883 /* PCMPEQB/W/D xmm1, xmm2/m128 */
3884 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
3885 instruction[3] = 0xc0 | (0 << 3) | 2;
3886 sljit_emit_op_custom(compiler, instruction, 4);
3887
3888 if (load_twice)
3889 {
3890 instruction[3] = 0xc0 | (1 << 3) | 3;
3891 sljit_emit_op_custom(compiler, instruction, 4);
3892 }
3893
3894 /* PMOVMSKB reg, xmm */
3895 instruction[2] = 0xd7;
3896 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
3897 sljit_emit_op_custom(compiler, instruction, 4);
3898
3899 if (load_twice)
3900 {
3901 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
3902 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
3903 sljit_emit_op_custom(compiler, instruction, 4);
3904
3905 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3906 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
3907 }
3908
3909 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
3910
3911 /* BSF r32, r/m32 */
3912 instruction[0] = 0x0f;
3913 instruction[1] = 0xbc;
3914 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
3915 sljit_emit_op_custom(compiler, instruction, 3);
3916
3917 nomatch = JUMP(SLJIT_ZERO);
3918
3919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3920 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3921 quit[1] = JUMP(SLJIT_JUMP);
3922
3923 JUMPHERE(nomatch);
3924
3925 start = LABEL();
3926 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
3927 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3928
3929 /* Second part (aligned) */
3930
3931 instruction[0] = 0x66;
3932 instruction[1] = 0x0f;
3933
3934 /* MOVDQA xmm1, xmm2/m128 */
3935 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3936
3937 if (str_ptr_ind < 8)
3938 {
3939 instruction[2] = 0x6f;
3940 instruction[3] = (0 << 3) | str_ptr_ind;
3941 sljit_emit_op_custom(compiler, instruction, 4);
3942
3943 if (load_twice)
3944 {
3945 instruction[3] = (1 << 3) | str_ptr_ind;
3946 sljit_emit_op_custom(compiler, instruction, 4);
3947 }
3948 }
3949 else
3950 {
3951 instruction[1] = 0x41;
3952 instruction[2] = 0x0f;
3953 instruction[3] = 0x6f;
3954 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
3955 sljit_emit_op_custom(compiler, instruction, 5);
3956
3957 if (load_twice)
3958 {
3959 instruction[4] = (1 << 3) | str_ptr_ind;
3960 sljit_emit_op_custom(compiler, instruction, 5);
3961 }
3962 instruction[1] = 0x0f;
3963 }
3964
3965 #else
3966
3967 instruction[2] = 0x6f;
3968 instruction[3] = (0 << 3) | str_ptr_ind;
3969 sljit_emit_op_custom(compiler, instruction, 4);
3970
3971 if (load_twice)
3972 {
3973 instruction[3] = (1 << 3) | str_ptr_ind;
3974 sljit_emit_op_custom(compiler, instruction, 4);
3975 }
3976
3977 #endif
3978
3979 if (bit != 0)
3980 {
3981 /* POR xmm1, xmm2/m128 */
3982 instruction[2] = 0xeb;
3983 instruction[3] = 0xc0 | (0 << 3) | 3;
3984 sljit_emit_op_custom(compiler, instruction, 4);
3985 }
3986
3987 /* PCMPEQB/W/D xmm1, xmm2/m128 */
3988 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
3989 instruction[3] = 0xc0 | (0 << 3) | 2;
3990 sljit_emit_op_custom(compiler, instruction, 4);
3991
3992 if (load_twice)
3993 {
3994 instruction[3] = 0xc0 | (1 << 3) | 3;
3995 sljit_emit_op_custom(compiler, instruction, 4);
3996 }
3997
3998 /* PMOVMSKB reg, xmm */
3999 instruction[2] = 0xd7;
4000 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4001 sljit_emit_op_custom(compiler, instruction, 4);
4002
4003 if (load_twice)
4004 {
4005 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4006 sljit_emit_op_custom(compiler, instruction, 4);
4007
4008 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4009 }
4010
4011 /* BSF r32, r/m32 */
4012 instruction[0] = 0x0f;
4013 instruction[1] = 0xbc;
4014 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4015 sljit_emit_op_custom(compiler, instruction, 3);
4016
4017 JUMPTO(SLJIT_ZERO, start);
4018
4019 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4020
4021 start = LABEL();
4022 SET_LABEL(quit[0], start);
4023 SET_LABEL(quit[1], start);
4024 SET_LABEL(quit[2], start);
4025 }
4026
4027 #undef SSE2_COMPARE_TYPE_INDEX
4028
4029 #endif
4030
4031 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_si offset)
4032 {
4033 DEFINE_COMPILER;
4034 struct sljit_label *start;
4035 struct sljit_jump *quit;
4036 struct sljit_jump *found;
4037 pcre_uchar mask;
4038 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4039 struct sljit_label *utf_start = NULL;
4040 struct sljit_jump *utf_quit = NULL;
4041 #endif
4042 BOOL has_first_line_end = (common->first_line_end != 0);
4043
4044 if (offset > 0)
4045 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4046
4047 if (has_first_line_end)
4048 {
4049 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4050
4051 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end, SLJIT_IMM, IN_UCHARS(offset + 1));
4052 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4053 if (sljit_x86_is_cmov_available())
4054 {
4055 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4056 sljit_x86_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4057 }
4058 #endif
4059 {
4060 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP3, 0);
4061 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4062 JUMPHERE(quit);
4063 }
4064 }
4065
4066 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4067 if (common->utf && offset > 0)
4068 utf_start = LABEL();
4069 #endif
4070
4071 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
4072
4073 /* SSE2 accelerated first character search. */
4074
4075 if (sljit_x86_is_sse2_available())
4076 {
4077 fast_forward_first_char2_sse2(common, char1, char2);
4078
4079 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4080 if (common->mode == JIT_COMPILE)
4081 {
4082 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4083 SLJIT_ASSERT(common->forced_quit_label == NULL);
4084 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4085 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4086
4087 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4088 if (common->utf && offset > 0)
4089 {
4090 SLJIT_ASSERT(common->mode == JIT_COMPILE);
4091
4092 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4093 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4094 #if defined COMPILE_PCRE8
4095 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4096 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4097 #elif defined COMPILE_PCRE16
4098 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4099 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4100 #else
4101 #error "Unknown code width"
4102 #endif
4103 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4104 }
4105 #endif
4106
4107 if (offset > 0)
4108 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4109 }
4110 else if (sljit_x86_is_cmov_available())
4111 {
4112 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4113 sljit_x86_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, has_first_line_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_first_line_end ? common->first_line_end : 0);
4114 }
4115 else
4116 {
4117 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4118 OP1(SLJIT_MOV, STR_PTR, 0, has_first_line_end ? SLJIT_MEM1(SLJIT_SP) : STR_END, has_first_line_end ? common->first_line_end : 0);
4119 JUMPHERE(quit);
4120 }
4121
4122 if (has_first_line_end)
4123 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4124 return;
4125 }
4126
4127 #endif
4128
4129 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4130
4131 start = LABEL();
4132 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4133
4134 if (char1 == char2)
4135 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4136 else
4137 {
4138 mask = char1 ^ char2;
4139 if (is_powerof2(mask))
4140 {
4141 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4142 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4143 }
4144 else
4145 {
4146 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4147 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4148 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4149 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
4150 found = JUMP(SLJIT_NOT_ZERO);
4151 }
4152 }
4153
4154 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4155 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4156
4157 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4158 if (common->utf && offset > 0)
4159 utf_quit = JUMP(SLJIT_JUMP);
4160 #endif
4161
4162 JUMPHERE(found);
4163
4164 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4165 if (common->utf && offset > 0)
4166 {
4167 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4168 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4169 #if defined COMPILE_PCRE8
4170 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4171 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4172 #elif defined COMPILE_PCRE16
4173 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4174 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4175 #else
4176 #error "Unknown code width"
4177 #endif
4178 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4179 JUMPHERE(utf_quit);
4180 }
4181 #endif
4182
4183 JUMPHERE(quit);
4184
4185 if (has_first_line_end)
4186 {
4187 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4188 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4189 if (offset > 0)
4190 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4191 JUMPHERE(quit);
4192 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4193 }
4194
4195 if (offset > 0)
4196 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4197 }
4198
4199 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
4200 {
4201 DEFINE_COMPILER;
4202 struct sljit_label *start;
4203 struct sljit_jump *quit;
4204 pcre_uint32 chars[MAX_N_CHARS * 2];
4205 pcre_uint8 bytes[MAX_N_CHARS * MAX_N_BYTES];
4206 pcre_uint8 ones[MAX_N_CHARS];
4207 int offsets[3];
4208 pcre_uint32 mask;
4209 pcre_uint8 *byte_set, *byte_set_end;
4210 int i, max, from;
4211 int range_right = -1, range_len = 3 - 1;
4212 sljit_ub *update_table = NULL;
4213 BOOL in_range;
4214 pcre_uint32 rec_count;
4215
4216 for (i = 0; i < MAX_N_CHARS; i++)
4217 {
4218 chars[i << 1] = NOTACHAR;
4219 chars[(i << 1) + 1] = 0;
4220 bytes[i * MAX_N_BYTES] = 0;
4221 }
4222
4223 rec_count = 10000;
4224 max = scan_prefix(common, common->start, chars, bytes, MAX_N_CHARS, &rec_count);
4225
4226 if (max <= 1)
4227 return FALSE;
4228
4229 for (i = 0; i < max; i++)
4230 {
4231 mask = chars[(i << 1) + 1];
4232 ones[i] = ones_in_half_byte[mask & 0xf];
4233 mask >>= 4;
4234 while (mask != 0)
4235 {
4236 ones[i] += ones_in_half_byte[mask & 0xf];
4237 mask >>= 4;
4238 }
4239 }
4240
4241 in_range = FALSE;
4242 from = 0; /* Prevent compiler "uninitialized" warning */
4243 for (i = 0; i <= max; i++)
4244 {
4245 if (in_range && (i - from) > range_len && (bytes[(i - 1) * MAX_N_BYTES] <= 4))
4246 {
4247 range_len = i - from;
4248 range_right = i - 1;
4249 }
4250
4251 if (i < max && bytes[i * MAX_N_BYTES] < 255)
4252 {
4253 if (!in_range)
4254 {
4255 in_range = TRUE;
4256 from = i;
4257 }
4258 }
4259 else if (in_range)
4260 in_range = FALSE;
4261 }
4262
4263 if (range_right >= 0)
4264 {
4265 update_table = (sljit_ub *)allocate_read_only_data(common, 256);
4266 if (update_table == NULL)
4267 return TRUE;
4268 memset(update_table, IN_UCHARS(range_len), 256);
4269
4270 for (i = 0; i < range_len; i++)
4271 {
4272 byte_set = bytes + ((range_right - i) * MAX_N_BYTES);
4273 SLJIT_ASSERT(byte_set[0] > 0 && byte_set[0] < 255);
4274 byte_set_end = byte_set + byte_set[0];
4275 byte_set++;
4276 while (byte_set <= byte_set_end)
4277 {
4278 if (update_table[*byte_set] > IN_UCHARS(i))
4279 update_table[*byte_set] = IN_UCHARS(i);
4280 byte_set++;
4281 }
4282 }
4283 }
4284
4285 offsets[0] = -1;
4286 /* Scan forward. */
4287 for (i = 0; i < max; i++)
4288 if (ones[i] <= 2) {
4289 offsets[0] = i;
4290 break;
4291 }
4292
4293 if (offsets[0] < 0 && range_right < 0)
4294 return FALSE;
4295
4296 if (offsets[0] >= 0)
4297 {
4298 /* Scan backward. */
4299 offsets[1] = -1;
4300 for (i = max - 1; i > offsets[0]; i--)
4301 if (ones[i] <= 2 && i != range_right)
4302 {
4303 offsets[1] = i;
4304 break;
4305 }
4306
4307 /* This case is handled better by fast_forward_first_char. */
4308 if (offsets[1] == -1 && offsets[0] == 0 && range_right < 0)
4309 return FALSE;
4310
4311 offsets[2] = -1;
4312 /* We only search for a middle character if there is no range check. */
4313 if (offsets[1] >= 0 && range_right == -1)
4314 {
4315 /* Scan from middle. */
4316 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
4317 if (ones[i] <= 2)
4318 {
4319 offsets[2] = i;
4320 break;
4321 }
4322
4323 if (offsets[2] == -1)
4324 {
4325 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
4326 if (ones[i] <= 2)
4327 {
4328 offsets[2] = i;
4329 break;
4330 }
4331 }
4332 }
4333
4334 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
4335 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
4336
4337 chars[0] = chars[offsets[0] << 1];
4338 chars[1] = chars[(offsets[0] << 1) + 1];
4339 if (offsets[2] >= 0)
4340 {
4341 chars[2] = chars[offsets[2] << 1];
4342 chars[3] = chars[(offsets[2] << 1) + 1];
4343 }
4344 if (offsets[1] >= 0)
4345 {
4346 chars[4] = chars[offsets[1] << 1];
4347 chars[5] = chars[(offsets[1] << 1) + 1];
4348 }
4349 }
4350
4351 max -= 1;
4352 if (firstline)
4353 {
4354 SLJIT_ASSERT(common->first_line_end != 0);
4355 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4356 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4357 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4358 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4359 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4360 JUMPHERE(quit);
4361 }
4362 else
4363 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4364
4365 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4366 if (range_right >= 0)
4367 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4368 #endif
4369
4370 start = LABEL();
4371 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4372
4373 SLJIT_ASSERT(range_right >= 0 || offsets[0] >= 0);
4374
4375 if (range_right >= 0)
4376 {
4377 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4378 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4379 #else
4380 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4381 #endif
4382
4383 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4384 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4385 #else
4386 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4387 #endif
4388 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4389 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4390 }
4391
4392 if (offsets[0] >= 0)
4393 {
4394 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
4395 if (offsets[1] >= 0)
4396 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
4397 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4398
4399 if (chars[1] != 0)
4400 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
4401 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
4402 if (offsets[2] >= 0)
4403 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
4404
4405 if (offsets[1] >= 0)
4406 {
4407 if (chars[5] != 0)
4408 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
4409 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
4410 }
4411
4412 if (offsets[2] >= 0)
4413 {
4414 if (chars[3] != 0)
4415 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
4416 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
4417 }
4418 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4419 }
4420
4421 JUMPHERE(quit);
4422
4423 if (firstline)
4424 {
4425 if (range_right >= 0)
4426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4427 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4428 if (range_right >= 0)
4429 {
4430 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4431 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4432 JUMPHERE(quit);
4433 }
4434 }
4435 else
4436 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4437 return TRUE;
4438 }
4439
4440 #undef MAX_N_CHARS
4441 #undef MAX_N_BYTES
4442
4443 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4444 {
4445 pcre_uchar oc;
4446
4447 oc = first_char;
4448 if (caseless)
4449 {
4450 oc = TABLE_GET(first_char, common->fcc, first_char);
4451 #if defined SUPPORT_UTF && !defined COMPILE_PCRE8
4452 if (first_char > 127 && common->utf)
4453 oc = UCD_OTHERCASE(first_char);
4454 #endif
4455 }
4456
4457 fast_forward_first_char2(common, first_char, oc, 0);
4458 }
4459
4460 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4461 {
4462 DEFINE_COMPILER;
4463 struct sljit_label *loop;
4464 struct sljit_jump *lastchar;
4465 struct sljit_jump *firstchar;
4466 struct sljit_jump *quit;
4467 struct sljit_jump *foundcr = NULL;
4468 struct sljit_jump *notfoundnl;
4469 jump_list *newline = NULL;
4470
4471 if (common->first_line_end != 0)
4472 {
4473 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4474 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4475 }
4476
4477 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4478 {
4479 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4480 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4481 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4482 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4483 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4484
4485 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4486 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4487 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER_EQUAL);
4488 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4489 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4490 #endif
4491 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4492
4493 loop = LABEL();
4494 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4495 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4496 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4497 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4498 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4499 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4500
4501 JUMPHERE(quit);
4502 JUMPHERE(firstchar);
4503 JUMPHERE(lastchar);
4504
4505 if (common->first_line_end != 0)
4506 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4507 return;
4508 }
4509
4510 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4511 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4512 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4513 skip_char_back(common);
4514
4515 loop = LABEL();
4516 common->ff_newline_shortcut = loop;
4517
4518 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4519 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4520 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4521 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4522 check_newlinechar(common, common->nltype, &newline, FALSE);
4523 set_jumps(newline, loop);
4524
4525 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4526 {
4527 quit = JUMP(SLJIT_JUMP);
4528 JUMPHERE(foundcr);
4529 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4530 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4531 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4532 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4533 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4534 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4535 #endif
4536 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4537 JUMPHERE(notfoundnl);
4538 JUMPHERE(quit);
4539 }
4540 JUMPHERE(lastchar);
4541 JUMPHERE(firstchar);
4542
4543 if (common->first_line_end != 0)
4544 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4545 }
4546
4547 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4548
4549 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_ub *start_bits)
4550 {
4551 DEFINE_COMPILER;
4552 struct sljit_label *start;
4553 struct sljit_jump *quit;
4554 struct sljit_jump *found = NULL;
4555 jump_list *matches = NULL;
4556 #ifndef COMPILE_PCRE8
4557 struct sljit_jump *jump;
4558 #endif
4559
4560 if (common->first_line_end != 0)
4561 {
4562 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4563 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->first_line_end);
4564 }
4565
4566 start = LABEL();
4567 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4568 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4569 #ifdef SUPPORT_UTF
4570 if (common->utf)
4571 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4572 #endif
4573
4574 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4575 {
4576 #ifndef COMPILE_PCRE8
4577 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4578 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4579 JUMPHERE(jump);
4580 #endif
4581 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4582 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4583 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4584 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4585 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4586 found = JUMP(SLJIT_NOT_ZERO);
4587 }
4588
4589 #ifdef SUPPORT_UTF
4590 if (common->utf)
4591 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4592 #endif
4593 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4594 #ifdef SUPPORT_UTF
4595 #if defined COMPILE_PCRE8
4596 if (common->utf)
4597 {
4598 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4599 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4600 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4601 }
4602 #elif defined COMPILE_PCRE16
4603 if (common->utf)
4604 {
4605 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4606 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4607 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4608 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
4609 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4610 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4611 }
4612 #endif /* COMPILE_PCRE[8|16] */
4613 #endif /* SUPPORT_UTF */
4614 JUMPTO(SLJIT_JUMP, start);
4615 if (found != NULL)
4616 JUMPHERE(found);
4617 if (matches != NULL)
4618 set_jumps(matches, LABEL());
4619 JUMPHERE(quit);
4620
4621 if (common->first_line_end != 0)
4622 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4623 }
4624
4625 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4626 {
4627 DEFINE_COMPILER;
4628 struct sljit_label *loop;
4629 struct sljit_jump *toolong;
4630 struct sljit_jump *alreadyfound;
4631 struct sljit_jump *found;
4632 struct sljit_jump *foundoc = NULL;
4633 struct sljit_jump *notfound;
4634 pcre_uint32 oc, bit;
4635
4636 SLJIT_ASSERT(common->req_char_ptr != 0);
4637 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4638 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4639 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4640 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4641
4642 if (has_firstchar)
4643 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4644 else
4645 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4646
4647 loop = LABEL();
4648 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4649
4650 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4651 oc = req_char;
4652 if (caseless)
4653 {
4654 oc = TABLE_GET(req_char, common->fcc, req_char);
4655 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4656 if (req_char > 127 && common->utf)
4657 oc = UCD_OTHERCASE(req_char);
4658 #endif
4659 }
4660 if (req_char == oc)
4661 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4662 else
4663 {
4664 bit = req_char ^ oc;
4665 if (is_powerof2(bit))
4666 {
4667 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4668 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4669 }
4670 else
4671 {
4672 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4673 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4674 }
4675 }
4676 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4677 JUMPTO(SLJIT_JUMP, loop);
4678
4679 JUMPHERE(found);
4680 if (foundoc)
4681 JUMPHERE(foundoc);
4682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4683 JUMPHERE(alreadyfound);
4684 JUMPHERE(toolong);
4685 return notfound;
4686 }
4687
4688 static void do_revertframes(compiler_common *common)
4689 {
4690 DEFINE_COMPILER;
4691 struct sljit_jump *jump;
4692 struct sljit_label *mainloop;
4693
4694 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4695 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
4696 GET_LOCAL_BASE(TMP3, 0, 0);
4697
4698 /* Drop frames until we reach STACK_TOP. */
4699 mainloop = LABEL();
4700 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4701 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
4702 jump = JUMP(SLJIT_SIG_LESS_EQUAL);
4703
4704 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4705 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4706 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
4707 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4708 JUMPTO(SLJIT_JUMP, mainloop);
4709
4710 JUMPHERE(jump);
4711 jump = JUMP(SLJIT_SIG_LESS);
4712 /* End of dropping frames. */
4713 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4714
4715 JUMPHERE(jump);
4716 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4717 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
4718 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
4719 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4720 JUMPTO(SLJIT_JUMP, mainloop);
4721 }
4722
4723 static void check_wordboundary(compiler_common *common)
4724 {
4725 DEFINE_COMPILER;
4726 struct sljit_jump *skipread;
4727 jump_list *skipread_list = NULL;
4728 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4729 struct sljit_jump *jump;
4730 #endif
4731
4732 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4733
4734 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4735 /* Get type of the previous char, and put it to LOCALS1. */
4736 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4737 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4739 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4740 skip_char_back(common);
4741 check_start_used_ptr(common);
4742 read_char(common);
4743
4744 /* Testing char type. */
4745 #ifdef SUPPORT_UCP
4746 if (common->use_ucp)
4747 {
4748 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4749 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4750 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4751 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4752 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4753 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4754 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4755 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4756 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4757 JUMPHERE(jump);
4758 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4759 }
4760 else
4761 #endif
4762 {
4763 #ifndef COMPILE_PCRE8
4764 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4765 #elif defined SUPPORT_UTF
4766 /* Here LOCALS1 has already been zeroed. */
4767 jump = NULL;
4768 if (common->utf)
4769 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4770 #endif /* COMPILE_PCRE8 */
4771 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4772 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4773 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4774 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4775 #ifndef COMPILE_PCRE8
4776 JUMPHERE(jump);
4777 #elif defined SUPPORT_UTF
4778 if (jump != NULL)
4779 JUMPHERE(jump);
4780 #endif /* COMPILE_PCRE8 */
4781 }
4782 JUMPHERE(skipread);
4783
4784 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4785 check_str_end(common, &skipread_list);
4786 peek_char(common, READ_CHAR_MAX);
4787
4788 /* Testing char type. This is a code duplication. */
4789 #ifdef SUPPORT_UCP
4790 if (common->use_ucp)
4791 {
4792 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4793 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4794 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4795 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4796 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4797 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4798 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4799 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4800 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
4801 JUMPHERE(jump);
4802 }
4803 else
4804 #endif
4805 {
4806 #ifndef COMPILE_PCRE8
4807 /* TMP2 may be destroyed by peek_char. */
4808 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4809 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4810 #elif defined SUPPORT_UTF
4811 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4812 jump = NULL;
4813 if (common->utf)
4814 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4815 #endif
4816 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4817 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4818 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4819 #ifndef COMPILE_PCRE8
4820 JUMPHERE(jump);
4821 #elif defined SUPPORT_UTF
4822 if (jump != NULL)
4823 JUMPHERE(jump);
4824 #endif /* COMPILE_PCRE8 */
4825 }
4826 set_jumps(skipread_list, LABEL());
4827
4828 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4829 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4830 }
4831
4832 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4833 {
4834 DEFINE_COMPILER;
4835 int ranges[MAX_RANGE_SIZE];
4836 pcre_uint8 bit, cbit, all;
4837 int i, byte, length = 0;
4838
4839 bit = bits[0] & 0x1;
4840 /* All bits will be zero or one (since bit is zero or one). */
4841 all = -bit;
4842
4843 for (i = 0; i < 256; )
4844 {
4845 byte = i >> 3;
4846 if ((i & 0x7) == 0 && bits[byte] == all)
4847 i += 8;
4848 else
4849 {
4850 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
4851 if (cbit != bit)
4852 {
4853 if (length >= MAX_RANGE_SIZE)
4854 return FALSE;
4855 ranges[length] = i;
4856 length++;
4857 bit = cbit;
4858 all = -cbit;
4859 }
4860 i++;
4861 }
4862 }
4863
4864 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
4865 {
4866 if (length >= MAX_RANGE_SIZE)
4867 return FALSE;
4868 ranges[length] = 256;
4869 length++;
4870 }
4871
4872 if (length < 0 || length > 4)
4873 return FALSE;
4874
4875 bit = bits[0] & 0x1;
4876 if (invert) bit ^= 0x1;
4877
4878 /* No character is accepted. */
4879 if (length == 0 && bit == 0)
4880 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4881
4882 switch(length)
4883 {
4884 case 0:
4885 /* When bit != 0, all characters are accepted. */
4886 return TRUE;
4887
4888 case 1:
4889 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4890 return TRUE;
4891
4892 case 2:
4893 if (ranges[0] + 1 != ranges[1])
4894 {
4895 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4896 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4897 }
4898 else
4899 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4900 return TRUE;
4901
4902 case 3:
4903 if (bit != 0)
4904 {
4905 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4906 if (ranges[0] + 1 != ranges[1])
4907 {
4908 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4909 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4910 }
4911 else
4912 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4913 return TRUE;
4914 }
4915
4916 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
4917 if (ranges[1] + 1 != ranges[2])
4918 {
4919 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
4920 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4921 }
4922 else
4923 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
4924 return TRUE;
4925
4926 case 4:
4927 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
4928 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
4929 && (ranges[1] & (ranges[2] - ranges[0])) == 0
4930 && is_powerof2(ranges[2] - ranges[0]))
4931 {
4932 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
4933 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
4934 if (ranges[2] + 1 != ranges[3])
4935 {
4936 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
4937 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4938 }
4939 else
4940 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
4941 return TRUE;
4942 }
4943
4944 if (bit != 0)
4945 {
4946 i = 0;
4947 if (ranges[0] + 1 != ranges[1])
4948 {
4949 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4950 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4951 i = ranges[0];
4952 }
4953 else
4954 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
4955
4956 if (ranges[2] + 1 != ranges[3])
4957 {
4958 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
4959 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
4960 }
4961 else
4962 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
4963 return TRUE;
4964 }
4965
4966 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
4967 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
4968 if (ranges[1] + 1 != ranges[2])
4969 {
4970 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4971 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4972 }
4973 else
4974 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4975 return TRUE;
4976
4977 default:
4978 SLJIT_ASSERT_STOP();
4979 return FALSE;
4980 }
4981 }
4982
4983 static void check_anynewline(compiler_common *common)
4984 {
4985 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4986 DEFINE_COMPILER;
4987
4988 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4989
4990 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4991 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4992 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
4993 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4994 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4995 #ifdef COMPILE_PCRE8
4996 if (common->utf)
4997 {
4998 #endif
4999 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5000 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5001 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5002 #ifdef COMPILE_PCRE8
5003 }
5004 #endif
5005 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5006 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5007 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5008 }
5009
5010 static void check_hspace(compiler_common *common)
5011 {
5012 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5013 DEFINE_COMPILER;
5014
5015 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5016
5017 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5018 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5019 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5020 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5021 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5022 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5023 #ifdef COMPILE_PCRE8
5024 if (common->utf)
5025 {
5026 #endif
5027 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5028 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5029 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5030 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5031 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5032 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5033 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5034 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5035 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5036 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5037 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5038 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5039 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5040 #ifdef COMPILE_PCRE8
5041 }
5042 #endif
5043 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5044 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5045
5046 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5047 }
5048
5049 static void check_vspace(compiler_common *common)
5050 {
5051 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5052 DEFINE_COMPILER;
5053
5054 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5055
5056 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5057 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5058 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5059 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5060 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5061 #ifdef COMPILE_PCRE8
5062 if (common->utf)
5063 {
5064 #endif
5065 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5066 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5067 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5068 #ifdef COMPILE_PCRE8
5069 }
5070 #endif
5071 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5072 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5073
5074 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5075 }
5076
5077 #define CHAR1 STR_END
5078 #define CHAR2 STACK_TOP
5079
5080 static void do_casefulcmp(compiler_common *common)
5081 {
5082 DEFINE_COMPILER;
5083 struct sljit_jump *jump;
5084 struct sljit_label *label;
5085
5086 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5087 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5088 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
5089 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR2, 0);
5090 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5091 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5092
5093 label = LABEL();
5094 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5095 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5096 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5097 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5098 JUMPTO(SLJIT_NOT_ZERO, label);
5099
5100 JUMPHERE(jump);
5101 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5102 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
5103 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5104 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5105 }
5106
5107 #define LCC_TABLE STACK_LIMIT
5108
5109 static void do_caselesscmp(compiler_common *common)
5110 {
5111 DEFINE_COMPILER;
5112 struct sljit_jump *jump;
5113 struct sljit_label *label;
5114
5115 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5116 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5117
5118 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
5119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, CHAR1, 0);
5120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, CHAR2, 0);
5121 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
5122 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5123 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5124
5125 label = LABEL();
5126 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5127 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5128 #ifndef COMPILE_PCRE8
5129 jump = CMP(SLJIT_GREATER, CHAR1, 0, SLJIT_IMM, 255);
5130 #endif
5131 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
5132 #ifndef COMPILE_PCRE8
5133 JUMPHERE(jump);
5134 jump = CMP(SLJIT_GREATER, CHAR2, 0, SLJIT_IMM, 255);
5135 #endif
5136 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
5137 #ifndef COMPILE_PCRE8
5138 JUMPHERE(jump);
5139 #endif
5140 jump = CMP(SLJIT_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
5141 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5142 JUMPTO(SLJIT_NOT_ZERO, label);
5143
5144 JUMPHERE(jump);
5145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5146 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
5147 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5148 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5149 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5150 }
5151
5152 #undef LCC_TABLE
5153 #undef CHAR1
5154 #undef CHAR2
5155
5156 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5157
5158 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
5159 {
5160 /* This function would be ineffective to do in JIT level. */
5161 pcre_uint32 c1, c2;
5162 const pcre_uchar *src2 = args->uchar_ptr;
5163 const pcre_uchar *end2 = args->end;
5164 const ucd_record *ur;
5165 const pcre_uint32 *pp;
5166
5167 while (src1 < end1)
5168 {
5169 if (src2 >= end2)
5170 return (pcre_uchar*)1;
5171 GETCHARINC(c1, src1);
5172 GETCHARINC(c2, src2);
5173 ur = GET_UCD(c2);
5174 if (c1 != c2 && c1 != c2 + ur->other_case)
5175 {
5176 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5177 for (;;)
5178 {
5179 if (c1 < *pp) return NULL;
5180 if (c1 == *pp++) break;
5181 }
5182 }
5183 }
5184 return src2;
5185 }
5186
5187 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5188
5189 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5190 compare_context *context, jump_list **backtracks)
5191 {
5192 DEFINE_COMPILER;
5193 unsigned int othercasebit = 0;
5194 pcre_uchar *othercasechar = NULL;
5195 #ifdef SUPPORT_UTF
5196 int utflength;
5197 #endif
5198
5199 if (caseless && char_has_othercase(common, cc))
5200 {
5201 othercasebit = char_get_othercase_bit(common, cc);
5202 SLJIT_ASSERT(othercasebit);
5203 /* Extracting bit difference info. */
5204 #if defined COMPILE_PCRE8
5205 othercasechar = cc + (othercasebit >> 8);
5206 othercasebit &= 0xff;
5207 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5208 /* Note that this code only handles characters in the BMP. If there
5209 ever are characters outside the BMP whose othercase differs in only one
5210 bit from itself (there currently are none), this code will need to be
5211 revised for COMPILE_PCRE32. */
5212 othercasechar = cc + (othercasebit >> 9);
5213 if ((othercasebit & 0x100) != 0)
5214 othercasebit = (othercasebit & 0xff) << 8;
5215 else
5216 othercasebit &= 0xff;
5217 #endif /* COMPILE_PCRE[8|16|32] */
5218 }
5219
5220 if (context->sourcereg == -1)
5221 {
5222 #if defined COMPILE_PCRE8
5223 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5224 if (context->length >= 4)
5225 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5226 else if (context->length >= 2)
5227 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5228 else
5229 #endif
5230 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5231 #elif defined COMPILE_PCRE16
5232 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5233 if (context->length >= 4)
5234 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5235 else
5236 #endif
5237 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5238 #elif defined COMPILE_PCRE32
5239 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5240 #endif /* COMPILE_PCRE[8|16|32] */
5241 context->sourcereg = TMP2;
5242 }
5243
5244 #ifdef SUPPORT_UTF
5245 utflength = 1;
5246 if (common->utf && HAS_EXTRALEN(*cc))
5247 utflength += GET_EXTRALEN(*cc);
5248
5249 do
5250 {
5251 #endif
5252
5253 context->length -= IN_UCHARS(1);
5254 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5255
5256 /* Unaligned read is supported. */
5257 if (othercasebit != 0 && othercasechar == cc)
5258 {
5259 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5260 context->oc.asuchars[context->ucharptr] = othercasebit;
5261 }
5262 else
5263 {
5264 context->c.asuchars[context->ucharptr] = *cc;
5265 context->oc.asuchars[context->ucharptr] = 0;
5266 }
5267 context->ucharptr++;
5268
5269 #if defined COMPILE_PCRE8
5270 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5271 #else
5272 if (context->ucharptr >= 2 || context->length == 0)
5273 #endif
5274 {
5275 if (context->length >= 4)
5276 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5277 else if (context->length >= 2)
5278 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5279 #if defined COMPILE_PCRE8
5280 else if (context->length >= 1)
5281 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5282 #endif /* COMPILE_PCRE8 */
5283 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5284
5285 switch(context->ucharptr)
5286 {
5287 case 4 / sizeof(pcre_uchar):
5288 if (context->oc.asint != 0)
5289 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5290 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5291 break;
5292
5293 case 2 / sizeof(pcre_uchar):
5294 if (context->oc.asushort != 0)
5295 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5296 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5297 break;
5298
5299 #ifdef COMPILE_PCRE8
5300 case 1:
5301 if (context->oc.asbyte != 0)
5302 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5303 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5304 break;
5305 #endif
5306
5307 default:
5308 SLJIT_ASSERT_STOP();
5309 break;
5310 }
5311 context->ucharptr = 0;
5312 }
5313
5314 #else
5315
5316 /* Unaligned read is unsupported or in 32 bit mode. */
5317 if (context->length >= 1)
5318 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5319
5320 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5321
5322 if (othercasebit != 0 && othercasechar == cc)
5323 {
5324 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5325 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5326 }
5327 else
5328 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5329
5330 #endif
5331
5332 cc++;
5333 #ifdef SUPPORT_UTF
5334 utflength--;
5335 }
5336 while (utflength > 0);
5337 #endif
5338
5339 return cc;
5340 }
5341
5342 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5343
5344 #define SET_TYPE_OFFSET(value) \
5345 if ((value) != typeoffset) \
5346 { \
5347 if ((value) < typeoffset) \
5348 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5349 else \
5350 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5351 } \
5352 typeoffset = (value);
5353
5354 #define SET_CHAR_OFFSET(value) \
5355 if ((value) != charoffset) \
5356 { \
5357 if ((value) < charoffset) \
5358 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5359 else \
5360 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5361 } \
5362 charoffset = (value);
5363
5364 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5365
5366 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5367 {
5368 DEFINE_COMPILER;
5369 jump_list *found = NULL;
5370 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5371 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5372 struct sljit_jump *jump = NULL;
5373 pcre_uchar *ccbegin;
5374 int compares, invertcmp, numberofcmps;
5375 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5376 BOOL utf = common->utf;
5377 #endif
5378
5379 #ifdef SUPPORT_UCP
5380 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5381 BOOL charsaved = FALSE;
5382 int typereg = TMP1;
5383 const sljit_ui *other_cases;
5384 sljit_uw typeoffset;
5385 #endif
5386
5387 /* Scanning the necessary info. */
5388 cc++;
5389 ccbegin = cc;
5390 compares = 0;
5391 if (cc[-1] & XCL_MAP)
5392 {
5393 min = 0;
5394 cc += 32 / sizeof(pcre_uchar);
5395 }
5396
5397 while (*cc != XCL_END)
5398 {
5399 compares++;
5400 if (*cc == XCL_SINGLE)
5401 {
5402 cc ++;
5403 GETCHARINCTEST(c, cc);
5404 if (c > max) max = c;
5405 if (c < min) min = c;
5406 #ifdef SUPPORT_UCP
5407 needschar = TRUE;
5408 #endif
5409 }
5410 else if (*cc == XCL_RANGE)
5411 {
5412 cc ++;
5413 GETCHARINCTEST(c, cc);
5414 if (c < min) min = c;
5415 GETCHARINCTEST(c, cc);
5416 if (c > max) max = c;
5417 #ifdef SUPPORT_UCP
5418 needschar = TRUE;
5419 #endif
5420 }
5421 #ifdef SUPPORT_UCP
5422 else
5423 {
5424 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5425 cc++;
5426 if (*cc == PT_CLIST)
5427 {
5428 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5429 while (*other_cases != NOTACHAR)
5430 {
5431 if (*other_cases > max) max = *other_cases;
5432 if (*other_cases < min) min = *other_cases;
5433 other_cases++;
5434 }
5435 }
5436 else
5437 {
5438 max = READ_CHAR_MAX;
5439 min = 0;
5440 }
5441
5442 switch(*cc)
5443 {
5444 case PT_ANY:
5445 /* Any either accepts everything or ignored. */
5446 if (cc[-1] == XCL_PROP)
5447 {
5448 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5449 if (list == backtracks)
5450 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5451 return;
5452 }
5453 break;
5454
5455 case PT_LAMP:
5456 case PT_GC:
5457 case PT_PC:
5458 case PT_ALNUM:
5459 needstype = TRUE;
5460 break;
5461
5462 case PT_SC:
5463 needsscript = TRUE;
5464 break;
5465
5466 case PT_SPACE:
5467 case PT_PXSPACE:
5468 case PT_WORD:
5469 case PT_PXGRAPH:
5470 case PT_PXPRINT:
5471 case PT_PXPUNCT:
5472 needstype = TRUE;
5473 needschar = TRUE;
5474 break;
5475
5476 case PT_CLIST:
5477 case PT_UCNC:
5478 needschar = TRUE;
5479 break;
5480
5481 default:
5482 SLJIT_ASSERT_STOP();
5483 break;
5484 }
5485 cc += 2;
5486 }
5487 #endif
5488 }
5489 SLJIT_ASSERT(compares > 0);
5490
5491 /* We are not necessary in utf mode even in 8 bit mode. */
5492 cc = ccbegin;
5493 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5494
5495 if ((cc[-1] & XCL_HASPROP) == 0)
5496 {
5497 if ((cc[-1] & XCL_MAP) != 0)
5498 {
5499 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5500 if (!check_class_ranges(common, (const sljit_ub *)cc, (((const sljit_ub *)cc)[31] & 0x80) != 0, TRUE, &found))
5501 {
5502 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5503 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5504 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5505 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5506 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5507 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5508 }
5509
5510 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5511 JUMPHERE(jump);
5512
5513 cc += 32 / sizeof(pcre_uchar);
5514 }
5515 else
5516 {
5517 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5518 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5519 }
5520 }
5521 else if ((cc[-1] & XCL_MAP) != 0)
5522 {
5523 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5524 #ifdef SUPPORT_UCP
5525 charsaved = TRUE;
5526 #endif
5527 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
5528 {
5529 #ifdef COMPILE_PCRE8
5530 jump = NULL;
5531 if (common->utf)
5532 #endif
5533 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5534
5535 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5536 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5537 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5538 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5539 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5540 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5541
5542 #ifdef COMPILE_PCRE8
5543 if (common->utf)
5544 #endif
5545 JUMPHERE(jump);
5546 }
5547
5548 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5549 cc += 32 / sizeof(pcre_uchar);
5550 }
5551
5552 #ifdef SUPPORT_UCP
5553 if (needstype || needsscript)
5554 {
5555 if (needschar && !charsaved)
5556 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5557
5558 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5559 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5560 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5561 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5562 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5563 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5564 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5565
5566 /* Before anything else, we deal with scripts. */
5567 if (needsscript)
5568 {
5569 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5570 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5571
5572 ccbegin = cc;
5573
5574 while (*cc != XCL_END)
5575 {
5576 if (*cc == XCL_SINGLE)
5577 {
5578 cc ++;
5579 GETCHARINCTEST(c, cc);
5580 }
5581 else if (*cc == XCL_RANGE)
5582 {
5583 cc ++;
5584 GETCHARINCTEST(c, cc);
5585 GETCHARINCTEST(c, cc);
5586 }
5587 else
5588 {
5589 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5590 cc++;
5591 if (*cc == PT_SC)
5592 {
5593 compares--;
5594 invertcmp = (compares == 0 && list != backtracks);
5595 if (cc[-1] == XCL_NOTPROP)
5596 invertcmp ^= 0x1;
5597 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5598 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5599 }
5600 cc += 2;
5601 }
5602 }
5603
5604 cc = ccbegin;
5605 }
5606
5607 if (needschar)
5608 {
5609 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5610 }
5611
5612 if (needstype)
5613 {
5614 if (!needschar)
5615 {
5616 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5617 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5618 }
5619 else
5620 {
5621 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5622 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5623 typereg = RETURN_ADDR;
5624 }
5625 }
5626 }
5627 #endif
5628
5629 /* Generating code. */
5630 charoffset = 0;
5631 numberofcmps = 0;
5632 #ifdef SUPPORT_UCP
5633 typeoffset = 0;
5634 #endif
5635
5636 while (*cc != XCL_END)
5637 {
5638 compares--;
5639 invertcmp = (compares == 0 && list != backtracks);
5640 jump = NULL;
5641
5642 if (*cc == XCL_SINGLE)
5643 {
5644 cc ++;
5645 GETCHARINCTEST(c, cc);
5646
5647 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5648 {
5649 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5650 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_EQUAL);
5651 numberofcmps++;
5652 }
5653 else if (numberofcmps > 0)
5654 {
5655 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5656 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5657 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5658 numberofcmps = 0;
5659 }
5660 else
5661 {
5662 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5663 numberofcmps = 0;
5664 }
5665 }
5666 else if (*cc == XCL_RANGE)
5667 {
5668 cc ++;
5669 GETCHARINCTEST(c, cc);
5670 SET_CHAR_OFFSET(c);
5671 GETCHARINCTEST(c, cc);
5672
5673 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5674 {
5675 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5676 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5677 numberofcmps++;
5678 }
5679 else if (numberofcmps > 0)
5680 {
5681 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5682 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5683 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5684 numberofcmps = 0;
5685 }
5686 else
5687 {
5688 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5689 numberofcmps = 0;
5690 }
5691 }
5692 #ifdef SUPPORT_UCP
5693 else
5694 {
5695 if (*cc == XCL_NOTPROP)
5696 invertcmp ^= 0x1;
5697 cc++;
5698 switch(*cc)
5699 {
5700 case PT_ANY:
5701 if (!invertcmp)
5702 jump = JUMP(SLJIT_JUMP);
5703 break;
5704
5705 case PT_LAMP:
5706 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5707 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5708 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5709 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5710 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5711 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5712 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5713 break;
5714
5715 case PT_GC:
5716 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5717 SET_TYPE_OFFSET(c);
5718 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5719 break;
5720
5721 case PT_PC:
5722 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5723 break;
5724
5725 case PT_SC:
5726 compares++;
5727 /* Do nothing. */
5728 break;
5729
5730 case PT_SPACE:
5731 case PT_PXSPACE:
5732 SET_CHAR_OFFSET(9);
5733 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
5734 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5735
5736 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
5737 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5738
5739 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
5740 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5741
5742 SET_TYPE_OFFSET(ucp_Zl);
5743 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
5744 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5745 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5746 break;
5747
5748 case PT_WORD:
5749 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
5750 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5751 /* Fall through. */
5752
5753 case PT_ALNUM:
5754 SET_TYPE_OFFSET(ucp_Ll);
5755 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
5756 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_LESS_EQUAL);
5757 SET_TYPE_OFFSET(ucp_Nd);
5758 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
5759 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5760 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5761 break;
5762
5763 case PT_CLIST:
5764 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5765
5766 /* At least three characters are required.
5767 Otherwise this case would be handled by the normal code path. */
5768 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
5769 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
5770
5771 /* Optimizing character pairs, if their difference is power of 2. */
5772 if (is_powerof2(other_cases[1] ^ other_cases[0]))
5773 {
5774 if (charoffset == 0)
5775 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5776 else
5777 {
5778 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5779 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5780 }
5781 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
5782 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5783 other_cases += 2;
5784 }
5785 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
5786 {
5787 if (charoffset == 0)
5788 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
5789 else
5790 {
5791 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
5792 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
5793 }
5794 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
5795 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5796
5797 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
5798 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5799
5800 other_cases += 3;
5801 }
5802 else
5803 {
5804 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5805 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5806 }
5807
5808 while (*other_cases != NOTACHAR)
5809 {
5810 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
5811 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5812 }
5813 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5814 break;
5815
5816 case PT_UCNC:
5817 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
5818 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
5819 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
5820 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5821 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
5822 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5823
5824 SET_CHAR_OFFSET(0xa0);
5825 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
5826 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5827 SET_CHAR_OFFSET(0);
5828 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
5829 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_GREATER_EQUAL);
5830 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5831 break;
5832
5833 case PT_PXGRAPH:
5834 /* C and Z groups are the farthest two groups. */
5835 SET_TYPE_OFFSET(ucp_Ll);
5836 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5837 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5838
5839 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5840
5841 /* In case of ucp_Cf, we overwrite the result. */
5842 SET_CHAR_OFFSET(0x2066);
5843 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5844 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5845
5846 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5847 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5848
5849 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
5850 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5851
5852 JUMPHERE(jump);
5853 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5854 break;
5855
5856 case PT_PXPRINT:
5857 /* C and Z groups are the farthest two groups. */
5858 SET_TYPE_OFFSET(ucp_Ll);
5859 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
5860 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_GREATER);
5861
5862 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
5863 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5864
5865 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
5866
5867 /* In case of ucp_Cf, we overwrite the result. */
5868 SET_CHAR_OFFSET(0x2066);
5869 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
5870 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5871
5872 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
5873 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_EQUAL);
5874
5875 JUMPHERE(jump);
5876 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
5877 break;
5878
5879 case PT_PXPUNCT:
5880 SET_TYPE_OFFSET(ucp_Sc);
5881 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
5882 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS_EQUAL);
5883
5884 SET_CHAR_OFFSET(0);
5885 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
5886 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5887
5888 SET_TYPE_OFFSET(ucp_Pc);
5889 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
5890 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_LESS_EQUAL);
5891 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5892 break;
5893
5894 default:
5895 SLJIT_ASSERT_STOP();
5896 break;
5897 }
5898 cc += 2;
5899 }
5900 #endif
5901
5902 if (jump != NULL)
5903 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5904 }
5905
5906 if (found != NULL)
5907 set_jumps(found, LABEL());
5908 }
5909
5910 #undef SET_TYPE_OFFSET
5911 #undef SET_CHAR_OFFSET
5912
5913 #endif
5914
5915 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
5916 {
5917 DEFINE_COMPILER;
5918 int length;
5919 struct sljit_jump *jump[4];
5920 #ifdef SUPPORT_UTF
5921 struct sljit_label *label;
5922 #endif /* SUPPORT_UTF */
5923
5924 switch(type)
5925 {
5926 case OP_SOD:
5927 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5928 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5929 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5930 return cc;
5931
5932 case OP_SOM:
5933 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5934 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
5935 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
5936 return cc;
5937
5938 case OP_NOT_WORD_BOUNDARY:
5939 case OP_WORD_BOUNDARY:
5940 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
5941 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
5942 return cc;
5943
5944 case OP_EODN:
5945 /* Requires rather complex checks. */
5946 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5947 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5948 {
5949 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5950 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5951 if (common->mode == JIT_COMPILE)
5952 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5953 else
5954 {
5955 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
5956 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5957 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_LESS);
5958 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5959 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_NOT_EQUAL);
5960 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
5961 check_partial(common, TRUE);
5962 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5963 JUMPHERE(jump[1]);
5964 }
5965 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5966 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5967 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5968 }
5969 else if (common->nltype == NLTYPE_FIXED)
5970 {
5971 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5972 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5973 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
5974 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5975 }
5976 else
5977 {
5978 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5979 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5980 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5981 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5982 jump[2] = JUMP(SLJIT_GREATER);
5983 add_jump(compiler, backtracks, JUMP(SLJIT_LESS));
5984 /* Equal. */
5985 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5986 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5987 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5988
5989 JUMPHERE(jump[1]);
5990 if (common->nltype == NLTYPE_ANYCRLF)
5991 {
5992 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5993 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
5994 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5995 }
5996 else
5997 {
5998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
5999 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6000 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6001 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6002 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6003 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6004 }
6005 JUMPHERE(jump[2]);
6006 JUMPHERE(jump[3]);
6007 }
6008 JUMPHERE(jump[0]);
6009 check_partial(common, FALSE);
6010 return cc;
6011
6012 case OP_EOD:
6013 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6014 check_partial(common, FALSE);
6015 return cc;
6016
6017 case OP_DOLL:
6018 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6019 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6020 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6021
6022 if (!common->endonly)
6023 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6024 else
6025 {
6026 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6027 check_partial(common, FALSE);
6028 }
6029 return cc;
6030
6031 case OP_DOLLM:
6032 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6033 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6034 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6035 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6036 check_partial(common, FALSE);
6037 jump[0] = JUMP(SLJIT_JUMP);
6038 JUMPHERE(jump[1]);
6039
6040 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6041 {
6042 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6043 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6044 if (common->mode == JIT_COMPILE)
6045 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6046 else
6047 {
6048 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6049 /* STR_PTR = STR_END - IN_UCHARS(1) */
6050 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6051 check_partial(common, TRUE);
6052 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6053 JUMPHERE(jump[1]);
6054 }
6055
6056 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6057 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6058 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6059 }
6060 else
6061 {
6062 peek_char(common, common->nlmax);
6063 check_newlinechar(common, common->nltype, backtracks, FALSE);
6064 }
6065 JUMPHERE(jump[0]);
6066 return cc;
6067
6068 case OP_CIRC:
6069 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6070 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6071 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6072 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6073 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6074 return cc;
6075
6076 case OP_CIRCM:
6077 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6078 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6079 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6080 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6081 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6082 jump[0] = JUMP(SLJIT_JUMP);
6083 JUMPHERE(jump[1]);
6084
6085 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6086 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6087 {
6088 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6089 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6090 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6091 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6092 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6093 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6094 }
6095 else
6096 {
6097 skip_char_back(common);
6098 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6099 check_newlinechar(common, common->nltype, backtracks, FALSE);
6100 }
6101 JUMPHERE(jump[0]);
6102 return cc;
6103
6104 case OP_REVERSE:
6105 length = GET(cc, 0);
6106 if (length == 0)
6107 return cc + LINK_SIZE;
6108 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6109 #ifdef SUPPORT_UTF
6110 if (common->utf)
6111 {
6112 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6113 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6114 label = LABEL();
6115 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6116 skip_char_back(common);
6117 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6118 JUMPTO(SLJIT_NOT_ZERO, label);
6119 }
6120 else
6121 #endif
6122 {
6123 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6124 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6125 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6126 }
6127 check_start_used_ptr(common);
6128 return cc + LINK_SIZE;
6129 }
6130 SLJIT_ASSERT_STOP();
6131 return cc;
6132 }
6133
6134 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6135 {
6136 DEFINE_COMPILER;
6137 int length;
6138 unsigned int c, oc, bit;
6139 compare_context context;
6140 struct sljit_jump *jump[3];
6141 jump_list *end_list;
6142 #ifdef SUPPORT_UTF
6143 struct sljit_label *label;
6144 #ifdef SUPPORT_UCP
6145 pcre_uchar propdata[5];
6146 #endif
6147 #endif /* SUPPORT_UTF */
6148
6149 switch(type)
6150 {
6151 case OP_NOT_DIGIT:
6152 case OP_DIGIT:
6153 /* Digits are usually 0-9, so it is worth to optimize them. */
6154 if (check_str_ptr)
6155 detect_partial_match(common, backtracks);
6156 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6157 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
6158 read_char7_type(common, type == OP_NOT_DIGIT);
6159 else
6160 #endif
6161 read_char8_type(common, type == OP_NOT_DIGIT);
6162 /* Flip the starting bit in the negative case. */
6163 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6164 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6165 return cc;
6166
6167 case OP_NOT_WHITESPACE:
6168 case OP_WHITESPACE:
6169 if (check_str_ptr)
6170 detect_partial_match(common, backtracks);
6171 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6172 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
6173 read_char7_type(common, type == OP_NOT_WHITESPACE);
6174 else
6175 #endif
6176 read_char8_type(common, type == OP_NOT_WHITESPACE);
6177 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6178 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6179 return cc;
6180
6181 case OP_NOT_WORDCHAR:
6182 case OP_WORDCHAR:
6183 if (check_str_ptr)
6184 detect_partial_match(common, backtracks);
6185 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6186 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
6187 read_char7_type(common, type == OP_NOT_WORDCHAR);
6188 else
6189 #endif
6190 read_char8_type(common, type == OP_NOT_WORDCHAR);
6191 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6192 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6193 return cc;
6194
6195 case OP_ANY:
6196 if (check_str_ptr)
6197 detect_partial_match(common, backtracks);
6198 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6199 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6200 {
6201 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6202 end_list = NULL;
6203 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6204 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6205 else
6206 check_str_end(common, &end_list);
6207
6208 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6209 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6210 set_jumps(end_list, LABEL());
6211 JUMPHERE(jump[0]);
6212 }
6213 else
6214 check_newlinechar(common, common->nltype, backtracks, TRUE);
6215 return cc;
6216
6217 case OP_ALLANY:
6218 if (check_str_ptr)
6219 detect_partial_match(common, backtracks);
6220 #ifdef SUPPORT_UTF
6221 if (common->utf)
6222 {
6223 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6224 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6225 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6226 #if defined COMPILE_PCRE8
6227 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6228 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6229 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6230 #elif defined COMPILE_PCRE16
6231 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6232 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6233 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6234 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_EQUAL);
6235 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6236 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6237 #endif
6238 JUMPHERE(jump[0]);
6239 #endif /* COMPILE_PCRE[8|16] */
6240 return cc;
6241 }
6242 #endif
6243 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6244 return cc;
6245
6246 case OP_ANYBYTE:
6247 if (check_str_ptr)
6248 detect_partial_match(common, backtracks);
6249 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6250 return cc;
6251
6252 #ifdef SUPPORT_UTF
6253 #ifdef SUPPORT_UCP
6254 case OP_NOTPROP:
6255 case OP_PROP:
6256 propdata[0] = XCL_HASPROP;
6257 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6258 propdata[2] = cc[0];
6259 propdata[3] = cc[1];
6260 propdata[4] = XCL_END;
6261 if (check_str_ptr)
6262 detect_partial_match(common, backtracks);
6263 compile_xclass_matchingpath(common, propdata, backtracks);
6264 return cc + 2;
6265 #endif
6266 #endif
6267
6268 case OP_ANYNL:
6269 if (check_str_ptr)
6270 detect_partial_match(common, backtracks);
6271 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6272 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6273 /* We don't need to handle soft partial matching case. */
6274 end_list = NULL;
6275 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6276 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6277 else
6278 check_str_end(common, &end_list);
6279 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6280 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6281 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6282 jump[2] = JUMP(SLJIT_JUMP);
6283 JUMPHERE(jump[0]);
6284 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6285 set_jumps(end_list, LABEL());
6286 JUMPHERE(jump[1]);
6287 JUMPHERE(jump[2]);
6288 return cc;
6289
6290 case OP_NOT_HSPACE:
6291 case OP_HSPACE:
6292 if (check_str_ptr)
6293 detect_partial_match(common, backtracks);
6294 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6295 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6296 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6297 return cc;
6298
6299 case OP_NOT_VSPACE:
6300 case OP_VSPACE:
6301 if (check_str_ptr)
6302 detect_partial_match(common, backtracks);
6303 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6304 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6305 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6306 return cc;
6307
6308 #ifdef SUPPORT_UCP
6309 case OP_EXTUNI:
6310 if (check_str_ptr)
6311 detect_partial_match(common, backtracks);
6312 read_char(common);
6313 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6314 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6315 /* Optimize register allocation: use a real register. */
6316 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6317 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6318
6319 label = LABEL();
6320 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6321 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6322 read_char(common);
6323 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6324 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6325 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6326
6327 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6328 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6329 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6330 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6331 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6332 JUMPTO(SLJIT_NOT_ZERO, label);
6333
6334 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6335 JUMPHERE(jump[0]);
6336 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6337
6338 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6339 {
6340 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6341 /* Since we successfully read a char above, partial matching must occure. */
6342 check_partial(common, TRUE);
6343 JUMPHERE(jump[0]);
6344 }
6345 return cc;
6346 #endif
6347
6348 case OP_CHAR:
6349 case OP_CHARI:
6350 length = 1;
6351 #ifdef SUPPORT_UTF
6352 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6353 #endif
6354 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6355 {
6356 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6357 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6358
6359 context.length = IN_UCHARS(length);
6360 context.sourcereg = -1;
6361 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6362 context.ucharptr = 0;
6363 #endif
6364 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6365 }
6366
6367 if (check_str_ptr)
6368 detect_partial_match(common, backtracks);
6369 #ifdef SUPPORT_UTF
6370 if (common->utf)
6371 {
6372 GETCHAR(c, cc);
6373 }
6374 else
6375 #endif
6376 c = *cc;
6377
6378 if (type == OP_CHAR || !char_has_othercase(common, cc))
6379 {
6380 read_char_range(common, c, c, FALSE);
6381 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6382 return cc + length;
6383 }
6384 oc = char_othercase(common, c);
6385 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6386 bit = c ^ oc;
6387 if (is_powerof2(bit))
6388 {
6389 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6390 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6391 return cc + length;
6392 }
6393 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6394 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6395 JUMPHERE(jump[0]);
6396 return cc + length;
6397
6398 case OP_NOT:
6399 case OP_NOTI:
6400 if (check_str_ptr)
6401 detect_partial_match(common, backtracks);
6402 length = 1;
6403 #ifdef SUPPORT_UTF
6404 if (common->utf)
6405 {
6406 #ifdef COMPILE_PCRE8
6407 c = *cc;
6408 if (c < 128)
6409 {
6410 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6411 if (type == OP_NOT || !char_has_othercase(common, cc))
6412 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6413 else
6414 {
6415 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6416 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6417 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6418 }
6419 /* Skip the variable-length character. */
6420 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6421 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6422 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6423 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6424 JUMPHERE(jump[0]);
6425 return cc + 1;
6426 }
6427 else
6428 #endif /* COMPILE_PCRE8 */
6429 {
6430 GETCHARLEN(c, cc, length);
6431 }
6432 }
6433 else
6434 #endif /* SUPPORT_UTF */
6435 c = *cc;
6436
6437 if (type == OP_NOT || !char_has_othercase(common, cc))
6438 {
6439 read_char_range(common, c, c, TRUE);
6440 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6441 }
6442 else
6443 {
6444 oc = char_othercase(common, c);
6445 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6446 bit = c ^ oc;
6447 if (is_powerof2(bit))
6448 {
6449 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6450 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6451 }
6452 else
6453 {
6454 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6455 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6456 }
6457 }
6458 return cc + length;
6459
6460 case OP_CLASS:
6461 case OP_NCLASS:
6462 if (check_str_ptr)
6463 detect_partial_match(common, backtracks);
6464
6465 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6466 bit = (common->utf && is_char7_bitset((const sljit_ub *)cc, type == OP_NCLASS)) ? 127 : 255;
6467 read_char_range(common, 0, bit, type == OP_NCLASS);
6468 #else
6469 read_char_range(common, 0, 255, type == OP_NCLASS);
6470 #endif
6471
6472 if (check_class_ranges(common, (const sljit_ub *)cc, type == OP_NCLASS, FALSE, backtracks))
6473 return cc + 32 / sizeof(pcre_uchar);
6474
6475 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6476 jump[0] = NULL;
6477 if (common->utf)
6478 {
6479 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6480 if (type == OP_CLASS)
6481 {
6482 add_jump(compiler, backtracks, jump[0]);
6483 jump[0] = NULL;
6484 }
6485 }
6486 #elif !defined COMPILE_PCRE8
6487 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6488 if (type == OP_CLASS)
6489 {
6490 add_jump(compiler, backtracks, jump[0]);
6491 jump[0] = NULL;
6492 }
6493 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6494
6495 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6496 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6497 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6498 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6499 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6500 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6501
6502 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6503 if (jump[0] != NULL)
6504 JUMPHERE(jump[0]);
6505 #endif
6506 return cc + 32 / sizeof(pcre_uchar);
6507
6508 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6509 case OP_XCLASS:
6510 if (check_str_ptr)
6511 detect_partial_match(common, backtracks);
6512 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6513 return cc + GET(cc, 0) - 1;
6514 #endif
6515 }
6516 SLJIT_ASSERT_STOP();
6517 return cc;
6518 }
6519
6520 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6521 {
6522 /* This function consumes at least one input character. */
6523 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6524 DEFINE_COMPILER;
6525 pcre_uchar *ccbegin = cc;
6526 compare_context context;
6527 int size;
6528
6529 context.length = 0;
6530 do
6531 {
6532 if (cc >= ccend)
6533 break;
6534
6535 if (*cc == OP_CHAR)
6536 {
6537 size = 1;
6538 #ifdef SUPPORT_UTF
6539 if (common->utf && HAS_EXTRALEN(cc[1]))
6540 size += GET_EXTRALEN(cc[1]);
6541 #endif
6542 }
6543 else if (*cc == OP_CHARI)
6544 {
6545 size = 1;
6546 #ifdef SUPPORT_UTF
6547 if (common->utf)
6548 {
6549 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6550 size = 0;
6551 else if (HAS_EXTRALEN(cc[1]))
6552 size += GET_EXTRALEN(cc[1]);
6553 }
6554 else
6555 #endif
6556 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6557 size = 0;
6558 }
6559 else
6560 size = 0;
6561
6562 cc += 1 + size;
6563 context.length += IN_UCHARS(size);
6564 }
6565 while (size > 0 && context.length <= 128);
6566
6567 cc = ccbegin;
6568 if (context.length > 0)
6569 {
6570 /* We have a fixed-length byte sequence. */
6571 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
6572 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6573
6574 context.sourcereg = -1;
6575 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6576 context.ucharptr = 0;
6577 #endif
6578 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
6579 return cc;
6580 }
6581
6582 /* A non-fixed length character will be checked if length == 0. */
6583 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
6584 }
6585
6586 /* Forward definitions. */
6587 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
6588 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
6589
6590 #define PUSH_BACKTRACK(size, ccstart, error) \
6591 do \
6592 { \
6593 backtrack = sljit_alloc_memory(compiler, (size)); \
6594 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6595 return error; \
6596 memset(backtrack, 0, size); \
6597 backtrack->prev = parent->top; \
6598 backtrack->cc = (ccstart); \
6599 parent->top = backtrack; \
6600 } \
6601 while (0)
6602
6603 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
6604 do \
6605 { \
6606 backtrack = sljit_alloc_memory(compiler, (size)); \
6607 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6608 return; \
6609 memset(backtrack, 0, size); \
6610 backtrack->prev = parent->top; \
6611 backtrack->cc = (ccstart); \
6612 parent->top = backtrack; \
6613 } \
6614 while (0)
6615
6616 #define BACKTRACK_AS(type) ((type *)backtrack)
6617
6618 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
6619 {
6620 /* The OVECTOR offset goes to TMP2. */
6621 DEFINE_COMPILER;
6622 int count = GET2(cc, 1 + IMM2_SIZE);
6623 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
6624 unsigned int offset;
6625 jump_list *found = NULL;
6626
6627 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
6628
6629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
6630
6631 count--;
6632 while (count-- > 0)
6633 {
6634 offset = GET2(slot, 0) << 1;
6635 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6636 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6637 slot += common->name_entry_size;
6638 }
6639
6640 offset = GET2(slot, 0) << 1;
6641 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
6642 if (backtracks != NULL && !common->jscript_compat)
6643 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
6644
6645 set_jumps(found, LABEL());
6646 }
6647
6648 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
6649 {
6650 DEFINE_COMPILER;
6651 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6652 int offset = 0;
6653 struct sljit_jump *jump = NULL;
6654 struct sljit_jump *partial;
6655 struct sljit_jump *nopartial;
6656
6657 if (ref)
6658 {
6659 offset = GET2(cc, 1) << 1;
6660 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6661 /* OVECTOR(1) contains the "string begin - 1" constant. */
6662 if (withchecks && !common->jscript_compat)
6663 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
6664 }
6665 else
6666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6667
6668 #if defined SUPPORT_UTF && defined SUPPORT_UCP
6669 if (common->utf && *cc == OP_REFI)
6670 {
6671 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1 && TMP2 == SLJIT_R2);
6672 if (ref)
6673 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6674 else
6675 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
6676
6677 if (withchecks)
6678 jump = CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0);
6679
6680 /* Needed to save important temporary registers. */
6681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6682 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
6683 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
6684 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
6685 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6686 if (common->mode == JIT_COMPILE)
6687 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
6688 else
6689 {
6690 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
6691 nopartial = CMP(SLJIT_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
6692 check_partial(common, FALSE);
6693 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6694 JUMPHERE(nopartial);
6695 }
6696 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
6697 }
6698 else
6699 #endif /* SUPPORT_UTF && SUPPORT_UCP */
6700 {
6701 if (ref)
6702 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
6703 else
6704 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
6705
6706 if (withchecks)
6707 jump = JUMP(SLJIT_ZERO);
6708
6709 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6710 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
6711 if (common->mode == JIT_COMPILE)
6712 add_jump(compiler, backtracks, partial);
6713
6714 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6715 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6716
6717 if (common->mode != JIT_COMPILE)
6718 {
6719 nopartial = JUMP(SLJIT_JUMP);
6720 JUMPHERE(partial);
6721 /* TMP2 -= STR_END - STR_PTR */
6722 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
6723 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
6724 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6725 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
6726 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
6727 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6728 JUMPHERE(partial);
6729 check_partial(common, FALSE);
6730 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6731 JUMPHERE(nopartial);
6732 }
6733 }
6734
6735 if (jump != NULL)
6736 {
6737 if (emptyfail)
6738 add_jump(compiler, backtracks, jump);
6739 else
6740 JUMPHERE(jump);
6741 }
6742 }
6743
6744 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6745 {
6746 DEFINE_COMPILER;
6747 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
6748 backtrack_common *backtrack;
6749 pcre_uchar type;
6750 int offset = 0;
6751 struct sljit_label *label;
6752 struct sljit_jump *zerolength;
6753 struct sljit_jump *jump = NULL;
6754 pcre_uchar *ccbegin = cc;
6755 int min = 0, max = 0;
6756 BOOL minimize;
6757
6758 PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
6759
6760 if (ref)
6761 offset = GET2(cc, 1) << 1;
6762 else
6763 cc += IMM2_SIZE;
6764 type = cc[1 + IMM2_SIZE];
6765
6766 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
6767 minimize = (type & 0x1) != 0;
6768 switch(type)
6769 {
6770 case OP_CRSTAR:
6771 case OP_CRMINSTAR:
6772 min = 0;
6773 max = 0;
6774 cc += 1 + IMM2_SIZE + 1;
6775 break;
6776 case OP_CRPLUS:
6777 case OP_CRMINPLUS:
6778 min = 1;
6779 max = 0;
6780 cc += 1 + IMM2_SIZE + 1;
6781 break;
6782 case OP_CRQUERY:
6783 case OP_CRMINQUERY:
6784 min = 0;
6785 max = 1;
6786 cc += 1 + IMM2_SIZE + 1;
6787 break;
6788 case OP_CRRANGE:
6789 case OP_CRMINRANGE:
6790 min = GET2(cc, 1 + IMM2_SIZE + 1);
6791 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
6792 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
6793 break;
6794 default:
6795 SLJIT_ASSERT_STOP();
6796 break;
6797 }
6798
6799 if (!minimize)
6800 {
6801 if (min == 0)
6802 {
6803 allocate_stack(common, 2);
6804 if (ref)
6805 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
6806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6807 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6808 /* Temporary release of STR_PTR. */
6809 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6810 /* Handles both invalid and empty cases. Since the minimum repeat,
6811 is zero the invalid case is basically the same as an empty case. */
6812 if (ref)
6813 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
6814 else
6815 {
6816 compile_dnref_search(common, ccbegin, NULL);
6817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
6818 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
6819