/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1740 - (show annotations)
Fri Sep 21 07:34:10 2018 UTC (11 months, 4 weeks ago) by zherczeg
File MIME type: text/plain
File size: 364424 byte(s)
Fix subject buffer overread in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *mark_ptr;
168 void *callout_data;
169 /* Everything else after. */
170 sljit_u32 limit_match;
171 int real_offset_count;
172 int offset_count;
173 sljit_u8 notbol;
174 sljit_u8 noteol;
175 sljit_u8 notempty;
176 sljit_u8 notempty_atstart;
177 } jit_arguments;
178
179 typedef struct executable_functions {
180 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
181 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
182 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
183 PUBL(jit_callback) callback;
184 void *userdata;
185 sljit_u32 top_bracket;
186 sljit_u32 limit_match;
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 typedef struct label_addr_list {
201 struct sljit_label *label;
202 sljit_uw *update_addr;
203 struct label_addr_list *next;
204 } label_addr_list;
205
206 enum frame_types {
207 no_frame = -1,
208 no_stack = -2
209 };
210
211 enum control_types {
212 type_mark = 0,
213 type_then_trap = 1
214 };
215
216 typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
217
218 /* The following structure is the key data type for the recursive
219 code generator. It is allocated by compile_matchingpath, and contains
220 the arguments for compile_backtrackingpath. Must be the first member
221 of its descendants. */
222 typedef struct backtrack_common {
223 /* Concatenation stack. */
224 struct backtrack_common *prev;
225 jump_list *nextbacktracks;
226 /* Internal stack (for component operators). */
227 struct backtrack_common *top;
228 jump_list *topbacktracks;
229 /* Opcode pointer. */
230 pcre_uchar *cc;
231 } backtrack_common;
232
233 typedef struct assert_backtrack {
234 backtrack_common common;
235 jump_list *condfailed;
236 /* Less than 0 if a frame is not needed. */
237 int framesize;
238 /* Points to our private memory word on the stack. */
239 int private_data_ptr;
240 /* For iterators. */
241 struct sljit_label *matchingpath;
242 } assert_backtrack;
243
244 typedef struct bracket_backtrack {
245 backtrack_common common;
246 /* Where to coninue if an alternative is successfully matched. */
247 struct sljit_label *alternative_matchingpath;
248 /* For rmin and rmax iterators. */
249 struct sljit_label *recursive_matchingpath;
250 /* For greedy ? operator. */
251 struct sljit_label *zero_matchingpath;
252 /* Contains the branches of a failed condition. */
253 union {
254 /* Both for OP_COND, OP_SCOND. */
255 jump_list *condfailed;
256 assert_backtrack *assert;
257 /* For OP_ONCE. Less than 0 if not needed. */
258 int framesize;
259 } u;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 } bracket_backtrack;
263
264 typedef struct bracketpos_backtrack {
265 backtrack_common common;
266 /* Points to our private memory word on the stack. */
267 int private_data_ptr;
268 /* Reverting stack is needed. */
269 int framesize;
270 /* Allocated stack size. */
271 int stacksize;
272 } bracketpos_backtrack;
273
274 typedef struct braminzero_backtrack {
275 backtrack_common common;
276 struct sljit_label *matchingpath;
277 } braminzero_backtrack;
278
279 typedef struct char_iterator_backtrack {
280 backtrack_common common;
281 /* Next iteration. */
282 struct sljit_label *matchingpath;
283 union {
284 jump_list *backtracks;
285 struct {
286 unsigned int othercasebit;
287 pcre_uchar chr;
288 BOOL enabled;
289 } charpos;
290 } u;
291 } char_iterator_backtrack;
292
293 typedef struct ref_iterator_backtrack {
294 backtrack_common common;
295 /* Next iteration. */
296 struct sljit_label *matchingpath;
297 } ref_iterator_backtrack;
298
299 typedef struct recurse_entry {
300 struct recurse_entry *next;
301 /* Contains the function entry. */
302 struct sljit_label *entry;
303 /* Collects the calls until the function is not created. */
304 jump_list *calls;
305 /* Points to the starting opcode. */
306 sljit_sw start;
307 } recurse_entry;
308
309 typedef struct recurse_backtrack {
310 backtrack_common common;
311 BOOL inlined_pattern;
312 } recurse_backtrack;
313
314 #define OP_THEN_TRAP OP_TABLE_LENGTH
315
316 typedef struct then_trap_backtrack {
317 backtrack_common common;
318 /* If then_trap is not NULL, this structure contains the real
319 then_trap for the backtracking path. */
320 struct then_trap_backtrack *then_trap;
321 /* Points to the starting opcode. */
322 sljit_sw start;
323 /* Exit point for the then opcodes of this alternative. */
324 jump_list *quit;
325 /* Frame size of the current alternative. */
326 int framesize;
327 } then_trap_backtrack;
328
329 #define MAX_RANGE_SIZE 4
330
331 typedef struct compiler_common {
332 /* The sljit ceneric compiler. */
333 struct sljit_compiler *compiler;
334 /* First byte code. */
335 pcre_uchar *start;
336 /* Maps private data offset to each opcode. */
337 sljit_s32 *private_data_ptrs;
338 /* Chain list of read-only data ptrs. */
339 void *read_only_data_head;
340 /* Tells whether the capturing bracket is optimized. */
341 sljit_u8 *optimized_cbracket;
342 /* Tells whether the starting offset is a target of then. */
343 sljit_u8 *then_offsets;
344 /* Current position where a THEN must jump. */
345 then_trap_backtrack *then_trap;
346 /* Starting offset of private data for capturing brackets. */
347 sljit_s32 cbra_ptr;
348 /* Output vector starting point. Must be divisible by 2. */
349 sljit_s32 ovector_start;
350 /* Points to the starting character of the current match. */
351 sljit_s32 start_ptr;
352 /* Last known position of the requested byte. */
353 sljit_s32 req_char_ptr;
354 /* Head of the last recursion. */
355 sljit_s32 recursive_head_ptr;
356 /* First inspected character for partial matching.
357 (Needed for avoiding zero length partial matches.) */
358 sljit_s32 start_used_ptr;
359 /* Starting pointer for partial soft matches. */
360 sljit_s32 hit_start;
361 /* Pointer of the match end position. */
362 sljit_s32 match_end_ptr;
363 /* Points to the marked string. */
364 sljit_s32 mark_ptr;
365 /* Recursive control verb management chain. */
366 sljit_s32 control_head_ptr;
367 /* Points to the last matched capture block index. */
368 sljit_s32 capture_last_ptr;
369 /* Fast forward skipping byte code pointer. */
370 pcre_uchar *fast_forward_bc_ptr;
371 /* Locals used by fast fail optimization. */
372 sljit_s32 fast_fail_start_ptr;
373 sljit_s32 fast_fail_end_ptr;
374
375 /* Flipped and lower case tables. */
376 const sljit_u8 *fcc;
377 sljit_sw lcc;
378 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
379 int mode;
380 /* TRUE, when minlength is greater than 0. */
381 BOOL might_be_empty;
382 /* \K is found in the pattern. */
383 BOOL has_set_som;
384 /* (*SKIP:arg) is found in the pattern. */
385 BOOL has_skip_arg;
386 /* (*THEN) is found in the pattern. */
387 BOOL has_then;
388 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
389 BOOL has_skip_in_assert_back;
390 /* Currently in recurse or negative assert. */
391 BOOL local_exit;
392 /* Currently in a positive assert. */
393 BOOL positive_assert;
394 /* Newline control. */
395 int nltype;
396 sljit_u32 nlmax;
397 sljit_u32 nlmin;
398 int newline;
399 int bsr_nltype;
400 sljit_u32 bsr_nlmax;
401 sljit_u32 bsr_nlmin;
402 /* Dollar endonly. */
403 int endonly;
404 /* Tables. */
405 sljit_sw ctypes;
406 /* Named capturing brackets. */
407 pcre_uchar *name_table;
408 sljit_sw name_count;
409 sljit_sw name_entry_size;
410
411 /* Labels and jump lists. */
412 struct sljit_label *partialmatchlabel;
413 struct sljit_label *quit_label;
414 struct sljit_label *forced_quit_label;
415 struct sljit_label *accept_label;
416 struct sljit_label *ff_newline_shortcut;
417 stub_list *stubs;
418 label_addr_list *label_addrs;
419 recurse_entry *entries;
420 recurse_entry *currententry;
421 jump_list *partialmatch;
422 jump_list *quit;
423 jump_list *positive_assert_quit;
424 jump_list *forced_quit;
425 jump_list *accept;
426 jump_list *calllimit;
427 jump_list *stackalloc;
428 jump_list *revertframes;
429 jump_list *wordboundary;
430 jump_list *anynewline;
431 jump_list *hspace;
432 jump_list *vspace;
433 jump_list *casefulcmp;
434 jump_list *caselesscmp;
435 jump_list *reset_match;
436 BOOL jscript_compat;
437 #ifdef SUPPORT_UTF
438 BOOL utf;
439 #ifdef SUPPORT_UCP
440 BOOL use_ucp;
441 jump_list *getucd;
442 #endif
443 #ifdef COMPILE_PCRE8
444 jump_list *utfreadchar;
445 jump_list *utfreadchar16;
446 jump_list *utfreadtype8;
447 #endif
448 #endif /* SUPPORT_UTF */
449 } compiler_common;
450
451 /* For byte_sequence_compare. */
452
453 typedef struct compare_context {
454 int length;
455 int sourcereg;
456 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
457 int ucharptr;
458 union {
459 sljit_s32 asint;
460 sljit_u16 asushort;
461 #if defined COMPILE_PCRE8
462 sljit_u8 asbyte;
463 sljit_u8 asuchars[4];
464 #elif defined COMPILE_PCRE16
465 sljit_u16 asuchars[2];
466 #elif defined COMPILE_PCRE32
467 sljit_u32 asuchars[1];
468 #endif
469 } c;
470 union {
471 sljit_s32 asint;
472 sljit_u16 asushort;
473 #if defined COMPILE_PCRE8
474 sljit_u8 asbyte;
475 sljit_u8 asuchars[4];
476 #elif defined COMPILE_PCRE16
477 sljit_u16 asuchars[2];
478 #elif defined COMPILE_PCRE32
479 sljit_u32 asuchars[1];
480 #endif
481 } oc;
482 #endif
483 } compare_context;
484
485 /* Undefine sljit macros. */
486 #undef CMP
487
488 /* Used for accessing the elements of the stack. */
489 #define STACK(i) ((i) * (int)sizeof(sljit_sw))
490
491 #ifdef SLJIT_PREF_SHIFT_REG
492 #if SLJIT_PREF_SHIFT_REG == SLJIT_R2
493 /* Nothing. */
494 #elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
495 #define SHIFT_REG_IS_R3
496 #else
497 #error "Unsupported shift register"
498 #endif
499 #endif
500
501 #define TMP1 SLJIT_R0
502 #ifdef SHIFT_REG_IS_R3
503 #define TMP2 SLJIT_R3
504 #define TMP3 SLJIT_R2
505 #else
506 #define TMP2 SLJIT_R2
507 #define TMP3 SLJIT_R3
508 #endif
509 #define STR_PTR SLJIT_S0
510 #define STR_END SLJIT_S1
511 #define STACK_TOP SLJIT_R1
512 #define STACK_LIMIT SLJIT_S2
513 #define COUNT_MATCH SLJIT_S3
514 #define ARGUMENTS SLJIT_S4
515 #define RETURN_ADDR SLJIT_R4
516
517 /* Local space layout. */
518 /* These two locals can be used by the current opcode. */
519 #define LOCALS0 (0 * sizeof(sljit_sw))
520 #define LOCALS1 (1 * sizeof(sljit_sw))
521 /* Two local variables for possessive quantifiers (char1 cannot use them). */
522 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
523 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
524 /* Max limit of recursions. */
525 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
526 /* The output vector is stored on the stack, and contains pointers
527 to characters. The vector data is divided into two groups: the first
528 group contains the start / end character pointers, and the second is
529 the start pointers when the end of the capturing group has not yet reached. */
530 #define OVECTOR_START (common->ovector_start)
531 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
532 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
533 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
534
535 #if defined COMPILE_PCRE8
536 #define MOV_UCHAR SLJIT_MOV_U8
537 #elif defined COMPILE_PCRE16
538 #define MOV_UCHAR SLJIT_MOV_U16
539 #elif defined COMPILE_PCRE32
540 #define MOV_UCHAR SLJIT_MOV_U32
541 #else
542 #error Unsupported compiling mode
543 #endif
544
545 /* Shortcuts. */
546 #define DEFINE_COMPILER \
547 struct sljit_compiler *compiler = common->compiler
548 #define OP1(op, dst, dstw, src, srcw) \
549 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
550 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
551 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
552 #define LABEL() \
553 sljit_emit_label(compiler)
554 #define JUMP(type) \
555 sljit_emit_jump(compiler, (type))
556 #define JUMPTO(type, label) \
557 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
558 #define JUMPHERE(jump) \
559 sljit_set_label((jump), sljit_emit_label(compiler))
560 #define SET_LABEL(jump, label) \
561 sljit_set_label((jump), (label))
562 #define CMP(type, src1, src1w, src2, src2w) \
563 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
564 #define CMPTO(type, src1, src1w, src2, src2w, label) \
565 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
566 #define OP_FLAGS(op, dst, dstw, type) \
567 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
568 #define GET_LOCAL_BASE(dst, dstw, offset) \
569 sljit_get_local_base(compiler, (dst), (dstw), (offset))
570
571 #define READ_CHAR_MAX 0x7fffffff
572
573 #define INVALID_UTF_CHAR 888
574
575 static pcre_uchar *bracketend(pcre_uchar *cc)
576 {
577 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
578 do cc += GET(cc, 1); while (*cc == OP_ALT);
579 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
580 cc += 1 + LINK_SIZE;
581 return cc;
582 }
583
584 static int no_alternatives(pcre_uchar *cc)
585 {
586 int count = 0;
587 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
588 do
589 {
590 cc += GET(cc, 1);
591 count++;
592 }
593 while (*cc == OP_ALT);
594 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
595 return count;
596 }
597
598 /* Functions whose might need modification for all new supported opcodes:
599 next_opcode
600 check_opcode_types
601 set_private_data_ptrs
602 get_framesize
603 init_frame
604 get_private_data_copy_length
605 copy_private_data
606 compile_matchingpath
607 compile_backtrackingpath
608 */
609
610 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
611 {
612 SLJIT_UNUSED_ARG(common);
613 switch(*cc)
614 {
615 case OP_SOD:
616 case OP_SOM:
617 case OP_SET_SOM:
618 case OP_NOT_WORD_BOUNDARY:
619 case OP_WORD_BOUNDARY:
620 case OP_NOT_DIGIT:
621 case OP_DIGIT:
622 case OP_NOT_WHITESPACE:
623 case OP_WHITESPACE:
624 case OP_NOT_WORDCHAR:
625 case OP_WORDCHAR:
626 case OP_ANY:
627 case OP_ALLANY:
628 case OP_NOTPROP:
629 case OP_PROP:
630 case OP_ANYNL:
631 case OP_NOT_HSPACE:
632 case OP_HSPACE:
633 case OP_NOT_VSPACE:
634 case OP_VSPACE:
635 case OP_EXTUNI:
636 case OP_EODN:
637 case OP_EOD:
638 case OP_CIRC:
639 case OP_CIRCM:
640 case OP_DOLL:
641 case OP_DOLLM:
642 case OP_CRSTAR:
643 case OP_CRMINSTAR:
644 case OP_CRPLUS:
645 case OP_CRMINPLUS:
646 case OP_CRQUERY:
647 case OP_CRMINQUERY:
648 case OP_CRRANGE:
649 case OP_CRMINRANGE:
650 case OP_CRPOSSTAR:
651 case OP_CRPOSPLUS:
652 case OP_CRPOSQUERY:
653 case OP_CRPOSRANGE:
654 case OP_CLASS:
655 case OP_NCLASS:
656 case OP_REF:
657 case OP_REFI:
658 case OP_DNREF:
659 case OP_DNREFI:
660 case OP_RECURSE:
661 case OP_CALLOUT:
662 case OP_ALT:
663 case OP_KET:
664 case OP_KETRMAX:
665 case OP_KETRMIN:
666 case OP_KETRPOS:
667 case OP_REVERSE:
668 case OP_ASSERT:
669 case OP_ASSERT_NOT:
670 case OP_ASSERTBACK:
671 case OP_ASSERTBACK_NOT:
672 case OP_ONCE:
673 case OP_ONCE_NC:
674 case OP_BRA:
675 case OP_BRAPOS:
676 case OP_CBRA:
677 case OP_CBRAPOS:
678 case OP_COND:
679 case OP_SBRA:
680 case OP_SBRAPOS:
681 case OP_SCBRA:
682 case OP_SCBRAPOS:
683 case OP_SCOND:
684 case OP_CREF:
685 case OP_DNCREF:
686 case OP_RREF:
687 case OP_DNRREF:
688 case OP_DEF:
689 case OP_BRAZERO:
690 case OP_BRAMINZERO:
691 case OP_BRAPOSZERO:
692 case OP_PRUNE:
693 case OP_SKIP:
694 case OP_THEN:
695 case OP_COMMIT:
696 case OP_FAIL:
697 case OP_ACCEPT:
698 case OP_ASSERT_ACCEPT:
699 case OP_CLOSE:
700 case OP_SKIPZERO:
701 return cc + PRIV(OP_lengths)[*cc];
702
703 case OP_CHAR:
704 case OP_CHARI:
705 case OP_NOT:
706 case OP_NOTI:
707 case OP_STAR:
708 case OP_MINSTAR:
709 case OP_PLUS:
710 case OP_MINPLUS:
711 case OP_QUERY:
712 case OP_MINQUERY:
713 case OP_UPTO:
714 case OP_MINUPTO:
715 case OP_EXACT:
716 case OP_POSSTAR:
717 case OP_POSPLUS:
718 case OP_POSQUERY:
719 case OP_POSUPTO:
720 case OP_STARI:
721 case OP_MINSTARI:
722 case OP_PLUSI:
723 case OP_MINPLUSI:
724 case OP_QUERYI:
725 case OP_MINQUERYI:
726 case OP_UPTOI:
727 case OP_MINUPTOI:
728 case OP_EXACTI:
729 case OP_POSSTARI:
730 case OP_POSPLUSI:
731 case OP_POSQUERYI:
732 case OP_POSUPTOI:
733 case OP_NOTSTAR:
734 case OP_NOTMINSTAR:
735 case OP_NOTPLUS:
736 case OP_NOTMINPLUS:
737 case OP_NOTQUERY:
738 case OP_NOTMINQUERY:
739 case OP_NOTUPTO:
740 case OP_NOTMINUPTO:
741 case OP_NOTEXACT:
742 case OP_NOTPOSSTAR:
743 case OP_NOTPOSPLUS:
744 case OP_NOTPOSQUERY:
745 case OP_NOTPOSUPTO:
746 case OP_NOTSTARI:
747 case OP_NOTMINSTARI:
748 case OP_NOTPLUSI:
749 case OP_NOTMINPLUSI:
750 case OP_NOTQUERYI:
751 case OP_NOTMINQUERYI:
752 case OP_NOTUPTOI:
753 case OP_NOTMINUPTOI:
754 case OP_NOTEXACTI:
755 case OP_NOTPOSSTARI:
756 case OP_NOTPOSPLUSI:
757 case OP_NOTPOSQUERYI:
758 case OP_NOTPOSUPTOI:
759 cc += PRIV(OP_lengths)[*cc];
760 #ifdef SUPPORT_UTF
761 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
762 #endif
763 return cc;
764
765 /* Special cases. */
766 case OP_TYPESTAR:
767 case OP_TYPEMINSTAR:
768 case OP_TYPEPLUS:
769 case OP_TYPEMINPLUS:
770 case OP_TYPEQUERY:
771 case OP_TYPEMINQUERY:
772 case OP_TYPEUPTO:
773 case OP_TYPEMINUPTO:
774 case OP_TYPEEXACT:
775 case OP_TYPEPOSSTAR:
776 case OP_TYPEPOSPLUS:
777 case OP_TYPEPOSQUERY:
778 case OP_TYPEPOSUPTO:
779 return cc + PRIV(OP_lengths)[*cc] - 1;
780
781 case OP_ANYBYTE:
782 #ifdef SUPPORT_UTF
783 if (common->utf) return NULL;
784 #endif
785 return cc + 1;
786
787 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
788 case OP_XCLASS:
789 return cc + GET(cc, 1);
790 #endif
791
792 case OP_MARK:
793 case OP_PRUNE_ARG:
794 case OP_SKIP_ARG:
795 case OP_THEN_ARG:
796 return cc + 1 + 2 + cc[1];
797
798 default:
799 /* All opcodes are supported now! */
800 SLJIT_UNREACHABLE();
801 return NULL;
802 }
803 }
804
805 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
806 {
807 int count;
808 pcre_uchar *slot;
809 pcre_uchar *assert_back_end = cc - 1;
810
811 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
812 while (cc < ccend)
813 {
814 switch(*cc)
815 {
816 case OP_SET_SOM:
817 common->has_set_som = TRUE;
818 common->might_be_empty = TRUE;
819 cc += 1;
820 break;
821
822 case OP_REF:
823 case OP_REFI:
824 common->optimized_cbracket[GET2(cc, 1)] = 0;
825 cc += 1 + IMM2_SIZE;
826 break;
827
828 case OP_CBRAPOS:
829 case OP_SCBRAPOS:
830 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
831 cc += 1 + LINK_SIZE + IMM2_SIZE;
832 break;
833
834 case OP_COND:
835 case OP_SCOND:
836 /* Only AUTO_CALLOUT can insert this opcode. We do
837 not intend to support this case. */
838 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
839 return FALSE;
840 cc += 1 + LINK_SIZE;
841 break;
842
843 case OP_CREF:
844 common->optimized_cbracket[GET2(cc, 1)] = 0;
845 cc += 1 + IMM2_SIZE;
846 break;
847
848 case OP_DNREF:
849 case OP_DNREFI:
850 case OP_DNCREF:
851 count = GET2(cc, 1 + IMM2_SIZE);
852 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
853 while (count-- > 0)
854 {
855 common->optimized_cbracket[GET2(slot, 0)] = 0;
856 slot += common->name_entry_size;
857 }
858 cc += 1 + 2 * IMM2_SIZE;
859 break;
860
861 case OP_RECURSE:
862 /* Set its value only once. */
863 if (common->recursive_head_ptr == 0)
864 {
865 common->recursive_head_ptr = common->ovector_start;
866 common->ovector_start += sizeof(sljit_sw);
867 }
868 cc += 1 + LINK_SIZE;
869 break;
870
871 case OP_CALLOUT:
872 if (common->capture_last_ptr == 0)
873 {
874 common->capture_last_ptr = common->ovector_start;
875 common->ovector_start += sizeof(sljit_sw);
876 }
877 cc += 2 + 2 * LINK_SIZE;
878 break;
879
880 case OP_ASSERTBACK:
881 slot = bracketend(cc);
882 if (slot > assert_back_end)
883 assert_back_end = slot;
884 cc += 1 + LINK_SIZE;
885 break;
886
887 case OP_THEN_ARG:
888 common->has_then = TRUE;
889 common->control_head_ptr = 1;
890 /* Fall through. */
891
892 case OP_PRUNE_ARG:
893 case OP_MARK:
894 if (common->mark_ptr == 0)
895 {
896 common->mark_ptr = common->ovector_start;
897 common->ovector_start += sizeof(sljit_sw);
898 }
899 cc += 1 + 2 + cc[1];
900 break;
901
902 case OP_THEN:
903 common->has_then = TRUE;
904 common->control_head_ptr = 1;
905 cc += 1;
906 break;
907
908 case OP_SKIP:
909 if (cc < assert_back_end)
910 common->has_skip_in_assert_back = TRUE;
911 cc += 1;
912 break;
913
914 case OP_SKIP_ARG:
915 common->control_head_ptr = 1;
916 common->has_skip_arg = TRUE;
917 if (cc < assert_back_end)
918 common->has_skip_in_assert_back = TRUE;
919 cc += 1 + 2 + cc[1];
920 break;
921
922 default:
923 cc = next_opcode(common, cc);
924 if (cc == NULL)
925 return FALSE;
926 break;
927 }
928 }
929 return TRUE;
930 }
931
932 static BOOL is_accelerated_repeat(pcre_uchar *cc)
933 {
934 switch(*cc)
935 {
936 case OP_TYPESTAR:
937 case OP_TYPEMINSTAR:
938 case OP_TYPEPLUS:
939 case OP_TYPEMINPLUS:
940 case OP_TYPEPOSSTAR:
941 case OP_TYPEPOSPLUS:
942 return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI);
943
944 case OP_STAR:
945 case OP_MINSTAR:
946 case OP_PLUS:
947 case OP_MINPLUS:
948 case OP_POSSTAR:
949 case OP_POSPLUS:
950
951 case OP_STARI:
952 case OP_MINSTARI:
953 case OP_PLUSI:
954 case OP_MINPLUSI:
955 case OP_POSSTARI:
956 case OP_POSPLUSI:
957
958 case OP_NOTSTAR:
959 case OP_NOTMINSTAR:
960 case OP_NOTPLUS:
961 case OP_NOTMINPLUS:
962 case OP_NOTPOSSTAR:
963 case OP_NOTPOSPLUS:
964
965 case OP_NOTSTARI:
966 case OP_NOTMINSTARI:
967 case OP_NOTPLUSI:
968 case OP_NOTMINPLUSI:
969 case OP_NOTPOSSTARI:
970 case OP_NOTPOSPLUSI:
971 return TRUE;
972
973 case OP_CLASS:
974 case OP_NCLASS:
975 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
976 case OP_XCLASS:
977 cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar)));
978 #else
979 cc += (1 + (32 / sizeof(pcre_uchar)));
980 #endif
981
982 switch(*cc)
983 {
984 case OP_CRSTAR:
985 case OP_CRMINSTAR:
986 case OP_CRPLUS:
987 case OP_CRMINPLUS:
988 case OP_CRPOSSTAR:
989 case OP_CRPOSPLUS:
990 return TRUE;
991 }
992 break;
993 }
994 return FALSE;
995 }
996
997 static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start)
998 {
999 pcre_uchar *cc = common->start;
1000 pcre_uchar *end;
1001
1002 /* Skip not repeated brackets. */
1003 while (TRUE)
1004 {
1005 switch(*cc)
1006 {
1007 case OP_SOD:
1008 case OP_SOM:
1009 case OP_SET_SOM:
1010 case OP_NOT_WORD_BOUNDARY:
1011 case OP_WORD_BOUNDARY:
1012 case OP_EODN:
1013 case OP_EOD:
1014 case OP_CIRC:
1015 case OP_CIRCM:
1016 case OP_DOLL:
1017 case OP_DOLLM:
1018 /* Zero width assertions. */
1019 cc++;
1020 continue;
1021 }
1022
1023 if (*cc != OP_BRA && *cc != OP_CBRA)
1024 break;
1025
1026 end = cc + GET(cc, 1);
1027 if (*end != OP_KET || PRIVATE_DATA(end) != 0)
1028 return FALSE;
1029 if (*cc == OP_CBRA)
1030 {
1031 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1032 return FALSE;
1033 cc += IMM2_SIZE;
1034 }
1035 cc += 1 + LINK_SIZE;
1036 }
1037
1038 if (is_accelerated_repeat(cc))
1039 {
1040 common->fast_forward_bc_ptr = cc;
1041 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1042 *private_data_start += sizeof(sljit_sw);
1043 return TRUE;
1044 }
1045 return FALSE;
1046 }
1047
1048 static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth)
1049 {
1050 pcre_uchar *next_alt;
1051
1052 SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA);
1053
1054 if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1055 return;
1056
1057 next_alt = bracketend(cc) - (1 + LINK_SIZE);
1058 if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0)
1059 return;
1060
1061 do
1062 {
1063 next_alt = cc + GET(cc, 1);
1064
1065 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1066
1067 while (TRUE)
1068 {
1069 switch(*cc)
1070 {
1071 case OP_SOD:
1072 case OP_SOM:
1073 case OP_SET_SOM:
1074 case OP_NOT_WORD_BOUNDARY:
1075 case OP_WORD_BOUNDARY:
1076 case OP_EODN:
1077 case OP_EOD:
1078 case OP_CIRC:
1079 case OP_CIRCM:
1080 case OP_DOLL:
1081 case OP_DOLLM:
1082 /* Zero width assertions. */
1083 cc++;
1084 continue;
1085 }
1086 break;
1087 }
1088
1089 if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA))
1090 detect_fast_fail(common, cc, private_data_start, depth - 1);
1091
1092 if (is_accelerated_repeat(cc))
1093 {
1094 common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start;
1095
1096 if (common->fast_fail_start_ptr == 0)
1097 common->fast_fail_start_ptr = *private_data_start;
1098
1099 *private_data_start += sizeof(sljit_sw);
1100 common->fast_fail_end_ptr = *private_data_start;
1101
1102 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1103 return;
1104 }
1105
1106 cc = next_alt;
1107 }
1108 while (*cc == OP_ALT);
1109 }
1110
1111 static int get_class_iterator_size(pcre_uchar *cc)
1112 {
1113 sljit_u32 min;
1114 sljit_u32 max;
1115 switch(*cc)
1116 {
1117 case OP_CRSTAR:
1118 case OP_CRPLUS:
1119 return 2;
1120
1121 case OP_CRMINSTAR:
1122 case OP_CRMINPLUS:
1123 case OP_CRQUERY:
1124 case OP_CRMINQUERY:
1125 return 1;
1126
1127 case OP_CRRANGE:
1128 case OP_CRMINRANGE:
1129 min = GET2(cc, 1);
1130 max = GET2(cc, 1 + IMM2_SIZE);
1131 if (max == 0)
1132 return (*cc == OP_CRRANGE) ? 2 : 1;
1133 max -= min;
1134 if (max > 2)
1135 max = 2;
1136 return max;
1137
1138 default:
1139 return 0;
1140 }
1141 }
1142
1143 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
1144 {
1145 pcre_uchar *end = bracketend(begin);
1146 pcre_uchar *next;
1147 pcre_uchar *next_end;
1148 pcre_uchar *max_end;
1149 pcre_uchar type;
1150 sljit_sw length = end - begin;
1151 int min, max, i;
1152
1153 /* Detect fixed iterations first. */
1154 if (end[-(1 + LINK_SIZE)] != OP_KET)
1155 return FALSE;
1156
1157 /* Already detected repeat. */
1158 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
1159 return TRUE;
1160
1161 next = end;
1162 min = 1;
1163 while (1)
1164 {
1165 if (*next != *begin)
1166 break;
1167 next_end = bracketend(next);
1168 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1169 break;
1170 next = next_end;
1171 min++;
1172 }
1173
1174 if (min == 2)
1175 return FALSE;
1176
1177 max = 0;
1178 max_end = next;
1179 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1180 {
1181 type = *next;
1182 while (1)
1183 {
1184 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1185 break;
1186 next_end = bracketend(next + 2 + LINK_SIZE);
1187 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1188 break;
1189 next = next_end;
1190 max++;
1191 }
1192
1193 if (next[0] == type && next[1] == *begin && max >= 1)
1194 {
1195 next_end = bracketend(next + 1);
1196 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1197 {
1198 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1199 if (*next_end != OP_KET)
1200 break;
1201
1202 if (i == max)
1203 {
1204 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1205 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1206 /* +2 the original and the last. */
1207 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1208 if (min == 1)
1209 return TRUE;
1210 min--;
1211 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1212 }
1213 }
1214 }
1215 }
1216
1217 if (min >= 3)
1218 {
1219 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1220 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1221 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1222 return TRUE;
1223 }
1224
1225 return FALSE;
1226 }
1227
1228 #define CASE_ITERATOR_PRIVATE_DATA_1 \
1229 case OP_MINSTAR: \
1230 case OP_MINPLUS: \
1231 case OP_QUERY: \
1232 case OP_MINQUERY: \
1233 case OP_MINSTARI: \
1234 case OP_MINPLUSI: \
1235 case OP_QUERYI: \
1236 case OP_MINQUERYI: \
1237 case OP_NOTMINSTAR: \
1238 case OP_NOTMINPLUS: \
1239 case OP_NOTQUERY: \
1240 case OP_NOTMINQUERY: \
1241 case OP_NOTMINSTARI: \
1242 case OP_NOTMINPLUSI: \
1243 case OP_NOTQUERYI: \
1244 case OP_NOTMINQUERYI:
1245
1246 #define CASE_ITERATOR_PRIVATE_DATA_2A \
1247 case OP_STAR: \
1248 case OP_PLUS: \
1249 case OP_STARI: \
1250 case OP_PLUSI: \
1251 case OP_NOTSTAR: \
1252 case OP_NOTPLUS: \
1253 case OP_NOTSTARI: \
1254 case OP_NOTPLUSI:
1255
1256 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1257 case OP_UPTO: \
1258 case OP_MINUPTO: \
1259 case OP_UPTOI: \
1260 case OP_MINUPTOI: \
1261 case OP_NOTUPTO: \
1262 case OP_NOTMINUPTO: \
1263 case OP_NOTUPTOI: \
1264 case OP_NOTMINUPTOI:
1265
1266 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1267 case OP_TYPEMINSTAR: \
1268 case OP_TYPEMINPLUS: \
1269 case OP_TYPEQUERY: \
1270 case OP_TYPEMINQUERY:
1271
1272 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1273 case OP_TYPESTAR: \
1274 case OP_TYPEPLUS:
1275
1276 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1277 case OP_TYPEUPTO: \
1278 case OP_TYPEMINUPTO:
1279
1280 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1281 {
1282 pcre_uchar *cc = common->start;
1283 pcre_uchar *alternative;
1284 pcre_uchar *end = NULL;
1285 int private_data_ptr = *private_data_start;
1286 int space, size, bracketlen;
1287 BOOL repeat_check = TRUE;
1288
1289 while (cc < ccend)
1290 {
1291 space = 0;
1292 size = 0;
1293 bracketlen = 0;
1294 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1295 break;
1296
1297 if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1298 {
1299 if (detect_repeat(common, cc))
1300 {
1301 /* These brackets are converted to repeats, so no global
1302 based single character repeat is allowed. */
1303 if (cc >= end)
1304 end = bracketend(cc);
1305 }
1306 }
1307 repeat_check = TRUE;
1308
1309 switch(*cc)
1310 {
1311 case OP_KET:
1312 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1313 {
1314 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1315 private_data_ptr += sizeof(sljit_sw);
1316 cc += common->private_data_ptrs[cc + 1 - common->start];
1317 }
1318 cc += 1 + LINK_SIZE;
1319 break;
1320
1321 case OP_ASSERT:
1322 case OP_ASSERT_NOT:
1323 case OP_ASSERTBACK:
1324 case OP_ASSERTBACK_NOT:
1325 case OP_ONCE:
1326 case OP_ONCE_NC:
1327 case OP_BRAPOS:
1328 case OP_SBRA:
1329 case OP_SBRAPOS:
1330 case OP_SCOND:
1331 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1332 private_data_ptr += sizeof(sljit_sw);
1333 bracketlen = 1 + LINK_SIZE;
1334 break;
1335
1336 case OP_CBRAPOS:
1337 case OP_SCBRAPOS:
1338 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1339 private_data_ptr += sizeof(sljit_sw);
1340 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1341 break;
1342
1343 case OP_COND:
1344 /* Might be a hidden SCOND. */
1345 alternative = cc + GET(cc, 1);
1346 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1347 {
1348 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1349 private_data_ptr += sizeof(sljit_sw);
1350 }
1351 bracketlen = 1 + LINK_SIZE;
1352 break;
1353
1354 case OP_BRA:
1355 bracketlen = 1 + LINK_SIZE;
1356 break;
1357
1358 case OP_CBRA:
1359 case OP_SCBRA:
1360 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1361 break;
1362
1363 case OP_BRAZERO:
1364 case OP_BRAMINZERO:
1365 case OP_BRAPOSZERO:
1366 repeat_check = FALSE;
1367 size = 1;
1368 break;
1369
1370 CASE_ITERATOR_PRIVATE_DATA_1
1371 space = 1;
1372 size = -2;
1373 break;
1374
1375 CASE_ITERATOR_PRIVATE_DATA_2A
1376 space = 2;
1377 size = -2;
1378 break;
1379
1380 CASE_ITERATOR_PRIVATE_DATA_2B
1381 space = 2;
1382 size = -(2 + IMM2_SIZE);
1383 break;
1384
1385 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1386 space = 1;
1387 size = 1;
1388 break;
1389
1390 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1391 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1392 space = 2;
1393 size = 1;
1394 break;
1395
1396 case OP_TYPEUPTO:
1397 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1398 space = 2;
1399 size = 1 + IMM2_SIZE;
1400 break;
1401
1402 case OP_TYPEMINUPTO:
1403 space = 2;
1404 size = 1 + IMM2_SIZE;
1405 break;
1406
1407 case OP_CLASS:
1408 case OP_NCLASS:
1409 space = get_class_iterator_size(cc + size);
1410 size = 1 + 32 / sizeof(pcre_uchar);
1411 break;
1412
1413 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1414 case OP_XCLASS:
1415 space = get_class_iterator_size(cc + size);
1416 size = GET(cc, 1);
1417 break;
1418 #endif
1419
1420 default:
1421 cc = next_opcode(common, cc);
1422 SLJIT_ASSERT(cc != NULL);
1423 break;
1424 }
1425
1426 /* Character iterators, which are not inside a repeated bracket,
1427 gets a private slot instead of allocating it on the stack. */
1428 if (space > 0 && cc >= end)
1429 {
1430 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1431 private_data_ptr += sizeof(sljit_sw) * space;
1432 }
1433
1434 if (size != 0)
1435 {
1436 if (size < 0)
1437 {
1438 cc += -size;
1439 #ifdef SUPPORT_UTF
1440 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1441 #endif
1442 }
1443 else
1444 cc += size;
1445 }
1446
1447 if (bracketlen > 0)
1448 {
1449 if (cc >= end)
1450 {
1451 end = bracketend(cc);
1452 if (end[-1 - LINK_SIZE] == OP_KET)
1453 end = NULL;
1454 }
1455 cc += bracketlen;
1456 }
1457 }
1458 *private_data_start = private_data_ptr;
1459 }
1460
1461 /* Returns with a frame_types (always < 0) if no need for frame. */
1462 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head)
1463 {
1464 int length = 0;
1465 int possessive = 0;
1466 BOOL stack_restore = FALSE;
1467 BOOL setsom_found = recursive;
1468 BOOL setmark_found = recursive;
1469 /* The last capture is a local variable even for recursions. */
1470 BOOL capture_last_found = FALSE;
1471
1472 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1473 SLJIT_ASSERT(common->control_head_ptr != 0);
1474 *needs_control_head = TRUE;
1475 #else
1476 *needs_control_head = FALSE;
1477 #endif
1478
1479 if (ccend == NULL)
1480 {
1481 ccend = bracketend(cc) - (1 + LINK_SIZE);
1482 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1483 {
1484 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1485 /* This is correct regardless of common->capture_last_ptr. */
1486 capture_last_found = TRUE;
1487 }
1488 cc = next_opcode(common, cc);
1489 }
1490
1491 SLJIT_ASSERT(cc != NULL);
1492 while (cc < ccend)
1493 switch(*cc)
1494 {
1495 case OP_SET_SOM:
1496 SLJIT_ASSERT(common->has_set_som);
1497 stack_restore = TRUE;
1498 if (!setsom_found)
1499 {
1500 length += 2;
1501 setsom_found = TRUE;
1502 }
1503 cc += 1;
1504 break;
1505
1506 case OP_MARK:
1507 case OP_PRUNE_ARG:
1508 case OP_THEN_ARG:
1509 SLJIT_ASSERT(common->mark_ptr != 0);
1510 stack_restore = TRUE;
1511 if (!setmark_found)
1512 {
1513 length += 2;
1514 setmark_found = TRUE;
1515 }
1516 if (common->control_head_ptr != 0)
1517 *needs_control_head = TRUE;
1518 cc += 1 + 2 + cc[1];
1519 break;
1520
1521 case OP_RECURSE:
1522 stack_restore = TRUE;
1523 if (common->has_set_som && !setsom_found)
1524 {
1525 length += 2;
1526 setsom_found = TRUE;
1527 }
1528 if (common->mark_ptr != 0 && !setmark_found)
1529 {
1530 length += 2;
1531 setmark_found = TRUE;
1532 }
1533 if (common->capture_last_ptr != 0 && !capture_last_found)
1534 {
1535 length += 2;
1536 capture_last_found = TRUE;
1537 }
1538 cc += 1 + LINK_SIZE;
1539 break;
1540
1541 case OP_CBRA:
1542 case OP_CBRAPOS:
1543 case OP_SCBRA:
1544 case OP_SCBRAPOS:
1545 stack_restore = TRUE;
1546 if (common->capture_last_ptr != 0 && !capture_last_found)
1547 {
1548 length += 2;
1549 capture_last_found = TRUE;
1550 }
1551 length += 3;
1552 cc += 1 + LINK_SIZE + IMM2_SIZE;
1553 break;
1554
1555 case OP_THEN:
1556 stack_restore = TRUE;
1557 if (common->control_head_ptr != 0)
1558 *needs_control_head = TRUE;
1559 cc ++;
1560 break;
1561
1562 default:
1563 stack_restore = TRUE;
1564 /* Fall through. */
1565
1566 case OP_NOT_WORD_BOUNDARY:
1567 case OP_WORD_BOUNDARY:
1568 case OP_NOT_DIGIT:
1569 case OP_DIGIT:
1570 case OP_NOT_WHITESPACE:
1571 case OP_WHITESPACE:
1572 case OP_NOT_WORDCHAR:
1573 case OP_WORDCHAR:
1574 case OP_ANY:
1575 case OP_ALLANY:
1576 case OP_ANYBYTE:
1577 case OP_NOTPROP:
1578 case OP_PROP:
1579 case OP_ANYNL:
1580 case OP_NOT_HSPACE:
1581 case OP_HSPACE:
1582 case OP_NOT_VSPACE:
1583 case OP_VSPACE:
1584 case OP_EXTUNI:
1585 case OP_EODN:
1586 case OP_EOD:
1587 case OP_CIRC:
1588 case OP_CIRCM:
1589 case OP_DOLL:
1590 case OP_DOLLM:
1591 case OP_CHAR:
1592 case OP_CHARI:
1593 case OP_NOT:
1594 case OP_NOTI:
1595
1596 case OP_EXACT:
1597 case OP_POSSTAR:
1598 case OP_POSPLUS:
1599 case OP_POSQUERY:
1600 case OP_POSUPTO:
1601
1602 case OP_EXACTI:
1603 case OP_POSSTARI:
1604 case OP_POSPLUSI:
1605 case OP_POSQUERYI:
1606 case OP_POSUPTOI:
1607
1608 case OP_NOTEXACT:
1609 case OP_NOTPOSSTAR:
1610 case OP_NOTPOSPLUS:
1611 case OP_NOTPOSQUERY:
1612 case OP_NOTPOSUPTO:
1613
1614 case OP_NOTEXACTI:
1615 case OP_NOTPOSSTARI:
1616 case OP_NOTPOSPLUSI:
1617 case OP_NOTPOSQUERYI:
1618 case OP_NOTPOSUPTOI:
1619
1620 case OP_TYPEEXACT:
1621 case OP_TYPEPOSSTAR:
1622 case OP_TYPEPOSPLUS:
1623 case OP_TYPEPOSQUERY:
1624 case OP_TYPEPOSUPTO:
1625
1626 case OP_CLASS:
1627 case OP_NCLASS:
1628 case OP_XCLASS:
1629 case OP_CALLOUT:
1630
1631 cc = next_opcode(common, cc);
1632 SLJIT_ASSERT(cc != NULL);
1633 break;
1634 }
1635
1636 /* Possessive quantifiers can use a special case. */
1637 if (SLJIT_UNLIKELY(possessive == length))
1638 return stack_restore ? no_frame : no_stack;
1639
1640 if (length > 0)
1641 return length + 1;
1642 return stack_restore ? no_frame : no_stack;
1643 }
1644
1645 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1646 {
1647 DEFINE_COMPILER;
1648 BOOL setsom_found = recursive;
1649 BOOL setmark_found = recursive;
1650 /* The last capture is a local variable even for recursions. */
1651 BOOL capture_last_found = FALSE;
1652 int offset;
1653
1654 /* >= 1 + shortest item size (2) */
1655 SLJIT_UNUSED_ARG(stacktop);
1656 SLJIT_ASSERT(stackpos >= stacktop + 2);
1657
1658 stackpos = STACK(stackpos);
1659 if (ccend == NULL)
1660 {
1661 ccend = bracketend(cc) - (1 + LINK_SIZE);
1662 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1663 cc = next_opcode(common, cc);
1664 }
1665
1666 SLJIT_ASSERT(cc != NULL);
1667 while (cc < ccend)
1668 switch(*cc)
1669 {
1670 case OP_SET_SOM:
1671 SLJIT_ASSERT(common->has_set_som);
1672 if (!setsom_found)
1673 {
1674 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1676 stackpos -= (int)sizeof(sljit_sw);
1677 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1678 stackpos -= (int)sizeof(sljit_sw);
1679 setsom_found = TRUE;
1680 }
1681 cc += 1;
1682 break;
1683
1684 case OP_MARK:
1685 case OP_PRUNE_ARG:
1686 case OP_THEN_ARG:
1687 SLJIT_ASSERT(common->mark_ptr != 0);
1688 if (!setmark_found)
1689 {
1690 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1691 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1692 stackpos -= (int)sizeof(sljit_sw);
1693 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1694 stackpos -= (int)sizeof(sljit_sw);
1695 setmark_found = TRUE;
1696 }
1697 cc += 1 + 2 + cc[1];
1698 break;
1699
1700 case OP_RECURSE:
1701 if (common->has_set_som && !setsom_found)
1702 {
1703 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
1704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1705 stackpos -= (int)sizeof(sljit_sw);
1706 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1707 stackpos -= (int)sizeof(sljit_sw);
1708 setsom_found = TRUE;
1709 }
1710 if (common->mark_ptr != 0 && !setmark_found)
1711 {
1712 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
1713 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1714 stackpos -= (int)sizeof(sljit_sw);
1715 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1716 stackpos -= (int)sizeof(sljit_sw);
1717 setmark_found = TRUE;
1718 }
1719 if (common->capture_last_ptr != 0 && !capture_last_found)
1720 {
1721 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1723 stackpos -= (int)sizeof(sljit_sw);
1724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1725 stackpos -= (int)sizeof(sljit_sw);
1726 capture_last_found = TRUE;
1727 }
1728 cc += 1 + LINK_SIZE;
1729 break;
1730
1731 case OP_CBRA:
1732 case OP_CBRAPOS:
1733 case OP_SCBRA:
1734 case OP_SCBRAPOS:
1735 if (common->capture_last_ptr != 0 && !capture_last_found)
1736 {
1737 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
1738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1739 stackpos -= (int)sizeof(sljit_sw);
1740 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1741 stackpos -= (int)sizeof(sljit_sw);
1742 capture_last_found = TRUE;
1743 }
1744 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1745 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1746 stackpos -= (int)sizeof(sljit_sw);
1747 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
1748 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
1749 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1750 stackpos -= (int)sizeof(sljit_sw);
1751 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1752 stackpos -= (int)sizeof(sljit_sw);
1753
1754 cc += 1 + LINK_SIZE + IMM2_SIZE;
1755 break;
1756
1757 default:
1758 cc = next_opcode(common, cc);
1759 SLJIT_ASSERT(cc != NULL);
1760 break;
1761 }
1762
1763 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1764 SLJIT_ASSERT(stackpos == STACK(stacktop));
1765 }
1766
1767 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1768 {
1769 int private_data_length = needs_control_head ? 3 : 2;
1770 int size;
1771 pcre_uchar *alternative;
1772 /* Calculate the sum of the private machine words. */
1773 while (cc < ccend)
1774 {
1775 size = 0;
1776 switch(*cc)
1777 {
1778 case OP_KET:
1779 if (PRIVATE_DATA(cc) != 0)
1780 {
1781 private_data_length++;
1782 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1783 cc += PRIVATE_DATA(cc + 1);
1784 }
1785 cc += 1 + LINK_SIZE;
1786 break;
1787
1788 case OP_ASSERT:
1789 case OP_ASSERT_NOT:
1790 case OP_ASSERTBACK:
1791 case OP_ASSERTBACK_NOT:
1792 case OP_ONCE:
1793 case OP_ONCE_NC:
1794 case OP_BRAPOS:
1795 case OP_SBRA:
1796 case OP_SBRAPOS:
1797 case OP_SCOND:
1798 private_data_length++;
1799 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
1800 cc += 1 + LINK_SIZE;
1801 break;
1802
1803 case OP_CBRA:
1804 case OP_SCBRA:
1805 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1806 private_data_length++;
1807 cc += 1 + LINK_SIZE + IMM2_SIZE;
1808 break;
1809
1810 case OP_CBRAPOS:
1811 case OP_SCBRAPOS:
1812 private_data_length += 2;
1813 cc += 1 + LINK_SIZE + IMM2_SIZE;
1814 break;
1815
1816 case OP_COND:
1817 /* Might be a hidden SCOND. */
1818 alternative = cc + GET(cc, 1);
1819 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1820 private_data_length++;
1821 cc += 1 + LINK_SIZE;
1822 break;
1823
1824 CASE_ITERATOR_PRIVATE_DATA_1
1825 if (PRIVATE_DATA(cc))
1826 private_data_length++;
1827 cc += 2;
1828 #ifdef SUPPORT_UTF
1829 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1830 #endif
1831 break;
1832
1833 CASE_ITERATOR_PRIVATE_DATA_2A
1834 if (PRIVATE_DATA(cc))
1835 private_data_length += 2;
1836 cc += 2;
1837 #ifdef SUPPORT_UTF
1838 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1839 #endif
1840 break;
1841
1842 CASE_ITERATOR_PRIVATE_DATA_2B
1843 if (PRIVATE_DATA(cc))
1844 private_data_length += 2;
1845 cc += 2 + IMM2_SIZE;
1846 #ifdef SUPPORT_UTF
1847 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1848 #endif
1849 break;
1850
1851 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1852 if (PRIVATE_DATA(cc))
1853 private_data_length++;
1854 cc += 1;
1855 break;
1856
1857 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1858 if (PRIVATE_DATA(cc))
1859 private_data_length += 2;
1860 cc += 1;
1861 break;
1862
1863 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1864 if (PRIVATE_DATA(cc))
1865 private_data_length += 2;
1866 cc += 1 + IMM2_SIZE;
1867 break;
1868
1869 case OP_CLASS:
1870 case OP_NCLASS:
1871 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1872 case OP_XCLASS:
1873 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1874 #else
1875 size = 1 + 32 / (int)sizeof(pcre_uchar);
1876 #endif
1877 if (PRIVATE_DATA(cc))
1878 private_data_length += get_class_iterator_size(cc + size);
1879 cc += size;
1880 break;
1881
1882 default:
1883 cc = next_opcode(common, cc);
1884 SLJIT_ASSERT(cc != NULL);
1885 break;
1886 }
1887 }
1888 SLJIT_ASSERT(cc == ccend);
1889 return private_data_length;
1890 }
1891
1892 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1893 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1894 {
1895 DEFINE_COMPILER;
1896 int srcw[2];
1897 int count, size;
1898 BOOL tmp1next = TRUE;
1899 BOOL tmp1empty = TRUE;
1900 BOOL tmp2empty = TRUE;
1901 pcre_uchar *alternative;
1902 enum {
1903 loop,
1904 end
1905 } status;
1906
1907 status = loop;
1908 stackptr = STACK(stackptr);
1909 stacktop = STACK(stacktop - 1);
1910
1911 if (!save)
1912 {
1913 stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1914 if (stackptr < stacktop)
1915 {
1916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1917 stackptr += sizeof(sljit_sw);
1918 tmp1empty = FALSE;
1919 }
1920 if (stackptr < stacktop)
1921 {
1922 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1923 stackptr += sizeof(sljit_sw);
1924 tmp2empty = FALSE;
1925 }
1926 /* The tmp1next must be TRUE in either way. */
1927 }
1928
1929 SLJIT_ASSERT(common->recursive_head_ptr != 0);
1930
1931 do
1932 {
1933 count = 0;
1934 if (cc >= ccend)
1935 {
1936 if (!save)
1937 break;
1938
1939 count = 1;
1940 srcw[0] = common->recursive_head_ptr;
1941 if (needs_control_head)
1942 {
1943 SLJIT_ASSERT(common->control_head_ptr != 0);
1944 count = 2;
1945 srcw[0] = common->control_head_ptr;
1946 srcw[1] = common->recursive_head_ptr;
1947 }
1948 status = end;
1949 }
1950 else switch(*cc)
1951 {
1952 case OP_KET:
1953 if (PRIVATE_DATA(cc) != 0)
1954 {
1955 count = 1;
1956 srcw[0] = PRIVATE_DATA(cc);
1957 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
1958 cc += PRIVATE_DATA(cc + 1);
1959 }
1960 cc += 1 + LINK_SIZE;
1961 break;
1962
1963 case OP_ASSERT:
1964 case OP_ASSERT_NOT:
1965 case OP_ASSERTBACK:
1966 case OP_ASSERTBACK_NOT:
1967 case OP_ONCE:
1968 case OP_ONCE_NC:
1969 case OP_BRAPOS:
1970 case OP_SBRA:
1971 case OP_SBRAPOS:
1972 case OP_SCOND:
1973 count = 1;
1974 srcw[0] = PRIVATE_DATA(cc);
1975 SLJIT_ASSERT(srcw[0] != 0);
1976 cc += 1 + LINK_SIZE;
1977 break;
1978
1979 case OP_CBRA:
1980 case OP_SCBRA:
1981 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1982 {
1983 count = 1;
1984 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1985 }
1986 cc += 1 + LINK_SIZE + IMM2_SIZE;
1987 break;
1988
1989 case OP_CBRAPOS:
1990 case OP_SCBRAPOS:
1991 count = 2;
1992 srcw[0] = PRIVATE_DATA(cc);
1993 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1994 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1995 cc += 1 + LINK_SIZE + IMM2_SIZE;
1996 break;
1997
1998 case OP_COND:
1999 /* Might be a hidden SCOND. */
2000 alternative = cc + GET(cc, 1);
2001 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2002 {
2003 count = 1;
2004 srcw[0] = PRIVATE_DATA(cc);
2005 SLJIT_ASSERT(srcw[0] != 0);
2006 }
2007 cc += 1 + LINK_SIZE;
2008 break;
2009
2010 CASE_ITERATOR_PRIVATE_DATA_1
2011 if (PRIVATE_DATA(cc))
2012 {
2013 count = 1;
2014 srcw[0] = PRIVATE_DATA(cc);
2015 }
2016 cc += 2;
2017 #ifdef SUPPORT_UTF
2018 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2019 #endif
2020 break;
2021
2022 CASE_ITERATOR_PRIVATE_DATA_2A
2023 if (PRIVATE_DATA(cc))
2024 {
2025 count = 2;
2026 srcw[0] = PRIVATE_DATA(cc);
2027 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2028 }
2029 cc += 2;
2030 #ifdef SUPPORT_UTF
2031 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2032 #endif
2033 break;
2034
2035 CASE_ITERATOR_PRIVATE_DATA_2B
2036 if (PRIVATE_DATA(cc))
2037 {
2038 count = 2;
2039 srcw[0] = PRIVATE_DATA(cc);
2040 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2041 }
2042 cc += 2 + IMM2_SIZE;
2043 #ifdef SUPPORT_UTF
2044 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2045 #endif
2046 break;
2047
2048 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2049 if (PRIVATE_DATA(cc))
2050 {
2051 count = 1;
2052 srcw[0] = PRIVATE_DATA(cc);
2053 }
2054 cc += 1;
2055 break;
2056
2057 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2058 if (PRIVATE_DATA(cc))
2059 {
2060 count = 2;
2061 srcw[0] = PRIVATE_DATA(cc);
2062 srcw[1] = srcw[0] + sizeof(sljit_sw);
2063 }
2064 cc += 1;
2065 break;
2066
2067 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2068 if (PRIVATE_DATA(cc))
2069 {
2070 count = 2;
2071 srcw[0] = PRIVATE_DATA(cc);
2072 srcw[1] = srcw[0] + sizeof(sljit_sw);
2073 }
2074 cc += 1 + IMM2_SIZE;
2075 break;
2076
2077 case OP_CLASS:
2078 case OP_NCLASS:
2079 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2080 case OP_XCLASS:
2081 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
2082 #else
2083 size = 1 + 32 / (int)sizeof(pcre_uchar);
2084 #endif
2085 if (PRIVATE_DATA(cc))
2086 switch(get_class_iterator_size(cc + size))
2087 {
2088 case 1:
2089 count = 1;
2090 srcw[0] = PRIVATE_DATA(cc);
2091 break;
2092
2093 case 2:
2094 count = 2;
2095 srcw[0] = PRIVATE_DATA(cc);
2096 srcw[1] = srcw[0] + sizeof(sljit_sw);
2097 break;
2098
2099 default:
2100 SLJIT_UNREACHABLE();
2101 break;
2102 }
2103 cc += size;
2104 break;
2105
2106 default:
2107 cc = next_opcode(common, cc);
2108 SLJIT_ASSERT(cc != NULL);
2109 break;
2110 }
2111
2112 while (count > 0)
2113 {
2114 count--;
2115 if (save)
2116 {
2117 if (tmp1next)
2118 {
2119 if (!tmp1empty)
2120 {
2121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2122 stackptr += sizeof(sljit_sw);
2123 }
2124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2125 tmp1empty = FALSE;
2126 tmp1next = FALSE;
2127 }
2128 else
2129 {
2130 if (!tmp2empty)
2131 {
2132 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2133 stackptr += sizeof(sljit_sw);
2134 }
2135 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]);
2136 tmp2empty = FALSE;
2137 tmp1next = TRUE;
2138 }
2139 }
2140 else
2141 {
2142 if (tmp1next)
2143 {
2144 SLJIT_ASSERT(!tmp1empty);
2145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0);
2146 tmp1empty = stackptr >= stacktop;
2147 if (!tmp1empty)
2148 {
2149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2150 stackptr += sizeof(sljit_sw);
2151 }
2152 tmp1next = FALSE;
2153 }
2154 else
2155 {
2156 SLJIT_ASSERT(!tmp2empty);
2157 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0);
2158 tmp2empty = stackptr >= stacktop;
2159 if (!tmp2empty)
2160 {
2161 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
2162 stackptr += sizeof(sljit_sw);
2163 }
2164 tmp1next = TRUE;
2165 }
2166 }
2167 }
2168 }
2169 while (status != end);
2170
2171 if (save)
2172 {
2173 if (tmp1next)
2174 {
2175 if (!tmp1empty)
2176 {
2177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2178 stackptr += sizeof(sljit_sw);
2179 }
2180 if (!tmp2empty)
2181 {
2182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2183 stackptr += sizeof(sljit_sw);
2184 }
2185 }
2186 else
2187 {
2188 if (!tmp2empty)
2189 {
2190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
2191 stackptr += sizeof(sljit_sw);
2192 }
2193 if (!tmp1empty)
2194 {
2195 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
2196 stackptr += sizeof(sljit_sw);
2197 }
2198 }
2199 }
2200 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
2201 }
2202
2203 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset)
2204 {
2205 pcre_uchar *end = bracketend(cc);
2206 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2207
2208 /* Assert captures then. */
2209 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
2210 current_offset = NULL;
2211 /* Conditional block does not. */
2212 if (*cc == OP_COND || *cc == OP_SCOND)
2213 has_alternatives = FALSE;
2214
2215 cc = next_opcode(common, cc);
2216 if (has_alternatives)
2217 current_offset = common->then_offsets + (cc - common->start);
2218
2219 while (cc < end)
2220 {
2221 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2222 cc = set_then_offsets(common, cc, current_offset);
2223 else
2224 {
2225 if (*cc == OP_ALT && has_alternatives)
2226 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2227 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2228 *current_offset = 1;
2229 cc = next_opcode(common, cc);
2230 }
2231 }
2232
2233 return end;
2234 }
2235
2236 #undef CASE_ITERATOR_PRIVATE_DATA_1
2237 #undef CASE_ITERATOR_PRIVATE_DATA_2A
2238 #undef CASE_ITERATOR_PRIVATE_DATA_2B
2239 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2240 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2241 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2242
2243 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
2244 {
2245 return (value & (value - 1)) == 0;
2246 }
2247
2248 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
2249 {
2250 while (list)
2251 {
2252 /* sljit_set_label is clever enough to do nothing
2253 if either the jump or the label is NULL. */
2254 SET_LABEL(list->jump, label);
2255 list = list->next;
2256 }
2257 }
2258
2259 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
2260 {
2261 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
2262 if (list_item)
2263 {
2264 list_item->next = *list;
2265 list_item->jump = jump;
2266 *list = list_item;
2267 }
2268 }
2269
2270 static void add_stub(compiler_common *common, struct sljit_jump *start)
2271 {
2272 DEFINE_COMPILER;
2273 stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2274
2275 if (list_item)
2276 {
2277 list_item->start = start;
2278 list_item->quit = LABEL();
2279 list_item->next = common->stubs;
2280 common->stubs = list_item;
2281 }
2282 }
2283
2284 static void flush_stubs(compiler_common *common)
2285 {
2286 DEFINE_COMPILER;
2287 stub_list *list_item = common->stubs;
2288
2289 while (list_item)
2290 {
2291 JUMPHERE(list_item->start);
2292 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2293 JUMPTO(SLJIT_JUMP, list_item->quit);
2294 list_item = list_item->next;
2295 }
2296 common->stubs = NULL;
2297 }
2298
2299 static void add_label_addr(compiler_common *common, sljit_uw *update_addr)
2300 {
2301 DEFINE_COMPILER;
2302 label_addr_list *label_addr;
2303
2304 label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list));
2305 if (label_addr == NULL)
2306 return;
2307 label_addr->label = LABEL();
2308 label_addr->update_addr = update_addr;
2309 label_addr->next = common->label_addrs;
2310 common->label_addrs = label_addr;
2311 }
2312
2313 static SLJIT_INLINE void count_match(compiler_common *common)
2314 {
2315 DEFINE_COMPILER;
2316
2317 OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2318 add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
2319 }
2320
2321 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2322 {
2323 /* May destroy all locals and registers except TMP2. */
2324 DEFINE_COMPILER;
2325
2326 SLJIT_ASSERT(size > 0);
2327 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2328 #ifdef DESTROY_REGISTERS
2329 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2330 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2331 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2332 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
2333 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
2334 #endif
2335 add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
2336 }
2337
2338 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2339 {
2340 DEFINE_COMPILER;
2341
2342 SLJIT_ASSERT(size > 0);
2343 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2344 }
2345
2346 static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
2347 {
2348 DEFINE_COMPILER;
2349 sljit_uw *result;
2350
2351 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
2352 return NULL;
2353
2354 result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
2355 if (SLJIT_UNLIKELY(result == NULL))
2356 {
2357 sljit_set_compiler_memory_error(compiler);
2358 return NULL;
2359 }
2360
2361 *(void**)result = common->read_only_data_head;
2362 common->read_only_data_head = (void *)result;
2363 return result + 1;
2364 }
2365
2366 static void free_read_only_data(void *current, void *allocator_data)
2367 {
2368 void *next;
2369
2370 SLJIT_UNUSED_ARG(allocator_data);
2371
2372 while (current != NULL)
2373 {
2374 next = *(void**)current;
2375 SLJIT_FREE(current, allocator_data);
2376 current = next;
2377 }
2378 }
2379
2380 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2381 {
2382 DEFINE_COMPILER;
2383 struct sljit_label *loop;
2384 int i;
2385
2386 /* At this point we can freely use all temporary registers. */
2387 SLJIT_ASSERT(length > 1);
2388 /* TMP1 returns with begin - 1. */
2389 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2390 if (length < 8)
2391 {
2392 for (i = 1; i < length; i++)
2393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
2394 }
2395 else
2396 {
2397 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2398 {
2399 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
2400 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2401 loop = LABEL();
2402 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
2403 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2404 JUMPTO(SLJIT_NOT_ZERO, loop);
2405 }
2406 else
2407 {
2408 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
2409 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
2410 loop = LABEL();
2411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
2412 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
2413 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
2414 JUMPTO(SLJIT_NOT_ZERO, loop);
2415 }
2416 }
2417 }
2418
2419 static SLJIT_INLINE void reset_fast_fail(compiler_common *common)
2420 {
2421 DEFINE_COMPILER;
2422 sljit_s32 i;
2423
2424 SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr);
2425
2426 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2427 for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw))
2428 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0);
2429 }
2430
2431 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2432 {
2433 DEFINE_COMPILER;
2434 struct sljit_label *loop;
2435 int i;
2436
2437 SLJIT_ASSERT(length > 1);
2438 /* OVECTOR(1) contains the "string begin - 1" constant. */
2439 if (length > 2)
2440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2441 if (length < 8)
2442 {
2443 for (i = 2; i < length; i++)
2444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
2445 }
2446 else
2447 {
2448 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
2449 {
2450 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2451 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2452 loop = LABEL();
2453 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
2454 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2455 JUMPTO(SLJIT_NOT_ZERO, loop);
2456 }
2457 else
2458 {
2459 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
2460 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2461 loop = LABEL();
2462 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
2463 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
2464 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2465 JUMPTO(SLJIT_NOT_ZERO, loop);
2466 }
2467 }
2468
2469 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2470 if (common->mark_ptr != 0)
2471 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
2472 if (common->control_head_ptr != 0)
2473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
2474 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2475 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
2476 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
2477 }
2478
2479 static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2480 {
2481 while (current != NULL)
2482 {
2483 switch (current[1])
2484 {
2485 case type_then_trap:
2486 break;
2487
2488 case type_mark:
2489 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0)
2490 return current[3];
2491 break;
2492
2493 default:
2494 SLJIT_UNREACHABLE();
2495 break;
2496 }
2497 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
2498 current = (sljit_sw*)current[0];
2499 }
2500 return 0;
2501 }
2502
2503 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2504 {
2505 DEFINE_COMPILER;
2506 struct sljit_label *loop;
2507 struct sljit_jump *early_quit;
2508 BOOL has_pre;
2509
2510 /* At this point we can freely use all registers. */
2511 OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
2512 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
2513
2514 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
2515 if (common->mark_ptr != 0)
2516 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2517 OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count));
2518 if (common->mark_ptr != 0)
2519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
2520 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2521 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin));
2522
2523 has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
2524 GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
2525
2526 /* Unlikely, but possible */
2527 early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0);
2528 loop = LABEL();
2529
2530 if (has_pre)
2531 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
2532 else
2533 {
2534 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
2535 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
2536 }
2537
2538 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(int));
2539 OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
2540 /* Copy the integer value to the output buffer */
2541 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2542 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2543 #endif
2544
2545 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
2546 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2547 JUMPTO(SLJIT_NOT_ZERO, loop);
2548 JUMPHERE(early_quit);
2549
2550 /* Calculate the return value, which is the maximum ovector value. */
2551 if (topbracket > 1)
2552 {
2553 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
2554 {
2555 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2556 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2557
2558 /* OVECTOR(0) is never equal to SLJIT_S2. */
2559 loop = LABEL();
2560 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
2561 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2562 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2563 }
2564 else
2565 {
2566 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
2567 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
2568
2569 /* OVECTOR(0) is never equal to SLJIT_S2. */
2570 loop = LABEL();
2571 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
2572 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
2573 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
2574 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
2575 }
2576 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
2577 }
2578 else
2579 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2580 }
2581
2582 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2583 {
2584 DEFINE_COMPILER;
2585 struct sljit_jump *jump;
2586
2587 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2);
2588 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2589 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2590
2591 OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0);
2592 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2593 OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2594 CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit);
2595
2596 /* Store match begin and end. */
2597 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin));
2598 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets));
2599
2600 jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3);
2601 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0);
2602 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2603 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2604 #endif
2605 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0);
2606 JUMPHERE(jump);
2607
2608 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2609 OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0);
2610 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2611 OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
2612 #endif
2613 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0);
2614
2615 OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0);
2616 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2617 OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
2618 #endif
2619 OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0);
2620
2621 JUMPTO(SLJIT_JUMP, quit);
2622 }
2623
2624 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2625 {
2626 /* May destroy TMP1. */
2627 DEFINE_COMPILER;
2628 struct sljit_jump *jump;
2629
2630 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2631 {
2632 /* The value of -1 must be kept for start_used_ptr! */
2633 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
2634 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2635 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2636 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2637 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2638 JUMPHERE(jump);
2639 }
2640 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2641 {
2642 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2644 JUMPHERE(jump);
2645 }
2646 }
2647
2648 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc)
2649 {
2650 /* Detects if the character has an othercase. */
2651 unsigned int c;
2652
2653 #ifdef SUPPORT_UTF
2654 if (common->utf)
2655 {
2656 GETCHAR(c, cc);
2657 if (c > 127)
2658 {
2659 #ifdef SUPPORT_UCP
2660 return c != UCD_OTHERCASE(c);
2661 #else
2662 return FALSE;
2663 #endif
2664 }
2665 #ifndef COMPILE_PCRE8
2666 return common->fcc[c] != c;
2667 #endif
2668 }
2669 else
2670 #endif
2671 c = *cc;
2672 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2673 }
2674
2675 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2676 {
2677 /* Returns with the othercase. */
2678 #ifdef SUPPORT_UTF
2679 if (common->utf && c > 127)
2680 {
2681 #ifdef SUPPORT_UCP
2682 return UCD_OTHERCASE(c);
2683 #else
2684 return c;
2685 #endif
2686 }
2687 #endif
2688 return TABLE_GET(c, common->fcc, c);
2689 }
2690
2691 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc)
2692 {
2693 /* Detects if the character and its othercase has only 1 bit difference. */
2694 unsigned int c, oc, bit;
2695 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2696 int n;
2697 #endif
2698
2699 #ifdef SUPPORT_UTF
2700 if (common->utf)
2701 {
2702 GETCHAR(c, cc);
2703 if (c <= 127)
2704 oc = common->fcc[c];
2705 else
2706 {
2707 #ifdef SUPPORT_UCP
2708 oc = UCD_OTHERCASE(c);
2709 #else
2710 oc = c;
2711 #endif
2712 }
2713 }
2714 else
2715 {
2716 c = *cc;
2717 oc = TABLE_GET(c, common->fcc, c);
2718 }
2719 #else
2720 c = *cc;
2721 oc = TABLE_GET(c, common->fcc, c);
2722 #endif
2723
2724 SLJIT_ASSERT(c != oc);
2725
2726 bit = c ^ oc;
2727 /* Optimized for English alphabet. */
2728 if (c <= 127 && bit == 0x20)
2729 return (0 << 8) | 0x20;
2730
2731 /* Since c != oc, they must have at least 1 bit difference. */
2732 if (!is_powerof2(bit))
2733 return 0;
2734
2735 #if defined COMPILE_PCRE8
2736
2737 #ifdef SUPPORT_UTF
2738 if (common->utf && c > 127)
2739 {
2740 n = GET_EXTRALEN(*cc);
2741 while ((bit & 0x3f) == 0)
2742 {
2743 n--;
2744 bit >>= 6;
2745 }
2746 return (n << 8) | bit;
2747 }
2748 #endif /* SUPPORT_UTF */
2749 return (0 << 8) | bit;
2750
2751 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2752
2753 #ifdef SUPPORT_UTF
2754 if (common->utf && c > 65535)
2755 {
2756 if (bit >= (1 << 10))
2757 bit >>= 10;
2758 else
2759 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2760 }
2761 #endif /* SUPPORT_UTF */
2762 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2763
2764 #endif /* COMPILE_PCRE[8|16|32] */
2765 }
2766
2767 static void check_partial(compiler_common *common, BOOL force)
2768 {
2769 /* Checks whether a partial matching is occurred. Does not modify registers. */
2770 DEFINE_COMPILER;
2771 struct sljit_jump *jump = NULL;
2772
2773 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2774
2775 if (common->mode == JIT_COMPILE)
2776 return;
2777
2778 if (!force)
2779 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
2780 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2781 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
2782
2783 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2784 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2785 else
2786 {
2787 if (common->partialmatchlabel != NULL)
2788 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2789 else
2790 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2791 }
2792
2793 if (jump != NULL)
2794 JUMPHERE(jump);
2795 }
2796
2797 static void check_str_end(compiler_common *common, jump_list **end_reached)
2798 {
2799 /* Does not affect registers. Usually used in a tight spot. */
2800 DEFINE_COMPILER;
2801 struct sljit_jump *jump;
2802
2803 if (common->mode == JIT_COMPILE)
2804 {
2805 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2806 return;
2807 }
2808
2809 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2810 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2811 {
2812 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2813 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2814 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2815 }
2816 else
2817 {
2818 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2819 if (common->partialmatchlabel != NULL)
2820 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2821 else
2822 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2823 }
2824 JUMPHERE(jump);
2825 }
2826
2827 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2828 {
2829 DEFINE_COMPILER;
2830 struct sljit_jump *jump;
2831
2832 if (common->mode == JIT_COMPILE)
2833 {
2834 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2835 return;
2836 }
2837
2838 /* Partial matching mode. */
2839 jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
2840 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
2841 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2842 {
2843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
2844 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2845 }
2846 else
2847 {
2848 if (common->partialmatchlabel != NULL)
2849 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2850 else
2851 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2852 }
2853 JUMPHERE(jump);
2854 }
2855
2856 static void peek_char(compiler_common *common, sljit_u32 max)
2857 {
2858 /* Reads the character into TMP1, keeps STR_PTR.
2859 Does not check STR_END. TMP2 Destroyed. */
2860 DEFINE_COMPILER;
2861 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2862 struct sljit_jump *jump;
2863 #endif
2864
2865 SLJIT_UNUSED_ARG(max);
2866
2867 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2868 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2869 if (common->utf)
2870 {
2871 if (max < 128) return;
2872
2873 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2874 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2875 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2876 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2877 JUMPHERE(jump);
2878 }
2879 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2880
2881 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2882 if (common->utf)
2883 {
2884 if (max < 0xd800) return;
2885
2886 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2887 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2888 /* TMP2 contains the high surrogate. */
2889 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2890 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2891 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2892 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2893 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2894 JUMPHERE(jump);
2895 }
2896 #endif
2897 }
2898
2899 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2900
2901 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
2902 {
2903 /* Tells whether the character codes below 128 are enough
2904 to determine a match. */
2905 const sljit_u8 value = nclass ? 0xff : 0;
2906 const sljit_u8 *end = bitset + 32;
2907
2908 bitset += 16;
2909 do
2910 {
2911 if (*bitset++ != value)
2912 return FALSE;
2913 }
2914 while (bitset < end);
2915 return TRUE;
2916 }
2917
2918 static void read_char7_type(compiler_common *common, BOOL full_read)
2919 {
2920 /* Reads the precise character type of a character into TMP1, if the character
2921 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2922 full_read argument tells whether characters above max are accepted or not. */
2923 DEFINE_COMPILER;
2924 struct sljit_jump *jump;
2925
2926 SLJIT_ASSERT(common->utf);
2927
2928 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2929 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2930
2931 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2932
2933 if (full_read)
2934 {
2935 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2936 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2937 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2938 JUMPHERE(jump);
2939 }
2940 }
2941
2942 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2943
2944 static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr)
2945 {
2946 /* Reads the precise value of a character into TMP1, if the character is
2947 between min and max (c >= min && c <= max). Otherwise it returns with a value
2948 outside the range. Does not check STR_END. */
2949 DEFINE_COMPILER;
2950 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2951 struct sljit_jump *jump;
2952 #endif
2953 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2954 struct sljit_jump *jump2;
2955 #endif
2956
2957 SLJIT_UNUSED_ARG(update_str_ptr);
2958 SLJIT_UNUSED_ARG(min);
2959 SLJIT_UNUSED_ARG(max);
2960 SLJIT_ASSERT(min <= max);
2961
2962 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2963 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2964
2965 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2966 if (common->utf)
2967 {
2968 if (max < 128 && !update_str_ptr) return;
2969
2970 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2971 if (min >= 0x10000)
2972 {
2973 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
2974 if (update_str_ptr)
2975 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2976 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2977 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
2978 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2979 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2980 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2981 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2982 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2983 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2984 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2985 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2986 if (!update_str_ptr)
2987 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2988 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2989 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2990 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2991 JUMPHERE(jump2);
2992 if (update_str_ptr)
2993 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2994 }
2995 else if (min >= 0x800 && max <= 0xffff)
2996 {
2997 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
2998 if (update_str_ptr)
2999 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3000 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3001 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3002 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3003 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3004 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3005 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3006 if (!update_str_ptr)
3007 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3008 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3009 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3010 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3011 JUMPHERE(jump2);
3012 if (update_str_ptr)
3013 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3014 }
3015 else if (max >= 0x800)
3016 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3017 else if (max < 128)
3018 {
3019 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3020 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3021 }
3022 else
3023 {
3024 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3025 if (!update_str_ptr)
3026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3027 else
3028 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3029 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3030 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3031 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3032 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3033 if (update_str_ptr)
3034 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3035 }
3036 JUMPHERE(jump);
3037 }
3038 #endif
3039
3040 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3041 if (common->utf)
3042 {
3043 if (max >= 0x10000)
3044 {
3045 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3046 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3047 /* TMP2 contains the high surrogate. */
3048 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3049 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
3050 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3051 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3052 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
3053 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3054 JUMPHERE(jump);
3055 return;
3056 }
3057
3058 if (max < 0xd800 && !update_str_ptr) return;
3059
3060 /* Skip low surrogate if necessary. */
3061 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3062 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3063 if (update_str_ptr)
3064 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3065 if (max >= 0xd800)
3066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3067 JUMPHERE(jump);
3068 }
3069 #endif
3070 }
3071
3072 static SLJIT_INLINE void read_char(compiler_common *common)
3073 {
3074 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
3075 }
3076
3077 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
3078 {
3079 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
3080 DEFINE_COMPILER;
3081 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3082 struct sljit_jump *jump;
3083 #endif
3084 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3085 struct sljit_jump *jump2;
3086 #endif
3087
3088 SLJIT_UNUSED_ARG(update_str_ptr);
3089
3090 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
3091 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3092
3093 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3094 if (common->utf)
3095 {
3096 /* This can be an extra read in some situations, but hopefully
3097 it is needed in most cases. */
3098 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3099 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
3100 if (!update_str_ptr)
3101 {
3102 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3103 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3104 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3105 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3106 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3107 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3108 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3109 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3110 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3111 JUMPHERE(jump2);
3112 }
3113 else
3114 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
3115 JUMPHERE(jump);
3116 return;
3117 }
3118 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
3119
3120 #if !defined COMPILE_PCRE8
3121 /* The ctypes array contains only 256 values. */
3122 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3123 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
3124 #endif
3125 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3126 #if !defined COMPILE_PCRE8
3127 JUMPHERE(jump);
3128 #endif
3129
3130 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
3131 if (common->utf && update_str_ptr)
3132 {
3133 /* Skip low surrogate if necessary. */
3134 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
3135 jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
3136 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3137 JUMPHERE(jump);
3138 }
3139 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
3140 }
3141
3142 static void skip_char_back(compiler_common *common)
3143 {
3144 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
3145 DEFINE_COMPILER;
3146 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3147 #if defined COMPILE_PCRE8
3148 struct sljit_label *label;
3149
3150 if (common->utf)
3151 {
3152 label = LABEL();
3153 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3154 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3155 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
3156 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
3157 return;
3158 }
3159 #elif defined COMPILE_PCRE16
3160 if (common->utf)
3161 {
3162 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
3163 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3164 /* Skip low surrogate if necessary. */
3165 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3166 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3167 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3168 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3169 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3170 return;
3171 }
3172 #endif /* COMPILE_PCRE[8|16] */
3173 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3174 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3175 }
3176
3177 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
3178 {
3179 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
3180 DEFINE_COMPILER;
3181 struct sljit_jump *jump;
3182
3183 if (nltype == NLTYPE_ANY)
3184 {
3185 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
3186 sljit_set_current_flags(compiler, SLJIT_SET_Z);
3187 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
3188 }
3189 else if (nltype == NLTYPE_ANYCRLF)
3190 {
3191 if (jumpifmatch)
3192 {
3193 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
3194 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3195 }
3196 else
3197 {
3198 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3199 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
3200 JUMPHERE(jump);
3201 }
3202 }
3203 else
3204 {
3205 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
3206 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
3207 }
3208 }
3209
3210 #ifdef SUPPORT_UTF
3211
3212 #if defined COMPILE_PCRE8
3213 static void do_utfreadchar(compiler_common *common)
3214 {
3215 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3216 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
3217 DEFINE_COMPILER;
3218 struct sljit_jump *jump;
3219
3220 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3221 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3222 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3223 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3224 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3225 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3226
3227 /* Searching for the first zero. */
3228 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3229 jump = JUMP(SLJIT_NOT_ZERO);
3230 /* Two byte sequence. */
3231 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3232 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
3233 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3234
3235 JUMPHERE(jump);
3236 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3237 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3238 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3239 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3240 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3241
3242 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3243 jump = JUMP(SLJIT_NOT_ZERO);
3244 /* Three byte sequence. */
3245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3246 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
3247 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3248
3249 /* Four byte sequence. */
3250 JUMPHERE(jump);
3251 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3252 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
3253 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3254 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3255 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3256 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3257 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
3258 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3259 }
3260
3261 static void do_utfreadchar16(compiler_common *common)
3262 {
3263 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
3264 of the character (>= 0xc0). Return value in TMP1. */
3265 DEFINE_COMPILER;
3266 struct sljit_jump *jump;
3267
3268 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3269 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3270 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3271 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3272 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3273 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3274
3275 /* Searching for the first zero. */
3276 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
3277 jump = JUMP(SLJIT_NOT_ZERO);
3278 /* Two byte sequence. */
3279 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3280 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3281
3282 JUMPHERE(jump);
3283 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
3284 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
3285 /* This code runs only in 8 bit mode. No need to shift the value. */
3286 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3287 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3288 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
3289 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3290 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3291 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3292 /* Three byte sequence. */
3293 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3294 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3295 }
3296
3297 static void do_utfreadtype8(compiler_common *common)
3298 {
3299 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
3300 of the character (>= 0xc0). Return value in TMP1. */
3301 DEFINE_COMPILER;
3302 struct sljit_jump *jump;
3303 struct sljit_jump *compare;
3304
3305 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3306
3307 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
3308 jump = JUMP(SLJIT_NOT_ZERO);
3309 /* Two byte sequence. */
3310 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3311 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3312 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
3313 /* The upper 5 bits are known at this point. */
3314 compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
3315 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3316 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3317 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
3318 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
3319 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3320
3321 JUMPHERE(compare);
3322 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3323 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3324
3325 /* We only have types for characters less than 256. */
3326 JUMPHERE(jump);
3327 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3328 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
3329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3330 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3331 }
3332
3333 #endif /* COMPILE_PCRE8 */
3334
3335 #endif /* SUPPORT_UTF */
3336
3337 #ifdef SUPPORT_UCP
3338
3339 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
3340 #define UCD_BLOCK_MASK 127
3341 #define UCD_BLOCK_SHIFT 7
3342
3343 static void do_getucd(compiler_common *common)
3344 {
3345 /* Search the UCD record for the character comes in TMP1.
3346 Returns chartype in TMP1 and UCD offset in TMP2. */
3347 DEFINE_COMPILER;
3348 #ifdef COMPILE_PCRE32
3349 struct sljit_jump *jump;
3350 #endif
3351
3352 #if defined SLJIT_DEBUG && SLJIT_DEBUG
3353 /* dummy_ucd_record */
3354 const ucd_record *record = GET_UCD(INVALID_UTF_CHAR);
3355 SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
3356 SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
3357 #endif
3358
3359 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
3360
3361 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3362
3363 #ifdef COMPILE_PCRE32
3364 if (!common->utf)
3365 {
3366 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
3367 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
3368 JUMPHERE(jump);
3369 }
3370 #endif
3371
3372 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3373 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
3374 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
3375 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
3376 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3377 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
3378 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
3379 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
3380 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
3381 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3382 }
3383 #endif
3384
3385 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf)
3386 {
3387 DEFINE_COMPILER;
3388 struct sljit_label *mainloop;
3389 struct sljit_label *newlinelabel = NULL;
3390 struct sljit_jump *start;
3391 struct sljit_jump *end = NULL;
3392 struct sljit_jump *end2 = NULL;
3393 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3394 struct sljit_jump *singlechar;
3395 #endif
3396 jump_list *newline = NULL;
3397 BOOL newlinecheck = FALSE;
3398 BOOL readuchar = FALSE;
3399
3400 if (!(hascrorlf || (common->match_end_ptr != 0)) &&
3401 (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
3402 newlinecheck = TRUE;
3403
3404 if (common->match_end_ptr != 0)
3405 {
3406 /* Search for the end of the first line. */
3407 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
3408
3409 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3410 {
3411 mainloop = LABEL();
3412 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3413 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3414 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3415 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3416 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
3417 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
3418 JUMPHERE(end);
3419 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3420 }
3421 else
3422 {
3423 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3424 mainloop = LABEL();
3425 /* Continual stores does not cause data dependency. */
3426 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3427 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3428 check_newlinechar(common, common->nltype, &newline, TRUE);
3429 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
3430 JUMPHERE(end);
3431 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
3432 set_jumps(newline, LABEL());
3433 }
3434
3435 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
3436 }
3437
3438 start = JUMP(SLJIT_JUMP);
3439
3440 if (newlinecheck)
3441 {
3442 newlinelabel = LABEL();
3443 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3444 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3445 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3446 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3447 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3448 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3449 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3450 #endif
3451 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3452 end2 = JUMP(SLJIT_JUMP);
3453 }
3454
3455 mainloop = LABEL();
3456
3457 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3458 #ifdef SUPPORT_UTF
3459 if (common->utf) readuchar = TRUE;
3460 #endif
3461 if (newlinecheck) readuchar = TRUE;
3462
3463 if (readuchar)
3464 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3465
3466 if (newlinecheck)
3467 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3468
3469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3470 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3471 #if defined COMPILE_PCRE8
3472 if (common->utf)
3473 {
3474 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3475 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3476 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3477 JUMPHERE(singlechar);
3478 }
3479 #elif defined COMPILE_PCRE16
3480 if (common->utf)
3481 {
3482 singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3483 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3484 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3485 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
3486 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3487 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3488 JUMPHERE(singlechar);
3489 }
3490 #endif /* COMPILE_PCRE[8|16] */
3491 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3492 JUMPHERE(start);
3493
3494 if (newlinecheck)
3495 {
3496 JUMPHERE(end);
3497 JUMPHERE(end2);
3498 }
3499
3500 return mainloop;
3501 }
3502
3503 #define MAX_N_CHARS 16
3504 #define MAX_DIFF_CHARS 6
3505
3506 static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars)
3507 {
3508 pcre_uchar i, len;
3509
3510 len = chars[0];
3511 if (len == 255)
3512 return;
3513
3514 if (len == 0)
3515 {
3516 chars[0] = 1;
3517 chars[1] = chr;
3518 return;
3519 }
3520
3521 for (i = len; i > 0; i--)
3522 if (chars[i] == chr)
3523 return;
3524
3525 if (len >= MAX_DIFF_CHARS - 1)
3526 {
3527 chars[0] = 255;
3528 return;
3529 }
3530
3531 len++;
3532 chars[len] = chr;
3533 chars[0] = len;
3534 }
3535
3536 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count)
3537 {
3538 /* Recursive function, which scans prefix literals. */
3539 BOOL last, any, class, caseless;
3540 int len, repeat, len_save, consumed = 0;
3541 sljit_u32 chr; /* Any unicode character. */
3542 sljit_u8 *bytes, *bytes_end, byte;
3543 pcre_uchar *alternative, *cc_save, *oc;
3544 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3545 pcre_uchar othercase[8];
3546 #elif defined SUPPORT_UTF && defined COMPILE_PCRE16
3547 pcre_uchar othercase[2];
3548 #else
3549 pcre_uchar othercase[1];
3550 #endif
3551
3552 repeat = 1;
3553 while (TRUE)
3554 {
3555 if (*rec_count == 0)
3556 return 0;
3557 (*rec_count)--;
3558
3559 last = TRUE;
3560 any = FALSE;
3561 class = FALSE;
3562 caseless = FALSE;
3563
3564 switch (*cc)
3565 {
3566 case OP_CHARI:
3567 caseless = TRUE;
3568 case OP_CHAR:
3569 last = FALSE;
3570 cc++;
3571 break;
3572
3573 case OP_SOD:
3574 case OP_SOM:
3575 case OP_SET_SOM:
3576 case OP_NOT_WORD_BOUNDARY:
3577 case OP_WORD_BOUNDARY:
3578 case OP_EODN:
3579 case OP_EOD:
3580 case OP_CIRC:
3581 case OP_CIRCM:
3582 case OP_DOLL:
3583 case OP_DOLLM:
3584 /* Zero width assertions. */
3585 cc++;
3586 continue;
3587
3588 case OP_ASSERT:
3589 case OP_ASSERT_NOT:
3590 case OP_ASSERTBACK:
3591 case OP_ASSERTBACK_NOT:
3592 cc = bracketend(cc);
3593 continue;
3594
3595 case OP_PLUSI:
3596 case OP_MINPLUSI:
3597 case OP_POSPLUSI:
3598 caseless = TRUE;
3599 case OP_PLUS:
3600 case OP_MINPLUS:
3601 case OP_POSPLUS:
3602 cc++;
3603 break;
3604
3605 case OP_EXACTI:
3606 caseless = TRUE;
3607 case OP_EXACT:
3608 repeat = GET2(cc, 1);
3609 last = FALSE;
3610 cc += 1 + IMM2_SIZE;
3611 break;
3612
3613 case OP_QUERYI:
3614 case OP_MINQUERYI:
3615 case OP_POSQUERYI:
3616 caseless = TRUE;
3617 case OP_QUERY:
3618 case OP_MINQUERY:
3619 case OP_POSQUERY:
3620 len = 1;
3621 cc++;
3622 #ifdef SUPPORT_UTF
3623 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3624 #endif
3625 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
3626 if (max_chars == 0)
3627 return consumed;
3628 last = FALSE;
3629 break;
3630
3631 case OP_KET:
3632 cc += 1 + LINK_SIZE;
3633 continue;
3634
3635 case OP_ALT:
3636 cc += GET(cc, 1);
3637 continue;
3638
3639 case OP_ONCE:
3640 case OP_ONCE_NC:
3641 case OP_BRA:
3642 case OP_BRAPOS:
3643 case OP_CBRA:
3644 case OP_CBRAPOS:
3645 alternative = cc + GET(cc, 1);
3646 while (*alternative == OP_ALT)
3647 {
3648 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
3649 if (max_chars == 0)
3650 return consumed;
3651 alternative += GET(alternative, 1);
3652 }
3653
3654 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3655 cc += IMM2_SIZE;
3656 cc += 1 + LINK_SIZE;
3657 continue;
3658
3659 case OP_CLASS:
3660 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3661 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
3662 return consumed;
3663 #endif
3664 class = TRUE;
3665 break;
3666
3667 case OP_NCLASS:
3668 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3669 if (common->utf) return consumed;
3670 #endif
3671 class = TRUE;
3672 break;
3673
3674 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3675 case OP_XCLASS:
3676 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3677 if (common->utf) return consumed;
3678 #endif
3679 any = TRUE;
3680 cc += GET(cc, 1);
3681 break;
3682 #endif
3683
3684 case OP_DIGIT:
3685 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3686 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
3687 return consumed;
3688 #endif
3689 any = TRUE;
3690 cc++;
3691 break;
3692
3693 case OP_WHITESPACE:
3694 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3695 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
3696 return consumed;
3697 #endif
3698 any = TRUE;
3699 cc++;
3700 break;
3701
3702 case OP_WORDCHAR:
3703 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3704 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
3705 return consumed;
3706 #endif
3707 any = TRUE;
3708 cc++;
3709 break;
3710
3711 case OP_NOT:
3712 case OP_NOTI:
3713 cc++;
3714 /* Fall through. */
3715 case OP_NOT_DIGIT:
3716 case OP_NOT_WHITESPACE:
3717 case OP_NOT_WORDCHAR:
3718 case OP_ANY:
3719 case OP_ALLANY:
3720 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3721 if (common->utf) return consumed;
3722 #endif
3723 any = TRUE;
3724 cc++;
3725 break;
3726
3727 #ifdef SUPPORT_UTF
3728 case OP_NOTPROP:
3729 case OP_PROP:
3730 #ifndef COMPILE_PCRE32
3731 if (common->utf) return consumed;
3732 #endif
3733 any = TRUE;
3734 cc += 1 + 2;
3735 break;
3736 #endif
3737
3738 case OP_TYPEEXACT:
3739 repeat = GET2(cc, 1);
3740 cc += 1 + IMM2_SIZE;
3741 continue;
3742
3743 case OP_NOTEXACT:
3744 case OP_NOTEXACTI:
3745 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3746 if (common->utf) return consumed;
3747 #endif
3748 any = TRUE;
3749 repeat = GET2(cc, 1);
3750 cc += 1 + IMM2_SIZE + 1;
3751 break;
3752
3753 default:
3754 return consumed;
3755 }
3756
3757 if (any)
3758 {
3759 do
3760 {
3761 chars[0] = 255;
3762
3763 consumed++;
3764 if (--max_chars == 0)
3765 return consumed;
3766 chars += MAX_DIFF_CHARS;
3767 }
3768 while (--repeat > 0);
3769
3770 repeat = 1;
3771 continue;
3772 }
3773
3774 if (class)
3775 {
3776 bytes = (sljit_u8*) (cc + 1);
3777 cc += 1 + 32 / sizeof(pcre_uchar);
3778
3779 switch (*cc)
3780 {
3781 case OP_CRSTAR:
3782 case OP_CRMINSTAR:
3783 case OP_CRPOSSTAR:
3784 case OP_CRQUERY:
3785 case OP_CRMINQUERY:
3786 case OP_CRPOSQUERY:
3787 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
3788 if (max_chars == 0)
3789 return consumed;
3790 break;
3791
3792 default:
3793 case OP_CRPLUS:
3794 case OP_CRMINPLUS:
3795 case OP_CRPOSPLUS:
3796 break;
3797
3798 case OP_CRRANGE:
3799 case OP_CRMINRANGE:
3800 case OP_CRPOSRANGE:
3801 repeat = GET2(cc, 1);
3802 if (repeat <= 0)
3803 return consumed;
3804 break;
3805 }
3806
3807 do
3808 {
3809 if (bytes[31] & 0x80)
3810 chars[0] = 255;
3811 else if (chars[0] != 255)
3812 {
3813 bytes_end = bytes + 32;
3814 chr = 0;
3815 do
3816 {
3817 byte = *bytes++;
3818 SLJIT_ASSERT((chr & 0x7) == 0);
3819 if (byte == 0)
3820 chr += 8;
3821 else
3822 {
3823 do
3824 {
3825 if ((byte & 0x1) != 0)
3826 add_prefix_char(chr, chars);
3827 byte >>= 1;
3828 chr++;
3829 }
3830 while (byte != 0);
3831 chr = (chr + 7) & ~7;
3832 }
3833 }
3834 while (chars[0] != 255 && bytes < bytes_end);
3835 bytes = bytes_end - 32;
3836 }
3837
3838 consumed++;
3839 if (--max_chars == 0)
3840 return consumed;
3841 chars += MAX_DIFF_CHARS;
3842 }
3843 while (--repeat > 0);
3844
3845 switch (*cc)
3846 {
3847 case OP_CRSTAR:
3848 case OP_CRMINSTAR:
3849 case OP_CRPOSSTAR:
3850 return consumed;
3851
3852 case OP_CRQUERY:
3853 case OP_CRMINQUERY:
3854 case OP_CRPOSQUERY:
3855 cc++;
3856 break;
3857
3858 case OP_CRRANGE:
3859 case OP_CRMINRANGE:
3860 case OP_CRPOSRANGE:
3861 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
3862 return consumed;
3863 cc += 1 + 2 * IMM2_SIZE;
3864 break;
3865 }
3866
3867 repeat = 1;
3868 continue;
3869 }
3870
3871 len = 1;
3872 #ifdef SUPPORT_UTF
3873 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3874 #endif
3875
3876 if (caseless && char_has_othercase(common, cc))
3877 {
3878 #ifdef SUPPORT_UTF
3879 if (common->utf)
3880 {
3881 GETCHAR(chr, cc);
3882 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
3883 return consumed;
3884 }
3885 else
3886 #endif
3887 {
3888 chr = *cc;
3889 othercase[0] = TABLE_GET(chr, common->fcc, chr);
3890 }
3891 }
3892 else
3893 {
3894 caseless = FALSE;
3895 othercase[0] = 0; /* Stops compiler warning - PH */
3896 }
3897
3898 len_save = len;
3899 cc_save = cc;
3900 while (TRUE)
3901 {
3902 oc = othercase;
3903 do
3904 {
3905 chr = *cc;
3906 add_prefix_char(*cc, chars);
3907
3908 if (caseless)
3909 add_prefix_char(*oc, chars);
3910
3911 len--;
3912 consumed++;
3913 if (--max_chars == 0)
3914 return consumed;
3915 chars += MAX_DIFF_CHARS;
3916 cc++;
3917 oc++;
3918 }
3919 while (len > 0);
3920
3921 if (--repeat == 0)
3922 break;
3923
3924 len = len_save;
3925 cc = cc_save;
3926 }
3927
3928 repeat = 1;
3929 if (last)
3930 return consumed;
3931 }
3932 }
3933
3934 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
3935
3936 static sljit_s32 character_to_int32(pcre_uchar chr)
3937 {
3938 sljit_s32 value = (sljit_s32)chr;
3939 #if defined COMPILE_PCRE8
3940 #define SSE2_COMPARE_TYPE_INDEX 0
3941 return (value << 24) | (value << 16) | (value << 8) | value;
3942 #elif defined COMPILE_PCRE16
3943 #define SSE2_COMPARE_TYPE_INDEX 1
3944 return (value << 16) | value;
3945 #elif defined COMPILE_PCRE32
3946 #define SSE2_COMPARE_TYPE_INDEX 2
3947 return value;
3948 #else
3949 #error "Unsupported unit width"
3950 #endif
3951 }
3952
3953 static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2)
3954 {
3955 DEFINE_COMPILER;
3956 struct sljit_label *start;
3957 struct sljit_jump *quit[3];
3958 struct sljit_jump *nomatch;
3959 sljit_u8 instruction[8];
3960 sljit_s32 tmp1_ind = sljit_get_register_index(TMP1);
3961 sljit_s32 tmp2_ind = sljit_get_register_index(TMP2);
3962 sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR);
3963 BOOL load_twice = FALSE;
3964 pcre_uchar bit;
3965
3966 bit = char1 ^ char2;
3967 if (!is_powerof2(bit))
3968 bit = 0;
3969
3970 if ((char1 != char2) && bit == 0)
3971 load_twice = TRUE;
3972
3973 quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3974
3975 /* First part (unaligned start) */
3976
3977 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit));
3978
3979 SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1);
3980
3981 /* MOVD xmm, r/m32 */
3982 instruction[0] = 0x66;
3983 instruction[1] = 0x0f;
3984 instruction[2] = 0x6e;
3985 instruction[3] = 0xc0 | (2 << 3) | tmp1_ind;
3986 sljit_emit_op_custom(compiler, instruction, 4);
3987
3988 if (char1 != char2)
3989 {
3990 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2));
3991
3992 /* MOVD xmm, r/m32 */
3993 instruction[3] = 0xc0 | (3 << 3) | tmp1_ind;
3994 sljit_emit_op_custom(compiler, instruction, 4);
3995 }
3996
3997 /* PSHUFD xmm1, xmm2/m128, imm8 */
3998 instruction[2] = 0x70;
3999 instruction[3] = 0xc0 | (2 << 3) | 2;
4000 instruction[4] = 0;
4001 sljit_emit_op_custom(compiler, instruction, 5);
4002
4003 if (char1 != char2)
4004 {
4005 /* PSHUFD xmm1, xmm2/m128, imm8 */
4006 instruction[3] = 0xc0 | (3 << 3) | 3;
4007 instruction[4] = 0;
4008 sljit_emit_op_custom(compiler, instruction, 5);
4009 }
4010
4011 OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf);
4012 OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf);
4013
4014 /* MOVDQA xmm1, xmm2/m128 */
4015 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4016
4017 if (str_ptr_ind < 8)
4018 {
4019 instruction[2] = 0x6f;
4020 instruction[3] = (0 << 3) | str_ptr_ind;
4021 sljit_emit_op_custom(compiler, instruction, 4);
4022
4023 if (load_twice)
4024 {
4025 instruction[3] = (1 << 3) | str_ptr_ind;
4026 sljit_emit_op_custom(compiler, instruction, 4);
4027 }
4028 }
4029 else
4030 {
4031 instruction[1] = 0x41;
4032 instruction[2] = 0x0f;
4033 instruction[3] = 0x6f;
4034 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4035 sljit_emit_op_custom(compiler, instruction, 5);
4036
4037 if (load_twice)
4038 {
4039 instruction[4] = (1 << 3) | str_ptr_ind;
4040 sljit_emit_op_custom(compiler, instruction, 5);
4041 }
4042 instruction[1] = 0x0f;
4043 }
4044
4045 #else
4046
4047 instruction[2] = 0x6f;
4048 instruction[3] = (0 << 3) | str_ptr_ind;
4049 sljit_emit_op_custom(compiler, instruction, 4);
4050
4051 if (load_twice)
4052 {
4053 instruction[3] = (1 << 3) | str_ptr_ind;
4054 sljit_emit_op_custom(compiler, instruction, 4);
4055 }
4056
4057 #endif
4058
4059 if (bit != 0)
4060 {
4061 /* POR xmm1, xmm2/m128 */
4062 instruction[2] = 0xeb;
4063 instruction[3] = 0xc0 | (0 << 3) | 3;
4064 sljit_emit_op_custom(compiler, instruction, 4);
4065 }
4066
4067 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4068 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4069 instruction[3] = 0xc0 | (0 << 3) | 2;
4070 sljit_emit_op_custom(compiler, instruction, 4);
4071
4072 if (load_twice)
4073 {
4074 instruction[3] = 0xc0 | (1 << 3) | 3;
4075 sljit_emit_op_custom(compiler, instruction, 4);
4076 }
4077
4078 /* PMOVMSKB reg, xmm */
4079 instruction[2] = 0xd7;
4080 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4081 sljit_emit_op_custom(compiler, instruction, 4);
4082
4083 if (load_twice)
4084 {
4085 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
4086 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4087 sljit_emit_op_custom(compiler, instruction, 4);
4088
4089 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4090 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
4091 }
4092
4093 OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0);
4094
4095 /* BSF r32, r/m32 */
4096 instruction[0] = 0x0f;
4097 instruction[1] = 0xbc;
4098 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4099 sljit_emit_op_custom(compiler, instruction, 3);
4100 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4101
4102 nomatch = JUMP(SLJIT_ZERO);
4103
4104 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4105 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4106 quit[1] = JUMP(SLJIT_JUMP);
4107
4108 JUMPHERE(nomatch);
4109
4110 start = LABEL();
4111 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16);
4112 quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4113
4114 /* Second part (aligned) */
4115
4116 instruction[0] = 0x66;
4117 instruction[1] = 0x0f;
4118
4119 /* MOVDQA xmm1, xmm2/m128 */
4120 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4121
4122 if (str_ptr_ind < 8)
4123 {
4124 instruction[2] = 0x6f;
4125 instruction[3] = (0 << 3) | str_ptr_ind;
4126 sljit_emit_op_custom(compiler, instruction, 4);
4127
4128 if (load_twice)
4129 {
4130 instruction[3] = (1 << 3) | str_ptr_ind;
4131 sljit_emit_op_custom(compiler, instruction, 4);
4132 }
4133 }
4134 else
4135 {
4136 instruction[1] = 0x41;
4137 instruction[2] = 0x0f;
4138 instruction[3] = 0x6f;
4139 instruction[4] = (0 << 3) | (str_ptr_ind & 0x7);
4140 sljit_emit_op_custom(compiler, instruction, 5);
4141
4142 if (load_twice)
4143 {
4144 instruction[4] = (1 << 3) | str_ptr_ind;
4145 sljit_emit_op_custom(compiler, instruction, 5);
4146 }
4147 instruction[1] = 0x0f;
4148 }
4149
4150 #else
4151
4152 instruction[2] = 0x6f;
4153 instruction[3] = (0 << 3) | str_ptr_ind;
4154 sljit_emit_op_custom(compiler, instruction, 4);
4155
4156 if (load_twice)
4157 {
4158 instruction[3] = (1 << 3) | str_ptr_ind;
4159 sljit_emit_op_custom(compiler, instruction, 4);
4160 }
4161
4162 #endif
4163
4164 if (bit != 0)
4165 {
4166 /* POR xmm1, xmm2/m128 */
4167 instruction[2] = 0xeb;
4168 instruction[3] = 0xc0 | (0 << 3) | 3;
4169 sljit_emit_op_custom(compiler, instruction, 4);
4170 }
4171
4172 /* PCMPEQB/W/D xmm1, xmm2/m128 */
4173 instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX;
4174 instruction[3] = 0xc0 | (0 << 3) | 2;
4175 sljit_emit_op_custom(compiler, instruction, 4);
4176
4177 if (load_twice)
4178 {
4179 instruction[3] = 0xc0 | (1 << 3) | 3;
4180 sljit_emit_op_custom(compiler, instruction, 4);
4181 }
4182
4183 /* PMOVMSKB reg, xmm */
4184 instruction[2] = 0xd7;
4185 instruction[3] = 0xc0 | (tmp1_ind << 3) | 0;
4186 sljit_emit_op_custom(compiler, instruction, 4);
4187
4188 if (load_twice)
4189 {
4190 instruction[3] = 0xc0 | (tmp2_ind << 3) | 1;
4191 sljit_emit_op_custom(compiler, instruction, 4);
4192
4193 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4194 }
4195
4196 /* BSF r32, r/m32 */
4197 instruction[0] = 0x0f;
4198 instruction[1] = 0xbc;
4199 instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind;
4200 sljit_emit_op_custom(compiler, instruction, 3);
4201 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4202
4203 JUMPTO(SLJIT_ZERO, start);
4204
4205 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4206
4207 start = LABEL();
4208 SET_LABEL(quit[0], start);
4209 SET_LABEL(quit[1], start);
4210 SET_LABEL(quit[2], start);
4211 }
4212
4213 #undef SSE2_COMPARE_TYPE_INDEX
4214
4215 #endif
4216
4217 static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset)
4218 {
4219 DEFINE_COMPILER;
4220 struct sljit_label *start;
4221 struct sljit_jump *quit;
4222 struct sljit_jump *found;
4223 pcre_uchar mask;
4224 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4225 struct sljit_label *utf_start = NULL;
4226 struct sljit_jump *utf_quit = NULL;
4227 #endif
4228 BOOL has_match_end = (common->match_end_ptr != 0);
4229
4230 if (offset > 0)
4231 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4232
4233 if (has_match_end)
4234 {
4235 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4236
4237 OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1));
4238 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0);
4239 sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0);
4240 }
4241
4242 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4243 if (common->utf && offset > 0)
4244 utf_start = LABEL();
4245 #endif
4246
4247 #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND)
4248
4249 /* SSE2 accelerated first character search. */
4250
4251 if (sljit_has_cpu_feature(SLJIT_HAS_SSE2))
4252 {
4253 fast_forward_first_char2_sse2(common, char1, char2);
4254
4255 SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0);
4256 if (common->mode == JIT_COMPILE)
4257 {
4258 /* In complete mode, we don't need to run a match when STR_PTR == STR_END. */
4259 SLJIT_ASSERT(common->forced_quit_label == NULL);
4260 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH);
4261 add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4262
4263 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4264 if (common->utf && offset > 0)
4265 {
4266 SLJIT_ASSERT(common->mode == JIT_COMPILE);
4267
4268 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4269 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4270 #if defined COMPILE_PCRE8
4271 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4272 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4273 #elif defined COMPILE_PCRE16
4274 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4275 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4276 #else
4277 #error "Unknown code width"
4278 #endif
4279 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4280 }
4281 #endif
4282
4283 if (offset > 0)
4284 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4285 }
4286 else
4287 {
4288 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
4289 if (has_match_end)
4290 {
4291 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4292 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0);
4293 }
4294 else
4295 sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0);
4296 }
4297
4298 if (has_match_end)
4299 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4300 return;
4301 }
4302
4303 #endif
4304
4305 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4306
4307 start = LABEL();
4308 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4309
4310 if (char1 == char2)
4311 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
4312 else
4313 {
4314 mask = char1 ^ char2;
4315 if (is_powerof2(mask))
4316 {
4317 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4318 found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask);
4319 }
4320 else
4321 {
4322 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1);
4323 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
4324 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2);
4325 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
4326 found = JUMP(SLJIT_NOT_ZERO);
4327 }
4328 }
4329
4330 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4331 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start);
4332
4333 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4334 if (common->utf && offset > 0)
4335 utf_quit = JUMP(SLJIT_JUMP);
4336 #endif
4337
4338 JUMPHERE(found);
4339
4340 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4341 if (common->utf && offset > 0)
4342 {
4343 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset));
4344 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4345 #if defined COMPILE_PCRE8
4346 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4347 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start);
4348 #elif defined COMPILE_PCRE16
4349 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4350 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start);
4351 #else
4352 #error "Unknown code width"
4353 #endif
4354 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4355 JUMPHERE(utf_quit);
4356 }
4357 #endif
4358
4359 JUMPHERE(quit);
4360
4361 if (has_match_end)
4362 {
4363 quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
4364 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4365 if (offset > 0)
4366 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4367 JUMPHERE(quit);
4368 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4369 }
4370
4371 if (offset > 0)
4372 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
4373 }
4374
4375 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
4376 {
4377 DEFINE_COMPILER;
4378 struct sljit_label *start;
4379 struct sljit_jump *quit;
4380 struct sljit_jump *match;
4381 /* bytes[0] represent the number of characters between 0
4382 and MAX_N_BYTES - 1, 255 represents any character. */
4383 pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS];
4384 sljit_s32 offset;
4385 pcre_uchar mask;
4386 pcre_uchar *char_set, *char_set_end;
4387 int i, max, from;
4388 int range_right = -1, range_len;
4389 sljit_u8 *update_table = NULL;
4390 BOOL in_range;
4391 sljit_u32 rec_count;
4392
4393 for (i = 0; i < MAX_N_CHARS; i++)
4394 chars[i * MAX_DIFF_CHARS] = 0;
4395
4396 rec_count = 10000;
4397 max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
4398
4399 if (max < 1)
4400 return FALSE;
4401
4402 in_range = FALSE;
4403 /* Prevent compiler "uninitialized" warning */
4404 from = 0;
4405 range_len = 4 /* minimum length */ - 1;
4406 for (i = 0; i <= max; i++)
4407 {
4408 if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255))
4409 {
4410 range_len = i - from;
4411 range_right = i - 1;
4412 }
4413
4414 if (i < max && chars[i * MAX_DIFF_CHARS] < 255)
4415 {
4416 SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0);
4417 if (!in_range)
4418 {
4419 in_range = TRUE;
4420 from = i;
4421 }
4422 }
4423 else
4424 in_range = FALSE;
4425 }
4426
4427 if (range_right >= 0)
4428 {
4429 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
4430 if (update_table == NULL)
4431 return TRUE;
4432 memset(update_table, IN_UCHARS(range_len), 256);
4433
4434 for (i = 0; i < range_len; i++)
4435 {
4436 char_set = chars + ((range_right - i) * MAX_DIFF_CHARS);
4437 SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255);
4438 char_set_end = char_set + char_set[0];
4439 char_set++;
4440 while (char_set <= char_set_end)
4441 {
4442 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
4443 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
4444 char_set++;
4445 }
4446 }
4447 }
4448
4449 offset = -1;
4450 /* Scan forward. */
4451 for (i = 0; i < max; i++)
4452 {
4453 if (offset == -1)
4454 {
4455 if (chars[i * MAX_DIFF_CHARS] <= 2)
4456 offset = i;
4457 }
4458 else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2)
4459 {
4460 if (chars[i * MAX_DIFF_CHARS] == 1)
4461 offset = i;
4462 else
4463 {
4464 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4465 if (!is_powerof2(mask))
4466 {
4467 mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2];
4468 if (is_powerof2(mask))
4469 offset = i;
4470 }
4471 }
4472 }
4473 }
4474
4475 if (range_right < 0)
4476 {
4477 if (offset < 0)
4478 return FALSE;
4479 SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2);
4480 /* Works regardless the value is 1 or 2. */
4481 mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]];
4482 fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset);
4483 return TRUE;
4484 }
4485
4486 if (range_right == offset)
4487 offset = -1;
4488
4489 SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2));
4490
4491 max -= 1;
4492 SLJIT_ASSERT(max > 0);
4493 if (common->match_end_ptr != 0)
4494 {
4495 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4496 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4497 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4498 quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0);
4499 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
4500 JUMPHERE(quit);
4501 }
4502 else
4503 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4504
4505 SLJIT_ASSERT(range_right >= 0);
4506
4507 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4508 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
4509 #endif
4510
4511 start = LABEL();
4512 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4513
4514 #if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
4515 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
4516 #else
4517 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
4518 #endif
4519
4520 #if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4521 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
4522 #else
4523 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
4524 #endif
4525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4526 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
4527
4528 if (offset >= 0)
4529 {
4530 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
4531 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4532
4533 if (chars[offset * MAX_DIFF_CHARS] == 1)
4534 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start);
4535 else
4536 {
4537 mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2];
4538 if (is_powerof2(mask))
4539 {
4540 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
4541 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start);
4542 }
4543 else
4544 {
4545 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]);
4546 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start);
4547 JUMPHERE(match);
4548 }
4549 }
4550 }
4551
4552 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
4553 if (common->utf && offset != 0)
4554 {
4555 if (offset < 0)
4556 {
4557 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4558 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4559 }
4560 else
4561 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4562 #if defined COMPILE_PCRE8
4563 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4564 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start);
4565 #elif defined COMPILE_PCRE16
4566 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4567 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start);
4568 #else
4569 #error "Unknown code width"
4570 #endif
4571 if (offset < 0)
4572 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4573 }
4574 #endif
4575
4576 if (offset >= 0)
4577 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4578
4579 JUMPHERE(quit);
4580
4581 if (common->match_end_ptr != 0)
4582 {
4583 if (range_right >= 0)
4584 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4585 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4586 if (range_right >= 0)
4587 {
4588 quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4589 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
4590 JUMPHERE(quit);
4591 }
4592 }
4593 else
4594 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
4595 return TRUE;
4596 }
4597
4598 #undef MAX_N_CHARS
4599 #undef MAX_DIFF_CHARS
4600
4601 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless)
4602 {
4603 pcre_uchar oc;
4604
4605 oc = first_char;
4606 if (caseless)
4607 {
4608 oc = TABLE_GET(first_char, common->fcc, first_char);
4609 #if defined SUPPORT_UCP && !defined COMPILE_PCRE8
4610 if (first_char > 127 && common->utf)
4611 oc = UCD_OTHERCASE(first_char);
4612 #endif
4613 }
4614
4615 fast_forward_first_char2(common, first_char, oc, 0);
4616 }
4617
4618 static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
4619 {
4620 DEFINE_COMPILER;
4621 struct sljit_label *loop;
4622 struct sljit_jump *lastchar;
4623 struct sljit_jump *firstchar;
4624 struct sljit_jump *quit;
4625 struct sljit_jump *foundcr = NULL;
4626 struct sljit_jump *notfoundnl;
4627 jump_list *newline = NULL;
4628
4629 if (common->match_end_ptr != 0)
4630 {
4631 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
4632 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4633 }
4634
4635 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4636 {
4637 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4638 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4639 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4641 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4642
4643 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
4644 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
4645 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
4646 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4647 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4648 #endif
4649 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4650
4651 loop = LABEL();
4652 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4653 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4654 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4655 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4656 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
4657 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
4658
4659 JUMPHERE(quit);
4660 JUMPHERE(firstchar);
4661 JUMPHERE(lastchar);
4662
4663 if (common->match_end_ptr != 0)
4664 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4665 return;
4666 }
4667
4668 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4669 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4670 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
4671 skip_char_back(common);
4672
4673 loop = LABEL();
4674 common->ff_newline_shortcut = loop;
4675
4676 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4677 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4678 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4679 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4680 check_newlinechar(common, common->nltype, &newline, FALSE);
4681 set_jumps(newline, loop);
4682
4683 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
4684 {
4685 quit = JUMP(SLJIT_JUMP);
4686 JUMPHERE(foundcr);
4687 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4688 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4689 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
4690 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4691 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4692 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4693 #endif
4694 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4695 JUMPHERE(notfoundnl);
4696 JUMPHERE(quit);
4697 }
4698 JUMPHERE(lastchar);
4699 JUMPHERE(firstchar);
4700
4701 if (common->match_end_ptr != 0)
4702 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
4703 }
4704
4705 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
4706
4707 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits)
4708 {
4709 DEFINE_COMPILER;
4710 struct sljit_label *start;
4711 struct sljit_jump *quit;
4712 struct sljit_jump *found = NULL;
4713 jump_list *matches = NULL;
4714 #ifndef COMPILE_PCRE8
4715 struct sljit_jump *jump;
4716 #endif
4717
4718 if (common->match_end_ptr != 0)
4719 {
4720 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
4721 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
4722 }
4723
4724 start = LABEL();
4725 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4726 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4727 #ifdef SUPPORT_UTF
4728 if (common->utf)
4729 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4730 #endif
4731
4732 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
4733 {
4734 #ifndef COMPILE_PCRE8
4735 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255);
4736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
4737 JUMPHERE(jump);
4738 #endif
4739 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4740 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4741 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
4742 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4743 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4744 found = JUMP(SLJIT_NOT_ZERO);
4745 }
4746
4747 #ifdef SUPPORT_UTF
4748 if (common->utf)
4749 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4750 #endif
4751 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4752 #ifdef SUPPORT_UTF
4753 #if defined COMPILE_PCRE8
4754 if (common->utf)
4755 {
4756 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
4757 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4758 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4759 }
4760 #elif defined COMPILE_PCRE16
4761 if (common->utf)
4762 {
4763 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
4764 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4765 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4766 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4767 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4768 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4769 }
4770 #endif /* COMPILE_PCRE[8|16] */
4771 #endif /* SUPPORT_UTF */
4772 JUMPTO(SLJIT_JUMP, start);
4773 if (found != NULL)
4774 JUMPHERE(found);
4775 if (matches != NULL)
4776 set_jumps(matches, LABEL());
4777 JUMPHERE(quit);
4778
4779 if (common->match_end_ptr != 0)
4780 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
4781 }
4782
4783 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
4784 {
4785 DEFINE_COMPILER;
4786 struct sljit_label *loop;
4787 struct sljit_jump *toolong;
4788 struct sljit_jump *alreadyfound;
4789 struct sljit_jump *found;
4790 struct sljit_jump *foundoc = NULL;
4791 struct sljit_jump *notfound;
4792 sljit_u32 oc, bit;
4793
4794 SLJIT_ASSERT(common->req_char_ptr != 0);
4795 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
4796 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
4797 toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0);
4798 alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4799
4800 if (has_firstchar)
4801 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4802 else
4803 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
4804
4805 loop = LABEL();
4806 notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0);
4807
4808 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
4809 oc = req_char;
4810 if (caseless)
4811 {
4812 oc = TABLE_GET(req_char, common->fcc, req_char);
4813 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
4814 if (req_char > 127 && common->utf)
4815 oc = UCD_OTHERCASE(req_char);
4816 #endif
4817 }
4818 if (req_char == oc)
4819 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4820 else
4821 {
4822 bit = req_char ^ oc;
4823 if (is_powerof2(bit))
4824 {
4825 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
4826 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
4827 }
4828 else
4829 {
4830 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
4831 foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
4832 }
4833 }
4834 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4835 JUMPTO(SLJIT_JUMP, loop);
4836
4837 JUMPHERE(found);
4838 if (foundoc)
4839 JUMPHERE(foundoc);
4840 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
4841 JUMPHERE(alreadyfound);
4842 JUMPHERE(toolong);
4843 return notfound;
4844 }
4845
4846 static void do_revertframes(compiler_common *common)
4847 {
4848 DEFINE_COMPILER;
4849 struct sljit_jump *jump;
4850 struct sljit_label *mainloop;
4851
4852 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4853 OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0);
4854 GET_LOCAL_BASE(TMP1, 0, 0);
4855
4856 /* Drop frames until we reach STACK_TOP. */
4857 mainloop = LABEL();
4858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
4859 jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4860
4861 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4862 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4863 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw));
4864 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
4865 JUMPTO(SLJIT_JUMP, mainloop);
4866
4867 JUMPHERE(jump);
4868 jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
4869 /* End of reverting values. */
4870 OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0);
4871 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4872
4873 JUMPHERE(jump);
4874 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
4875 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4876 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw));
4877 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
4878 JUMPTO(SLJIT_JUMP, mainloop);
4879 }
4880
4881 static void check_wordboundary(compiler_common *common)
4882 {
4883 DEFINE_COMPILER;
4884 struct sljit_jump *skipread;
4885 jump_list *skipread_list = NULL;
4886 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
4887 struct sljit_jump *jump;
4888 #endif
4889
4890 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
4891
4892 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4893 /* Get type of the previous char, and put it to LOCALS1. */
4894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4895 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4896 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0);
4897 skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
4898 skip_char_back(common);
4899 check_start_used_ptr(common);
4900 read_char(common);
4901
4902 /* Testing char type. */
4903 #ifdef SUPPORT_UCP
4904 if (common->use_ucp)
4905 {
4906 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4907 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4908 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4909 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4910 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4911 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4912 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4913 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4914 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4915 JUMPHERE(jump);
4916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0);
4917 }
4918 else
4919 #endif
4920 {
4921 #ifndef COMPILE_PCRE8
4922 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4923 #elif defined SUPPORT_UTF
4924 /* Here LOCALS1 has already been zeroed. */
4925 jump = NULL;
4926 if (common->utf)
4927 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4928 #endif /* COMPILE_PCRE8 */
4929 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
4930 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
4931 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4932 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
4933 #ifndef COMPILE_PCRE8
4934 JUMPHERE(jump);
4935 #elif defined SUPPORT_UTF
4936 if (jump != NULL)
4937 JUMPHERE(jump);
4938 #endif /* COMPILE_PCRE8 */
4939 }
4940 JUMPHERE(skipread);
4941
4942 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4943 check_str_end(common, &skipread_list);
4944 peek_char(common, READ_CHAR_MAX);
4945
4946 /* Testing char type. This is a code duplication. */
4947 #ifdef SUPPORT_UCP
4948 if (common->use_ucp)
4949 {
4950 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
4951 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
4952 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4953 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
4954 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4955 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
4956 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
4957 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4958 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
4959 JUMPHERE(jump);
4960 }
4961 else
4962 #endif
4963 {
4964 #ifndef COMPILE_PCRE8
4965 /* TMP2 may be destroyed by peek_char. */
4966 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4967 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4968 #elif defined SUPPORT_UTF
4969 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4970 jump = NULL;
4971 if (common->utf)
4972 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
4973 #endif
4974 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
4975 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
4976 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4977 #ifndef COMPILE_PCRE8
4978 JUMPHERE(jump);
4979 #elif defined SUPPORT_UTF
4980 if (jump != NULL)
4981 JUMPHERE(jump);
4982 #endif /* COMPILE_PCRE8 */
4983 }
4984 set_jumps(skipread_list, LABEL());
4985
4986 OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
4987 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
4988 }
4989
4990 static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
4991 {
4992 /* May destroy TMP1. */
4993 DEFINE_COMPILER;
4994 int ranges[MAX_RANGE_SIZE];
4995 sljit_u8 bit, cbit, all;
4996 int i, byte, length = 0;
4997
4998 bit = bits[0] & 0x1;
4999 /* All bits will be zero or one (since bit is zero or one). */
5000 all = -bit;
5001
5002 for (i = 0; i < 256; )
5003 {
5004 byte = i >> 3;
5005 if ((i & 0x7) == 0 && bits[byte] == all)
5006 i += 8;
5007 else
5008 {
5009 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
5010 if (cbit != bit)
5011 {
5012 if (length >= MAX_RANGE_SIZE)
5013 return FALSE;
5014 ranges[length] = i;
5015 length++;
5016 bit = cbit;
5017 all = -cbit;
5018 }
5019 i++;
5020 }
5021 }
5022
5023 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
5024 {
5025 if (length >= MAX_RANGE_SIZE)
5026 return FALSE;
5027 ranges[length] = 256;
5028 length++;
5029 }
5030
5031 if (length < 0 || length > 4)
5032 return FALSE;
5033
5034 bit = bits[0] & 0x1;
5035 if (invert) bit ^= 0x1;
5036
5037 /* No character is accepted. */
5038 if (length == 0 && bit == 0)
5039 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5040
5041 switch(length)
5042 {
5043 case 0:
5044 /* When bit != 0, all characters are accepted. */
5045 return TRUE;
5046
5047 case 1:
5048 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5049 return TRUE;
5050
5051 case 2:
5052 if (ranges[0] + 1 != ranges[1])
5053 {
5054 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5055 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5056 }
5057 else
5058 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5059 return TRUE;
5060
5061 case 3:
5062 if (bit != 0)
5063 {
5064 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5065 if (ranges[0] + 1 != ranges[1])
5066 {
5067 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5068 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5069 }
5070 else
5071 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5072 return TRUE;
5073 }
5074
5075 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
5076 if (ranges[1] + 1 != ranges[2])
5077 {
5078 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
5079 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5080 }
5081 else
5082 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
5083 return TRUE;
5084
5085 case 4:
5086 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
5087 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
5088 && (ranges[1] & (ranges[2] - ranges[0])) == 0
5089 && is_powerof2(ranges[2] - ranges[0]))
5090 {
5091 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
5092 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
5093 if (ranges[2] + 1 != ranges[3])
5094 {
5095 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
5096 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5097 }
5098 else
5099 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
5100 return TRUE;
5101 }
5102
5103 if (bit != 0)
5104 {
5105 i = 0;
5106 if (ranges[0] + 1 != ranges[1])
5107 {
5108 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5109 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5110 i = ranges[0];
5111 }
5112 else
5113 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
5114
5115 if (ranges[2] + 1 != ranges[3])
5116 {
5117 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
5118 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
5119 }
5120 else
5121 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
5122 return TRUE;
5123 }
5124
5125 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
5126 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
5127 if (ranges[1] + 1 != ranges[2])
5128 {
5129 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
5130 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
5131 }
5132 else
5133 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
5134 return TRUE;
5135
5136 default:
5137 SLJIT_UNREACHABLE();
5138 return FALSE;
5139 }
5140 }
5141
5142 static void check_anynewline(compiler_common *common)
5143 {
5144 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5145 DEFINE_COMPILER;
5146
5147 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5148
5149 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5150 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5151 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5152 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5153 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5154 #ifdef COMPILE_PCRE8
5155 if (common->utf)
5156 {
5157 #endif
5158 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5159 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5160 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5161 #ifdef COMPILE_PCRE8
5162 }
5163 #endif
5164 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5165 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5166 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5167 }
5168
5169 static void check_hspace(compiler_common *common)
5170 {
5171 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5172 DEFINE_COMPILER;
5173
5174 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5175
5176 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
5177 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5178 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
5179 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5180 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
5181 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5182 #ifdef COMPILE_PCRE8
5183 if (common->utf)
5184 {
5185 #endif
5186 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5187 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
5188 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5189 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
5190 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5191 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
5192 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
5193 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5194 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
5195 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5196 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
5197 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5198 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
5199 #ifdef COMPILE_PCRE8
5200 }
5201 #endif
5202 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5203 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5204
5205 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5206 }
5207
5208 static void check_vspace(compiler_common *common)
5209 {
5210 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
5211 DEFINE_COMPILER;
5212
5213 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5214
5215 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
5216 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
5217 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
5218 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
5219 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5220 #ifdef COMPILE_PCRE8
5221 if (common->utf)
5222 {
5223 #endif
5224 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5225 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
5226 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
5227 #ifdef COMPILE_PCRE8
5228 }
5229 #endif
5230 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
5231 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5232
5233 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
5234 }
5235
5236 static void do_casefulcmp(compiler_common *common)
5237 {
5238 DEFINE_COMPILER;
5239 struct sljit_jump *jump;
5240 struct sljit_label *label;
5241 int char1_reg;
5242 int char2_reg;
5243
5244 if (sljit_get_register_index(TMP3) < 0)
5245 {
5246 char1_reg = STR_END;
5247 char2_reg = STACK_TOP;
5248 }
5249 else
5250 {
5251 char1_reg = TMP3;
5252 char2_reg = RETURN_ADDR;
5253 }
5254
5255 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5256 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5257
5258 if (char1_reg == STR_END)
5259 {
5260 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
5261 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
5262 }
5263
5264 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5265 {
5266 label = LABEL();
5267 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5268 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5269 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5270 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5271 JUMPTO(SLJIT_NOT_ZERO, label);
5272
5273 JUMPHERE(jump);
5274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5275 }
5276 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5277 {
5278 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5279 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5280
5281 label = LABEL();
5282 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5283 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5284 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5285 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5286 JUMPTO(SLJIT_NOT_ZERO, label);
5287
5288 JUMPHERE(jump);
5289 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5290 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5291 }
5292 else
5293 {
5294 label = LABEL();
5295 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5296 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5297 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5298 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5299 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5300 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5301 JUMPTO(SLJIT_NOT_ZERO, label);
5302
5303 JUMPHERE(jump);
5304 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5305 }
5306
5307 if (char1_reg == STR_END)
5308 {
5309 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
5310 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
5311 }
5312
5313 sljit_emit_fast_return(compiler, TMP1, 0);
5314 }
5315
5316 static void do_caselesscmp(compiler_common *common)
5317 {
5318 DEFINE_COMPILER;
5319 struct sljit_jump *jump;
5320 struct sljit_label *label;
5321 int char1_reg = STR_END;
5322 int char2_reg;
5323 int lcc_table;
5324 int opt_type = 0;
5325
5326 if (sljit_get_register_index(TMP3) < 0)
5327 {
5328 char2_reg = STACK_TOP;
5329 lcc_table = STACK_LIMIT;
5330 }
5331 else
5332 {
5333 char2_reg = RETURN_ADDR;
5334 lcc_table = TMP3;
5335 }
5336
5337 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5338 opt_type = 1;
5339 else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
5340 opt_type = 2;
5341
5342 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5343 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5344
5345 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
5346
5347 if (char2_reg == STACK_TOP)
5348 {
5349 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
5350 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
5351 }
5352
5353 OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
5354
5355 if (opt_type == 1)
5356 {
5357 label = LABEL();
5358 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5359 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5360 }
5361 else if (opt_type == 2)
5362 {
5363 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5364 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5365
5366 label = LABEL();
5367 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
5368 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5369 }
5370 else
5371 {
5372 label = LABEL();
5373 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
5374 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
5375 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
5376 }
5377
5378 #ifndef COMPILE_PCRE8
5379 jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
5380 #endif
5381 OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
5382 #ifndef COMPILE_PCRE8
5383 JUMPHERE(jump);
5384 jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
5385 #endif
5386 OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
5387 #ifndef COMPILE_PCRE8
5388 JUMPHERE(jump);
5389 #endif
5390
5391 if (opt_type == 0)
5392 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5393
5394 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
5395 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5396 JUMPTO(SLJIT_NOT_ZERO, label);
5397
5398 JUMPHERE(jump);
5399 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
5400
5401 if (opt_type == 2)
5402 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5403
5404 if (char2_reg == STACK_TOP)
5405 {
5406 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
5407 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
5408 }
5409
5410 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
5411 sljit_emit_fast_return(compiler, TMP1, 0);
5412 }
5413
5414 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5415
5416 static const pcre_uchar * SLJIT_FUNC do_utf_caselesscmp(pcre_uchar *src1, pcre_uchar *src2, pcre_uchar *end1, pcre_uchar *end2)
5417 {
5418 /* This function would be ineffective to do in JIT level. */
5419 sljit_u32 c1, c2;
5420 const ucd_record *ur;
5421 const sljit_u32 *pp;
5422
5423 while (src1 < end1)
5424 {
5425 if (src2 >= end2)
5426 return (pcre_uchar*)1;
5427 GETCHARINC(c1, src1);
5428 GETCHARINC(c2, src2);
5429 ur = GET_UCD(c2);
5430 if (c1 != c2 && c1 != c2 + ur->other_case)
5431 {
5432 pp = PRIV(ucd_caseless_sets) + ur->caseset;
5433 for (;;)
5434 {
5435 if (c1 < *pp) return NULL;
5436 if (c1 == *pp++) break;
5437 }
5438 }
5439 }
5440 return src2;
5441 }
5442
5443 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5444
5445 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
5446 compare_context *context, jump_list **backtracks)
5447 {
5448 DEFINE_COMPILER;
5449 unsigned int othercasebit = 0;
5450 pcre_uchar *othercasechar = NULL;
5451 #ifdef SUPPORT_UTF
5452 int utflength;
5453 #endif
5454
5455 if (caseless && char_has_othercase(common, cc))
5456 {
5457 othercasebit = char_get_othercase_bit(common, cc);
5458 SLJIT_ASSERT(othercasebit);
5459 /* Extracting bit difference info. */
5460 #if defined COMPILE_PCRE8
5461 othercasechar = cc + (othercasebit >> 8);
5462 othercasebit &= 0xff;
5463 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5464 /* Note that this code only handles characters in the BMP. If there
5465 ever are characters outside the BMP whose othercase differs in only one
5466 bit from itself (there currently are none), this code will need to be
5467 revised for COMPILE_PCRE32. */
5468 othercasechar = cc + (othercasebit >> 9);
5469 if ((othercasebit & 0x100) != 0)
5470 othercasebit = (othercasebit & 0xff) << 8;
5471 else
5472 othercasebit &= 0xff;
5473 #endif /* COMPILE_PCRE[8|16|32] */
5474 }
5475
5476 if (context->sourcereg == -1)
5477 {
5478 #if defined COMPILE_PCRE8
5479 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5480 if (context->length >= 4)
5481 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5482 else if (context->length >= 2)
5483 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5484 else
5485 #endif
5486 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5487 #elif defined COMPILE_PCRE16
5488 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5489 if (context->length >= 4)
5490 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5491 else
5492 #endif
5493 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5494 #elif defined COMPILE_PCRE32
5495 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
5496 #endif /* COMPILE_PCRE[8|16|32] */
5497 context->sourcereg = TMP2;
5498 }
5499
5500 #ifdef SUPPORT_UTF
5501 utflength = 1;
5502 if (common->utf && HAS_EXTRALEN(*cc))
5503 utflength += GET_EXTRALEN(*cc);
5504
5505 do
5506 {
5507 #endif
5508
5509 context->length -= IN_UCHARS(1);
5510 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5511
5512 /* Unaligned read is supported. */
5513 if (othercasebit != 0 && othercasechar == cc)
5514 {
5515 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
5516 context->oc.asuchars[context->ucharptr] = othercasebit;
5517 }
5518 else
5519 {
5520 context->c.asuchars[context->ucharptr] = *cc;
5521 context->oc.asuchars[context->ucharptr] = 0;
5522 }
5523 context->ucharptr++;
5524
5525 #if defined COMPILE_PCRE8
5526 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
5527 #else
5528 if (context->ucharptr >= 2 || context->length == 0)
5529 #endif
5530 {
5531 if (context->length >= 4)
5532 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5533 else if (context->length >= 2)
5534 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5535 #if defined COMPILE_PCRE8
5536 else if (context->length >= 1)
5537 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5538 #endif /* COMPILE_PCRE8 */
5539 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5540
5541 switch(context->ucharptr)
5542 {
5543 case 4 / sizeof(pcre_uchar):
5544 if (context->oc.asint != 0)
5545 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
5546 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
5547 break;
5548
5549 case 2 / sizeof(pcre_uchar):
5550 if (context->oc.asushort != 0)
5551 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
5552 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
5553 break;
5554
5555 #ifdef COMPILE_PCRE8
5556 case 1:
5557 if (context->oc.asbyte != 0)
5558 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
5559 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
5560 break;
5561 #endif
5562
5563 default:
5564 SLJIT_UNREACHABLE();
5565 break;
5566 }
5567 context->ucharptr = 0;
5568 }
5569
5570 #else
5571
5572 /* Unaligned read is unsupported or in 32 bit mode. */
5573 if (context->length >= 1)
5574 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
5575
5576 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
5577
5578 if (othercasebit != 0 && othercasechar == cc)
5579 {
5580 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
5581 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
5582 }
5583 else
5584 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
5585
5586 #endif
5587
5588 cc++;
5589 #ifdef SUPPORT_UTF
5590 utflength--;
5591 }
5592 while (utflength > 0);
5593 #endif
5594
5595 return cc;
5596 }
5597
5598 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5599
5600 #define SET_TYPE_OFFSET(value) \
5601 if ((value) != typeoffset) \
5602 { \
5603 if ((value) < typeoffset) \
5604 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
5605 else \
5606 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
5607 } \
5608 typeoffset = (value);
5609
5610 #define SET_CHAR_OFFSET(value) \
5611 if ((value) != charoffset) \
5612 { \
5613 if ((value) < charoffset) \
5614 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
5615 else \
5616 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
5617 } \
5618 charoffset = (value);
5619
5620 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr);
5621
5622 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5623 {
5624 DEFINE_COMPILER;
5625 jump_list *found = NULL;
5626 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
5627 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
5628 struct sljit_jump *jump = NULL;
5629 pcre_uchar *ccbegin;
5630 int compares, invertcmp, numberofcmps;
5631 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
5632 BOOL utf = common->utf;
5633 #endif
5634
5635 #ifdef SUPPORT_UCP
5636 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
5637 BOOL charsaved = FALSE;
5638 int typereg = TMP1;
5639 const sljit_u32 *other_cases;
5640 sljit_uw typeoffset;
5641 #endif
5642
5643 /* Scanning the necessary info. */
5644 cc++;
5645 ccbegin = cc;
5646 compares = 0;
5647 if (cc[-1] & XCL_MAP)
5648 {
5649 min = 0;
5650 cc += 32 / sizeof(pcre_uchar);
5651 }
5652
5653 while (*cc != XCL_END)
5654 {
5655 compares++;
5656 if (*cc == XCL_SINGLE)
5657 {
5658 cc ++;
5659 GETCHARINCTEST(c, cc);
5660 if (c > max) max = c;
5661 if (c < min) min = c;
5662 #ifdef SUPPORT_UCP
5663 needschar = TRUE;
5664 #endif
5665 }
5666 else if (*cc == XCL_RANGE)
5667 {
5668 cc ++;
5669 GETCHARINCTEST(c, cc);
5670 if (c < min) min = c;
5671 GETCHARINCTEST(c, cc);
5672 if (c > max) max = c;
5673 #ifdef SUPPORT_UCP
5674 needschar = TRUE;
5675 #endif
5676 }
5677 #ifdef SUPPORT_UCP
5678 else
5679 {
5680 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5681 cc++;
5682 if (*cc == PT_CLIST)
5683 {
5684 other_cases = PRIV(ucd_caseless_sets) + cc[1];
5685 while (*other_cases != NOTACHAR)
5686 {
5687 if (*other_cases > max) max = *other_cases;
5688 if (*other_cases < min) min = *other_cases;
5689 other_cases++;
5690 }
5691 }
5692 else
5693 {
5694 max = READ_CHAR_MAX;
5695 min = 0;
5696 }
5697
5698 switch(*cc)
5699 {
5700 case PT_ANY:
5701 /* Any either accepts everything or ignored. */
5702 if (cc[-1] == XCL_PROP)
5703 {
5704 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
5705 if (list == backtracks)
5706 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5707 return;
5708 }
5709 break;
5710
5711 case PT_LAMP:
5712 case PT_GC:
5713 case PT_PC:
5714 case PT_ALNUM:
5715 needstype = TRUE;
5716 break;
5717
5718 case PT_SC:
5719 needsscript = TRUE;
5720 break;
5721
5722 case PT_SPACE:
5723 case PT_PXSPACE:
5724 case PT_WORD:
5725 case PT_PXGRAPH:
5726 case PT_PXPRINT:
5727 case PT_PXPUNCT:
5728 needstype = TRUE;
5729 needschar = TRUE;
5730 break;
5731
5732 case PT_CLIST:
5733 case PT_UCNC:
5734 needschar = TRUE;
5735 break;
5736
5737 default:
5738 SLJIT_UNREACHABLE();
5739 break;
5740 }
5741 cc += 2;
5742 }
5743 #endif
5744 }
5745 SLJIT_ASSERT(compares > 0);
5746
5747 /* We are not necessary in utf mode even in 8 bit mode. */
5748 cc = ccbegin;
5749 read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0);
5750
5751 if ((cc[-1] & XCL_HASPROP) == 0)
5752 {
5753 if ((cc[-1] & XCL_MAP) != 0)
5754 {
5755 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5756 if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
5757 {
5758 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5759 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5760 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5761 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5762 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5763 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
5764 }
5765
5766 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5767 JUMPHERE(jump);
5768
5769 cc += 32 / sizeof(pcre_uchar);
5770 }
5771 else
5772 {
5773 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
5774 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
5775 }
5776 }
5777 else if ((cc[-1] & XCL_MAP) != 0)
5778 {
5779 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5780 #ifdef SUPPORT_UCP
5781 charsaved = TRUE;
5782 #endif
5783 if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
5784 {
5785 #ifdef COMPILE_PCRE8
5786 jump = NULL;
5787 if (common->utf)
5788 #endif
5789 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
5790
5791 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5792 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5793 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5794 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5795 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5796 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
5797
5798 #ifdef COMPILE_PCRE8
5799 if (common->utf)
5800 #endif
5801 JUMPHERE(jump);
5802 }
5803
5804 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5805 cc += 32 / sizeof(pcre_uchar);
5806 }
5807
5808 #ifdef SUPPORT_UCP
5809 if (needstype || needsscript)
5810 {
5811 if (needschar && !charsaved)
5812 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
5813
5814 #ifdef COMPILE_PCRE32
5815 if (!common->utf)
5816 {
5817 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1);
5818 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5819 JUMPHERE(jump);
5820 }
5821 #endif
5822
5823 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5824 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5825 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5826 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5827 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5828 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5829 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5830
5831 /* Before anything else, we deal with scripts. */
5832 if (needsscript)
5833 {
5834 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
5835 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5836
5837 ccbegin = cc;
5838
5839 while (*cc != XCL_END)
5840 {
5841 if (*cc == XCL_SINGLE)
5842 {
5843 cc ++;
5844 GETCHARINCTEST(c, cc);
5845 }
5846 else if (*cc == XCL_RANGE)
5847 {
5848 cc ++;
5849 GETCHARINCTEST(c, cc);
5850 GETCHARINCTEST(c, cc);
5851 }
5852 else
5853 {
5854 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5855 cc++;
5856 if (*cc == PT_SC)
5857 {
5858 compares--;
5859 invertcmp = (compares == 0 && list != backtracks);
5860 if (cc[-1] == XCL_NOTPROP)
5861 invertcmp ^= 0x1;
5862 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
5863 add_jump(compiler, compares > 0 ? list : backtracks, jump);
5864 }
5865 cc += 2;
5866 }
5867 }
5868
5869 cc = ccbegin;
5870 }
5871
5872 if (needschar)
5873 {
5874 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
5875 }
5876
5877 if (needstype)
5878 {
5879 if (!needschar)
5880 {
5881 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5882 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5883 }
5884 else
5885 {
5886 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
5887 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5888 typereg = RETURN_ADDR;
5889 }
5890 }
5891 }
5892 #endif
5893
5894 /* Generating code. */
5895 charoffset = 0;
5896 numberofcmps = 0;
5897 #ifdef SUPPORT_UCP
5898 typeoffset = 0;
5899 #endif
5900
5901 while (*cc != XCL_END)
5902 {
5903 compares--;
5904 invertcmp = (compares == 0 && list != backtracks);
5905 jump = NULL;
5906
5907 if (*cc == XCL_SINGLE)
5908 {
5909 cc ++;
5910 GETCHARINCTEST(c, cc);
5911
5912 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5913 {
5914 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5915 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5916 numberofcmps++;
5917 }
5918 else if (numberofcmps > 0)
5919 {
5920 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5921 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5922 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5923 numberofcmps = 0;
5924 }
5925 else
5926 {
5927 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5928 numberofcmps = 0;
5929 }
5930 }
5931 else if (*cc == XCL_RANGE)
5932 {
5933 cc ++;
5934 GETCHARINCTEST(c, cc);
5935 SET_CHAR_OFFSET(c);
5936 GETCHARINCTEST(c, cc);
5937
5938 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
5939 {
5940 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5941 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
5942 numberofcmps++;
5943 }
5944 else if (numberofcmps > 0)
5945 {
5946 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5947 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
5948 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5949 numberofcmps = 0;
5950 }
5951 else
5952 {
5953 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
5954 numberofcmps = 0;
5955 }
5956 }
5957 #ifdef SUPPORT_UCP
5958 else
5959 {
5960 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
5961 if (*cc == XCL_NOTPROP)
5962 invertcmp ^= 0x1;
5963 cc++;
5964 switch(*cc)
5965 {
5966 case PT_ANY:
5967 if (!invertcmp)
5968 jump = JUMP(SLJIT_JUMP);
5969 break;
5970
5971 case PT_LAMP:
5972 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
5973 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
5974 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
5975 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
5976 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
5977 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
5978 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
5979 break;
5980
5981 case PT_GC:
5982 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
5983 SET_TYPE_OFFSET(c);
5984 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
5985 break;
5986
5987 case PT_PC:
5988 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
5989 break;
5990
5991 case PT_SC:
5992 compares++;
5993 /* Do nothing. */
5994 break;
5995
5996 case PT_SPACE:
5997 case PT_PXSPACE:
5998 SET_CHAR_OFFSET(9);
5999 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
6000 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6001
6002 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
6003 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6004
6005 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
6006 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6007
6008 SET_TYPE_OFFSET(ucp_Zl);
6009 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
6010 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6011 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6012 break;
6013
6014 case PT_WORD:
6015 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
6016 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6017 /* Fall through. */
6018
6019 case PT_ALNUM:
6020 SET_TYPE_OFFSET(ucp_Ll);
6021 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6022 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6023 SET_TYPE_OFFSET(ucp_Nd);
6024 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6025 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6026 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6027 break;
6028
6029 case PT_CLIST:
6030 other_cases = PRIV(ucd_caseless_sets) + cc[1];
6031
6032 /* At least three characters are required.
6033 Otherwise this case would be handled by the normal code path. */
6034 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
6035 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
6036
6037 /* Optimizing character pairs, if their difference is power of 2. */
6038 if (is_powerof2(other_cases[1] ^ other_cases[0]))
6039 {
6040 if (charoffset == 0)
6041 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6042 else
6043 {
6044 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6045 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6046 }
6047 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
6048 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6049 other_cases += 2;
6050 }
6051 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
6052 {
6053 if (charoffset == 0)
6054 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
6055 else
6056 {
6057 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
6058 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
6059 }
6060 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
6061 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6062
6063 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
6064 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6065
6066 other_cases += 3;
6067 }
6068 else
6069 {
6070 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6071 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6072 }
6073
6074 while (*other_cases != NOTACHAR)
6075 {
6076 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
6077 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
6078 }
6079 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6080 break;
6081
6082 case PT_UCNC:
6083 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
6084 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6085 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
6086 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6087 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
6088 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6089
6090 SET_CHAR_OFFSET(0xa0);
6091 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
6092 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6093 SET_CHAR_OFFSET(0);
6094 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
6095 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
6096 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6097 break;
6098
6099 case PT_PXGRAPH:
6100 /* C and Z groups are the farthest two groups. */
6101 SET_TYPE_OFFSET(ucp_Ll);
6102 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6103 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6104
6105 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6106
6107 /* In case of ucp_Cf, we overwrite the result. */
6108 SET_CHAR_OFFSET(0x2066);
6109 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6110 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6111
6112 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6113 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6114
6115 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
6116 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6117
6118 JUMPHERE(jump);
6119 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6120 break;
6121
6122 case PT_PXPRINT:
6123 /* C and Z groups are the farthest two groups. */
6124 SET_TYPE_OFFSET(ucp_Ll);
6125 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
6126 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
6127
6128 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
6129 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
6130
6131 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
6132
6133 /* In case of ucp_Cf, we overwrite the result. */
6134 SET_CHAR_OFFSET(0x2066);
6135 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
6136 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6137
6138 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
6139 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6140
6141 JUMPHERE(jump);
6142 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
6143 break;
6144
6145 case PT_PXPUNCT:
6146 SET_TYPE_OFFSET(ucp_Sc);
6147 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
6148 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6149
6150 SET_CHAR_OFFSET(0);
6151 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
6152 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
6153
6154 SET_TYPE_OFFSET(ucp_Pc);
6155 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
6156 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
6157 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
6158 break;
6159
6160 default:
6161 SLJIT_UNREACHABLE();
6162 break;
6163 }
6164 cc += 2;
6165 }
6166 #endif
6167
6168 if (jump != NULL)
6169 add_jump(compiler, compares > 0 ? list : backtracks, jump);
6170 }
6171
6172 if (found != NULL)
6173 set_jumps(found, LABEL());
6174 }
6175
6176 #undef SET_TYPE_OFFSET
6177 #undef SET_CHAR_OFFSET
6178
6179 #endif
6180
6181 static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
6182 {
6183 DEFINE_COMPILER;
6184 int length;
6185 struct sljit_jump *jump[4];
6186 #ifdef SUPPORT_UTF
6187 struct sljit_label *label;
6188 #endif /* SUPPORT_UTF */
6189
6190 switch(type)
6191 {
6192 case OP_SOD:
6193 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6194 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6195 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6196 return cc;
6197
6198 case OP_SOM:
6199 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6200 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6201 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
6202 return cc;
6203
6204 case OP_NOT_WORD_BOUNDARY:
6205 case OP_WORD_BOUNDARY:
6206 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
6207 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6208 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6209 return cc;
6210
6211 case OP_EODN:
6212 /* Requires rather complex checks. */
6213 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6214 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6215 {
6216 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6217 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6218 if (common->mode == JIT_COMPILE)
6219 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6220 else
6221 {
6222 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
6223 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6224 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
6225 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6226 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
6227 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
6228 check_partial(common, TRUE);
6229 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6230 JUMPHERE(jump[1]);
6231 }
6232 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6233 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6234 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6235 }
6236 else if (common->nltype == NLTYPE_FIXED)
6237 {
6238 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6239 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6240 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
6241 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
6242 }
6243 else
6244 {
6245 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6246 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6247 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6248 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
6249 jump[2] = JUMP(SLJIT_GREATER);
6250 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
6251 /* Equal. */
6252 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6253 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6254 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6255
6256 JUMPHERE(jump[1]);
6257 if (common->nltype == NLTYPE_ANYCRLF)
6258 {
6259 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6260 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
6261 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
6262 }
6263 else
6264 {
6265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
6266 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6267 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
6268 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
6269 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6270 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6271 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
6272 }
6273 JUMPHERE(jump[2]);
6274 JUMPHERE(jump[3]);
6275 }
6276 JUMPHERE(jump[0]);
6277 check_partial(common, FALSE);
6278 return cc;
6279
6280 case OP_EOD:
6281 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6282 check_partial(common, FALSE);
6283 return cc;
6284
6285 case OP_DOLL:
6286 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6287 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6288 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6289
6290 if (!common->endonly)
6291 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
6292 else
6293 {
6294 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
6295 check_partial(common, FALSE);
6296 }
6297 return cc;
6298
6299 case OP_DOLLM:
6300 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6301 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6302 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
6303 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6304 check_partial(common, FALSE);
6305 jump[0] = JUMP(SLJIT_JUMP);
6306 JUMPHERE(jump[1]);
6307
6308 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6309 {
6310 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6311 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
6312 if (common->mode == JIT_COMPILE)
6313 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
6314 else
6315 {
6316 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
6317 /* STR_PTR = STR_END - IN_UCHARS(1) */
6318 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6319 check_partial(common, TRUE);
6320 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6321 JUMPHERE(jump[1]);
6322 }
6323
6324 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
6325 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6326 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6327 }
6328 else
6329 {
6330 peek_char(common, common->nlmax);
6331 check_newlinechar(common, common->nltype, backtracks, FALSE);
6332 }
6333 JUMPHERE(jump[0]);
6334 return cc;
6335
6336 case OP_CIRC:
6337 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6339 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
6340 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6341 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6342 return cc;
6343
6344 case OP_CIRCM:
6345 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
6346 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
6347 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0);
6348 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
6349 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6350 jump[0] = JUMP(SLJIT_JUMP);
6351 JUMPHERE(jump[1]);
6352
6353 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6354 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6355 {
6356 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6357 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0));
6358 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6359 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6360 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
6361 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
6362 }
6363 else
6364 {
6365 skip_char_back(common);
6366 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6367 check_newlinechar(common, common->nltype, backtracks, FALSE);
6368 }
6369 JUMPHERE(jump[0]);
6370 return cc;
6371
6372 case OP_REVERSE:
6373 length = GET(cc, 0);
6374 if (length == 0)
6375 return cc + LINK_SIZE;
6376 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6377 #ifdef SUPPORT_UTF
6378 if (common->utf)
6379 {
6380 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6381 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
6382 label = LABEL();
6383 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
6384 skip_char_back(common);
6385 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6386 JUMPTO(SLJIT_NOT_ZERO, label);
6387 }
6388 else
6389 #endif
6390 {
6391 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6392 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6393 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0));
6394 }
6395 check_start_used_ptr(common);
6396 return cc + LINK_SIZE;
6397 }
6398 SLJIT_UNREACHABLE();
6399 return cc;
6400 }
6401
6402 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr)
6403 {
6404 DEFINE_COMPILER;
6405 int length;
6406 unsigned int c, oc, bit;
6407 compare_context context;
6408 struct sljit_jump *jump[3];
6409 jump_list *end_list;
6410 #ifdef SUPPORT_UTF
6411 struct sljit_label *label;
6412 #ifdef SUPPORT_UCP
6413 pcre_uchar propdata[5];
6414 #endif
6415 #endif /* SUPPORT_UTF */
6416
6417 switch(type)
6418 {
6419 case OP_NOT_DIGIT:
6420 case OP_DIGIT:
6421 /* Digits are usually 0-9, so it is worth to optimize them. */
6422 if (check_str_ptr)
6423 detect_partial_match(common, backtracks);
6424 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6425 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
6426 read_char7_type(common, type == OP_NOT_DIGIT);
6427 else
6428 #endif
6429 read_char8_type(common, type == OP_NOT_DIGIT);
6430 /* Flip the starting bit in the negative case. */
6431 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
6432 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6433 return cc;
6434
6435 case OP_NOT_WHITESPACE:
6436 case OP_WHITESPACE:
6437 if (check_str_ptr)
6438 detect_partial_match(common, backtracks);
6439 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6440 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
6441 read_char7_type(common, type == OP_NOT_WHITESPACE);
6442 else
6443 #endif
6444 read_char8_type(common, type == OP_NOT_WHITESPACE);
6445 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
6446 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6447 return cc;
6448
6449 case OP_NOT_WORDCHAR:
6450 case OP_WORDCHAR:
6451 if (check_str_ptr)
6452 detect_partial_match(common, backtracks);
6453 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6454 if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
6455 read_char7_type(common, type == OP_NOT_WORDCHAR);
6456 else
6457 #endif
6458 read_char8_type(common, type == OP_NOT_WORDCHAR);
6459 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
6460 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
6461 return cc;
6462
6463 case OP_ANY:
6464 if (check_str_ptr)
6465 detect_partial_match(common, backtracks);
6466 read_char_range(common, common->nlmin, common->nlmax, TRUE);
6467 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6468 {
6469 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
6470 end_list = NULL;
6471 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6472 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6473 else
6474 check_str_end(common, &end_list);
6475
6476 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6477 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
6478 set_jumps(end_list, LABEL());
6479 JUMPHERE(jump[0]);
6480 }
6481 else
6482 check_newlinechar(common, common->nltype, backtracks, TRUE);
6483 return cc;
6484
6485 case OP_ALLANY:
6486 if (check_str_ptr)
6487 detect_partial_match(common, backtracks);
6488 #ifdef SUPPORT_UTF
6489 if (common->utf)
6490 {
6491 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6492 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6493 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6494 #if defined COMPILE_PCRE8
6495 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6496 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6497 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6498 #elif defined COMPILE_PCRE16
6499 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
6500 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
6501 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
6502 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6503 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6504 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6505 #endif
6506 JUMPHERE(jump[0]);
6507 #endif /* COMPILE_PCRE[8|16] */
6508 return cc;
6509 }
6510 #endif
6511 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6512 return cc;
6513
6514 case OP_ANYBYTE:
6515 if (check_str_ptr)
6516 detect_partial_match(common, backtracks);
6517 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6518 return cc;
6519
6520 #ifdef SUPPORT_UTF
6521 #ifdef SUPPORT_UCP
6522 case OP_NOTPROP:
6523 case OP_PROP:
6524 propdata[0] = XCL_HASPROP;
6525 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
6526 propdata[2] = cc[0];
6527 propdata[3] = cc[1];
6528 propdata[4] = XCL_END;
6529 if (check_str_ptr)
6530 detect_partial_match(common, backtracks);
6531 compile_xclass_matchingpath(common, propdata, backtracks);
6532 return cc + 2;
6533 #endif
6534 #endif
6535
6536 case OP_ANYNL:
6537 if (check_str_ptr)
6538 detect_partial_match(common, backtracks);
6539 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
6540 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6541 /* We don't need to handle soft partial matching case. */
6542 end_list = NULL;
6543 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
6544 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
6545 else
6546 check_str_end(common, &end_list);
6547 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6548 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
6549 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6550 jump[2] = JUMP(SLJIT_JUMP);
6551 JUMPHERE(jump[0]);
6552 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
6553 set_jumps(end_list, LABEL());
6554 JUMPHERE(jump[1]);
6555 JUMPHERE(jump[2]);
6556 return cc;
6557
6558 case OP_NOT_HSPACE:
6559 case OP_HSPACE:
6560 if (check_str_ptr)
6561 detect_partial_match(common, backtracks);
6562 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
6563 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
6564 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6565 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6566 return cc;
6567
6568 case OP_NOT_VSPACE:
6569 case OP_VSPACE:
6570 if (check_str_ptr)
6571 detect_partial_match(common, backtracks);
6572 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
6573 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
6574 sljit_set_current_flags(compiler, SLJIT_SET_Z);
6575 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
6576 return cc;
6577
6578 #ifdef SUPPORT_UCP
6579 case OP_EXTUNI:
6580 if (check_str_ptr)
6581 detect_partial_match(common, backtracks);
6582 read_char(common);
6583 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6584 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6585 /* Optimize register allocation: use a real register. */
6586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0);
6587 OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6588
6589 label = LABEL();
6590 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6591 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6592 read_char(common);
6593 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
6594 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
6595 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
6596
6597 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
6598 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
6599 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
6600 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6601 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6602 JUMPTO(SLJIT_NOT_ZERO, label);
6603
6604 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6605 JUMPHERE(jump[0]);
6606 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6607
6608 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
6609 {
6610 jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
6611 /* Since we successfully read a char above, partial matching must occure. */
6612 check_partial(common, TRUE);
6613 JUMPHERE(jump[0]);
6614 }
6615 return cc;
6616 #endif
6617
6618 case OP_CHAR:
6619 case OP_CHARI:
6620 length = 1;
6621 #ifdef SUPPORT_UTF
6622 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
6623 #endif
6624 if (common->mode == JIT_COMPILE && check_str_ptr
6625 && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
6626 {
6627 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
6628 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6629
6630 context.length = IN_UCHARS(length);
6631 context.sourcereg = -1;
6632 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
6633 context.ucharptr = 0;
6634 #endif
6635 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
6636 }
6637
6638 if (check_str_ptr)
6639 detect_partial_match(common, backtracks);
6640 #ifdef SUPPORT_UTF
6641 if (common->utf)
6642 {
6643 GETCHAR(c, cc);
6644 }
6645 else
6646 #endif
6647 c = *cc;
6648
6649 if (type == OP_CHAR || !char_has_othercase(common, cc))
6650 {
6651 read_char_range(common, c, c, FALSE);
6652 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6653 return cc + length;
6654 }
6655 oc = char_othercase(common, c);
6656 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
6657 bit = c ^ oc;
6658 if (is_powerof2(bit))
6659 {
6660 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6661 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6662 return cc + length;
6663 }
6664 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
6665 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6666 JUMPHERE(jump[0]);
6667 return cc + length;
6668
6669 case OP_NOT:
6670 case OP_NOTI:
6671 if (check_str_ptr)
6672 detect_partial_match(common, backtracks);
6673 length = 1;
6674 #ifdef SUPPORT_UTF
6675 if (common->utf)
6676 {
6677 #ifdef COMPILE_PCRE8
6678 c = *cc;
6679 if (c < 128)
6680 {
6681 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6682 if (type == OP_NOT || !char_has_othercase(common, cc))
6683 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6684 else
6685 {
6686 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
6687 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
6688 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
6689 }
6690 /* Skip the variable-length character. */
6691 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6692 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
6693 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
6694 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6695 JUMPHERE(jump[0]);
6696 return cc + 1;
6697 }
6698 else
6699 #endif /* COMPILE_PCRE8 */
6700 {
6701 GETCHARLEN(c, cc, length);
6702 }
6703 }
6704 else
6705 #endif /* SUPPORT_UTF */
6706 c = *cc;
6707
6708 if (type == OP_NOT || !char_has_othercase(common, cc))
6709 {
6710 read_char_range(common, c, c, TRUE);
6711 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6712 }
6713 else
6714 {
6715 oc = char_othercase(common, c);
6716 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
6717 bit = c ^ oc;
6718 if (is_powerof2(bit))
6719 {
6720 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
6721 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
6722 }
6723 else
6724 {
6725 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
6726 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
6727 }
6728 }
6729 return cc + length;
6730
6731 case OP_CLASS:
6732 case OP_NCLASS:
6733 if (check_str_ptr)
6734 detect_partial_match(common, backtracks);
6735
6736 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6737 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
6738 read_char_range(common, 0, bit, type == OP_NCLASS);
6739 #else
6740 read_char_range(common, 0, 255, type == OP_NCLASS);
6741 #endif
6742
6743 if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
6744 return cc + 32 / sizeof(pcre_uchar);
6745
6746 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
6747 jump[0] = NULL;
6748 if (common->utf)
6749 {
6750 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
6751 if (type == OP_CLASS)
6752 {
6753 add_jump(compiler, backtracks, jump[0]);
6754 jump[0] = NULL;
6755 }
6756 }
6757 #elif !defined COMPILE_PCRE8
6758 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6759 if (type == OP_CLASS)
6760 {
6761 add_jump(compiler, backtracks, jump[0]);
6762 jump[0] = NULL;
6763 }
6764 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
6765
6766 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6767 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6768 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
6769 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6770 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6771 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
6772
6773 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6774 if (jump[0] != NULL)
6775 JUMPHERE(jump[0]);
6776 #endif
6777 return cc + 32 / sizeof(pcre_uchar);
6778
6779 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6780 case OP_XCLASS:
6781 if (check_str_ptr)
6782 detect_partial_match(common, backtracks);
6783 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
6784 return cc + GET(cc, 0) - 1;
6785 #endif
6786 }
6787 SLJIT_UNREACHABLE();
6788 return cc;
6789 }
6790
6791 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
6792 {
6793 /* This function consumes at least one input character. */
6794 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
6795 DEFINE_COMPILER;
6796 pcre_uchar *ccbegin = cc;
6797 compare_context context;
6798 int size;
6799
6800 context.length = 0;
6801 do
6802 {
6803 if (cc >= ccend)
6804 break;
6805
6806 if (*cc == OP_CHAR)
6807 {
6808 size = 1;
6809 #ifdef SUPPORT_UTF
6810 if (common->utf && HAS_EXTRALEN(cc[1]))
6811 size += GET_EXTRALEN(cc[1]);
6812 #endif
6813 }
6814 else if (*cc == OP_CHARI)
6815 {
6816 size = 1;
6817 #ifdef SUPPORT_UTF
6818 if (common->utf)
6819 {
6820 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
6821 size = 0;
6822 else if (HAS_EXTRALEN(cc[1]))
6823 size += GET_EXTRALEN(cc[1]);
6824 }
6825 else
6826 #endif
6827 if (char_has_othercase(common, cc + 1) && char_get_othercas