/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1426 - (show annotations)
Wed Jan 1 13:14:19 2014 UTC (5 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 323185 byte(s)
Warning fixes and code refactoring (renamings).
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 4
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 pcre_uint32 nlmax;
367 pcre_uint32 nlmin;
368 int newline;
369 int bsr_nltype;
370 pcre_uint32 bsr_nlmax;
371 pcre_uint32 bsr_nlmin;
372 /* Dollar endonly. */
373 int endonly;
374 /* Tables. */
375 sljit_sw ctypes;
376 /* Named capturing brackets. */
377 pcre_uchar *name_table;
378 sljit_sw name_count;
379 sljit_sw name_entry_size;
380
381 /* Labels and jump lists. */
382 struct sljit_label *partialmatchlabel;
383 struct sljit_label *quit_label;
384 struct sljit_label *forced_quit_label;
385 struct sljit_label *accept_label;
386 stub_list *stubs;
387 recurse_entry *entries;
388 recurse_entry *currententry;
389 jump_list *partialmatch;
390 jump_list *quit;
391 jump_list *positive_assert_quit;
392 jump_list *forced_quit;
393 jump_list *accept;
394 jump_list *calllimit;
395 jump_list *stackalloc;
396 jump_list *revertframes;
397 jump_list *wordboundary;
398 jump_list *anynewline;
399 jump_list *hspace;
400 jump_list *vspace;
401 jump_list *casefulcmp;
402 jump_list *caselesscmp;
403 jump_list *reset_match;
404 BOOL jscript_compat;
405 #ifdef SUPPORT_UTF
406 BOOL utf;
407 #ifdef SUPPORT_UCP
408 BOOL use_ucp;
409 #endif
410 #ifdef COMPILE_PCRE8
411 jump_list *utfreadchar;
412 jump_list *utfreadchar16;
413 jump_list *utfreadtype8;
414 #endif
415 #endif /* SUPPORT_UTF */
416 #ifdef SUPPORT_UCP
417 jump_list *getucd;
418 #endif
419 } compiler_common;
420
421 /* For byte_sequence_compare. */
422
423 typedef struct compare_context {
424 int length;
425 int sourcereg;
426 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
427 int ucharptr;
428 union {
429 sljit_si asint;
430 sljit_uh asushort;
431 #if defined COMPILE_PCRE8
432 sljit_ub asbyte;
433 sljit_ub asuchars[4];
434 #elif defined COMPILE_PCRE16
435 sljit_uh asuchars[2];
436 #elif defined COMPILE_PCRE32
437 sljit_ui asuchars[1];
438 #endif
439 } c;
440 union {
441 sljit_si asint;
442 sljit_uh asushort;
443 #if defined COMPILE_PCRE8
444 sljit_ub asbyte;
445 sljit_ub asuchars[4];
446 #elif defined COMPILE_PCRE16
447 sljit_uh asuchars[2];
448 #elif defined COMPILE_PCRE32
449 sljit_ui asuchars[1];
450 #endif
451 } oc;
452 #endif
453 } compare_context;
454
455 /* Undefine sljit macros. */
456 #undef CMP
457
458 /* Used for accessing the elements of the stack. */
459 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
460
461 #define TMP1 SLJIT_SCRATCH_REG1
462 #define TMP2 SLJIT_SCRATCH_REG3
463 #define TMP3 SLJIT_TEMPORARY_EREG2
464 #define STR_PTR SLJIT_SAVED_REG1
465 #define STR_END SLJIT_SAVED_REG2
466 #define STACK_TOP SLJIT_SCRATCH_REG2
467 #define STACK_LIMIT SLJIT_SAVED_REG3
468 #define ARGUMENTS SLJIT_SAVED_EREG1
469 #define COUNT_MATCH SLJIT_SAVED_EREG2
470 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
471
472 /* Local space layout. */
473 /* These two locals can be used by the current opcode. */
474 #define LOCALS0 (0 * sizeof(sljit_sw))
475 #define LOCALS1 (1 * sizeof(sljit_sw))
476 /* Two local variables for possessive quantifiers (char1 cannot use them). */
477 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
478 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
479 /* Max limit of recursions. */
480 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
481 /* The output vector is stored on the stack, and contains pointers
482 to characters. The vector data is divided into two groups: the first
483 group contains the start / end character pointers, and the second is
484 the start pointers when the end of the capturing group has not yet reached. */
485 #define OVECTOR_START (common->ovector_start)
486 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
487 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
488 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
489
490 #if defined COMPILE_PCRE8
491 #define MOV_UCHAR SLJIT_MOV_UB
492 #define MOVU_UCHAR SLJIT_MOVU_UB
493 #elif defined COMPILE_PCRE16
494 #define MOV_UCHAR SLJIT_MOV_UH
495 #define MOVU_UCHAR SLJIT_MOVU_UH
496 #elif defined COMPILE_PCRE32
497 #define MOV_UCHAR SLJIT_MOV_UI
498 #define MOVU_UCHAR SLJIT_MOVU_UI
499 #else
500 #error Unsupported compiling mode
501 #endif
502
503 /* Shortcuts. */
504 #define DEFINE_COMPILER \
505 struct sljit_compiler *compiler = common->compiler
506 #define OP1(op, dst, dstw, src, srcw) \
507 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
508 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
509 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
510 #define LABEL() \
511 sljit_emit_label(compiler)
512 #define JUMP(type) \
513 sljit_emit_jump(compiler, (type))
514 #define JUMPTO(type, label) \
515 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
516 #define JUMPHERE(jump) \
517 sljit_set_label((jump), sljit_emit_label(compiler))
518 #define SET_LABEL(jump, label) \
519 sljit_set_label((jump), (label))
520 #define CMP(type, src1, src1w, src2, src2w) \
521 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
522 #define CMPTO(type, src1, src1w, src2, src2w, label) \
523 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
524 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
525 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
526 #define GET_LOCAL_BASE(dst, dstw, offset) \
527 sljit_get_local_base(compiler, (dst), (dstw), (offset))
528
529 #define READ_CHAR_MAX 0x7fffffff
530
531 static pcre_uchar* bracketend(pcre_uchar* cc)
532 {
533 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
534 do cc += GET(cc, 1); while (*cc == OP_ALT);
535 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
536 cc += 1 + LINK_SIZE;
537 return cc;
538 }
539
540 static int ones_in_half_byte[16] = {
541 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
542 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
543 };
544
545 /* Functions whose might need modification for all new supported opcodes:
546 next_opcode
547 check_opcode_types
548 set_private_data_ptrs
549 get_framesize
550 init_frame
551 get_private_data_copy_length
552 copy_private_data
553 compile_matchingpath
554 compile_backtrackingpath
555 */
556
557 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
558 {
559 SLJIT_UNUSED_ARG(common);
560 switch(*cc)
561 {
562 case OP_SOD:
563 case OP_SOM:
564 case OP_SET_SOM:
565 case OP_NOT_WORD_BOUNDARY:
566 case OP_WORD_BOUNDARY:
567 case OP_NOT_DIGIT:
568 case OP_DIGIT:
569 case OP_NOT_WHITESPACE:
570 case OP_WHITESPACE:
571 case OP_NOT_WORDCHAR:
572 case OP_WORDCHAR:
573 case OP_ANY:
574 case OP_ALLANY:
575 case OP_NOTPROP:
576 case OP_PROP:
577 case OP_ANYNL:
578 case OP_NOT_HSPACE:
579 case OP_HSPACE:
580 case OP_NOT_VSPACE:
581 case OP_VSPACE:
582 case OP_EXTUNI:
583 case OP_EODN:
584 case OP_EOD:
585 case OP_CIRC:
586 case OP_CIRCM:
587 case OP_DOLL:
588 case OP_DOLLM:
589 case OP_CRSTAR:
590 case OP_CRMINSTAR:
591 case OP_CRPLUS:
592 case OP_CRMINPLUS:
593 case OP_CRQUERY:
594 case OP_CRMINQUERY:
595 case OP_CRRANGE:
596 case OP_CRMINRANGE:
597 case OP_CRPOSSTAR:
598 case OP_CRPOSPLUS:
599 case OP_CRPOSQUERY:
600 case OP_CRPOSRANGE:
601 case OP_CLASS:
602 case OP_NCLASS:
603 case OP_REF:
604 case OP_REFI:
605 case OP_DNREF:
606 case OP_DNREFI:
607 case OP_RECURSE:
608 case OP_CALLOUT:
609 case OP_ALT:
610 case OP_KET:
611 case OP_KETRMAX:
612 case OP_KETRMIN:
613 case OP_KETRPOS:
614 case OP_REVERSE:
615 case OP_ASSERT:
616 case OP_ASSERT_NOT:
617 case OP_ASSERTBACK:
618 case OP_ASSERTBACK_NOT:
619 case OP_ONCE:
620 case OP_ONCE_NC:
621 case OP_BRA:
622 case OP_BRAPOS:
623 case OP_CBRA:
624 case OP_CBRAPOS:
625 case OP_COND:
626 case OP_SBRA:
627 case OP_SBRAPOS:
628 case OP_SCBRA:
629 case OP_SCBRAPOS:
630 case OP_SCOND:
631 case OP_CREF:
632 case OP_DNCREF:
633 case OP_RREF:
634 case OP_DNRREF:
635 case OP_DEF:
636 case OP_BRAZERO:
637 case OP_BRAMINZERO:
638 case OP_BRAPOSZERO:
639 case OP_PRUNE:
640 case OP_SKIP:
641 case OP_THEN:
642 case OP_COMMIT:
643 case OP_FAIL:
644 case OP_ACCEPT:
645 case OP_ASSERT_ACCEPT:
646 case OP_CLOSE:
647 case OP_SKIPZERO:
648 return cc + PRIV(OP_lengths)[*cc];
649
650 case OP_CHAR:
651 case OP_CHARI:
652 case OP_NOT:
653 case OP_NOTI:
654 case OP_STAR:
655 case OP_MINSTAR:
656 case OP_PLUS:
657 case OP_MINPLUS:
658 case OP_QUERY:
659 case OP_MINQUERY:
660 case OP_UPTO:
661 case OP_MINUPTO:
662 case OP_EXACT:
663 case OP_POSSTAR:
664 case OP_POSPLUS:
665 case OP_POSQUERY:
666 case OP_POSUPTO:
667 case OP_STARI:
668 case OP_MINSTARI:
669 case OP_PLUSI:
670 case OP_MINPLUSI:
671 case OP_QUERYI:
672 case OP_MINQUERYI:
673 case OP_UPTOI:
674 case OP_MINUPTOI:
675 case OP_EXACTI:
676 case OP_POSSTARI:
677 case OP_POSPLUSI:
678 case OP_POSQUERYI:
679 case OP_POSUPTOI:
680 case OP_NOTSTAR:
681 case OP_NOTMINSTAR:
682 case OP_NOTPLUS:
683 case OP_NOTMINPLUS:
684 case OP_NOTQUERY:
685 case OP_NOTMINQUERY:
686 case OP_NOTUPTO:
687 case OP_NOTMINUPTO:
688 case OP_NOTEXACT:
689 case OP_NOTPOSSTAR:
690 case OP_NOTPOSPLUS:
691 case OP_NOTPOSQUERY:
692 case OP_NOTPOSUPTO:
693 case OP_NOTSTARI:
694 case OP_NOTMINSTARI:
695 case OP_NOTPLUSI:
696 case OP_NOTMINPLUSI:
697 case OP_NOTQUERYI:
698 case OP_NOTMINQUERYI:
699 case OP_NOTUPTOI:
700 case OP_NOTMINUPTOI:
701 case OP_NOTEXACTI:
702 case OP_NOTPOSSTARI:
703 case OP_NOTPOSPLUSI:
704 case OP_NOTPOSQUERYI:
705 case OP_NOTPOSUPTOI:
706 cc += PRIV(OP_lengths)[*cc];
707 #ifdef SUPPORT_UTF
708 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
709 #endif
710 return cc;
711
712 /* Special cases. */
713 case OP_TYPESTAR:
714 case OP_TYPEMINSTAR:
715 case OP_TYPEPLUS:
716 case OP_TYPEMINPLUS:
717 case OP_TYPEQUERY:
718 case OP_TYPEMINQUERY:
719 case OP_TYPEUPTO:
720 case OP_TYPEMINUPTO:
721 case OP_TYPEEXACT:
722 case OP_TYPEPOSSTAR:
723 case OP_TYPEPOSPLUS:
724 case OP_TYPEPOSQUERY:
725 case OP_TYPEPOSUPTO:
726 return cc + PRIV(OP_lengths)[*cc] - 1;
727
728 case OP_ANYBYTE:
729 #ifdef SUPPORT_UTF
730 if (common->utf) return NULL;
731 #endif
732 return cc + 1;
733
734 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
735 case OP_XCLASS:
736 return cc + GET(cc, 1);
737 #endif
738
739 case OP_MARK:
740 case OP_PRUNE_ARG:
741 case OP_SKIP_ARG:
742 case OP_THEN_ARG:
743 return cc + 1 + 2 + cc[1];
744
745 default:
746 /* All opcodes are supported now! */
747 SLJIT_ASSERT_STOP();
748 return NULL;
749 }
750 }
751
752 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
753 {
754 int count;
755 pcre_uchar *slot;
756
757 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
758 while (cc < ccend)
759 {
760 switch(*cc)
761 {
762 case OP_SET_SOM:
763 common->has_set_som = TRUE;
764 cc += 1;
765 break;
766
767 case OP_REF:
768 case OP_REFI:
769 common->optimized_cbracket[GET2(cc, 1)] = 0;
770 cc += 1 + IMM2_SIZE;
771 break;
772
773 case OP_CBRAPOS:
774 case OP_SCBRAPOS:
775 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
776 cc += 1 + LINK_SIZE + IMM2_SIZE;
777 break;
778
779 case OP_COND:
780 case OP_SCOND:
781 /* Only AUTO_CALLOUT can insert this opcode. We do
782 not intend to support this case. */
783 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
784 return FALSE;
785 cc += 1 + LINK_SIZE;
786 break;
787
788 case OP_CREF:
789 common->optimized_cbracket[GET2(cc, 1)] = 0;
790 cc += 1 + IMM2_SIZE;
791 break;
792
793 case OP_DNREF:
794 case OP_DNREFI:
795 case OP_DNCREF:
796 count = GET2(cc, 1 + IMM2_SIZE);
797 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
798 while (count-- > 0)
799 {
800 common->optimized_cbracket[GET2(slot, 0)] = 0;
801 slot += common->name_entry_size;
802 }
803 cc += 1 + 2 * IMM2_SIZE;
804 break;
805
806 case OP_RECURSE:
807 /* Set its value only once. */
808 if (common->recursive_head_ptr == 0)
809 {
810 common->recursive_head_ptr = common->ovector_start;
811 common->ovector_start += sizeof(sljit_sw);
812 }
813 cc += 1 + LINK_SIZE;
814 break;
815
816 case OP_CALLOUT:
817 if (common->capture_last_ptr == 0)
818 {
819 common->capture_last_ptr = common->ovector_start;
820 common->ovector_start += sizeof(sljit_sw);
821 }
822 cc += 2 + 2 * LINK_SIZE;
823 break;
824
825 case OP_THEN_ARG:
826 common->has_then = TRUE;
827 common->control_head_ptr = 1;
828 /* Fall through. */
829
830 case OP_PRUNE_ARG:
831 common->needs_start_ptr = TRUE;
832 /* Fall through. */
833
834 case OP_MARK:
835 if (common->mark_ptr == 0)
836 {
837 common->mark_ptr = common->ovector_start;
838 common->ovector_start += sizeof(sljit_sw);
839 }
840 cc += 1 + 2 + cc[1];
841 break;
842
843 case OP_THEN:
844 common->has_then = TRUE;
845 common->control_head_ptr = 1;
846 /* Fall through. */
847
848 case OP_PRUNE:
849 case OP_SKIP:
850 common->needs_start_ptr = TRUE;
851 cc += 1;
852 break;
853
854 case OP_SKIP_ARG:
855 common->control_head_ptr = 1;
856 common->has_skip_arg = TRUE;
857 cc += 1 + 2 + cc[1];
858 break;
859
860 default:
861 cc = next_opcode(common, cc);
862 if (cc == NULL)
863 return FALSE;
864 break;
865 }
866 }
867 return TRUE;
868 }
869
870 static int get_class_iterator_size(pcre_uchar *cc)
871 {
872 switch(*cc)
873 {
874 case OP_CRSTAR:
875 case OP_CRPLUS:
876 return 2;
877
878 case OP_CRMINSTAR:
879 case OP_CRMINPLUS:
880 case OP_CRQUERY:
881 case OP_CRMINQUERY:
882 return 1;
883
884 case OP_CRRANGE:
885 case OP_CRMINRANGE:
886 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
887 return 0;
888 return 2;
889
890 default:
891 return 0;
892 }
893 }
894
895 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
896 {
897 pcre_uchar *end = bracketend(begin);
898 pcre_uchar *next;
899 pcre_uchar *next_end;
900 pcre_uchar *max_end;
901 pcre_uchar type;
902 sljit_sw length = end - begin;
903 int min, max, i;
904
905 /* Detect fixed iterations first. */
906 if (end[-(1 + LINK_SIZE)] != OP_KET)
907 return FALSE;
908
909 /* Already detected repeat. */
910 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
911 return TRUE;
912
913 next = end;
914 min = 1;
915 while (1)
916 {
917 if (*next != *begin)
918 break;
919 next_end = bracketend(next);
920 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
921 break;
922 next = next_end;
923 min++;
924 }
925
926 if (min == 2)
927 return FALSE;
928
929 max = 0;
930 max_end = next;
931 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
932 {
933 type = *next;
934 while (1)
935 {
936 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
937 break;
938 next_end = bracketend(next + 2 + LINK_SIZE);
939 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
940 break;
941 next = next_end;
942 max++;
943 }
944
945 if (next[0] == type && next[1] == *begin && max >= 1)
946 {
947 next_end = bracketend(next + 1);
948 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
949 {
950 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
951 if (*next_end != OP_KET)
952 break;
953
954 if (i == max)
955 {
956 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
957 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
958 /* +2 the original and the last. */
959 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
960 if (min == 1)
961 return TRUE;
962 min--;
963 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
964 }
965 }
966 }
967 }
968
969 if (min >= 3)
970 {
971 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
972 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
973 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
974 return TRUE;
975 }
976
977 return FALSE;
978 }
979
980 #define CASE_ITERATOR_PRIVATE_DATA_1 \
981 case OP_MINSTAR: \
982 case OP_MINPLUS: \
983 case OP_QUERY: \
984 case OP_MINQUERY: \
985 case OP_MINSTARI: \
986 case OP_MINPLUSI: \
987 case OP_QUERYI: \
988 case OP_MINQUERYI: \
989 case OP_NOTMINSTAR: \
990 case OP_NOTMINPLUS: \
991 case OP_NOTQUERY: \
992 case OP_NOTMINQUERY: \
993 case OP_NOTMINSTARI: \
994 case OP_NOTMINPLUSI: \
995 case OP_NOTQUERYI: \
996 case OP_NOTMINQUERYI:
997
998 #define CASE_ITERATOR_PRIVATE_DATA_2A \
999 case OP_STAR: \
1000 case OP_PLUS: \
1001 case OP_STARI: \
1002 case OP_PLUSI: \
1003 case OP_NOTSTAR: \
1004 case OP_NOTPLUS: \
1005 case OP_NOTSTARI: \
1006 case OP_NOTPLUSI:
1007
1008 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1009 case OP_UPTO: \
1010 case OP_MINUPTO: \
1011 case OP_UPTOI: \
1012 case OP_MINUPTOI: \
1013 case OP_NOTUPTO: \
1014 case OP_NOTMINUPTO: \
1015 case OP_NOTUPTOI: \
1016 case OP_NOTMINUPTOI:
1017
1018 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1019 case OP_TYPEMINSTAR: \
1020 case OP_TYPEMINPLUS: \
1021 case OP_TYPEQUERY: \
1022 case OP_TYPEMINQUERY:
1023
1024 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1025 case OP_TYPESTAR: \
1026 case OP_TYPEPLUS:
1027
1028 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1029 case OP_TYPEUPTO: \
1030 case OP_TYPEMINUPTO:
1031
1032 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1033 {
1034 pcre_uchar *cc = common->start;
1035 pcre_uchar *alternative;
1036 pcre_uchar *end = NULL;
1037 int private_data_ptr = *private_data_start;
1038 int space, size, bracketlen;
1039
1040 while (cc < ccend)
1041 {
1042 space = 0;
1043 size = 0;
1044 bracketlen = 0;
1045 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1046 return;
1047
1048 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1049 if (detect_repeat(common, cc))
1050 {
1051 /* These brackets are converted to repeats, so no global
1052 based single character repeat is allowed. */
1053 if (cc >= end)
1054 end = bracketend(cc);
1055 }
1056
1057 switch(*cc)
1058 {
1059 case OP_KET:
1060 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1061 {
1062 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1063 private_data_ptr += sizeof(sljit_sw);
1064 cc += common->private_data_ptrs[cc + 1 - common->start];
1065 }
1066 cc += 1 + LINK_SIZE;
1067 break;
1068
1069 case OP_ASSERT:
1070 case OP_ASSERT_NOT:
1071 case OP_ASSERTBACK:
1072 case OP_ASSERTBACK_NOT:
1073 case OP_ONCE:
1074 case OP_ONCE_NC:
1075 case OP_BRAPOS:
1076 case OP_SBRA:
1077 case OP_SBRAPOS:
1078 case OP_SCOND:
1079 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1080 private_data_ptr += sizeof(sljit_sw);
1081 bracketlen = 1 + LINK_SIZE;
1082 break;
1083
1084 case OP_CBRAPOS:
1085 case OP_SCBRAPOS:
1086 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1087 private_data_ptr += sizeof(sljit_sw);
1088 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1089 break;
1090
1091 case OP_COND:
1092 /* Might be a hidden SCOND. */
1093 alternative = cc + GET(cc, 1);
1094 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1095 {
1096 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1097 private_data_ptr += sizeof(sljit_sw);
1098 }
1099 bracketlen = 1 + LINK_SIZE;
1100 break;
1101
1102 case OP_BRA:
1103 bracketlen = 1 + LINK_SIZE;
1104 break;
1105
1106 case OP_CBRA:
1107 case OP_SCBRA:
1108 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1109 break;
1110
1111 CASE_ITERATOR_PRIVATE_DATA_1
1112 space = 1;
1113 size = -2;
1114 break;
1115
1116 CASE_ITERATOR_PRIVATE_DATA_2A
1117 space = 2;
1118 size = -2;
1119 break;
1120
1121 CASE_ITERATOR_PRIVATE_DATA_2B
1122 space = 2;
1123 size = -(2 + IMM2_SIZE);
1124 break;
1125
1126 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1127 space = 1;
1128 size = 1;
1129 break;
1130
1131 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1132 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1133 space = 2;
1134 size = 1;
1135 break;
1136
1137 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1138 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1139 space = 2;
1140 size = 1 + IMM2_SIZE;
1141 break;
1142
1143 case OP_CLASS:
1144 case OP_NCLASS:
1145 size += 1 + 32 / sizeof(pcre_uchar);
1146 space = get_class_iterator_size(cc + size);
1147 break;
1148
1149 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1150 case OP_XCLASS:
1151 size = GET(cc, 1);
1152 space = get_class_iterator_size(cc + size);
1153 break;
1154 #endif
1155
1156 default:
1157 cc = next_opcode(common, cc);
1158 SLJIT_ASSERT(cc != NULL);
1159 break;
1160 }
1161
1162 /* Character iterators, which are not inside a repeated bracket,
1163 gets a private slot instead of allocating it on the stack. */
1164 if (space > 0 && cc >= end)
1165 {
1166 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1167 private_data_ptr += sizeof(sljit_sw) * space;
1168 }
1169
1170 if (size != 0)
1171 {
1172 if (size < 0)
1173 {
1174 cc += -size;
1175 #ifdef SUPPORT_UTF
1176 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1177 #endif
1178 }
1179 else
1180 cc += size;
1181 }
1182
1183 if (bracketlen > 0)
1184 {
1185 if (cc >= end)
1186 {
1187 end = bracketend(cc);
1188 if (end[-1 - LINK_SIZE] == OP_KET)
1189 end = NULL;
1190 }
1191 cc += bracketlen;
1192 }
1193 }
1194 *private_data_start = private_data_ptr;
1195 }
1196
1197 /* Returns with a frame_types (always < 0) if no need for frame. */
1198 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1199 {
1200 int length = 0;
1201 int possessive = 0;
1202 BOOL stack_restore = FALSE;
1203 BOOL setsom_found = recursive;
1204 BOOL setmark_found = recursive;
1205 /* The last capture is a local variable even for recursions. */
1206 BOOL capture_last_found = FALSE;
1207
1208 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1209 SLJIT_ASSERT(common->control_head_ptr != 0);
1210 *needs_control_head = TRUE;
1211 #else
1212 *needs_control_head = FALSE;
1213 #endif
1214
1215 if (ccend == NULL)
1216 {
1217 ccend = bracketend(cc) - (1 + LINK_SIZE);
1218 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1219 {
1220 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1221 /* This is correct regardless of common->capture_last_ptr. */
1222 capture_last_found = TRUE;
1223 }
1224 cc = next_opcode(common, cc);
1225 }
1226
1227 SLJIT_ASSERT(cc != NULL);
1228 while (cc < ccend)
1229 switch(*cc)
1230 {
1231 case OP_SET_SOM:
1232 SLJIT_ASSERT(common->has_set_som);
1233 stack_restore = TRUE;
1234 if (!setsom_found)
1235 {
1236 length += 2;
1237 setsom_found = TRUE;
1238 }
1239 cc += 1;
1240 break;
1241
1242 case OP_MARK:
1243 case OP_PRUNE_ARG:
1244 case OP_THEN_ARG:
1245 SLJIT_ASSERT(common->mark_ptr != 0);
1246 stack_restore = TRUE;
1247 if (!setmark_found)
1248 {
1249 length += 2;
1250 setmark_found = TRUE;
1251 }
1252 if (common->control_head_ptr != 0)
1253 *needs_control_head = TRUE;
1254 cc += 1 + 2 + cc[1];
1255 break;
1256
1257 case OP_RECURSE:
1258 stack_restore = TRUE;
1259 if (common->has_set_som && !setsom_found)
1260 {
1261 length += 2;
1262 setsom_found = TRUE;
1263 }
1264 if (common->mark_ptr != 0 && !setmark_found)
1265 {
1266 length += 2;
1267 setmark_found = TRUE;
1268 }
1269 if (common->capture_last_ptr != 0 && !capture_last_found)
1270 {
1271 length += 2;
1272 capture_last_found = TRUE;
1273 }
1274 cc += 1 + LINK_SIZE;
1275 break;
1276
1277 case OP_CBRA:
1278 case OP_CBRAPOS:
1279 case OP_SCBRA:
1280 case OP_SCBRAPOS:
1281 stack_restore = TRUE;
1282 if (common->capture_last_ptr != 0 && !capture_last_found)
1283 {
1284 length += 2;
1285 capture_last_found = TRUE;
1286 }
1287 length += 3;
1288 cc += 1 + LINK_SIZE + IMM2_SIZE;
1289 break;
1290
1291 default:
1292 stack_restore = TRUE;
1293 /* Fall through. */
1294
1295 case OP_NOT_WORD_BOUNDARY:
1296 case OP_WORD_BOUNDARY:
1297 case OP_NOT_DIGIT:
1298 case OP_DIGIT:
1299 case OP_NOT_WHITESPACE:
1300 case OP_WHITESPACE:
1301 case OP_NOT_WORDCHAR:
1302 case OP_WORDCHAR:
1303 case OP_ANY:
1304 case OP_ALLANY:
1305 case OP_ANYBYTE:
1306 case OP_NOTPROP:
1307 case OP_PROP:
1308 case OP_ANYNL:
1309 case OP_NOT_HSPACE:
1310 case OP_HSPACE:
1311 case OP_NOT_VSPACE:
1312 case OP_VSPACE:
1313 case OP_EXTUNI:
1314 case OP_EODN:
1315 case OP_EOD:
1316 case OP_CIRC:
1317 case OP_CIRCM:
1318 case OP_DOLL:
1319 case OP_DOLLM:
1320 case OP_CHAR:
1321 case OP_CHARI:
1322 case OP_NOT:
1323 case OP_NOTI:
1324
1325 case OP_EXACT:
1326 case OP_POSSTAR:
1327 case OP_POSPLUS:
1328 case OP_POSQUERY:
1329 case OP_POSUPTO:
1330
1331 case OP_EXACTI:
1332 case OP_POSSTARI:
1333 case OP_POSPLUSI:
1334 case OP_POSQUERYI:
1335 case OP_POSUPTOI:
1336
1337 case OP_NOTEXACT:
1338 case OP_NOTPOSSTAR:
1339 case OP_NOTPOSPLUS:
1340 case OP_NOTPOSQUERY:
1341 case OP_NOTPOSUPTO:
1342
1343 case OP_NOTEXACTI:
1344 case OP_NOTPOSSTARI:
1345 case OP_NOTPOSPLUSI:
1346 case OP_NOTPOSQUERYI:
1347 case OP_NOTPOSUPTOI:
1348
1349 case OP_TYPEEXACT:
1350 case OP_TYPEPOSSTAR:
1351 case OP_TYPEPOSPLUS:
1352 case OP_TYPEPOSQUERY:
1353 case OP_TYPEPOSUPTO:
1354
1355 case OP_CLASS:
1356 case OP_NCLASS:
1357 case OP_XCLASS:
1358
1359 cc = next_opcode(common, cc);
1360 SLJIT_ASSERT(cc != NULL);
1361 break;
1362 }
1363
1364 /* Possessive quantifiers can use a special case. */
1365 if (SLJIT_UNLIKELY(possessive == length))
1366 return stack_restore ? no_frame : no_stack;
1367
1368 if (length > 0)
1369 return length + 1;
1370 return stack_restore ? no_frame : no_stack;
1371 }
1372
1373 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1374 {
1375 DEFINE_COMPILER;
1376 BOOL setsom_found = recursive;
1377 BOOL setmark_found = recursive;
1378 /* The last capture is a local variable even for recursions. */
1379 BOOL capture_last_found = FALSE;
1380 int offset;
1381
1382 /* >= 1 + shortest item size (2) */
1383 SLJIT_UNUSED_ARG(stacktop);
1384 SLJIT_ASSERT(stackpos >= stacktop + 2);
1385
1386 stackpos = STACK(stackpos);
1387 if (ccend == NULL)
1388 {
1389 ccend = bracketend(cc) - (1 + LINK_SIZE);
1390 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1391 cc = next_opcode(common, cc);
1392 }
1393
1394 SLJIT_ASSERT(cc != NULL);
1395 while (cc < ccend)
1396 switch(*cc)
1397 {
1398 case OP_SET_SOM:
1399 SLJIT_ASSERT(common->has_set_som);
1400 if (!setsom_found)
1401 {
1402 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1404 stackpos += (int)sizeof(sljit_sw);
1405 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1406 stackpos += (int)sizeof(sljit_sw);
1407 setsom_found = TRUE;
1408 }
1409 cc += 1;
1410 break;
1411
1412 case OP_MARK:
1413 case OP_PRUNE_ARG:
1414 case OP_THEN_ARG:
1415 SLJIT_ASSERT(common->mark_ptr != 0);
1416 if (!setmark_found)
1417 {
1418 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1419 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1420 stackpos += (int)sizeof(sljit_sw);
1421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1422 stackpos += (int)sizeof(sljit_sw);
1423 setmark_found = TRUE;
1424 }
1425 cc += 1 + 2 + cc[1];
1426 break;
1427
1428 case OP_RECURSE:
1429 if (common->has_set_som && !setsom_found)
1430 {
1431 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1432 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1433 stackpos += (int)sizeof(sljit_sw);
1434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1435 stackpos += (int)sizeof(sljit_sw);
1436 setsom_found = TRUE;
1437 }
1438 if (common->mark_ptr != 0 && !setmark_found)
1439 {
1440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1442 stackpos += (int)sizeof(sljit_sw);
1443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1444 stackpos += (int)sizeof(sljit_sw);
1445 setmark_found = TRUE;
1446 }
1447 if (common->capture_last_ptr != 0 && !capture_last_found)
1448 {
1449 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1450 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1451 stackpos += (int)sizeof(sljit_sw);
1452 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1453 stackpos += (int)sizeof(sljit_sw);
1454 capture_last_found = TRUE;
1455 }
1456 cc += 1 + LINK_SIZE;
1457 break;
1458
1459 case OP_CBRA:
1460 case OP_CBRAPOS:
1461 case OP_SCBRA:
1462 case OP_SCBRAPOS:
1463 if (common->capture_last_ptr != 0 && !capture_last_found)
1464 {
1465 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1467 stackpos += (int)sizeof(sljit_sw);
1468 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1469 stackpos += (int)sizeof(sljit_sw);
1470 capture_last_found = TRUE;
1471 }
1472 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1474 stackpos += (int)sizeof(sljit_sw);
1475 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1476 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1477 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1478 stackpos += (int)sizeof(sljit_sw);
1479 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1480 stackpos += (int)sizeof(sljit_sw);
1481
1482 cc += 1 + LINK_SIZE + IMM2_SIZE;
1483 break;
1484
1485 default:
1486 cc = next_opcode(common, cc);
1487 SLJIT_ASSERT(cc != NULL);
1488 break;
1489 }
1490
1491 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1492 SLJIT_ASSERT(stackpos == STACK(stacktop));
1493 }
1494
1495 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1496 {
1497 int private_data_length = needs_control_head ? 3 : 2;
1498 int size;
1499 pcre_uchar *alternative;
1500 /* Calculate the sum of the private machine words. */
1501 while (cc < ccend)
1502 {
1503 size = 0;
1504 switch(*cc)
1505 {
1506 case OP_KET:
1507 if (PRIVATE_DATA(cc) != 0)
1508 private_data_length++;
1509 cc += 1 + LINK_SIZE;
1510 break;
1511
1512 case OP_ASSERT:
1513 case OP_ASSERT_NOT:
1514 case OP_ASSERTBACK:
1515 case OP_ASSERTBACK_NOT:
1516 case OP_ONCE:
1517 case OP_ONCE_NC:
1518 case OP_BRAPOS:
1519 case OP_SBRA:
1520 case OP_SBRAPOS:
1521 case OP_SCOND:
1522 private_data_length++;
1523 cc += 1 + LINK_SIZE;
1524 break;
1525
1526 case OP_CBRA:
1527 case OP_SCBRA:
1528 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1529 private_data_length++;
1530 cc += 1 + LINK_SIZE + IMM2_SIZE;
1531 break;
1532
1533 case OP_CBRAPOS:
1534 case OP_SCBRAPOS:
1535 private_data_length += 2;
1536 cc += 1 + LINK_SIZE + IMM2_SIZE;
1537 break;
1538
1539 case OP_COND:
1540 /* Might be a hidden SCOND. */
1541 alternative = cc + GET(cc, 1);
1542 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1543 private_data_length++;
1544 cc += 1 + LINK_SIZE;
1545 break;
1546
1547 CASE_ITERATOR_PRIVATE_DATA_1
1548 if (PRIVATE_DATA(cc))
1549 private_data_length++;
1550 cc += 2;
1551 #ifdef SUPPORT_UTF
1552 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1553 #endif
1554 break;
1555
1556 CASE_ITERATOR_PRIVATE_DATA_2A
1557 if (PRIVATE_DATA(cc))
1558 private_data_length += 2;
1559 cc += 2;
1560 #ifdef SUPPORT_UTF
1561 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1562 #endif
1563 break;
1564
1565 CASE_ITERATOR_PRIVATE_DATA_2B
1566 if (PRIVATE_DATA(cc))
1567 private_data_length += 2;
1568 cc += 2 + IMM2_SIZE;
1569 #ifdef SUPPORT_UTF
1570 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1571 #endif
1572 break;
1573
1574 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1575 if (PRIVATE_DATA(cc))
1576 private_data_length++;
1577 cc += 1;
1578 break;
1579
1580 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1581 if (PRIVATE_DATA(cc))
1582 private_data_length += 2;
1583 cc += 1;
1584 break;
1585
1586 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1587 if (PRIVATE_DATA(cc))
1588 private_data_length += 2;
1589 cc += 1 + IMM2_SIZE;
1590 break;
1591
1592 case OP_CLASS:
1593 case OP_NCLASS:
1594 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1595 case OP_XCLASS:
1596 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1597 #else
1598 size = 1 + 32 / (int)sizeof(pcre_uchar);
1599 #endif
1600 if (PRIVATE_DATA(cc))
1601 private_data_length += get_class_iterator_size(cc + size);
1602 cc += size;
1603 break;
1604
1605 default:
1606 cc = next_opcode(common, cc);
1607 SLJIT_ASSERT(cc != NULL);
1608 break;
1609 }
1610 }
1611 SLJIT_ASSERT(cc == ccend);
1612 return private_data_length;
1613 }
1614
1615 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1616 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1617 {
1618 DEFINE_COMPILER;
1619 int srcw[2];
1620 int count, size;
1621 BOOL tmp1next = TRUE;
1622 BOOL tmp1empty = TRUE;
1623 BOOL tmp2empty = TRUE;
1624 pcre_uchar *alternative;
1625 enum {
1626 start,
1627 loop,
1628 end
1629 } status;
1630
1631 status = save ? start : loop;
1632 stackptr = STACK(stackptr - 2);
1633 stacktop = STACK(stacktop - 1);
1634
1635 if (!save)
1636 {
1637 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1638 if (stackptr < stacktop)
1639 {
1640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1641 stackptr += sizeof(sljit_sw);
1642 tmp1empty = FALSE;
1643 }
1644 if (stackptr < stacktop)
1645 {
1646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1647 stackptr += sizeof(sljit_sw);
1648 tmp2empty = FALSE;
1649 }
1650 /* The tmp1next must be TRUE in either way. */
1651 }
1652
1653 do
1654 {
1655 count = 0;
1656 switch(status)
1657 {
1658 case start:
1659 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1660 count = 1;
1661 srcw[0] = common->recursive_head_ptr;
1662 if (needs_control_head)
1663 {
1664 SLJIT_ASSERT(common->control_head_ptr != 0);
1665 count = 2;
1666 srcw[1] = common->control_head_ptr;
1667 }
1668 status = loop;
1669 break;
1670
1671 case loop:
1672 if (cc >= ccend)
1673 {
1674 status = end;
1675 break;
1676 }
1677
1678 switch(*cc)
1679 {
1680 case OP_KET:
1681 if (PRIVATE_DATA(cc) != 0)
1682 {
1683 count = 1;
1684 srcw[0] = PRIVATE_DATA(cc);
1685 }
1686 cc += 1 + LINK_SIZE;
1687 break;
1688
1689 case OP_ASSERT:
1690 case OP_ASSERT_NOT:
1691 case OP_ASSERTBACK:
1692 case OP_ASSERTBACK_NOT:
1693 case OP_ONCE:
1694 case OP_ONCE_NC:
1695 case OP_BRAPOS:
1696 case OP_SBRA:
1697 case OP_SBRAPOS:
1698 case OP_SCOND:
1699 count = 1;
1700 srcw[0] = PRIVATE_DATA(cc);
1701 SLJIT_ASSERT(srcw[0] != 0);
1702 cc += 1 + LINK_SIZE;
1703 break;
1704
1705 case OP_CBRA:
1706 case OP_SCBRA:
1707 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1708 {
1709 count = 1;
1710 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1711 }
1712 cc += 1 + LINK_SIZE + IMM2_SIZE;
1713 break;
1714
1715 case OP_CBRAPOS:
1716 case OP_SCBRAPOS:
1717 count = 2;
1718 srcw[0] = PRIVATE_DATA(cc);
1719 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1720 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1721 cc += 1 + LINK_SIZE + IMM2_SIZE;
1722 break;
1723
1724 case OP_COND:
1725 /* Might be a hidden SCOND. */
1726 alternative = cc + GET(cc, 1);
1727 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1728 {
1729 count = 1;
1730 srcw[0] = PRIVATE_DATA(cc);
1731 SLJIT_ASSERT(srcw[0] != 0);
1732 }
1733 cc += 1 + LINK_SIZE;
1734 break;
1735
1736 CASE_ITERATOR_PRIVATE_DATA_1
1737 if (PRIVATE_DATA(cc))
1738 {
1739 count = 1;
1740 srcw[0] = PRIVATE_DATA(cc);
1741 }
1742 cc += 2;
1743 #ifdef SUPPORT_UTF
1744 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1745 #endif
1746 break;
1747
1748 CASE_ITERATOR_PRIVATE_DATA_2A
1749 if (PRIVATE_DATA(cc))
1750 {
1751 count = 2;
1752 srcw[0] = PRIVATE_DATA(cc);
1753 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1754 }
1755 cc += 2;
1756 #ifdef SUPPORT_UTF
1757 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1758 #endif
1759 break;
1760
1761 CASE_ITERATOR_PRIVATE_DATA_2B
1762 if (PRIVATE_DATA(cc))
1763 {
1764 count = 2;
1765 srcw[0] = PRIVATE_DATA(cc);
1766 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1767 }
1768 cc += 2 + IMM2_SIZE;
1769 #ifdef SUPPORT_UTF
1770 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1771 #endif
1772 break;
1773
1774 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1775 if (PRIVATE_DATA(cc))
1776 {
1777 count = 1;
1778 srcw[0] = PRIVATE_DATA(cc);
1779 }
1780 cc += 1;
1781 break;
1782
1783 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1784 if (PRIVATE_DATA(cc))
1785 {
1786 count = 2;
1787 srcw[0] = PRIVATE_DATA(cc);
1788 srcw[1] = srcw[0] + sizeof(sljit_sw);
1789 }
1790 cc += 1;
1791 break;
1792
1793 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1794 if (PRIVATE_DATA(cc))
1795 {
1796 count = 2;
1797 srcw[0] = PRIVATE_DATA(cc);
1798 srcw[1] = srcw[0] + sizeof(sljit_sw);
1799 }
1800 cc += 1 + IMM2_SIZE;
1801 break;
1802
1803 case OP_CLASS:
1804 case OP_NCLASS:
1805 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1806 case OP_XCLASS:
1807 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1808 #else
1809 size = 1 + 32 / (int)sizeof(pcre_uchar);
1810 #endif
1811 if (PRIVATE_DATA(cc))
1812 switch(get_class_iterator_size(cc + size))
1813 {
1814 case 1:
1815 count = 1;
1816 srcw[0] = PRIVATE_DATA(cc);
1817 break;
1818
1819 case 2:
1820 count = 2;
1821 srcw[0] = PRIVATE_DATA(cc);
1822 srcw[1] = srcw[0] + sizeof(sljit_sw);
1823 break;
1824
1825 default:
1826 SLJIT_ASSERT_STOP();
1827 break;
1828 }
1829 cc += size;
1830 break;
1831
1832 default:
1833 cc = next_opcode(common, cc);
1834 SLJIT_ASSERT(cc != NULL);
1835 break;
1836 }
1837 break;
1838
1839 case end:
1840 SLJIT_ASSERT_STOP();
1841 break;
1842 }
1843
1844 while (count > 0)
1845 {
1846 count--;
1847 if (save)
1848 {
1849 if (tmp1next)
1850 {
1851 if (!tmp1empty)
1852 {
1853 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1854 stackptr += sizeof(sljit_sw);
1855 }
1856 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1857 tmp1empty = FALSE;
1858 tmp1next = FALSE;
1859 }
1860 else
1861 {
1862 if (!tmp2empty)
1863 {
1864 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1865 stackptr += sizeof(sljit_sw);
1866 }
1867 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1868 tmp2empty = FALSE;
1869 tmp1next = TRUE;
1870 }
1871 }
1872 else
1873 {
1874 if (tmp1next)
1875 {
1876 SLJIT_ASSERT(!tmp1empty);
1877 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1878 tmp1empty = stackptr >= stacktop;
1879 if (!tmp1empty)
1880 {
1881 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1882 stackptr += sizeof(sljit_sw);
1883 }
1884 tmp1next = FALSE;
1885 }
1886 else
1887 {
1888 SLJIT_ASSERT(!tmp2empty);
1889 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1890 tmp2empty = stackptr >= stacktop;
1891 if (!tmp2empty)
1892 {
1893 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1894 stackptr += sizeof(sljit_sw);
1895 }
1896 tmp1next = TRUE;
1897 }
1898 }
1899 }
1900 }
1901 while (status != end);
1902
1903 if (save)
1904 {
1905 if (tmp1next)
1906 {
1907 if (!tmp1empty)
1908 {
1909 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1910 stackptr += sizeof(sljit_sw);
1911 }
1912 if (!tmp2empty)
1913 {
1914 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1915 stackptr += sizeof(sljit_sw);
1916 }
1917 }
1918 else
1919 {
1920 if (!tmp2empty)
1921 {
1922 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1923 stackptr += sizeof(sljit_sw);
1924 }
1925 if (!tmp1empty)
1926 {
1927 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1928 stackptr += sizeof(sljit_sw);
1929 }
1930 }
1931 }
1932 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1933 }
1934
1935 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1936 {
1937 pcre_uchar *end = bracketend(cc);
1938 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1939
1940 /* Assert captures then. */
1941 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1942 current_offset = NULL;
1943 /* Conditional block does not. */
1944 if (*cc == OP_COND || *cc == OP_SCOND)
1945 has_alternatives = FALSE;
1946
1947 cc = next_opcode(common, cc);
1948 if (has_alternatives)
1949 current_offset = common->then_offsets + (cc - common->start);
1950
1951 while (cc < end)
1952 {
1953 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1954 cc = set_then_offsets(common, cc, current_offset);
1955 else
1956 {
1957 if (*cc == OP_ALT && has_alternatives)
1958 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1959 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1960 *current_offset = 1;
1961 cc = next_opcode(common, cc);
1962 }
1963 }
1964
1965 return end;
1966 }
1967
1968 #undef CASE_ITERATOR_PRIVATE_DATA_1
1969 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1970 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1971 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1972 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1973 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1974
1975 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1976 {
1977 return (value & (value - 1)) == 0;
1978 }
1979
1980 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1981 {
1982 while (list)
1983 {
1984 /* sljit_set_label is clever enough to do nothing
1985 if either the jump or the label is NULL. */
1986 SET_LABEL(list->jump, label);
1987 list = list->next;
1988 }
1989 }
1990
1991 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1992 {
1993 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1994 if (list_item)
1995 {
1996 list_item->next = *list;
1997 list_item->jump = jump;
1998 *list = list_item;
1999 }
2000 }
2001
2002 static void add_stub(compiler_common *common, struct sljit_jump *start)
2003 {
2004 DEFINE_COMPILER;
2005 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2006
2007 if (list_item)
2008 {
2009 list_item->start = start;
2010 list_item->quit = LABEL();
2011 list_item->next = common->stubs;
2012 common->stubs = list_item;
2013 }
2014 }
2015
2016 static void flush_stubs(compiler_common *common)
2017 {
2018 DEFINE_COMPILER;
2019 stub_list* list_item = common->stubs;
2020
2021 while (list_item)
2022 {
2023 JUMPHERE(list_item->start);
2024 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2025 JUMPTO(SLJIT_JUMP, list_item->quit);
2026 list_item = list_item->next;
2027 }
2028 common->stubs = NULL;
2029 }
2030
2031 static SLJIT_INLINE void count_match(compiler_common *common)
2032 {
2033 DEFINE_COMPILER;
2034
2035 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2036 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2037 }
2038
2039 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2040 {
2041 /* May destroy all locals and registers except TMP2. */
2042 DEFINE_COMPILER;
2043
2044 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2045 #ifdef DESTROY_REGISTERS
2046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2047 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2048 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2051 #endif
2052 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2053 }
2054
2055 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2056 {
2057 DEFINE_COMPILER;
2058 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2059 }
2060
2061 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2062 {
2063 DEFINE_COMPILER;
2064 struct sljit_label *loop;
2065 int i;
2066
2067 /* At this point we can freely use all temporary registers. */
2068 SLJIT_ASSERT(length > 1);
2069 /* TMP1 returns with begin - 1. */
2070 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2071 if (length < 8)
2072 {
2073 for (i = 1; i < length; i++)
2074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2075 }
2076 else
2077 {
2078 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2079 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2080 loop = LABEL();
2081 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2082 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2083 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2084 }
2085 }
2086
2087 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2088 {
2089 DEFINE_COMPILER;
2090 struct sljit_label *loop;
2091 int i;
2092
2093 SLJIT_ASSERT(length > 1);
2094 /* OVECTOR(1) contains the "string begin - 1" constant. */
2095 if (length > 2)
2096 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2097 if (length < 8)
2098 {
2099 for (i = 2; i < length; i++)
2100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2101 }
2102 else
2103 {
2104 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2105 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2106 loop = LABEL();
2107 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2108 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2109 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2110 }
2111
2112 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2113 if (common->mark_ptr != 0)
2114 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2115 if (common->control_head_ptr != 0)
2116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2117 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2119 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2120 }
2121
2122 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2123 {
2124 while (current != NULL)
2125 {
2126 switch (current[-2])
2127 {
2128 case type_then_trap:
2129 break;
2130
2131 case type_mark:
2132 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2133 return current[-4];
2134 break;
2135
2136 default:
2137 SLJIT_ASSERT_STOP();
2138 break;
2139 }
2140 current = (sljit_sw*)current[-1];
2141 }
2142 return -1;
2143 }
2144
2145 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2146 {
2147 DEFINE_COMPILER;
2148 struct sljit_label *loop;
2149 struct sljit_jump *early_quit;
2150
2151 /* At this point we can freely use all registers. */
2152 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2153 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2154
2155 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2156 if (common->mark_ptr != 0)
2157 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2158 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2159 if (common->mark_ptr != 0)
2160 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2161 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2162 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2163 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2164 /* Unlikely, but possible */
2165 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2166 loop = LABEL();
2167 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2168 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2169 /* Copy the integer value to the output buffer */
2170 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2171 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2172 #endif
2173 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2174 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2175 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2176 JUMPHERE(early_quit);
2177
2178 /* Calculate the return value, which is the maximum ovector value. */
2179 if (topbracket > 1)
2180 {
2181 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2182 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2183
2184 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2185 loop = LABEL();
2186 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2187 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2188 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2189 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2190 }
2191 else
2192 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2193 }
2194
2195 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2196 {
2197 DEFINE_COMPILER;
2198 struct sljit_jump *jump;
2199
2200 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2201 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2202 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2203
2204 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2205 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2206 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2207 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2208
2209 /* Store match begin and end. */
2210 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2211 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2212
2213 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2214 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2215 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2216 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2217 #endif
2218 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2219 JUMPHERE(jump);
2220
2221 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2222 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2223 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2224 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2225 #endif
2226 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2227
2228 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2229 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2230 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2231 #endif
2232 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2233
2234 JUMPTO(SLJIT_JUMP, quit);
2235 }
2236
2237 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2238 {
2239 /* May destroy TMP1. */
2240 DEFINE_COMPILER;
2241 struct sljit_jump *jump;
2242
2243 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2244 {
2245 /* The value of -1 must be kept for start_used_ptr! */
2246 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2247 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2248 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2249 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2251 JUMPHERE(jump);
2252 }
2253 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2254 {
2255 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2256 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2257 JUMPHERE(jump);
2258 }
2259 }
2260
2261 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2262 {
2263 /* Detects if the character has an othercase. */
2264 unsigned int c;
2265
2266 #ifdef SUPPORT_UTF
2267 if (common->utf)
2268 {
2269 GETCHAR(c, cc);
2270 if (c > 127)
2271 {
2272 #ifdef SUPPORT_UCP
2273 return c != UCD_OTHERCASE(c);
2274 #else
2275 return FALSE;
2276 #endif
2277 }
2278 #ifndef COMPILE_PCRE8
2279 return common->fcc[c] != c;
2280 #endif
2281 }
2282 else
2283 #endif
2284 c = *cc;
2285 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2286 }
2287
2288 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2289 {
2290 /* Returns with the othercase. */
2291 #ifdef SUPPORT_UTF
2292 if (common->utf && c > 127)
2293 {
2294 #ifdef SUPPORT_UCP
2295 return UCD_OTHERCASE(c);
2296 #else
2297 return c;
2298 #endif
2299 }
2300 #endif
2301 return TABLE_GET(c, common->fcc, c);
2302 }
2303
2304 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2305 {
2306 /* Detects if the character and its othercase has only 1 bit difference. */
2307 unsigned int c, oc, bit;
2308 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2309 int n;
2310 #endif
2311
2312 #ifdef SUPPORT_UTF
2313 if (common->utf)
2314 {
2315 GETCHAR(c, cc);
2316 if (c <= 127)
2317 oc = common->fcc[c];
2318 else
2319 {
2320 #ifdef SUPPORT_UCP
2321 oc = UCD_OTHERCASE(c);
2322 #else
2323 oc = c;
2324 #endif
2325 }
2326 }
2327 else
2328 {
2329 c = *cc;
2330 oc = TABLE_GET(c, common->fcc, c);
2331 }
2332 #else
2333 c = *cc;
2334 oc = TABLE_GET(c, common->fcc, c);
2335 #endif
2336
2337 SLJIT_ASSERT(c != oc);
2338
2339 bit = c ^ oc;
2340 /* Optimized for English alphabet. */
2341 if (c <= 127 && bit == 0x20)
2342 return (0 << 8) | 0x20;
2343
2344 /* Since c != oc, they must have at least 1 bit difference. */
2345 if (!is_powerof2(bit))
2346 return 0;
2347
2348 #if defined COMPILE_PCRE8
2349
2350 #ifdef SUPPORT_UTF
2351 if (common->utf && c > 127)
2352 {
2353 n = GET_EXTRALEN(*cc);
2354 while ((bit & 0x3f) == 0)
2355 {
2356 n--;
2357 bit >>= 6;
2358 }
2359 return (n << 8) | bit;
2360 }
2361 #endif /* SUPPORT_UTF */
2362 return (0 << 8) | bit;
2363
2364 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2365
2366 #ifdef SUPPORT_UTF
2367 if (common->utf && c > 65535)
2368 {
2369 if (bit >= (1 << 10))
2370 bit >>= 10;
2371 else
2372 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2373 }
2374 #endif /* SUPPORT_UTF */
2375 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2376
2377 #endif /* COMPILE_PCRE[8|16|32] */
2378 }
2379
2380 static void check_partial(compiler_common *common, BOOL force)
2381 {
2382 /* Checks whether a partial matching is occurred. Does not modify registers. */
2383 DEFINE_COMPILER;
2384 struct sljit_jump *jump = NULL;
2385
2386 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2387
2388 if (common->mode == JIT_COMPILE)
2389 return;
2390
2391 if (!force)
2392 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2393 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2394 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2395
2396 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2397 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2398 else
2399 {
2400 if (common->partialmatchlabel != NULL)
2401 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2402 else
2403 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2404 }
2405
2406 if (jump != NULL)
2407 JUMPHERE(jump);
2408 }
2409
2410 static void check_str_end(compiler_common *common, jump_list **end_reached)
2411 {
2412 /* Does not affect registers. Usually used in a tight spot. */
2413 DEFINE_COMPILER;
2414 struct sljit_jump *jump;
2415
2416 if (common->mode == JIT_COMPILE)
2417 {
2418 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2419 return;
2420 }
2421
2422 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2423 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2424 {
2425 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2426 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2427 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2428 }
2429 else
2430 {
2431 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2432 if (common->partialmatchlabel != NULL)
2433 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2434 else
2435 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2436 }
2437 JUMPHERE(jump);
2438 }
2439
2440 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2441 {
2442 DEFINE_COMPILER;
2443 struct sljit_jump *jump;
2444
2445 if (common->mode == JIT_COMPILE)
2446 {
2447 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2448 return;
2449 }
2450
2451 /* Partial matching mode. */
2452 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2453 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2454 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2455 {
2456 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2457 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2458 }
2459 else
2460 {
2461 if (common->partialmatchlabel != NULL)
2462 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2463 else
2464 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2465 }
2466 JUMPHERE(jump);
2467 }
2468
2469 static void peek_char(compiler_common *common, pcre_uint32 max)
2470 {
2471 /* Reads the character into TMP1, keeps STR_PTR.
2472 Does not check STR_END. TMP2 Destroyed. */
2473 DEFINE_COMPILER;
2474 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2475 struct sljit_jump *jump;
2476 #endif
2477
2478 SLJIT_UNUSED_ARG(max);
2479
2480 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2481 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2482 if (common->utf)
2483 {
2484 if (max < 128) return;
2485
2486 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2487 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2488 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2489 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2490 JUMPHERE(jump);
2491 }
2492 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2493
2494 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2495 if (common->utf)
2496 {
2497 if (max < 0xd800) return;
2498
2499 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2500 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2501 /* TMP2 contains the high surrogate. */
2502 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2503 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2504 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2505 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2506 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2507 JUMPHERE(jump);
2508 }
2509 #endif
2510 }
2511
2512 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2513
2514 static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2515 {
2516 /* Tells whether the character codes below 128 are enough
2517 to determine a match. */
2518 const pcre_uint8 value = nclass ? 0xff : 0;
2519 const pcre_uint8* end = bitset + 32;
2520
2521 bitset += 16;
2522 do
2523 {
2524 if (*bitset++ != value)
2525 return FALSE;
2526 }
2527 while (bitset < end);
2528 return TRUE;
2529 }
2530
2531 static void read_char7_type(compiler_common *common, BOOL full_read)
2532 {
2533 /* Reads the precise character type of a character into TMP1, if the character
2534 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2535 full_read argument tells whether characters above max are accepted or not. */
2536 DEFINE_COMPILER;
2537 struct sljit_jump *jump;
2538
2539 SLJIT_ASSERT(common->utf);
2540
2541 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2542 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2543
2544 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2545
2546 if (full_read)
2547 {
2548 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2549 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2550 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2551 JUMPHERE(jump);
2552 }
2553 }
2554
2555 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2556
2557 static void read_char_range(compiler_common *common, pcre_uint32 min, pcre_uint32 max, BOOL update_str_ptr)
2558 {
2559 /* Reads the precise value of a character into TMP1, if the character is
2560 between min and max (c >= min && c <= max). Otherwise it returns with a value
2561 outside the range. Does not check STR_END. */
2562 DEFINE_COMPILER;
2563 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2564 struct sljit_jump *jump;
2565 #endif
2566
2567 SLJIT_UNUSED_ARG(update_str_ptr);
2568 SLJIT_UNUSED_ARG(min);
2569 SLJIT_UNUSED_ARG(max);
2570 SLJIT_ASSERT(min <= max);
2571
2572 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2573 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2574
2575 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2576 if (common->utf)
2577 {
2578 if (max < 128 && !update_str_ptr) return;
2579
2580 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2581 if (max >= 0x800)
2582 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2583 else if (max < 128)
2584 {
2585 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2586 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2587 }
2588 else
2589 {
2590 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2591 if (!update_str_ptr)
2592 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2593 else
2594 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2595 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2596 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2597 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2598 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2599 if (update_str_ptr)
2600 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2601 }
2602 JUMPHERE(jump);
2603 }
2604 #endif
2605
2606 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2607 if (common->utf)
2608 {
2609 if (max >= 0x10000)
2610 {
2611 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2612 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2613 /* TMP2 contains the high surrogate. */
2614 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2615 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2616 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2617 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2618 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2619 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2620 JUMPHERE(jump);
2621 return;
2622 }
2623
2624 if (max < 0xd800 && !update_str_ptr) return;
2625
2626 /* Skip low surrogate if necessary. */
2627 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2628 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2629 if (update_str_ptr)
2630 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2631 if (max >= 0xd800)
2632 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2633 JUMPHERE(jump);
2634 }
2635 #endif
2636 }
2637
2638 static SLJIT_INLINE void read_char(compiler_common *common)
2639 {
2640 read_char_range(common, 0, READ_CHAR_MAX, TRUE);
2641 }
2642
2643 static void read_char8_type(compiler_common *common, BOOL update_str_ptr)
2644 {
2645 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2646 DEFINE_COMPILER;
2647 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2648 struct sljit_jump *jump;
2649 #endif
2650 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2651 struct sljit_jump *jump2;
2652 #endif
2653
2654 SLJIT_UNUSED_ARG(update_str_ptr);
2655
2656 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2658
2659 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2660 if (common->utf)
2661 {
2662 /* This can be an extra read in some situations, but hopefully
2663 it is needed in most cases. */
2664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2665 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2666 if (!update_str_ptr)
2667 {
2668 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2669 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2670 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2671 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2672 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2673 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2674 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2675 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2676 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2677 JUMPHERE(jump2);
2678 }
2679 else
2680 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2681 JUMPHERE(jump);
2682 return;
2683 }
2684 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2685
2686 #if !defined COMPILE_PCRE8
2687 /* The ctypes array contains only 256 values. */
2688 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2689 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2690 #endif
2691 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2692 #if !defined COMPILE_PCRE8
2693 JUMPHERE(jump);
2694 #endif
2695
2696 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2697 if (common->utf && update_str_ptr)
2698 {
2699 /* Skip low surrogate if necessary. */
2700 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2701 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2702 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2703 JUMPHERE(jump);
2704 }
2705 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2706 }
2707
2708 static void skip_char_back(compiler_common *common)
2709 {
2710 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2711 DEFINE_COMPILER;
2712 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2713 #if defined COMPILE_PCRE8
2714 struct sljit_label *label;
2715
2716 if (common->utf)
2717 {
2718 label = LABEL();
2719 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2720 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2721 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2722 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2723 return;
2724 }
2725 #elif defined COMPILE_PCRE16
2726 if (common->utf)
2727 {
2728 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2729 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2730 /* Skip low surrogate if necessary. */
2731 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2732 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2733 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2734 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2735 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2736 return;
2737 }
2738 #endif /* COMPILE_PCRE[8|16] */
2739 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2740 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2741 }
2742
2743 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
2744 {
2745 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2746 DEFINE_COMPILER;
2747 struct sljit_jump *jump;
2748
2749 if (nltype == NLTYPE_ANY)
2750 {
2751 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2752 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2753 }
2754 else if (nltype == NLTYPE_ANYCRLF)
2755 {
2756 if (jumpifmatch)
2757 {
2758 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
2759 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2760 }
2761 else
2762 {
2763 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2764 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
2765 JUMPHERE(jump);
2766 }
2767 }
2768 else
2769 {
2770 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2771 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2772 }
2773 }
2774
2775 #ifdef SUPPORT_UTF
2776
2777 #if defined COMPILE_PCRE8
2778 static void do_utfreadchar(compiler_common *common)
2779 {
2780 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2781 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2782 DEFINE_COMPILER;
2783 struct sljit_jump *jump;
2784
2785 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2786 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2787 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2788 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2789 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2790 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2791
2792 /* Searching for the first zero. */
2793 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2794 jump = JUMP(SLJIT_C_NOT_ZERO);
2795 /* Two byte sequence. */
2796 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2797 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2798 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2799
2800 JUMPHERE(jump);
2801 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2802 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2803 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2804 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2805 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2806
2807 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2808 jump = JUMP(SLJIT_C_NOT_ZERO);
2809 /* Three byte sequence. */
2810 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2811 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2812 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2813
2814 /* Four byte sequence. */
2815 JUMPHERE(jump);
2816 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2817 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2818 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2819 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2820 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2821 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2822 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2823 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2824 }
2825
2826 static void do_utfreadchar16(compiler_common *common)
2827 {
2828 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2829 of the character (>= 0xc0). Return value in TMP1. */
2830 DEFINE_COMPILER;
2831 struct sljit_jump *jump;
2832
2833 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2834 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2835 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2836 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2837 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2838 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2839
2840 /* Searching for the first zero. */
2841 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2842 jump = JUMP(SLJIT_C_NOT_ZERO);
2843 /* Two byte sequence. */
2844 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2845 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2846
2847 JUMPHERE(jump);
2848 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
2849 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_NOT_ZERO);
2850 /* This code runs only in 8 bit mode. No need to shift the value. */
2851 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2852 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2853 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2854 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2855 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2856 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2857 /* Three byte sequence. */
2858 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2859 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2860 }
2861
2862 static void do_utfreadtype8(compiler_common *common)
2863 {
2864 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2865 of the character (>= 0xc0). Return value in TMP1. */
2866 DEFINE_COMPILER;
2867 struct sljit_jump *jump;
2868 struct sljit_jump *compare;
2869
2870 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2871
2872 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2873 jump = JUMP(SLJIT_C_NOT_ZERO);
2874 /* Two byte sequence. */
2875 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2876 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2877 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2878 /* The upper 5 bits are known at this point. */
2879 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2880 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2881 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2882 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2883 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2884 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2885
2886 JUMPHERE(compare);
2887 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2888 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2889
2890 /* We only have types for characters less than 256. */
2891 JUMPHERE(jump);
2892 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2893 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2894 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2895 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2896 }
2897
2898 #endif /* COMPILE_PCRE8 */
2899
2900 #endif /* SUPPORT_UTF */
2901
2902 #ifdef SUPPORT_UCP
2903
2904 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2905 #define UCD_BLOCK_MASK 127
2906 #define UCD_BLOCK_SHIFT 7
2907
2908 static void do_getucd(compiler_common *common)
2909 {
2910 /* Search the UCD record for the character comes in TMP1.
2911 Returns chartype in TMP1 and UCD offset in TMP2. */
2912 DEFINE_COMPILER;
2913
2914 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2915
2916 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2917 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2918 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2919 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2920 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2921 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2922 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2923 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2924 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2925 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2926 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2927 }
2928 #endif
2929
2930 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2931 {
2932 DEFINE_COMPILER;
2933 struct sljit_label *mainloop;
2934 struct sljit_label *newlinelabel = NULL;
2935 struct sljit_jump *start;
2936 struct sljit_jump *end = NULL;
2937 struct sljit_jump *nl = NULL;
2938 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2939 struct sljit_jump *singlechar;
2940 #endif
2941 jump_list *newline = NULL;
2942 BOOL newlinecheck = FALSE;
2943 BOOL readuchar = FALSE;
2944
2945 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2946 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2947 newlinecheck = TRUE;
2948
2949 if (firstline)
2950 {
2951 /* Search for the end of the first line. */
2952 SLJIT_ASSERT(common->first_line_end != 0);
2953 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2954
2955 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2956 {
2957 mainloop = LABEL();
2958 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2959 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2960 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2961 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2962 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2963 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2964 JUMPHERE(end);
2965 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2966 }
2967 else
2968 {
2969 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2970 mainloop = LABEL();
2971 /* Continual stores does not cause data dependency. */
2972 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2973 read_char_range(common, common->nlmin, common->nlmax, TRUE);
2974 check_newlinechar(common, common->nltype, &newline, TRUE);
2975 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2976 JUMPHERE(end);
2977 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2978 set_jumps(newline, LABEL());
2979 }
2980
2981 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2982 }
2983
2984 start = JUMP(SLJIT_JUMP);
2985
2986 if (newlinecheck)
2987 {
2988 newlinelabel = LABEL();
2989 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2990 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2991 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2992 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2993 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2994 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2995 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2996 #endif
2997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2998 nl = JUMP(SLJIT_JUMP);
2999 }
3000
3001 mainloop = LABEL();
3002
3003 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3004 #ifdef SUPPORT_UTF
3005 if (common->utf) readuchar = TRUE;
3006 #endif
3007 if (newlinecheck) readuchar = TRUE;
3008
3009 if (readuchar)
3010 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3011
3012 if (newlinecheck)
3013 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3014
3015 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3016 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3017 #if defined COMPILE_PCRE8
3018 if (common->utf)
3019 {
3020 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3021 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3022 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3023 JUMPHERE(singlechar);
3024 }
3025 #elif defined COMPILE_PCRE16
3026 if (common->utf)
3027 {
3028 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3029 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3030 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3031 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3032 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3033 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3034 JUMPHERE(singlechar);
3035 }
3036 #endif /* COMPILE_PCRE[8|16] */
3037 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3038 JUMPHERE(start);
3039
3040 if (newlinecheck)
3041 {
3042 JUMPHERE(end);
3043 JUMPHERE(nl);
3044 }
3045
3046 return mainloop;
3047 }
3048
3049 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
3050 {
3051 /* Recursive function, which scans prefix literals. */
3052 int len, repeat, len_save, consumed = 0;
3053 pcre_uint32 caseless, chr, mask;
3054 pcre_uchar *alternative, *cc_save;
3055 BOOL last, any;
3056
3057 repeat = 1;
3058 while (TRUE)
3059 {
3060 last = TRUE;
3061 any = FALSE;
3062 caseless = 0;
3063 switch (*cc)
3064 {
3065 case OP_CHARI:
3066 caseless = 1;
3067 case OP_CHAR:
3068 last = FALSE;
3069 cc++;
3070 break;
3071
3072 case OP_SOD:
3073 case OP_SOM:
3074 case OP_SET_SOM:
3075 case OP_NOT_WORD_BOUNDARY:
3076 case OP_WORD_BOUNDARY:
3077 case OP_EODN:
3078 case OP_EOD:
3079 case OP_CIRC:
3080 case OP_CIRCM:
3081 case OP_DOLL:
3082 case OP_DOLLM:
3083 /* Zero width assertions. */
3084 cc++;
3085 continue;
3086
3087 case OP_PLUS:
3088 case OP_MINPLUS:
3089 case OP_POSPLUS:
3090 cc++;
3091 break;
3092
3093 case OP_EXACTI:
3094 caseless = 1;
3095 case OP_EXACT:
3096 repeat = GET2(cc, 1);
3097 last = FALSE;
3098 cc += 1 + IMM2_SIZE;
3099 break;
3100
3101 case OP_PLUSI:
3102 case OP_MINPLUSI:
3103 case OP_POSPLUSI:
3104 caseless = 1;
3105 cc++;
3106 break;
3107
3108 case OP_KET:
3109 cc += 1 + LINK_SIZE;
3110 continue;
3111
3112 case OP_ALT:
3113 cc += GET(cc, 1);
3114 continue;
3115
3116 case OP_ONCE:
3117 case OP_ONCE_NC:
3118 case OP_BRA:
3119 case OP_BRAPOS:
3120 case OP_CBRA:
3121 case OP_CBRAPOS:
3122 alternative = cc + GET(cc, 1);
3123 while (*alternative == OP_ALT)
3124 {
3125 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3126 if (max_chars == 0)
3127 return consumed;
3128 alternative += GET(alternative, 1);
3129 }
3130
3131 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3132 cc += IMM2_SIZE;
3133 cc += 1 + LINK_SIZE;
3134 continue;
3135
3136 case OP_CLASS:
3137 case OP_NCLASS:
3138 any = TRUE;
3139 cc += 1 + 32 / sizeof(pcre_uchar);
3140 break;
3141
3142 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3143 case OP_XCLASS:
3144 any = TRUE;
3145 cc += GET(cc, 1);
3146 break;
3147 #endif
3148
3149 case OP_NOT_DIGIT:
3150 case OP_DIGIT:
3151 case OP_NOT_WHITESPACE:
3152 case OP_WHITESPACE:
3153 case OP_NOT_WORDCHAR:
3154 case OP_WORDCHAR:
3155 case OP_ANY:
3156 case OP_ALLANY:
3157 any = TRUE;
3158 cc++;
3159 break;
3160
3161 #ifdef SUPPORT_UCP
3162 case OP_NOTPROP:
3163 case OP_PROP:
3164 any = TRUE;
3165 cc += 1 + 2;
3166 break;
3167 #endif
3168
3169 case OP_TYPEEXACT:
3170 repeat = GET2(cc, 1);
3171 cc += 1 + IMM2_SIZE;
3172 continue;
3173
3174 default:
3175 return consumed;
3176 }
3177
3178 if (any)
3179 {
3180 #ifdef SUPPORT_UTF
3181 if (common->utf) return consumed;
3182 #endif
3183 #if defined COMPILE_PCRE8
3184 mask = 0xff;
3185 #elif defined COMPILE_PCRE16
3186 mask = 0xffff;
3187 #elif defined COMPILE_PCRE32
3188 mask = 0xffffffff;
3189 #else
3190 SLJIT_ASSERT_STOP();
3191 #endif
3192
3193 do
3194 {
3195 chars[0] = mask;
3196 chars[1] = mask;
3197
3198 if (--max_chars == 0)
3199 return consumed;
3200 consumed++;
3201 chars += 2;
3202 }
3203 while (--repeat > 0);
3204
3205 repeat = 1;
3206 continue;
3207 }
3208
3209 len = 1;
3210 #ifdef SUPPORT_UTF
3211 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3212 #endif
3213
3214 if (caseless != 0 && char_has_othercase(common, cc))
3215 {
3216 caseless = char_get_othercase_bit(common, cc);
3217 if (caseless == 0)
3218 return consumed;
3219 #ifdef COMPILE_PCRE8
3220 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3221 #else
3222 if ((caseless & 0x100) != 0)
3223 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3224 else
3225 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3226 #endif
3227 }
3228 else
3229 caseless = 0;
3230
3231 len_save = len;
3232 cc_save = cc;
3233 while (TRUE)
3234 {
3235 do
3236 {
3237 chr = *cc;
3238 #ifdef COMPILE_PCRE32
3239 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3240 return consumed;
3241 #endif
3242 mask = 0;
3243 if ((pcre_uint32)len == (caseless & 0xff))
3244 {
3245 mask = caseless >> 8;
3246 chr |= mask;
3247 }
3248
3249 if (chars[0] == NOTACHAR)
3250 {
3251 chars[0] = chr;
3252 chars[1] = mask;
3253 }
3254 else
3255 {
3256 mask |= chars[0] ^ chr;
3257 chr |= mask;
3258 chars[0] = chr;
3259 chars[1] |= mask;
3260 }
3261
3262 len--;
3263 if (--max_chars == 0)
3264 return consumed;
3265 consumed++;
3266 chars += 2;
3267 cc++;
3268 }
3269 while (len > 0);
3270
3271 if (--repeat == 0)
3272 break;
3273
3274 len = len_save;
3275 cc = cc_save;
3276 }
3277
3278 repeat = 1;
3279 if (last)
3280 return consumed;
3281 }
3282 }
3283
3284 #define MAX_N_CHARS 16
3285
3286 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3287 {
3288 DEFINE_COMPILER;
3289 struct sljit_label *start;
3290 struct sljit_jump *quit;
3291 pcre_uint32 chars[MAX_N_CHARS * 2];
3292 pcre_uint8 ones[MAX_N_CHARS];
3293 pcre_uint32 mask;
3294 int i, max;
3295 int offsets[3];
3296
3297 for (i = 0; i < MAX_N_CHARS; i++)
3298 {
3299 chars[i << 1] = NOTACHAR;
3300 chars[(i << 1) + 1] = 0;
3301 }
3302
3303 max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3304
3305 if (max <= 1)
3306 return FALSE;
3307
3308 for (i = 0; i < max; i++)
3309 {
3310 mask = chars[(i << 1) + 1];
3311 ones[i] = ones_in_half_byte[mask & 0xf];
3312 mask >>= 4;
3313 while (mask != 0)
3314 {
3315 ones[i] += ones_in_half_byte[mask & 0xf];
3316 mask >>= 4;
3317 }
3318 }
3319
3320 offsets[0] = -1;
3321 /* Scan forward. */
3322 for (i = 0; i < max; i++)
3323 if (ones[i] <= 2) {
3324 offsets[0] = i;
3325 break;
3326 }
3327
3328 if (offsets[0] == -1)
3329 return FALSE;
3330
3331 /* Scan backward. */
3332 offsets[1] = -1;
3333 for (i = max - 1; i > offsets[0]; i--)
3334 if (ones[i] <= 2) {
3335 offsets[1] = i;
3336 break;
3337 }
3338
3339 offsets[2] = -1;
3340 if (offsets[1] >= 0)
3341 {
3342 /* Scan from middle. */
3343 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3344 if (ones[i] <= 2)
3345 {
3346 offsets[2] = i;
3347 break;
3348 }
3349
3350 if (offsets[2] == -1)
3351 {
3352 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3353 if (ones[i] <= 2)
3354 {
3355 offsets[2] = i;
3356 break;
3357 }
3358 }
3359 }
3360
3361 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3362 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3363
3364 chars[0] = chars[offsets[0] << 1];
3365 chars[1] = chars[(offsets[0] << 1) + 1];
3366 if (offsets[2] >= 0)
3367 {
3368 chars[2] = chars[offsets[2] << 1];
3369 chars[3] = chars[(offsets[2] << 1) + 1];
3370 }
3371 if (offsets[1] >= 0)
3372 {
3373 chars[4] = chars[offsets[1] << 1];
3374 chars[5] = chars[(offsets[1] << 1) + 1];
3375 }
3376
3377 max -= 1;
3378 if (firstline)
3379 {
3380 SLJIT_ASSERT(common->first_line_end != 0);
3381 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3382 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3383 }
3384 else
3385 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3386
3387 start = LABEL();
3388 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3389
3390 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3391 if (offsets[1] >= 0)
3392 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3393 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3394
3395 if (chars[1] != 0)
3396 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3397 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3398 if (offsets[2] >= 0)
3399 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3400
3401 if (offsets[1] >= 0)
3402 {
3403 if (chars[5] != 0)
3404 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3405 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3406 }
3407
3408 if (offsets[2] >= 0)
3409 {
3410 if (chars[3] != 0)
3411 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3412 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3413 }
3414 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3415
3416 JUMPHERE(quit);
3417
3418 if (firstline)
3419 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3420 else
3421 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3422 return TRUE;
3423 }
3424
3425 #undef MAX_N_CHARS
3426
3427 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3428 {
3429 DEFINE_COMPILER;
3430 struct sljit_label *start;
3431 struct sljit_jump *quit;
3432 struct sljit_jump *found;
3433 pcre_uchar oc, bit;
3434
3435 if (firstline)
3436 {
3437 SLJIT_ASSERT(common->first_line_end != 0);
3438 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3439 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3440 }
3441
3442 start = LABEL();
3443 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3444 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3445
3446 oc = first_char;
3447 if (caseless)
3448 {
3449 oc = TABLE_GET(first_char, common->fcc, first_char);
3450 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3451 if (first_char > 127 && common->utf)
3452 oc = UCD_OTHERCASE(first_char);
3453 #endif
3454 }
3455 if (first_char == oc)
3456 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3457 else
3458 {
3459 bit = first_char ^ oc;
3460 if (is_powerof2(bit))
3461 {
3462 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3463 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3464 }
3465 else
3466 {
3467 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3468 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3470 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3471 found = JUMP(SLJIT_C_NOT_ZERO);
3472 }
3473 }
3474
3475 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3476 JUMPTO(SLJIT_JUMP, start);
3477 JUMPHERE(found);
3478 JUMPHERE(quit);
3479
3480 if (firstline)
3481 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3482 }
3483
3484 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3485 {
3486 DEFINE_COMPILER;
3487 struct sljit_label *loop;
3488 struct sljit_jump *lastchar;
3489 struct sljit_jump *firstchar;
3490 struct sljit_jump *quit;
3491 struct sljit_jump *foundcr = NULL;
3492 struct sljit_jump *notfoundnl;
3493 jump_list *newline = NULL;
3494
3495 if (firstline)
3496 {
3497 SLJIT_ASSERT(common->first_line_end != 0);
3498 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3499 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3500 }
3501
3502 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3503 {
3504 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3505 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3506 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3508 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3509
3510 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3511 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3512 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3513 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3514 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3515 #endif
3516 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3517
3518 loop = LABEL();
3519 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3520 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3521 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3522 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3523 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3524 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3525
3526 JUMPHERE(quit);
3527 JUMPHERE(firstchar);
3528 JUMPHERE(lastchar);
3529
3530 if (firstline)
3531 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3532 return;
3533 }
3534
3535 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3536 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3537 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3538 skip_char_back(common);
3539
3540 loop = LABEL();
3541 read_char_range(common, common->nlmin, common->nlmax, TRUE);
3542 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3543 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3544 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3545 check_newlinechar(common, common->nltype, &newline, FALSE);
3546 set_jumps(newline, loop);
3547
3548 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3549 {
3550 quit = JUMP(SLJIT_JUMP);
3551 JUMPHERE(foundcr);
3552 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3553 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3554 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3555 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3556 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3557 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3558 #endif
3559 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3560 JUMPHERE(notfoundnl);
3561 JUMPHERE(quit);
3562 }
3563 JUMPHERE(lastchar);
3564 JUMPHERE(firstchar);
3565
3566 if (firstline)
3567 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3568 }
3569
3570 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3571
3572 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3573 {
3574 DEFINE_COMPILER;
3575 struct sljit_label *start;
3576 struct sljit_jump *quit;
3577 struct sljit_jump *found = NULL;
3578 jump_list *matches = NULL;
3579 #ifndef COMPILE_PCRE8
3580 struct sljit_jump *jump;
3581 #endif
3582
3583 if (firstline)
3584 {
3585 SLJIT_ASSERT(common->first_line_end != 0);
3586 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3587 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3588 }
3589
3590 start = LABEL();
3591 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3592 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3593 #ifdef SUPPORT_UTF
3594 if (common->utf)
3595 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3596 #endif
3597
3598 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3599 {
3600 #ifndef COMPILE_PCRE8
3601 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3602 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3603 JUMPHERE(jump);
3604 #endif
3605 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3606 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3607 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3608 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3609 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3610 found = JUMP(SLJIT_C_NOT_ZERO);
3611 }
3612
3613 #ifdef SUPPORT_UTF
3614 if (common->utf)
3615 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3616 #endif
3617 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3618 #ifdef SUPPORT_UTF
3619 #if defined COMPILE_PCRE8
3620 if (common->utf)
3621 {
3622 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3623 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3624 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3625 }
3626 #elif defined COMPILE_PCRE16
3627 if (common->utf)
3628 {
3629 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3630 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3631 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3632 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3633 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3634 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3635 }
3636 #endif /* COMPILE_PCRE[8|16] */
3637 #endif /* SUPPORT_UTF */
3638 JUMPTO(SLJIT_JUMP, start);
3639 if (found != NULL)
3640 JUMPHERE(found);
3641 if (matches != NULL)
3642 set_jumps(matches, LABEL());
3643 JUMPHERE(quit);
3644
3645 if (firstline)
3646 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3647 }
3648
3649 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3650 {
3651 DEFINE_COMPILER;
3652 struct sljit_label *loop;
3653 struct sljit_jump *toolong;
3654 struct sljit_jump *alreadyfound;
3655 struct sljit_jump *found;
3656 struct sljit_jump *foundoc = NULL;
3657 struct sljit_jump *notfound;
3658 pcre_uint32 oc, bit;
3659
3660 SLJIT_ASSERT(common->req_char_ptr != 0);
3661 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3662 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3663 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3664 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3665
3666 if (has_firstchar)
3667 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3668 else
3669 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3670
3671 loop = LABEL();
3672 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3673
3674 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3675 oc = req_char;
3676 if (caseless)
3677 {
3678 oc = TABLE_GET(req_char, common->fcc, req_char);
3679 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3680 if (req_char > 127 && common->utf)
3681 oc = UCD_OTHERCASE(req_char);
3682 #endif
3683 }
3684 if (req_char == oc)
3685 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3686 else
3687 {
3688 bit = req_char ^ oc;
3689 if (is_powerof2(bit))
3690 {
3691 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3692 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3693 }
3694 else
3695 {
3696 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3697 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3698 }
3699 }
3700 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3701 JUMPTO(SLJIT_JUMP, loop);
3702
3703 JUMPHERE(found);
3704 if (foundoc)
3705 JUMPHERE(foundoc);
3706 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3707 JUMPHERE(alreadyfound);
3708 JUMPHERE(toolong);
3709 return notfound;
3710 }
3711
3712 static void do_revertframes(compiler_common *common)
3713 {
3714 DEFINE_COMPILER;
3715 struct sljit_jump *jump;
3716 struct sljit_label *mainloop;
3717
3718 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3719 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3720 GET_LOCAL_BASE(TMP3, 0, 0);
3721
3722 /* Drop frames until we reach STACK_TOP. */
3723 mainloop = LABEL();
3724 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3725 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3726 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3727
3728 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3729 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3730 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3731 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3732 JUMPTO(SLJIT_JUMP, mainloop);
3733
3734 JUMPHERE(jump);
3735 jump = JUMP(SLJIT_C_SIG_LESS);
3736 /* End of dropping frames. */
3737 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3738
3739 JUMPHERE(jump);
3740 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3741 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3742 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3743 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3744 JUMPTO(SLJIT_JUMP, mainloop);
3745 }
3746
3747 static void check_wordboundary(compiler_common *common)
3748 {
3749 DEFINE_COMPILER;
3750 struct sljit_jump *skipread;
3751 jump_list *skipread_list = NULL;
3752 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3753 struct sljit_jump *jump;
3754 #endif
3755
3756 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3757
3758 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3759 /* Get type of the previous char, and put it to LOCALS1. */
3760 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3761 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3762 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3763 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3764 skip_char_back(common);
3765 check_start_used_ptr(common);
3766 read_char(common);
3767
3768 /* Testing char type. */
3769 #ifdef SUPPORT_UCP
3770 if (common->use_ucp)
3771 {
3772 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3773 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3774 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3775 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3776 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3777 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3778 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3779 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3780 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3781 JUMPHERE(jump);
3782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3783 }
3784 else
3785 #endif
3786 {
3787 #ifndef COMPILE_PCRE8
3788 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3789 #elif defined SUPPORT_UTF
3790 /* Here LOCALS1 has already been zeroed. */
3791 jump = NULL;
3792 if (common->utf)
3793 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3794 #endif /* COMPILE_PCRE8 */
3795 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3796 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3797 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3798 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3799 #ifndef COMPILE_PCRE8
3800 JUMPHERE(jump);
3801 #elif defined SUPPORT_UTF
3802 if (jump != NULL)
3803 JUMPHERE(jump);
3804 #endif /* COMPILE_PCRE8 */
3805 }
3806 JUMPHERE(skipread);
3807
3808 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3809 check_str_end(common, &skipread_list);
3810 peek_char(common, READ_CHAR_MAX);
3811
3812 /* Testing char type. This is a code duplication. */
3813 #ifdef SUPPORT_UCP
3814 if (common->use_ucp)
3815 {
3816 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3817 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3818 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3819 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3820 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3821 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3822 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3823 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3824 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3825 JUMPHERE(jump);
3826 }
3827 else
3828 #endif
3829 {
3830 #ifndef COMPILE_PCRE8
3831 /* TMP2 may be destroyed by peek_char. */
3832 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3833 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3834 #elif defined SUPPORT_UTF
3835 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3836 jump = NULL;
3837 if (common->utf)
3838 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3839 #endif
3840 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3841 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3842 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3843 #ifndef COMPILE_PCRE8
3844 JUMPHERE(jump);
3845 #elif defined SUPPORT_UTF
3846 if (jump != NULL)
3847 JUMPHERE(jump);
3848 #endif /* COMPILE_PCRE8 */
3849 }
3850 set_jumps(skipread_list, LABEL());
3851
3852 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3853 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3854 }
3855
3856 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3857 {
3858 DEFINE_COMPILER;
3859 int ranges[MAX_RANGE_SIZE];
3860 pcre_uint8 bit, cbit, all;
3861 int i, byte, length = 0;
3862
3863 bit = bits[0] & 0x1;
3864 /* All bits will be zero or one (since bit is zero or one). */
3865 all = -bit;
3866
3867 for (i = 0; i < 256; )
3868 {
3869 byte = i >> 3;
3870 if ((i & 0x7) == 0 && bits[byte] == all)
3871 i += 8;
3872 else
3873 {
3874 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3875 if (cbit != bit)
3876 {
3877 if (length >= MAX_RANGE_SIZE)
3878 return FALSE;
3879 ranges[length] = i;
3880 length++;
3881 bit = cbit;
3882 all = -cbit;
3883 }
3884 i++;
3885 }
3886 }
3887
3888 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3889 {
3890 if (length >= MAX_RANGE_SIZE)
3891 return FALSE;
3892 ranges[length] = 256;
3893 length++;
3894 }
3895
3896 if (length < 0 || length > 4)
3897 return FALSE;
3898
3899 bit = bits[0] & 0x1;
3900 if (invert) bit ^= 0x1;
3901
3902 /* No character is accepted. */
3903 if (length == 0 && bit == 0)
3904 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3905
3906 switch(length)
3907 {
3908 case 0:
3909 /* When bit != 0, all characters are accepted. */
3910 return TRUE;
3911
3912 case 1:
3913 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3914 return TRUE;
3915
3916 case 2:
3917 if (ranges[0] + 1 != ranges[1])
3918 {
3919 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3920 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3921 }
3922 else
3923 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3924 return TRUE;
3925
3926 case 3:
3927 if (bit != 0)
3928 {
3929 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3930 if (ranges[0] + 1 != ranges[1])
3931 {
3932 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3933 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3934 }
3935 else
3936 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3937 return TRUE;
3938 }
3939
3940 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
3941 if (ranges[1] + 1 != ranges[2])
3942 {
3943 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
3944 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3945 }
3946 else
3947 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
3948 return TRUE;
3949
3950 case 4:
3951 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
3952 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
3953 && is_powerof2(ranges[2] - ranges[0]))
3954 {
3955 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
3956 if (ranges[2] + 1 != ranges[3])
3957 {
3958 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3959 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3960 }
3961 else
3962 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3963 return TRUE;
3964 }
3965
3966 if (bit != 0)
3967 {
3968 i = 0;
3969 if (ranges[0] + 1 != ranges[1])
3970 {
3971 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3972 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3973 i = ranges[0];
3974 }
3975 else
3976 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3977
3978 if (ranges[2] + 1 != ranges[3])
3979 {
3980 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
3981 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3982 }
3983 else
3984 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
3985 return TRUE;
3986 }
3987
3988 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3989 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
3990 if (ranges[1] + 1 != ranges[2])
3991 {
3992 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
3993 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3994 }
3995 else
3996 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3997 return TRUE;
3998
3999 default:
4000 SLJIT_ASSERT_STOP();
4001 return FALSE;
4002 }
4003 }
4004
4005 static void check_anynewline(compiler_common *common)
4006 {
4007 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4008 DEFINE_COMPILER;
4009
4010 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4011
4012 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4013 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4014 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4015 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4016 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4017 #ifdef COMPILE_PCRE8
4018 if (common->utf)
4019 {
4020 #endif
4021 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4022 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4023 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4024 #ifdef COMPILE_PCRE8
4025 }
4026 #endif
4027 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4028 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4029 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4030 }
4031
4032 static void check_hspace(compiler_common *common)
4033 {
4034 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4035 DEFINE_COMPILER;
4036
4037 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4038
4039 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4040 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4041 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4042 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4043 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4044 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4045 #ifdef COMPILE_PCRE8
4046 if (common->utf)
4047 {
4048 #endif
4049 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4050 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4051 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4052 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4053 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4054 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4055 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4056 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4057 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4058 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4059 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4060 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4061 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4062 #ifdef COMPILE_PCRE8
4063 }
4064 #endif
4065 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4066 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4067
4068 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4069 }
4070
4071 static void check_vspace(compiler_common *common)
4072 {
4073 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4074 DEFINE_COMPILER;
4075
4076 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4077
4078 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4079 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4080 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4081 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4082 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4083 #ifdef COMPILE_PCRE8
4084 if (common->utf)
4085 {
4086 #endif
4087 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4088 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4089 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4090 #ifdef COMPILE_PCRE8
4091 }
4092 #endif
4093 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4094 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4095
4096 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4097 }
4098
4099 #define CHAR1 STR_END
4100 #define CHAR2 STACK_TOP
4101
4102 static void do_casefulcmp(compiler_common *common)
4103 {
4104 DEFINE_COMPILER;
4105 struct sljit_jump *jump;
4106 struct sljit_label *label;
4107
4108 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4109 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4110 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4112 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4113 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4114
4115 label = LABEL();
4116 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4117 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4118 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4119 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4120 JUMPTO(SLJIT_C_NOT_ZERO, label);
4121
4122 JUMPHERE(jump);
4123 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4124 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4125 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4126 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4127 }
4128
4129 #define LCC_TABLE STACK_LIMIT
4130
4131 static void do_caselesscmp(compiler_common *common)
4132 {
4133 DEFINE_COMPILER;
4134 struct sljit_jump *jump;
4135 struct sljit_label *label;
4136
4137 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4138 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4139
4140 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4141 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4142 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4143 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4144 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4145 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4146
4147 label = LABEL();
4148 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4149 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4150 #ifndef COMPILE_PCRE8
4151 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4152 #endif
4153 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4154 #ifndef COMPILE_PCRE8
4155 JUMPHERE(jump);
4156 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4157 #endif
4158 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4159 #ifndef COMPILE_PCRE8
4160 JUMPHERE(jump);
4161 #endif
4162 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4163 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4164 JUMPTO(SLJIT_C_NOT_ZERO, label);
4165
4166 JUMPHERE(jump);
4167 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4168 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4169 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4170 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4171 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4172 }
4173
4174 #undef LCC_TABLE
4175 #undef CHAR1
4176 #undef CHAR2
4177
4178 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4179
4180 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4181 {
4182 /* This function would be ineffective to do in JIT level. */
4183 pcre_uint32 c1, c2;
4184 const pcre_uchar *src2 = args->uchar_ptr;
4185 const pcre_uchar *end2 = args->end;
4186 const ucd_record *ur;
4187 const pcre_uint32 *pp;
4188
4189 while (src1 < end1)
4190 {
4191 if (src2 >= end2)
4192 return (pcre_uchar*)1;
4193 GETCHARINC(c1, src1);
4194 GETCHARINC(c2, src2);
4195 ur = GET_UCD(c2);
4196 if (c1 != c2 && c1 != c2 + ur->other_case)
4197 {
4198 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4199 for (;;)
4200 {
4201 if (c1 < *pp) return NULL;
4202 if (c1 == *pp++) break;
4203 }
4204 }
4205 }
4206 return src2;
4207 }
4208
4209 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4210
4211 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4212 compare_context* context, jump_list **backtracks)
4213 {
4214 DEFINE_COMPILER;
4215 unsigned int othercasebit = 0;
4216 pcre_uchar *othercasechar = NULL;
4217 #ifdef SUPPORT_UTF
4218 int utflength;
4219 #endif
4220
4221 if (caseless && char_has_othercase(common, cc))
4222 {
4223 othercasebit = char_get_othercase_bit(common, cc);
4224 SLJIT_ASSERT(othercasebit);
4225 /* Extracting bit difference info. */
4226 #if defined COMPILE_PCRE8
4227 othercasechar = cc + (othercasebit >> 8);
4228 othercasebit &= 0xff;
4229 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4230 /* Note that this code only handles characters in the BMP. If there
4231 ever are characters outside the BMP whose othercase differs in only one
4232 bit from itself (there currently are none), this code will need to be
4233 revised for COMPILE_PCRE32. */
4234 othercasechar = cc + (othercasebit >> 9);
4235 if ((othercasebit & 0x100) != 0)
4236 othercasebit = (othercasebit & 0xff) << 8;
4237 else
4238 othercasebit &= 0xff;
4239 #endif /* COMPILE_PCRE[8|16|32] */
4240 }
4241
4242 if (context->sourcereg == -1)
4243 {
4244 #if defined COMPILE_PCRE8
4245 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4246 if (context->length >= 4)
4247 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4248 else if (context->length >= 2)
4249 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4250 else
4251 #endif
4252 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4253 #elif defined COMPILE_PCRE16
4254 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4255 if (context->length >= 4)
4256 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4257 else
4258 #endif
4259 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4260 #elif defined COMPILE_PCRE32
4261 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4262 #endif /* COMPILE_PCRE[8|16|32] */
4263 context->sourcereg = TMP2;
4264 }
4265
4266 #ifdef SUPPORT_UTF
4267 utflength = 1;
4268 if (common->utf && HAS_EXTRALEN(*cc))
4269 utflength += GET_EXTRALEN(*cc);
4270
4271 do
4272 {
4273 #endif
4274
4275 context->length -= IN_UCHARS(1);
4276 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4277
4278 /* Unaligned read is supported. */
4279 if (othercasebit != 0 && othercasechar == cc)
4280 {
4281 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4282 context->oc.asuchars[context->ucharptr] = othercasebit;
4283 }
4284 else
4285 {
4286 context->c.asuchars[context->ucharptr] = *cc;
4287 context->oc.asuchars[context->ucharptr] = 0;
4288 }
4289 context->ucharptr++;
4290
4291 #if defined COMPILE_PCRE8
4292 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4293 #else
4294 if (context->ucharptr >= 2 || context->length == 0)
4295 #endif
4296 {
4297 if (context->length >= 4)
4298 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4299 else if (context->length >= 2)
4300 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4301 #if defined COMPILE_PCRE8
4302 else if (context->length >= 1)
4303 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4304 #endif /* COMPILE_PCRE8 */
4305 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4306
4307 switch(context->ucharptr)
4308 {
4309 case 4 / sizeof(pcre_uchar):
4310 if (context->oc.asint != 0)
4311 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4312 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4313 break;
4314
4315 case 2 / sizeof(pcre_uchar):
4316 if (context->oc.asushort != 0)
4317 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4318 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4319 break;
4320
4321 #ifdef COMPILE_PCRE8
4322 case 1:
4323 if (context->oc.asbyte != 0)
4324 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4325 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4326 break;
4327 #endif
4328
4329 default:
4330 SLJIT_ASSERT_STOP();
4331 break;
4332 }
4333 context->ucharptr = 0;
4334 }
4335
4336 #else
4337
4338 /* Unaligned read is unsupported or in 32 bit mode. */
4339 if (context->length >= 1)
4340 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4341
4342 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4343
4344 if (othercasebit != 0 && othercasechar == cc)
4345 {
4346 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4347 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4348 }
4349 else
4350 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4351
4352 #endif
4353
4354 cc++;
4355 #ifdef SUPPORT_UTF
4356 utflength--;
4357 }
4358 while (utflength > 0);
4359 #endif
4360
4361 return cc;
4362 }
4363
4364 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4365
4366 #define SET_TYPE_OFFSET(value) \
4367 if ((value) != typeoffset) \
4368 { \
4369 if ((value) < typeoffset) \
4370 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4371 else \
4372 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4373 } \
4374 typeoffset = (value);
4375
4376 #define SET_CHAR_OFFSET(value) \
4377 if ((value) != charoffset) \
4378 { \
4379 if ((value) < charoffset) \
4380 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
4381 else \
4382 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
4383 } \
4384 charoffset = (value);
4385
4386 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4387 {
4388 DEFINE_COMPILER;
4389 jump_list *found = NULL;
4390 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4391 sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
4392 struct sljit_jump *jump = NULL;
4393 pcre_uchar *ccbegin;
4394 int compares, invertcmp, numberofcmps;
4395 #if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4396 BOOL utf = common->utf;
4397 #endif
4398
4399 #ifdef SUPPORT_UCP
4400 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4401 BOOL charsaved = FALSE;
4402 int typereg = TMP1, scriptreg = TMP1;
4403 const pcre_uint32 *other_cases;
4404 sljit_uw typeoffset;
4405 #endif
4406
4407 /* Scanning the necessary info. */
4408 cc++;
4409 ccbegin = cc;
4410 compares = 0;
4411 if (cc[-1] & XCL_MAP)
4412 {
4413 min = 0;
4414 cc += 32 / sizeof(pcre_uchar);
4415 }
4416
4417 while (*cc != XCL_END)
4418 {
4419 compares++;
4420 if (*cc == XCL_SINGLE)
4421 {
4422 cc ++;
4423 GETCHARINCTEST(c, cc);
4424 if (c > max) max = c;
4425 if (c < min) min = c;
4426 #ifdef SUPPORT_UCP
4427 needschar = TRUE;
4428 #endif
4429 }
4430 else if (*cc == XCL_RANGE)
4431 {
4432 cc ++;
4433 GETCHARINCTEST(c, cc);
4434 if (c < min) min = c;
4435 GETCHARINCTEST(c, cc);
4436 if (c > max) max = c;
4437 #ifdef SUPPORT_UCP
4438 needschar = TRUE;
4439 #endif
4440 }
4441 #ifdef SUPPORT_UCP
4442 else
4443 {
4444 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4445 cc++;
4446 if (*cc == PT_CLIST)
4447 {
4448 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4449 while (*other_cases != NOTACHAR)
4450 {
4451 if (*other_cases > max) max = *other_cases;
4452 if (*other_cases < min) min = *other_cases;
4453 other_cases++;
4454 }
4455 }
4456 else
4457 {
4458 max = READ_CHAR_MAX;
4459 min = 0;
4460 }
4461
4462 switch(*cc)
4463 {
4464 case PT_ANY:
4465 break;
4466
4467 case PT_LAMP:
4468 case PT_GC:
4469 case PT_PC:
4470 case PT_ALNUM:
4471 needstype = TRUE;
4472 break;
4473
4474 case PT_SC:
4475 needsscript = TRUE;
4476 break;
4477
4478 case PT_SPACE:
4479 case PT_PXSPACE:
4480 case PT_WORD:
4481 case PT_PXGRAPH:
4482 case PT_PXPRINT:
4483 case PT_PXPUNCT:
4484 needstype = TRUE;
4485 needschar = TRUE;
4486 break;
4487
4488 case PT_CLIST:
4489 case PT_UCNC:
4490 needschar = TRUE;
4491 break;
4492
4493 default:
4494 SLJIT_ASSERT_STOP();
4495 break;
4496 }
4497 cc += 2;
4498 }
4499 #endif
4500 }
4501
4502 /* We are not necessary in utf mode even in 8 bit mode. */
4503 cc = ccbegin;
4504 detect_partial_match(common, backtracks);
4505 read_char_range(common, min, max, (cc[0] & XCL_NOT) != 0);
4506
4507 if ((cc[-1] & XCL_HASPROP) == 0)
4508 {
4509 if ((cc[-1] & XCL_MAP) != 0)
4510 {
4511 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4512 if (!check_class_ranges(common, (const pcre_uint8 *)cc, (((const pcre_uint8 *)cc)[31] & 0x80) != 0, TRUE, &found))
4513 {
4514 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4515 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4516 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4517 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4518 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4519 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4520 }
4521
4522 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4523 JUMPHERE(jump);
4524
4525 cc += 32 / sizeof(pcre_uchar);
4526 }
4527 else
4528 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4529 }
4530 else if ((cc[-1] & XCL_MAP) != 0)
4531 {
4532 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4533 #ifdef SUPPORT_UCP
4534 charsaved = TRUE;
4535 #endif
4536 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4537 {
4538 #ifdef COMPILE_PCRE8
4539 SLJIT_ASSERT(common->utf);
4540 #endif
4541 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4542
4543 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4544 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4545 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4546 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4547 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4548 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4549
4550 JUMPHERE(jump);
4551 }
4552
4553 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4554 cc += 32 / sizeof(pcre_uchar);
4555 }
4556
4557 #ifdef SUPPORT_UCP
4558 /* Simple register allocation. TMP1 is preferred if possible. */
4559 if (needstype || needsscript)
4560 {
4561 if (needschar && !charsaved)
4562 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4563 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4564 if (needschar)
4565 {
4566 if (needstype)
4567 {
4568 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4569 typereg = RETURN_ADDR;
4570 }
4571
4572 if (needsscript)
4573 scriptreg = TMP3;
4574 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4575 }
4576 else if (needstype && needsscript)
4577 scriptreg = TMP3;
4578 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4579
4580 if (needsscript)
4581 {
4582 if (scriptreg == TMP1)
4583 {
4584 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4585 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4586 }
4587 else
4588 {
4589 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4590 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4591 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4592 }
4593 }
4594 }
4595 #endif
4596
4597 /* Generating code. */
4598 charoffset = 0;
4599 numberofcmps = 0;
4600 #ifdef SUPPORT_UCP
4601 typeoffset = 0;
4602 #endif
4603
4604 while (*cc != XCL_END)
4605 {
4606 compares--;
4607 invertcmp = (compares == 0 && list != backtracks);
4608 jump = NULL;
4609
4610 if (*cc == XCL_SINGLE)
4611 {
4612 cc ++;
4613 GETCHARINCTEST(c, cc);
4614
4615 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4616 {
4617 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4618 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4619 numberofcmps++;
4620 }
4621 else if (numberofcmps > 0)
4622 {
4623 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4624 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4625 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4626 numberofcmps = 0;
4627 }
4628 else
4629 {
4630 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4631 numberofcmps = 0;
4632 }
4633 }
4634 else if (*cc == XCL_RANGE)
4635 {
4636 cc ++;
4637 GETCHARINCTEST(c, cc);
4638 SET_CHAR_OFFSET(c);
4639 GETCHARINCTEST(c, cc);
4640
4641 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4642 {
4643 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4644 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4645 numberofcmps++;
4646 }
4647 else if (numberofcmps > 0)
4648 {
4649 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4650 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4651 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4652 numberofcmps = 0;
4653 }
4654 else
4655 {
4656 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
4657 numberofcmps = 0;
4658 }
4659 }
4660 #ifdef SUPPORT_UCP
4661 else
4662 {
4663 if (*cc == XCL_NOTPROP)
4664 invertcmp ^= 0x1;
4665 cc++;
4666 switch(*cc)
4667 {
4668 case PT_ANY:
4669 if (list != backtracks)
4670 {
4671 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4672 continue;
4673 }
4674 else if (cc[-1] == XCL_NOTPROP)
4675 continue;
4676 jump = JUMP(SLJIT_JUMP);
4677 break;
4678
4679 case PT_LAMP:
4680 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4681 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4682 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4683 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4684 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4685 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4686 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4687 break;
4688
4689 case PT_GC:
4690 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4691 SET_TYPE_OFFSET(c);
4692 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4693 break;
4694
4695 case PT_PC:
4696 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4697 break;
4698
4699 case PT_SC:
4700 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4701 break;
4702
4703 case PT_SPACE:
4704 case PT_PXSPACE:
4705 SET_CHAR_OFFSET(9);
4706 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4707 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4708
4709 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4710 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4711
4712 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4713 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4714
4715 SET_TYPE_OFFSET(ucp_Zl);
4716 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4717 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4718 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4719 break;
4720
4721 case PT_WORD:
4722 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
4723 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4724 /* Fall through. */
4725
4726 case PT_ALNUM:
4727 SET_TYPE_OFFSET(ucp_Ll);
4728 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4729 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4730 SET_TYPE_OFFSET(ucp_Nd);
4731 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4732 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4733 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4734 break;
4735
4736 case PT_CLIST:
4737 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4738
4739 /* At least three characters are required.
4740 Otherwise this case would be handled by the normal code path. */
4741 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4742 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4743
4744 /* Optimizing character pairs, if their difference is power of 2. */
4745 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4746 {
4747 if (charoffset == 0)
4748 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4749 else
4750 {
4751 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4752 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4753 }
4754 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4755 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4756 other_cases += 2;
4757 }
4758 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4759 {
4760 if (charoffset == 0)
4761 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4762 else
4763 {
4764 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4765 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4766 }
4767 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4768 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4769
4770 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
4771 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4772
4773 other_cases += 3;
4774 }
4775 else
4776 {
4777 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4778 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4779 }
4780
4781 while (*other_cases != NOTACHAR)
4782 {
4783 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
4784 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4785 }
4786 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4787 break;
4788
4789 case PT_UCNC:
4790 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
4791 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4792 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
4793 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4794 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
4795 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4796
4797 SET_CHAR_OFFSET(0xa0);
4798 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
4799 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4800 SET_CHAR_OFFSET(0);
4801 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4802 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4803 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4804 break;
4805
4806 case PT_PXGRAPH:
4807 /* C and Z groups are the farthest two groups. */
4808 SET_TYPE_OFFSET(ucp_Ll);
4809 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4810 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4811
4812 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4813
4814 /* In case of ucp_Cf, we overwrite the result. */
4815 SET_CHAR_OFFSET(0x2066);
4816 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4817 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4818
4819 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4820 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4821
4822 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4823 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4824
4825 JUMPHERE(jump);
4826 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4827 break;
4828
4829 case PT_PXPRINT:
4830 /* C and Z groups are the farthest two groups. */
4831 SET_TYPE_OFFSET(ucp_Ll);
4832 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4833 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4834
4835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4836 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4837
4838 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4839
4840 /* In case of ucp_Cf, we overwrite the result. */
4841 SET_CHAR_OFFSET(0x2066);
4842 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4843 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4844
4845 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4846 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4847
4848 JUMPHERE(jump);
4849 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4850 break;
4851
4852 case PT_PXPUNCT:
4853 SET_TYPE_OFFSET(ucp_Sc);
4854 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4855 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4856
4857 SET_CHAR_OFFSET(0);
4858 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4859 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4860
4861 SET_TYPE_OFFSET(ucp_Pc);
4862 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4863 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4864 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4865 break;
4866 }
4867 cc += 2;
4868 }
4869 #endif
4870
4871 if (jump != NULL)
4872 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4873 }
4874
4875 if (found != NULL)
4876 set_jumps(found, LABEL());
4877 }
4878
4879 #undef SET_TYPE_OFFSET
4880 #undef SET_CHAR_OFFSET
4881
4882 #endif
4883
4884 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4885 {
4886 DEFINE_COMPILER;
4887 int length;
4888 unsigned int c, oc, bit;
4889 compare_context context;
4890 struct sljit_jump *jump[4];
4891 jump_list *end_list;
4892 #ifdef SUPPORT_UTF
4893 struct sljit_label *label;
4894 #ifdef SUPPORT_UCP
4895 pcre_uchar propdata[5];
4896 #endif
4897 #endif /* SUPPORT_UTF */
4898
4899 switch(type)
4900 {
4901 case OP_SOD:
4902 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4903 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4904 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4905 return cc;
4906
4907 case OP_SOM:
4908 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4909 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4910 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4911 return cc;
4912
4913 case OP_NOT_WORD_BOUNDARY:
4914 case OP_WORD_BOUNDARY:
4915 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4916 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4917 return cc;
4918
4919 case OP_NOT_DIGIT:
4920 case OP_DIGIT:
4921 /* Digits are usually 0-9, so it is worth to optimize them. */
4922 detect_partial_match(common, backtracks);
4923 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4924 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
4925 read_char7_type(common, type == OP_NOT_DIGIT);
4926 else
4927 #endif
4928 read_char8_type(common, type == OP_NOT_DIGIT);
4929 /* Flip the starting bit in the negative case. */
4930 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4931 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4932 return cc;
4933
4934 case OP_NOT_WHITESPACE:
4935 case OP_WHITESPACE:
4936 detect_partial_match(common, backtracks);
4937 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4938 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
4939 read_char7_type(common, type == OP_NOT_WHITESPACE);
4940 else
4941 #endif
4942 read_char8_type(common, type == OP_NOT_WHITESPACE);
4943 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4944 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4945 return cc;
4946
4947 case OP_NOT_WORDCHAR:
4948 case OP_WORDCHAR:
4949 detect_partial_match(common, backtracks);
4950 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4951 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
4952 read_char7_type(common, type == OP_NOT_WORDCHAR);
4953 else
4954 #endif
4955 read_char8_type(common, type == OP_NOT_WORDCHAR);
4956 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4957 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4958 return cc;
4959
4960 case OP_ANY:
4961 detect_partial_match(common, backtracks);
4962 read_char_range(common, common->nlmin, common->nlmax, TRUE);
4963 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4964 {
4965 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4966 end_list = NULL;
4967 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4968 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4969 else
4970 check_str_end(common, &end_list);
4971
4972 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4973 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4974 set_jumps(end_list, LABEL());
4975 JUMPHERE(jump[0]);
4976 }
4977 else
4978 check_newlinechar(common, common->nltype, backtracks, TRUE);
4979 return cc;
4980
4981 case OP_ALLANY:
4982 detect_partial_match(common, backtracks);
4983 #ifdef SUPPORT_UTF
4984 if (common->utf)
4985 {
4986 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4987 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4988 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4989 #if defined COMPILE_PCRE8
4990 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4991 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4992 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4993 #elif defined COMPILE_PCRE16
4994 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4995 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4996 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4997 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4998 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4999 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5000 #endif
5001 JUMPHERE(jump[0]);
5002 #endif /* COMPILE_PCRE[8|16] */
5003 return cc;
5004 }
5005 #endif
5006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5007 return cc;
5008
5009 case OP_ANYBYTE:
5010 detect_partial_match(common, backtracks);
5011 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5012 return cc;
5013
5014 #ifdef SUPPORT_UTF
5015 #ifdef SUPPORT_UCP
5016 case OP_NOTPROP:
5017 case OP_PROP:
5018 propdata[0] = XCL_HASPROP;
5019 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5020 propdata[2] = cc[0];
5021 propdata[3] = cc[1];
5022 propdata[4] = XCL_END;
5023 compile_xclass_matchingpath(common, propdata, backtracks);
5024 return cc + 2;
5025 #endif
5026 #endif
5027
5028 case OP_ANYNL:
5029 detect_partial_match(common, backtracks);
5030 read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE);
5031 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5032 /* We don't need to handle soft partial matching case. */
5033 end_list = NULL;
5034 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5035 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5036 else
5037 check_str_end(common, &end_list);
5038 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5039 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5040 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5041 jump[2] = JUMP(SLJIT_JUMP);
5042 JUMPHERE(jump[0]);
5043 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5044 set_jumps(end_list, LABEL());
5045 JUMPHERE(jump[1]);
5046 JUMPHERE(jump[2]);
5047 return cc;
5048
5049 case OP_NOT_HSPACE:
5050 case OP_HSPACE:
5051 detect_partial_match(common, backtracks);
5052 read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE);
5053 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5054 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5055 return cc;
5056
5057 case OP_NOT_VSPACE:
5058 case OP_VSPACE:
5059 detect_partial_match(common, backtracks);
5060 read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE);
5061 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5062 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5063 return cc;
5064
5065 #ifdef SUPPORT_UCP
5066 case OP_EXTUNI:
5067 detect_partial_match(common, backtracks);
5068 read_char(common);
5069 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5070 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5071 /* Optimize register allocation: use a real register. */
5072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5073 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5074
5075 label = LABEL();
5076 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5077 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5078 read_char(common);
5079 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5080 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5081 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5082
5083 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5084 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5085 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5086 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5087 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5088 JUMPTO(SLJIT_C_NOT_ZERO, label);
5089
5090 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5091 JUMPHERE(jump[0]);
5092 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5093
5094 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5095 {
5096 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5097 /* Since we successfully read a char above, partial matching must occure. */
5098 check_partial(common, TRUE);
5099 JUMPHERE(jump[0]);
5100 }
5101 return cc;
5102 #endif
5103
5104 case OP_EODN:
5105 /* Requires rather complex checks. */
5106 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5107 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5108 {
5109 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5110 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5111 if (common->mode == JIT_COMPILE)
5112 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5113 else
5114 {
5115 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5116 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5117 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5118 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5119 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5120 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5121 check_partial(common, TRUE);
5122 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5123 JUMPHERE(jump[1]);
5124 }
5125 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5126 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5127 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5128 }
5129 else if (common->nltype == NLTYPE_FIXED)
5130 {
5131 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5132 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5133 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5134 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5135 }
5136 else
5137 {
5138 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5139 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5140 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5141 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5142 jump[2] = JUMP(SLJIT_C_GREATER);
5143 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5144 /* Equal. */
5145 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5146 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5147 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5148
5149 JUMPHERE(jump[1]);
5150 if (common->nltype == NLTYPE_ANYCRLF)
5151 {
5152 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5153 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5154 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5155 }
5156 else
5157 {
5158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5159 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5160 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5161 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5162 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5163 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5164 }
5165 JUMPHERE(jump[2]);
5166 JUMPHERE(jump[3]);
5167 }
5168 JUMPHERE(jump[0]);
5169 check_partial(common, FALSE);
5170 return cc;
5171
5172 case OP_EOD:
5173 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5174 check_partial(common, FALSE);
5175 return cc;
5176
5177 case OP_CIRC:
5178 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5179 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5180 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5181 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5182 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5183 return cc;
5184
5185 case OP_CIRCM:
5186 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5187 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5188 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5189 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5190 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5191 jump[0] = JUMP(SLJIT_JUMP);
5192 JUMPHERE(jump[1]);
5193
5194 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5195 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5196 {
5197 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5198 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5199 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5200 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5201 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5202 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5203 }
5204 else
5205 {
5206 skip_char_back(common);
5207 read_char_range(common, common->nlmin, common->nlmax, TRUE);
5208 check_newlinechar(common, common->nltype, backtracks, FALSE);
5209 }
5210 JUMPHERE(jump[0]);
5211 return cc;
5212
5213 case OP_DOLL:
5214 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5215 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5216 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5217
5218 if (!common->endonly)
5219 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5220 else
5221 {
5222 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5223 check_partial(common, FALSE);
5224 }
5225 return cc;
5226
5227 case OP_DOLLM:
5228 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5229 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5230 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5231 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5232 check_partial(common, FALSE);
5233 jump[0] = JUMP(SLJIT_JUMP);
5234 JUMPHERE(jump[1]);
5235
5236 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5237 {
5238 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5239 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5240 if (common->mode == JIT_COMPILE)
5241 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5242 else
5243 {
5244 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5245 /* STR_PTR = STR_END - IN_UCHARS(1) */
5246 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5247 check_partial(common, TRUE);
5248 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5249 JUMPHERE(jump[1]);
5250 }
5251
5252 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5253 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5254 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5255 }
5256 else
5257 {
5258 peek_char(common, common->nlmax);
5259 check_newlinechar(common, common->nltype, backtracks, FALSE);
5260 }
5261 JUMPHERE(jump[0]);
5262 return cc;
5263
5264 case OP_CHAR:
5265 case OP_CHARI:
5266 length = 1;
5267 #ifdef SUPPORT_UTF
5268 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5269 #endif
5270 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5271 {
5272 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5273 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5274
5275 context.length = IN_UCHARS(length);
5276 context.sourcereg = -1;
5277 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5278 context.ucharptr = 0;
5279 #endif
5280 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5281 }
5282
5283 detect_partial_match(common, backtracks);
5284 #ifdef SUPPORT_UTF
5285 if (common->utf)
5286 {
5287 GETCHAR(c, cc);
5288 }
5289 else
5290 #endif
5291 c = *cc;
5292
5293 if (type == OP_CHAR || !char_has_othercase(common, cc))
5294 {
5295 read_char_range(common, c, c, FALSE);
5296 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5297 return cc + length;
5298 }
5299 oc = char_othercase(common, c);
5300 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE);
5301 bit = c ^ oc;
5302 if (is_powerof2(bit))
5303 {
5304 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5305 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5306 return cc + length;
5307 }
5308 jump[0] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c);
5309 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5310 JUMPHERE(jump[0]);
5311 return cc + length;
5312
5313 case OP_NOT:
5314 case OP_NOTI:
5315 detect_partial_match(common, backtracks);
5316 length = 1;
5317 #ifdef SUPPORT_UTF
5318 if (common->utf)
5319 {
5320 #ifdef COMPILE_PCRE8
5321 c = *cc;
5322 if (c < 128)
5323 {
5324 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5325 if (type == OP_NOT || !char_has_othercase(common, cc))
5326 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5327 else
5328 {
5329 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5330 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5331 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5332 }
5333 /* Skip the variable-length character. */
5334 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5335 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5338 JUMPHERE(jump[0]);
5339 return cc + 1;
5340 }
5341 else
5342 #endif /* COMPILE_PCRE8 */
5343 {
5344 GETCHARLEN(c, cc, length);
5345 }
5346 }
5347 else
5348 #endif /* SUPPORT_UTF */
5349 c = *cc;
5350
5351 if (type == OP_NOT || !char_has_othercase(common, cc))
5352 {
5353 read_char_range(common, c, c, TRUE);
5354 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5355 }
5356 else
5357 {
5358 oc = char_othercase(common, c);
5359 read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE);
5360 bit = c ^ oc;
5361 if (is_powerof2(bit))
5362 {
5363 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5364 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5365 }
5366 else
5367 {
5368 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5369 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5370 }
5371 }
5372 return cc + length;
5373
5374 case OP_CLASS:
5375 case OP_NCLASS:
5376 detect_partial_match(common, backtracks);
5377
5378 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5379 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5380 read_char_range(common, 0, bit, type == OP_NCLASS);
5381 #else
5382 read_char_range(common, 0, 255, type == OP_NCLASS);
5383 #endif
5384
5385 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5386 return cc + 32 / sizeof(pcre_uchar);
5387
5388 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5389 jump[0] = NULL;
5390 if (common->utf)
5391 {
5392 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5393 if (type == OP_CLASS)
5394 {
5395 add_jump(compiler, backtracks, jump[0]);
5396 jump[0] = NULL;
5397 }
5398 }
5399 #elif !defined COMPILE_PCRE8
5400 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5401 if (type == OP_CLASS)
5402 {
5403 add_jump(compiler, backtracks, jump[0]);
5404 jump[0] = NULL;
5405 }
5406 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5407
5408 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5409 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5410 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5411 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5412 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5413 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5414
5415 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5416 if (jump[0] != NULL)
5417 JUMPHERE(jump[0]);
5418 #endif
5419
5420 return cc + 32 / sizeof(pcre_uchar);
5421
5422 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5423 case OP_XCLASS:
5424 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5425 return cc + GET(cc, 0) - 1;
5426 #endif
5427
5428 case OP_REVERSE:
5429 length = GET(cc, 0);
5430 if (length == 0)
5431 return cc + LINK_SIZE;
5432 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5433 #ifdef SUPPORT_UTF
5434 if (common->utf)
5435 {
5436 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5437 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5438 label = LABEL();
5439 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5440 skip_char_back(common);
5441 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5442 JUMPTO(SLJIT_C_NOT_ZERO, label);
5443 }
5444 else
5445 #endif
5446 {
5447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5448 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5449 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5450 }
5451 check_start_used_ptr(common);
5452 return cc + LINK_SIZE;
5453 }
5454 SLJIT_ASSERT_STOP();
5455 return cc;
5456 }
5457
5458 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5459 {
5460 /* This function consumes at least one input character. */
5461 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5462 DEFINE_COMPILER;
5463 pcre_uchar *ccbegin = cc;
5464 compare_context context;
5465 int size;
5466
5467 context.length = 0;
5468 do
5469 {
5470 if (cc >= ccend)
5471 break;
5472
5473 if (*cc == OP_CHAR)
5474 {
5475 size = 1;
5476 #ifdef SUPPORT_UTF
5477 if (common->utf && HAS_EXTRALEN(cc[1]))
5478 size += GET_EXTRALEN(cc[1]);
5479 #endif
5480 }
5481 else if (*cc == OP_CHARI)
5482 {
5483 size = 1;
5484 #ifdef SUPPORT_UTF
5485 if (common->utf)
5486 {
5487 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5488 size = 0;
5489 else if (HAS_EXTRALEN(cc[1]))
5490 size += GET_EXTRALEN(cc[1]);
5491 }
5492 else
5493 #endif
5494 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5495 size = 0;
5496 }
5497 else
5498 size = 0;
5499
5500 cc += 1 + size;
5501 context.length += IN_UCHARS(size);
5502 }
5503 while (size > 0 && context.length <= 128);
5504
5505 cc = ccbegin;
5506 if (context.length > 0)
5507 {
5508 /* We have a fixed-length byte sequence. */
5509 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5510 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5511
5512 context.sourcereg = -1;
5513 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5514 context.ucharptr = 0;
5515 #endif
5516 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5517 return cc;
5518 }
5519
5520 /* A non-fixed length character will be checked if length == 0. */
5521 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5522 }
5523
5524 /* Forward definitions. */
5525 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5526 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5527
5528 #define PUSH_BACKTRACK(size, ccstart, error) \
5529 do \
5530 { \
5531 backtrack = sljit_alloc_memory(compiler, (size)); \
5532 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5533 return error; \
5534 memset(backtrack, 0, size); \
5535 backtrack->prev = parent->top; \
5536 backtrack->cc = (ccstart); \
5537 parent->top = backtrack; \
5538 } \
5539 while (0)
5540
5541 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5542 do \
5543 { \
5544 backtrack = sljit_alloc_memory(compiler, (size)); \
5545 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5546 return; \
5547 memset(backtrack, 0, size); \
5548 backtrack->prev = parent->top; \
5549 backtrack->cc = (ccstart); \
5550 parent->top = backtrack; \
5551 } \
5552 while (0)
5553
5554 #define BACKTRACK_AS(type) ((type *)backtrack)
5555
5556 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5557 {
5558 /* The OVECTOR offset goes to TMP2. */
5559 DEFINE_COMPILER;
5560 int count = GET2(cc, 1 + IMM2_SIZE);
5561 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5562 unsigned int offset;
5563 jump_list *found = NULL;
5564
5565 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5566
5567 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5568
5569 count--;
5570 while (count-- > 0)
5571 {
5572 offset = GET2(slot, 0) << 1;
5573 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5574 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5575 slot += common->name_entry_size;
5576 }
5577
5578 offset = GET2(slot, 0) << 1;
5579 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5580 if (backtracks != NULL && !common->jscript_compat)
5581 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5582
5583 set_jumps(found, LABEL());
5584 }
5585
5586 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5587 {
5588 DEFINE_COMPILER;
5589 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5590 int offset = 0;
5591 struct sljit_jump *jump = NULL;
5592 struct sljit_jump *partial;
5593 struct sljit_jump *nopartial;
5594
5595 if (ref)
5596 {
5597 offset = GET2(cc, 1) << 1;
5598 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5599 /* OVECTOR(1) contains the "string begin - 1" constant. */
5600 if (withchecks && !common->jscript_compat)
5601 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5602 }
5603 else
5604 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5605
5606 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5607 if (common->utf && *cc == OP_REFI)
5608 {
5609 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5610 if (ref)
5611 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5612 else
5613 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5614
5615 if (withchecks)
5616 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5617
5618 /* Needed to save important temporary registers. */
5619 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5620 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5622 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5623 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5624 if (common->mode == JIT_COMPILE)
5625 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5626 else
5627 {
5628 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5629 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5630 check_partial(common, FALSE);
5631 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5632 JUMPHERE(nopartial);
5633 }
5634 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5635 }
5636 else
5637 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5638 {
5639 if (ref)
5640 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5641 else
5642 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5643
5644 if (withchecks)
5645 jump = JUMP(SLJIT_C_ZERO);
5646
5647 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5648 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5649 if (common->mode == JIT_COMPILE)
5650 add_jump(compiler, backtracks, partial);
5651
5652 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5653 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5654
5655 if (common->mode != JIT_COMPILE)
5656 {
5657 nopartial = JUMP(SLJIT_JUMP);
5658 JUMPHERE(partial);
5659 /* TMP2 -= STR_END - STR_PTR */
5660 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5661 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5662 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5663 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5664 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5665 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5666 JUMPHERE(partial);
5667 check_partial(common, FALSE);
5668 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5669 JUMPHERE(nopartial);
5670 }
5671 }
5672
5673 if (jump != NULL)
5674 {
5675 if (emptyfail)
5676 add_jump(compiler, backtracks, jump);
5677 else
5678 JUMPHERE(jump);
5679 }
5680 }
5681
5682 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5683 {
5684 DEFINE_COMPILER;
5685 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5686 backtrack_common *backtrack;
5687 pcre_uchar type;
5688 int offset = 0;
5689 struct sljit_label *label;
5690 struct sljit_jump *zerolength;
5691 struct sljit_jump *jump = NULL;
5692 pcre_uchar *ccbegin = cc;
5693 int min = 0, max = 0;
5694 BOOL minimize;
5695
5696 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5697
5698 if (ref)
5699 offset = GET2(cc, 1) << 1;
5700 else
5701 cc += IMM2_SIZE;
5702 type = cc[1 + IMM2_SIZE];
5703
5704 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5705 minimize = (type & 0x1) != 0;
5706 switch(type)
5707 {
5708 case OP_CRSTAR:
5709 case OP_CRMINSTAR:
5710 min = 0;
5711 max = 0;
5712 cc += 1 + IMM2_SIZE + 1;
5713 break;
5714 case OP_CRPLUS:
5715 case OP_CRMINPLUS:
5716 min = 1;
5717 max = 0;
5718 cc += 1 + IMM2_SIZE + 1;
5719 break;
5720 case OP_CRQUERY:
5721 case OP_CRMINQUERY:
5722 min = 0;
5723 max = 1;
5724 cc += 1 + IMM2_SIZE + 1;
5725 break;
5726 case OP_CRRANGE:
5727 case OP_CRMINRANGE:
5728 min = GET2(cc, 1 + IMM2_SIZE + 1);
5729 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5730 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5731 break;
5732 default:
5733 SLJIT_ASSERT_STOP();
5734 break;
5735 }
5736
5737 if (!minimize)
5738 {
5739 if (min == 0)
5740 {
5741 allocate_stack(common, 2);
5742 if (ref)
5743 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5744 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5745 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5746 /* Temporary release of STR_PTR. */
5747 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5748 /* Handles both invalid and empty cases. Since the minimum repeat,
5749 is zero the invalid case is basically the same as an empty case. */
5750 if (ref)
5751 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5752 else
5753 {
5754 compile_dnref_search(common, ccbegin, NULL);
5755 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5756 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5757 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5758 }
5759 /* Restore if not zero length. */
5760 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5761 }
5762 else
5763 {
5764 allocate_stack(common, 1);
5765 if (ref)
5766 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5767 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5768 if (ref)
5769 {
5770 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5771 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5772 }
5773 else
5774 {
5775 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5776 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5778 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5779 }
5780 }
5781
5782 if (min > 1 || max > 1)
5783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5784
5785 label = LABEL();
5786 if (!ref)
5787 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5788 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5789
5790 if (min > 1 || max > 1)
5791 {
5792 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5793 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5795 if (min > 1)
5796 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5797 if (max > 1)
5798 {
5799 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5800 allocate_stack(common, 1);
5801 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5802 JUMPTO(SLJIT_JUMP, label);
5803 JUMPHERE(jump);
5804 }
5805 }
5806
5807 if (max == 0)
5808 {
5809 /* Includes min > 1 case as well. */
5810 allocate_stack(common, 1);
5811 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5812 JUMPTO(SLJIT_JUMP, label);
5813 }
5814
5815 JUMPHERE(zerolength);
5816 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5817
5818 count_match(common);
5819 return cc;
5820 }
5821
5822 allocate_stack(common, ref ? 2 : 3);
5823 if (ref)
5824 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5825 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5826 if (type != OP_CRMINSTAR)
5827 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5828
5829 if (min == 0)
5830 {
5831 /* Handles both invalid and empty cases. Since the minimum repeat,
5832 is zero the invalid case is basically the same as an empty case. */
5833 if (ref)
5834 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5835 else
5836 {
5837 compile_dnref_search(common, ccbegin, NULL);
5838 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5839 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5840 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5841 }
5842 /* Length is non-zero, we can match real repeats. */
5843 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5844 jump = JUMP(SLJIT_JUMP);
5845 }
5846 else
5847 {
5848 if (ref)
5849 {
5850 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5851 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5852 }
5853 else
5854 {
5855 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5856 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5857 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5858 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5859 }
5860 }
5861
5862 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5863 if (max > 0)
5864 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5865
5866 if (!ref)
5867 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5868 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5869 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5870
5871 if (min > 1)
5872 {
5873 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5874 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5875 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5876 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5877 }
5878 else if (max > 0)
5879 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5880
5881 if (jump != NULL)
5882 JUMPHERE(jump);
5883 JUMPHERE(zerolength);
5884
5885 count_match(common);
5886 return cc;
5887 }
5888
5889 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5890 {
5891 DEFINE_COMPILER;
5892 backtrack_common *backtrack;
5893 recurse_entry *entry = common->entries;
5894 recurse_entry *prev = NULL;
5895 sljit_sw start = GET(cc, 1);
5896 pcre_uchar *start_cc;
5897 BOOL needs_control_head;
5898
5899 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5900
5901 /* Inlining simple patterns. */
5902 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5903 {
5904 start_cc = common->start + start;
5905 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5906 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5907 return cc + 1 + LINK_SIZE;
5908 }
5909
5910 while (entry != NULL)
5911 {
5912 if (entry->start == start)
5913 break;
5914 prev = entry;
5915 entry = entry->next;
5916 }
5917
5918 if (entry == NULL)
5919 {
5920 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5921 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5922 return NULL;
5923 entry->next = NULL;
5924 entry->entry = NULL;
5925 entry->calls = NULL;
5926 entry->start = start;
5927
5928 if (prev != NULL)
5929 prev->next = entry;
5930 else
5931 common->entries = entry;
5932 }
5933
5934 if (common->has_set_som && common->mark_ptr != 0)
5935 {
5936 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5937 allocate_stack(common, 2);
5938 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5939 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5940 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5941 }
5942 else if (common->has_set_som || common->mark_ptr != 0)
5943 {
5944 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5945 allocate_stack(common, 1);
5946 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5947 }
5948
5949 if (entry->entry == NULL)
5950 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5951 else
5952 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5953 /* Leave if the match is failed. */
5954 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5955 return cc + 1 + LINK_SIZE;
5956 }
5957
5958 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5959 {
5960 const pcre_uchar *begin = arguments->begin;
5961 int *offset_vector = arguments->offsets;
5962 int offset_count = arguments->offset_count;
5963 int i;
5964
5965 if (PUBL(callout) == NULL)
5966 return 0;
5967
5968 callout_block->version = 2;
5969 callout_block->callout_data = arguments->callout_data;
5970
5971 /* Offsets in subject. */
5972 callout_block->subject_length = arguments->end - arguments->begin;
5973 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5974 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5975 #if defined COMPILE_PCRE8
5976 callout_block->subject = (PCRE_SPTR)begin;
5977 #elif defined COMPILE_PCRE16
5978 callout_block->subject = (PCRE_SPTR16)begin;
5979 #elif defined COMPILE_PCRE32
5980 callout_block->subject = (PCRE_SPTR32)begin;
5981 #endif
5982
5983 /* Convert and copy the JIT offset vector to the offset_vector array. */
5984 callout_block->capture_top = 0;
5985 callout_block->offset_vector = offset_vector;
5986 for (i = 2; i < offset_count; i += 2)
5987 {
5988 offset_vector[i] = jit_ovector[i] - begin;
5989 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5990 if (jit_ovector[i] >= begin)
5991 callout_block->capture_top = i;
5992 }
5993
5994 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5995 if (offset_count > 0)
5996 offset_vector[0] = -1;
5997 if (offset_count > 1)
5998 offset_vector[1] = -1;
5999 return (*PUBL(callout))(callout_block);
6000 }
6001
6002 /* Aligning to 8 byte. */
6003 #define CALLOUT_ARG_SIZE \
6004 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6005
6006 #define CALLOUT_ARG_OFFSET(arg) \
6007 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6008
6009 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6010 {
6011 DEFINE_COMPILER;
6012 backtrack_common *backtrack;
6013
6014 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6015
6016 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6017
6018 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6019 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6020 SLJIT_ASSERT(common->capture_last_ptr != 0);
6021 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6022 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6023
6024 /* These pointer sized fields temporarly stores internal variables. */
6025 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6026 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6027 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6028
6029 if (common->mark_ptr != 0)
6030 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6031 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6032 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6033 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6034
6035 /* Needed to save important temporary registers. */
6036 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6037 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6038 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6039 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6040 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6041 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6042 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6043
6044 /* Check return value. */
6045 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6046 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6047 if (common->forced_quit_label == NULL)
6048 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6049 else
6050 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6051 return cc + 2 + 2 * LINK_SIZE;
6052 }
6053
6054 #undef CALLOUT_ARG_SIZE
6055 #undef CALLOUT_ARG_OFFSET
6056
6057 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6058 {
6059 DEFINE_COMPILER;
6060 int framesize;
6061 int extrasize;
6062 BOOL needs_control_head;
6063 int private_data_ptr;
6064 backtrack_common altbacktrack;
6065 pcre_uchar *ccbegin;
6066 pcre_uchar opcode;
6067 pcre_uchar bra = OP_BRA;
6068 jump_list *tmp = NULL;
6069 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6070 jump_list **found;
6071 /* Saving previous accept variables. */
6072 BOOL save_local_exit = common->local_exit;
6073 BOOL save_positive_assert = common->positive_assert;
6074 then_trap_backtrack *save_then_trap = common->then_trap;
6075 struct sljit_label *save_quit_label = common->quit_label;
6076 struct sljit_label *save_accept_label = common->accept_label;
6077 jump_list *save_quit = common->quit;
6078 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6079 jump_list *save_accept = common->accept;
6080 struct sljit_jump *jump;
6081 struct sljit_jump *brajump = NULL;
6082
6083 /* Assert captures then. */
6084 common->then_trap = NULL;
6085
6086 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6087 {
6088 SLJIT_ASSERT(!conditional);
6089 bra = *cc;
6090 cc++;
6091 }
6092 private_data_ptr = PRIVATE_DATA(cc);
6093 SLJIT_ASSERT(private_data_ptr != 0);
6094 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6095 backtrack->framesize = framesize;
6096 backtrack->private_data_ptr = private_data_ptr;
6097 opcode = *cc;
6098 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6099 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6100 ccbegin = cc;
6101 cc += GET(cc, 1);
6102
6103 if (bra == OP_BRAMINZERO)
6104 {
6105 /* This is a braminzero backtrack path. */
6106 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6107 free_stack(common, 1);
6108 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6109 }
6110
6111 if (framesize < 0)
6112 {
6113 extrasize = needs_control_head ? 2 : 1;
6114 if (framesize == no_frame)
6115 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6116 allocate_stack(common, extrasize);
6117 if (needs_control_head)
6118 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6119 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6120 if (needs_control_head)
6121 {
6122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6123 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6124 }
6125 }
6126 else
6127 {
6128 extrasize = needs_control_head ? 3 : 2;
6129 allocate_stack(common, framesize + extrasize);
6130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6131 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6132 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6133 if (needs_control_head)
6134 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6135 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6136 if (needs_control_head)
6137 {
6138 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6139 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6141 }
6142 else
6143 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6144 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6145 }
6146
6147 memset(&altbacktrack, 0, sizeof(backtrack_common));
6148 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6149 {
6150 /* Negative assert is stronger than positive assert. */
6151 common->local_exit = TRUE;
6152 common->quit_label = NULL;
6153 common->quit = NULL;
6154 common->positive_assert = FALSE;
6155 }
6156 else
6157 common->positive_assert = TRUE;
6158 common->positive_assert_quit = NULL;
6159
6160 while (1)
6161 {
6162 common->accept_label = NULL;
6163 common->accept = NULL;
6164 altbacktrack.top = NULL;
6165 altbacktrack.topbacktracks = NULL;
6166
6167 if (*ccbegin == OP_ALT)
6168 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6169
6170 altbacktrack.cc = ccbegin;
6171 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6172 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6173 {
6174 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6175 {
6176 common->local_exit = save_local_exit;
6177 common->quit_label = save_quit_label;
6178 common->quit = save_quit;
6179 }
6180 common->positive_assert = save_positive_assert;
6181 common->then_trap = save_then_trap;
6182 common->accept_label = save_accept_label;
6183 common->positive_assert_quit = save_positive_assert_quit;
6184 common->accept = save_accept;
6185 return NULL;
6186 }
6187 common->accept_label = LABEL();
6188 if (common->accept != NULL)
6189 set_jumps(common->accept, common->accept_label);
6190
6191 /* Reset stack. */
6192 if (framesize < 0)
6193 {
6194 if (framesize == no_frame)
6195 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6196 else
6197 free_stack(common, extrasize);
6198 if (needs_control_head)
6199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6200 }
6201 else
6202 {
6203 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6204 {
6205 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6206 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6207 if (needs_control_head)
6208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6209 }
6210 else
6211 {
6212 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6213 if (needs_control_head)
6214 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6215 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6216 }
6217 }
6218
6219 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6220 {
6221 /* We know that STR_PTR was stored on the top of the stack. */
6222 if (conditional)
6223 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6224 else if (bra == OP_BRAZERO)
6225 {
6226 if (framesize < 0)
6227 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6228 else
6229 {
6230 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6231 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6232 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6233 }
6234 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6235 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6236 }
6237 else if (framesize >= 0)
6238 {
6239 /* For OP_BRA and OP_BRAMINZERO. */
6240 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6241 }
6242 }
6243 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6244
6245 compile_backtrackingpath(common, altbacktrack.top);
6246 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6247 {
6248 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6249 {
6250 common->local_exit = save_local_exit;
6251 common->quit_label = save_quit_label;
6252 common->quit = save_quit;
6253 }
6254 common->positive_assert = save_positive_assert;
6255 common->then_trap = save_then_trap;
6256 common->accept_label = save_accept_label;
6257 common->positive_assert_quit = save_positive_assert_quit;
6258 common->accept = save_accept;
6259 return NULL;
6260 }
6261 set_jumps(altbacktrack.topbacktracks, LABEL());
6262
6263 if (*cc != OP_ALT)
6264 break;
6265
6266 ccbegin = cc;
6267 cc += GET(cc, 1);
6268 }
6269
6270 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6271 {
6272 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6273 /* Makes the check less complicated below. */
6274 common->positive_assert_quit = common->quit;
6275 }
6276
6277 /* None of them matched. */
6278 if (common->positive_assert_quit != NULL)
6279 {
6280 jump = JUMP(SLJIT_JUMP);
6281 set_jumps(common->positive_assert_quit, LABEL());
6282 SLJIT_ASSERT(framesize != no_stack);
6283 if (framesize < 0)
6284 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6285 else
6286 {
6287 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6288 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6289 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6290 }
6291 JUMPHERE(jump);
6292 }
6293
6294 if (needs_control_head)
6295 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6296
6297 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6298 {
6299 /* Assert is failed. */
6300 if (conditional || bra == OP_BRAZERO)
6301 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6302
6303 if (framesize < 0)
6304 {
6305 /* The topmost item should be 0. */
6306 if (bra == OP_BRAZERO)
6307 {
6308 if (extrasize == 2)
6309 free_stack(common, 1);
6310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6311 }
6312 else
6313 free_stack(common, extrasize);
6314 }
6315 else
6316 {
6317 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6318 /* The topmost item should be 0. */
6319 if (bra == OP_BRAZERO)
6320 {
6321 free_stack(common, framesize + extrasize - 1);
6322 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6323 }
6324 else
6325 free_stack(common, framesize + extrasize);
6326 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6327 }
6328 jump = JUMP(SLJIT_JUMP);
6329 if (bra != OP_BRAZERO)
6330 add_jump(compiler, target, jump);
6331
6332 /* Assert is successful. */
6333 set_jumps(tmp, LABEL());
6334 if (framesize < 0)
6335 {
6336 /* We know that STR_PTR was stored on the top of the stack. */
6337 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6338 /* Keep the STR_PTR on the top of the stack. */
6339 if (bra == OP_BRAZERO)
6340 {
6341 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6342 if (extrasize == 2)
6343 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6344 }
6345 else if (bra == OP_BRAMINZERO)
6346 {
6347 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6348 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6349 }
6350 }
6351 else
6352 {
6353 if (bra == OP_BRA)
6354 {
6355 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6356 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6357 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6358 }
6359 else
6360 {
6361 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6362 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6363 if (extrasize == 2)
6364 {
6365 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6366 if (bra == OP_BRAMINZERO)
6367 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6368 }
6369 else
6370 {
6371 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6372 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6373 }
6374 }
6375 }
6376
6377 if (bra == OP_BRAZERO)
6378 {
6379 backtrack->matchingpath = LABEL();
6380 SET_LABEL(jump, backtrack->matchingpath);
6381 }
6382 else if (bra == OP_BRAMINZERO)
6383 {
6384 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6385 JUMPHERE(brajump);
6386 if (framesize >= 0)
6387 {
6388 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6389 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6390 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6391 }
6392 set_jumps(backtrack->common.topbacktracks, LABEL());
6393 }
6394 }
6395 else
6396 {
6397 /* AssertNot is successful. */
6398 if (framesize < 0)
6399 {
6400 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6401 if (bra != OP_BRA)
6402 {
6403 if (extrasize == 2)
6404 free_stack(common, 1);
6405 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6406 }
6407 else
6408 free_stack(common, extrasize);
6409 }
6410 else
6411 {
6412 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6414 /* The topmost item should be 0. */
6415 if (bra != OP_BRA)
6416 {
6417 free_stack(common, framesize + extrasize - 1);
6418 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6419 }
6420 else
6421 free_stack(common, framesize + extrasize);
6422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6423 }
6424
6425 if (bra == OP_BRAZERO)
6426 backtrack->matchingpath = LABEL();
6427 else if (bra == OP_BRAMINZERO)
6428 {
6429 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6430 JUMPHERE(brajump);
6431 }
6432
6433 if (bra != OP_BRA)
6434 {
6435 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6436 set_jumps(backtrack->common.topbacktracks, LABEL());
6437 backtrack->common.topbacktracks = NULL;
6438 }
6439 }
6440
6441 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6442 {
6443 common->local_exit = save_local_exit;
6444 common->quit_label = save_quit_label;
6445 common->quit = save_quit;
6446 }
6447 common->positive_assert = save_positive_assert;
6448 common->then_trap = save_then_trap;
6449 common->accept_label = save_accept_label;
6450 common->positive_assert_quit = save_positive_assert_quit;
6451 common->accept = save_accept;
6452 return cc + 1 + LINK_SIZE;
6453 }
6454
6455 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6456 {
6457 DEFINE_COMPILER;
6458 int stacksize;
6459
6460 if (framesize < 0)
6461 {
6462 if (framesize == no_frame)
6463 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6464 else
6465 {
6466 stacksize = needs_control_head ? 1 : 0;
6467 if (ket != OP_KET || has_alternatives)
6468 stacksize++;
6469 free_stack(common, stacksize);
6470 }
6471
6472 if (needs_control_head)
6473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6474
6475 /* TMP2 which is set here used by OP_KETRMAX below. */
6476 if (ket == OP_KETRMAX)
6477 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6478 else if (ket == OP_KETRMIN)
6479 {
6480 /* Move the STR_PTR to the private_data_ptr. */
6481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6482 }
6483 }
6484 else
6485 {
6486 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6487 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6488 if (needs_control_head)
6489 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6490
6491 if (ket == OP_KETRMAX)
6492 {
6493 /* TMP2 which is set here used by OP_KETRMAX below. */
6494 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6495 }
6496 }
6497 if (needs_control_head)
6498 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6499 }
6500
6501 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6502 {
6503 DEFINE_COMPILER;
6504
6505 if (common->capture_last_ptr != 0)
6506 {
6507 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6508 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6509 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6510 stacksize++;
6511 }
6512 if (common->optimized_cbracket[offset >> 1] == 0)
6513 {
6514 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6515 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6516 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6517 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6518 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6520 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6521 stacksize += 2;
6522 }
6523 return stacksize;
6524 }
6525
6526 /*
6527 Handling bracketed expressions is probably the most complex part.
6528
6529 Stack layout naming characters:
6530 S - Push the current STR_PTR
6531 0 - Push a 0 (NULL)
6532 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6533 before the next alternative. Not pushed if there are no alternatives.
6534 M - Any values pushed by the current alternative. Can be empty, or anything.
6535 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6536 L - Push the previous local (pointed by localptr) to the stack
6537 () - opional values stored on the stack
6538 ()* - optonal, can be stored multiple times
6539
6540 The following list shows the regular expression templates, their PCRE byte codes
6541 and stack layout supported by pcre-sljit.
6542
6543 (?:) OP_BRA | OP_KET A M
6544 () OP_CBRA | OP_KET C M
6545 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6546 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6547 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6548 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6549 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6550 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6551 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6552 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6553 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6554 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6555 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6556 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6557 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6558 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6559 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6560 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6561 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6562 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6563 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6564 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6565
6566
6567 Stack layout naming characters:
6568 A - Push the alternative index (starting from 0) on the stack.
6569 Not pushed if there is no alternatives.
6570 M - Any values pushed by the current alternative. Can be empty, or anything.
6571
6572 The next list shows the possible content of a bracket:
6573 (|) OP_*BRA | OP_ALT ... M A
6574 (?()|) OP_*COND | OP_ALT M A
6575 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6576 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6577 Or nothing, if trace is unnecessary
6578 */
6579
6580 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6581 {
6582 DEFINE_COMPILER;
6583 backtrack_common *backtrack;
6584 pcre_uchar opcode;
6585 int private_data_ptr = 0;
6586 int offset = 0;
6587 int i, stacksize;
6588 int repeat_ptr = 0, repeat_length = 0;
6589 int repeat_type = 0, repeat_count = 0;
6590 pcre_uchar *ccbegin;
6591 pcre_uchar *matchingpath;
6592 pcre_uchar *slot;
6593 pcre_uchar bra = OP_BRA;
6594 pcre_uchar ket;
6595 assert_backtrack *assert;
6596 BOOL has_alternatives;
6597 BOOL needs_control_head = FALSE;
6598 struct sljit_jump *jump;
6599 struct sljit_jump *skip;
6600 struct sljit_label *rmax_label = NULL;
6601 struct sljit_jump *braminzero = NULL;
6602
6603 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6604
6605 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6606 {
6607 bra = *cc;
6608 cc++;
6609 opcode = *cc;
6610 }
6611
6612 opcode = *cc;
6613 ccbegin = cc;
6614 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6615 ket = *matchingpath;
6616 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6617 {
6618 repeat_ptr = PRIVATE_DATA(matchingpath);
6619 repeat_length = PRIVATE_DATA(matchingpath + 1);
6620 repeat_type = PRIVATE_DATA(matchingpath + 2);
6621 repeat_count = PRIVATE_DATA(matchingpath + 3);
6622 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6623 if (repeat_type == OP_UPTO)
6624 ket = OP_KETRMAX;
6625 if (repeat_type == OP_MINUPTO)
6626 ket = OP_KETRMIN;
6627 }
6628
6629 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6630 {
6631 /* Drop this bracket_backtrack. */
6632 parent->top = backtrack->prev;
6633 return matchingpath + 1 + LINK_SIZE + repeat_length;
6634 }
6635
6636 matchingpath = ccbegin + 1 + LINK_SIZE;
6637 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6638 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6639 cc += GET(cc, 1);
6640
6641 has_alternatives = *cc == OP_ALT;
6642 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6643 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6644
6645 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6646 opcode = OP_SCOND;
6647 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6648 opcode = OP_ONCE;
6649
6650 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6651 {
6652 /* Capturing brackets has a pre-allocated space. */
6653 offset = GET2(ccbegin, 1 + LINK_SIZE);
6654 if (common->optimized_cbracket[offset] == 0)
6655 {
6656 private_data_ptr = OVECTOR_PRIV(offset);
6657 offset <<= 1;
6658 }
6659 else
6660 {
6661 offset <<= 1;
6662 private_data_ptr = OVECTOR(offset);
6663 }
6664 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6665 matchingpath += IMM2_SIZE;
6666 }
6667 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6668 {
6669 /* Other brackets simply allocate the next entry. */
6670 private_data_ptr = PRIVATE_DATA(ccbegin);
6671 SLJIT_ASSERT(private_data_ptr != 0);
6672 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6673 if (opcode == OP_ONCE)
6674 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6675 }
6676
6677 /* Instructions before the first alternative. */
6678 stacksize = 0;
6679 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6680 stacksize++;
6681 if (bra == OP_BRAZERO)
6682 stacksize++;
6683
6684 if (stacksize > 0)
6685 allocate_stack(common, stacksize);
6686
6687 stacksize = 0;
6688 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6689 {
6690 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6691 stacksize++;
6692 }
6693
6694 if (bra == OP_BRAZERO)
6695 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6696
6697 if (bra == OP_BRAMINZERO)
6698 {
6699 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6700 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6701 if (ket != OP_KETRMIN)
6702 {
6703 free_stack(common, 1);
6704 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6705 }
6706 else
6707 {
6708 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6709 {
6710 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6711 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6712 /* Nothing stored during the first run. */
6713 skip = JUMP(SLJIT_JUMP);
6714 JUMPHERE(jump);
6715 /* Checking zero-length iteration. */
6716 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6717 {
6718 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6719 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6720 }
6721 else
6722 {
6723 /* Except when the whole stack frame must be saved. */
6724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6725 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6726 }
6727 JUMPHERE(skip);
6728 }
6729 else
6730 {
6731 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6732 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6733 JUMPHERE(jump);
6734 }
6735 }
6736 }
6737
6738 if (repeat_type != 0)
6739 {
6740 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6741 if (repeat_type == OP_EXACT)
6742 rmax_label = LABEL();
6743 }
6744
6745 if (ket == OP_KETRMIN)