/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1421 - (show annotations)
Mon Dec 30 13:25:20 2013 UTC (5 years, 11 months ago) by zherczeg
File MIME type: text/plain
File size: 322329 byte(s)
Error occurred while calculating annotation data.
More character range checks.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 4
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 int newline;
367 int bsr_nltype;
368 /* Dollar endonly. */
369 int endonly;
370 /* Tables. */
371 sljit_sw ctypes;
372 /* Named capturing brackets. */
373 pcre_uchar *name_table;
374 sljit_sw name_count;
375 sljit_sw name_entry_size;
376
377 /* Labels and jump lists. */
378 struct sljit_label *partialmatchlabel;
379 struct sljit_label *quit_label;
380 struct sljit_label *forced_quit_label;
381 struct sljit_label *accept_label;
382 stub_list *stubs;
383 recurse_entry *entries;
384 recurse_entry *currententry;
385 jump_list *partialmatch;
386 jump_list *quit;
387 jump_list *positive_assert_quit;
388 jump_list *forced_quit;
389 jump_list *accept;
390 jump_list *calllimit;
391 jump_list *stackalloc;
392 jump_list *revertframes;
393 jump_list *wordboundary;
394 jump_list *anynewline;
395 jump_list *hspace;
396 jump_list *vspace;
397 jump_list *casefulcmp;
398 jump_list *caselesscmp;
399 jump_list *reset_match;
400 BOOL jscript_compat;
401 #ifdef SUPPORT_UTF
402 BOOL utf;
403 #ifdef SUPPORT_UCP
404 BOOL use_ucp;
405 #endif
406 #ifndef COMPILE_PCRE32
407 jump_list *utfreadchar;
408 #endif
409 #ifdef COMPILE_PCRE8
410 jump_list *utfreadchar11;
411 jump_list *utfreadtype8;
412 #endif
413 #endif /* SUPPORT_UTF */
414 #ifdef SUPPORT_UCP
415 jump_list *getucd;
416 #endif
417 } compiler_common;
418
419 /* For byte_sequence_compare. */
420
421 typedef struct compare_context {
422 int length;
423 int sourcereg;
424 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
425 int ucharptr;
426 union {
427 sljit_si asint;
428 sljit_uh asushort;
429 #if defined COMPILE_PCRE8
430 sljit_ub asbyte;
431 sljit_ub asuchars[4];
432 #elif defined COMPILE_PCRE16
433 sljit_uh asuchars[2];
434 #elif defined COMPILE_PCRE32
435 sljit_ui asuchars[1];
436 #endif
437 } c;
438 union {
439 sljit_si asint;
440 sljit_uh asushort;
441 #if defined COMPILE_PCRE8
442 sljit_ub asbyte;
443 sljit_ub asuchars[4];
444 #elif defined COMPILE_PCRE16
445 sljit_uh asuchars[2];
446 #elif defined COMPILE_PCRE32
447 sljit_ui asuchars[1];
448 #endif
449 } oc;
450 #endif
451 } compare_context;
452
453 /* Undefine sljit macros. */
454 #undef CMP
455
456 /* Used for accessing the elements of the stack. */
457 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
458
459 #define TMP1 SLJIT_SCRATCH_REG1
460 #define TMP2 SLJIT_SCRATCH_REG3
461 #define TMP3 SLJIT_TEMPORARY_EREG2
462 #define STR_PTR SLJIT_SAVED_REG1
463 #define STR_END SLJIT_SAVED_REG2
464 #define STACK_TOP SLJIT_SCRATCH_REG2
465 #define STACK_LIMIT SLJIT_SAVED_REG3
466 #define ARGUMENTS SLJIT_SAVED_EREG1
467 #define COUNT_MATCH SLJIT_SAVED_EREG2
468 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469
470 /* Local space layout. */
471 /* These two locals can be used by the current opcode. */
472 #define LOCALS0 (0 * sizeof(sljit_sw))
473 #define LOCALS1 (1 * sizeof(sljit_sw))
474 /* Two local variables for possessive quantifiers (char1 cannot use them). */
475 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
477 /* Max limit of recursions. */
478 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
479 /* The output vector is stored on the stack, and contains pointers
480 to characters. The vector data is divided into two groups: the first
481 group contains the start / end character pointers, and the second is
482 the start pointers when the end of the capturing group has not yet reached. */
483 #define OVECTOR_START (common->ovector_start)
484 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
485 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
486 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
487
488 #if defined COMPILE_PCRE8
489 #define MOV_UCHAR SLJIT_MOV_UB
490 #define MOVU_UCHAR SLJIT_MOVU_UB
491 #elif defined COMPILE_PCRE16
492 #define MOV_UCHAR SLJIT_MOV_UH
493 #define MOVU_UCHAR SLJIT_MOVU_UH
494 #elif defined COMPILE_PCRE32
495 #define MOV_UCHAR SLJIT_MOV_UI
496 #define MOVU_UCHAR SLJIT_MOVU_UI
497 #else
498 #error Unsupported compiling mode
499 #endif
500
501 /* Shortcuts. */
502 #define DEFINE_COMPILER \
503 struct sljit_compiler *compiler = common->compiler
504 #define OP1(op, dst, dstw, src, srcw) \
505 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508 #define LABEL() \
509 sljit_emit_label(compiler)
510 #define JUMP(type) \
511 sljit_emit_jump(compiler, (type))
512 #define JUMPTO(type, label) \
513 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514 #define JUMPHERE(jump) \
515 sljit_set_label((jump), sljit_emit_label(compiler))
516 #define SET_LABEL(jump, label) \
517 sljit_set_label((jump), (label))
518 #define CMP(type, src1, src1w, src2, src2w) \
519 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520 #define CMPTO(type, src1, src1w, src2, src2w, label) \
521 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
522 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
524 #define GET_LOCAL_BASE(dst, dstw, offset) \
525 sljit_get_local_base(compiler, (dst), (dstw), (offset))
526
527 static pcre_uchar* bracketend(pcre_uchar* cc)
528 {
529 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
530 do cc += GET(cc, 1); while (*cc == OP_ALT);
531 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
532 cc += 1 + LINK_SIZE;
533 return cc;
534 }
535
536 static int ones_in_half_byte[16] = {
537 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
538 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
539 };
540
541 /* Functions whose might need modification for all new supported opcodes:
542 next_opcode
543 check_opcode_types
544 set_private_data_ptrs
545 get_framesize
546 init_frame
547 get_private_data_copy_length
548 copy_private_data
549 compile_matchingpath
550 compile_backtrackingpath
551 */
552
553 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
554 {
555 SLJIT_UNUSED_ARG(common);
556 switch(*cc)
557 {
558 case OP_SOD:
559 case OP_SOM:
560 case OP_SET_SOM:
561 case OP_NOT_WORD_BOUNDARY:
562 case OP_WORD_BOUNDARY:
563 case OP_NOT_DIGIT:
564 case OP_DIGIT:
565 case OP_NOT_WHITESPACE:
566 case OP_WHITESPACE:
567 case OP_NOT_WORDCHAR:
568 case OP_WORDCHAR:
569 case OP_ANY:
570 case OP_ALLANY:
571 case OP_NOTPROP:
572 case OP_PROP:
573 case OP_ANYNL:
574 case OP_NOT_HSPACE:
575 case OP_HSPACE:
576 case OP_NOT_VSPACE:
577 case OP_VSPACE:
578 case OP_EXTUNI:
579 case OP_EODN:
580 case OP_EOD:
581 case OP_CIRC:
582 case OP_CIRCM:
583 case OP_DOLL:
584 case OP_DOLLM:
585 case OP_CRSTAR:
586 case OP_CRMINSTAR:
587 case OP_CRPLUS:
588 case OP_CRMINPLUS:
589 case OP_CRQUERY:
590 case OP_CRMINQUERY:
591 case OP_CRRANGE:
592 case OP_CRMINRANGE:
593 case OP_CRPOSSTAR:
594 case OP_CRPOSPLUS:
595 case OP_CRPOSQUERY:
596 case OP_CRPOSRANGE:
597 case OP_CLASS:
598 case OP_NCLASS:
599 case OP_REF:
600 case OP_REFI:
601 case OP_DNREF:
602 case OP_DNREFI:
603 case OP_RECURSE:
604 case OP_CALLOUT:
605 case OP_ALT:
606 case OP_KET:
607 case OP_KETRMAX:
608 case OP_KETRMIN:
609 case OP_KETRPOS:
610 case OP_REVERSE:
611 case OP_ASSERT:
612 case OP_ASSERT_NOT:
613 case OP_ASSERTBACK:
614 case OP_ASSERTBACK_NOT:
615 case OP_ONCE:
616 case OP_ONCE_NC:
617 case OP_BRA:
618 case OP_BRAPOS:
619 case OP_CBRA:
620 case OP_CBRAPOS:
621 case OP_COND:
622 case OP_SBRA:
623 case OP_SBRAPOS:
624 case OP_SCBRA:
625 case OP_SCBRAPOS:
626 case OP_SCOND:
627 case OP_CREF:
628 case OP_DNCREF:
629 case OP_RREF:
630 case OP_DNRREF:
631 case OP_DEF:
632 case OP_BRAZERO:
633 case OP_BRAMINZERO:
634 case OP_BRAPOSZERO:
635 case OP_PRUNE:
636 case OP_SKIP:
637 case OP_THEN:
638 case OP_COMMIT:
639 case OP_FAIL:
640 case OP_ACCEPT:
641 case OP_ASSERT_ACCEPT:
642 case OP_CLOSE:
643 case OP_SKIPZERO:
644 return cc + PRIV(OP_lengths)[*cc];
645
646 case OP_CHAR:
647 case OP_CHARI:
648 case OP_NOT:
649 case OP_NOTI:
650 case OP_STAR:
651 case OP_MINSTAR:
652 case OP_PLUS:
653 case OP_MINPLUS:
654 case OP_QUERY:
655 case OP_MINQUERY:
656 case OP_UPTO:
657 case OP_MINUPTO:
658 case OP_EXACT:
659 case OP_POSSTAR:
660 case OP_POSPLUS:
661 case OP_POSQUERY:
662 case OP_POSUPTO:
663 case OP_STARI:
664 case OP_MINSTARI:
665 case OP_PLUSI:
666 case OP_MINPLUSI:
667 case OP_QUERYI:
668 case OP_MINQUERYI:
669 case OP_UPTOI:
670 case OP_MINUPTOI:
671 case OP_EXACTI:
672 case OP_POSSTARI:
673 case OP_POSPLUSI:
674 case OP_POSQUERYI:
675 case OP_POSUPTOI:
676 case OP_NOTSTAR:
677 case OP_NOTMINSTAR:
678 case OP_NOTPLUS:
679 case OP_NOTMINPLUS:
680 case OP_NOTQUERY:
681 case OP_NOTMINQUERY:
682 case OP_NOTUPTO:
683 case OP_NOTMINUPTO:
684 case OP_NOTEXACT:
685 case OP_NOTPOSSTAR:
686 case OP_NOTPOSPLUS:
687 case OP_NOTPOSQUERY:
688 case OP_NOTPOSUPTO:
689 case OP_NOTSTARI:
690 case OP_NOTMINSTARI:
691 case OP_NOTPLUSI:
692 case OP_NOTMINPLUSI:
693 case OP_NOTQUERYI:
694 case OP_NOTMINQUERYI:
695 case OP_NOTUPTOI:
696 case OP_NOTMINUPTOI:
697 case OP_NOTEXACTI:
698 case OP_NOTPOSSTARI:
699 case OP_NOTPOSPLUSI:
700 case OP_NOTPOSQUERYI:
701 case OP_NOTPOSUPTOI:
702 cc += PRIV(OP_lengths)[*cc];
703 #ifdef SUPPORT_UTF
704 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
705 #endif
706 return cc;
707
708 /* Special cases. */
709 case OP_TYPESTAR:
710 case OP_TYPEMINSTAR:
711 case OP_TYPEPLUS:
712 case OP_TYPEMINPLUS:
713 case OP_TYPEQUERY:
714 case OP_TYPEMINQUERY:
715 case OP_TYPEUPTO:
716 case OP_TYPEMINUPTO:
717 case OP_TYPEEXACT:
718 case OP_TYPEPOSSTAR:
719 case OP_TYPEPOSPLUS:
720 case OP_TYPEPOSQUERY:
721 case OP_TYPEPOSUPTO:
722 return cc + PRIV(OP_lengths)[*cc] - 1;
723
724 case OP_ANYBYTE:
725 #ifdef SUPPORT_UTF
726 if (common->utf) return NULL;
727 #endif
728 return cc + 1;
729
730 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
731 case OP_XCLASS:
732 return cc + GET(cc, 1);
733 #endif
734
735 case OP_MARK:
736 case OP_PRUNE_ARG:
737 case OP_SKIP_ARG:
738 case OP_THEN_ARG:
739 return cc + 1 + 2 + cc[1];
740
741 default:
742 /* All opcodes are supported now! */
743 SLJIT_ASSERT_STOP();
744 return NULL;
745 }
746 }
747
748 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
749 {
750 int count;
751 pcre_uchar *slot;
752
753 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
754 while (cc < ccend)
755 {
756 switch(*cc)
757 {
758 case OP_SET_SOM:
759 common->has_set_som = TRUE;
760 cc += 1;
761 break;
762
763 case OP_REF:
764 case OP_REFI:
765 common->optimized_cbracket[GET2(cc, 1)] = 0;
766 cc += 1 + IMM2_SIZE;
767 break;
768
769 case OP_CBRAPOS:
770 case OP_SCBRAPOS:
771 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
772 cc += 1 + LINK_SIZE + IMM2_SIZE;
773 break;
774
775 case OP_COND:
776 case OP_SCOND:
777 /* Only AUTO_CALLOUT can insert this opcode. We do
778 not intend to support this case. */
779 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
780 return FALSE;
781 cc += 1 + LINK_SIZE;
782 break;
783
784 case OP_CREF:
785 common->optimized_cbracket[GET2(cc, 1)] = 0;
786 cc += 1 + IMM2_SIZE;
787 break;
788
789 case OP_DNREF:
790 case OP_DNREFI:
791 case OP_DNCREF:
792 count = GET2(cc, 1 + IMM2_SIZE);
793 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
794 while (count-- > 0)
795 {
796 common->optimized_cbracket[GET2(slot, 0)] = 0;
797 slot += common->name_entry_size;
798 }
799 cc += 1 + 2 * IMM2_SIZE;
800 break;
801
802 case OP_RECURSE:
803 /* Set its value only once. */
804 if (common->recursive_head_ptr == 0)
805 {
806 common->recursive_head_ptr = common->ovector_start;
807 common->ovector_start += sizeof(sljit_sw);
808 }
809 cc += 1 + LINK_SIZE;
810 break;
811
812 case OP_CALLOUT:
813 if (common->capture_last_ptr == 0)
814 {
815 common->capture_last_ptr = common->ovector_start;
816 common->ovector_start += sizeof(sljit_sw);
817 }
818 cc += 2 + 2 * LINK_SIZE;
819 break;
820
821 case OP_THEN_ARG:
822 common->has_then = TRUE;
823 common->control_head_ptr = 1;
824 /* Fall through. */
825
826 case OP_PRUNE_ARG:
827 common->needs_start_ptr = TRUE;
828 /* Fall through. */
829
830 case OP_MARK:
831 if (common->mark_ptr == 0)
832 {
833 common->mark_ptr = common->ovector_start;
834 common->ovector_start += sizeof(sljit_sw);
835 }
836 cc += 1 + 2 + cc[1];
837 break;
838
839 case OP_THEN:
840 common->has_then = TRUE;
841 common->control_head_ptr = 1;
842 /* Fall through. */
843
844 case OP_PRUNE:
845 case OP_SKIP:
846 common->needs_start_ptr = TRUE;
847 cc += 1;
848 break;
849
850 case OP_SKIP_ARG:
851 common->control_head_ptr = 1;
852 common->has_skip_arg = TRUE;
853 cc += 1 + 2 + cc[1];
854 break;
855
856 default:
857 cc = next_opcode(common, cc);
858 if (cc == NULL)
859 return FALSE;
860 break;
861 }
862 }
863 return TRUE;
864 }
865
866 static int get_class_iterator_size(pcre_uchar *cc)
867 {
868 switch(*cc)
869 {
870 case OP_CRSTAR:
871 case OP_CRPLUS:
872 return 2;
873
874 case OP_CRMINSTAR:
875 case OP_CRMINPLUS:
876 case OP_CRQUERY:
877 case OP_CRMINQUERY:
878 return 1;
879
880 case OP_CRRANGE:
881 case OP_CRMINRANGE:
882 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
883 return 0;
884 return 2;
885
886 default:
887 return 0;
888 }
889 }
890
891 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
892 {
893 pcre_uchar *end = bracketend(begin);
894 pcre_uchar *next;
895 pcre_uchar *next_end;
896 pcre_uchar *max_end;
897 pcre_uchar type;
898 sljit_sw length = end - begin;
899 int min, max, i;
900
901 /* Detect fixed iterations first. */
902 if (end[-(1 + LINK_SIZE)] != OP_KET)
903 return FALSE;
904
905 /* Already detected repeat. */
906 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
907 return TRUE;
908
909 next = end;
910 min = 1;
911 while (1)
912 {
913 if (*next != *begin)
914 break;
915 next_end = bracketend(next);
916 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
917 break;
918 next = next_end;
919 min++;
920 }
921
922 if (min == 2)
923 return FALSE;
924
925 max = 0;
926 max_end = next;
927 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
928 {
929 type = *next;
930 while (1)
931 {
932 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
933 break;
934 next_end = bracketend(next + 2 + LINK_SIZE);
935 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
936 break;
937 next = next_end;
938 max++;
939 }
940
941 if (next[0] == type && next[1] == *begin && max >= 1)
942 {
943 next_end = bracketend(next + 1);
944 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
945 {
946 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
947 if (*next_end != OP_KET)
948 break;
949
950 if (i == max)
951 {
952 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
953 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
954 /* +2 the original and the last. */
955 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
956 if (min == 1)
957 return TRUE;
958 min--;
959 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
960 }
961 }
962 }
963 }
964
965 if (min >= 3)
966 {
967 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
968 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
969 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
970 return TRUE;
971 }
972
973 return FALSE;
974 }
975
976 #define CASE_ITERATOR_PRIVATE_DATA_1 \
977 case OP_MINSTAR: \
978 case OP_MINPLUS: \
979 case OP_QUERY: \
980 case OP_MINQUERY: \
981 case OP_MINSTARI: \
982 case OP_MINPLUSI: \
983 case OP_QUERYI: \
984 case OP_MINQUERYI: \
985 case OP_NOTMINSTAR: \
986 case OP_NOTMINPLUS: \
987 case OP_NOTQUERY: \
988 case OP_NOTMINQUERY: \
989 case OP_NOTMINSTARI: \
990 case OP_NOTMINPLUSI: \
991 case OP_NOTQUERYI: \
992 case OP_NOTMINQUERYI:
993
994 #define CASE_ITERATOR_PRIVATE_DATA_2A \
995 case OP_STAR: \
996 case OP_PLUS: \
997 case OP_STARI: \
998 case OP_PLUSI: \
999 case OP_NOTSTAR: \
1000 case OP_NOTPLUS: \
1001 case OP_NOTSTARI: \
1002 case OP_NOTPLUSI:
1003
1004 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1005 case OP_UPTO: \
1006 case OP_MINUPTO: \
1007 case OP_UPTOI: \
1008 case OP_MINUPTOI: \
1009 case OP_NOTUPTO: \
1010 case OP_NOTMINUPTO: \
1011 case OP_NOTUPTOI: \
1012 case OP_NOTMINUPTOI:
1013
1014 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1015 case OP_TYPEMINSTAR: \
1016 case OP_TYPEMINPLUS: \
1017 case OP_TYPEQUERY: \
1018 case OP_TYPEMINQUERY:
1019
1020 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1021 case OP_TYPESTAR: \
1022 case OP_TYPEPLUS:
1023
1024 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1025 case OP_TYPEUPTO: \
1026 case OP_TYPEMINUPTO:
1027
1028 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1029 {
1030 pcre_uchar *cc = common->start;
1031 pcre_uchar *alternative;
1032 pcre_uchar *end = NULL;
1033 int private_data_ptr = *private_data_start;
1034 int space, size, bracketlen;
1035
1036 while (cc < ccend)
1037 {
1038 space = 0;
1039 size = 0;
1040 bracketlen = 0;
1041 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1042 return;
1043
1044 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1045 if (detect_repeat(common, cc))
1046 {
1047 /* These brackets are converted to repeats, so no global
1048 based single character repeat is allowed. */
1049 if (cc >= end)
1050 end = bracketend(cc);
1051 }
1052
1053 switch(*cc)
1054 {
1055 case OP_KET:
1056 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1057 {
1058 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1059 private_data_ptr += sizeof(sljit_sw);
1060 cc += common->private_data_ptrs[cc + 1 - common->start];
1061 }
1062 cc += 1 + LINK_SIZE;
1063 break;
1064
1065 case OP_ASSERT:
1066 case OP_ASSERT_NOT:
1067 case OP_ASSERTBACK:
1068 case OP_ASSERTBACK_NOT:
1069 case OP_ONCE:
1070 case OP_ONCE_NC:
1071 case OP_BRAPOS:
1072 case OP_SBRA:
1073 case OP_SBRAPOS:
1074 case OP_SCOND:
1075 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1076 private_data_ptr += sizeof(sljit_sw);
1077 bracketlen = 1 + LINK_SIZE;
1078 break;
1079
1080 case OP_CBRAPOS:
1081 case OP_SCBRAPOS:
1082 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1083 private_data_ptr += sizeof(sljit_sw);
1084 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1085 break;
1086
1087 case OP_COND:
1088 /* Might be a hidden SCOND. */
1089 alternative = cc + GET(cc, 1);
1090 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1091 {
1092 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1093 private_data_ptr += sizeof(sljit_sw);
1094 }
1095 bracketlen = 1 + LINK_SIZE;
1096 break;
1097
1098 case OP_BRA:
1099 bracketlen = 1 + LINK_SIZE;
1100 break;
1101
1102 case OP_CBRA:
1103 case OP_SCBRA:
1104 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1105 break;
1106
1107 CASE_ITERATOR_PRIVATE_DATA_1
1108 space = 1;
1109 size = -2;
1110 break;
1111
1112 CASE_ITERATOR_PRIVATE_DATA_2A
1113 space = 2;
1114 size = -2;
1115 break;
1116
1117 CASE_ITERATOR_PRIVATE_DATA_2B
1118 space = 2;
1119 size = -(2 + IMM2_SIZE);
1120 break;
1121
1122 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1123 space = 1;
1124 size = 1;
1125 break;
1126
1127 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1128 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1129 space = 2;
1130 size = 1;
1131 break;
1132
1133 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1134 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1135 space = 2;
1136 size = 1 + IMM2_SIZE;
1137 break;
1138
1139 case OP_CLASS:
1140 case OP_NCLASS:
1141 size += 1 + 32 / sizeof(pcre_uchar);
1142 space = get_class_iterator_size(cc + size);
1143 break;
1144
1145 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1146 case OP_XCLASS:
1147 size = GET(cc, 1);
1148 space = get_class_iterator_size(cc + size);
1149 break;
1150 #endif
1151
1152 default:
1153 cc = next_opcode(common, cc);
1154 SLJIT_ASSERT(cc != NULL);
1155 break;
1156 }
1157
1158 /* Character iterators, which are not inside a repeated bracket,
1159 gets a private slot instead of allocating it on the stack. */
1160 if (space > 0 && cc >= end)
1161 {
1162 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1163 private_data_ptr += sizeof(sljit_sw) * space;
1164 }
1165
1166 if (size != 0)
1167 {
1168 if (size < 0)
1169 {
1170 cc += -size;
1171 #ifdef SUPPORT_UTF
1172 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1173 #endif
1174 }
1175 else
1176 cc += size;
1177 }
1178
1179 if (bracketlen > 0)
1180 {
1181 if (cc >= end)
1182 {
1183 end = bracketend(cc);
1184 if (end[-1 - LINK_SIZE] == OP_KET)
1185 end = NULL;
1186 }
1187 cc += bracketlen;
1188 }
1189 }
1190 *private_data_start = private_data_ptr;
1191 }
1192
1193 /* Returns with a frame_types (always < 0) if no need for frame. */
1194 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1195 {
1196 int length = 0;
1197 int possessive = 0;
1198 BOOL stack_restore = FALSE;
1199 BOOL setsom_found = recursive;
1200 BOOL setmark_found = recursive;
1201 /* The last capture is a local variable even for recursions. */
1202 BOOL capture_last_found = FALSE;
1203
1204 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1205 SLJIT_ASSERT(common->control_head_ptr != 0);
1206 *needs_control_head = TRUE;
1207 #else
1208 *needs_control_head = FALSE;
1209 #endif
1210
1211 if (ccend == NULL)
1212 {
1213 ccend = bracketend(cc) - (1 + LINK_SIZE);
1214 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1215 {
1216 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1217 /* This is correct regardless of common->capture_last_ptr. */
1218 capture_last_found = TRUE;
1219 }
1220 cc = next_opcode(common, cc);
1221 }
1222
1223 SLJIT_ASSERT(cc != NULL);
1224 while (cc < ccend)
1225 switch(*cc)
1226 {
1227 case OP_SET_SOM:
1228 SLJIT_ASSERT(common->has_set_som);
1229 stack_restore = TRUE;
1230 if (!setsom_found)
1231 {
1232 length += 2;
1233 setsom_found = TRUE;
1234 }
1235 cc += 1;
1236 break;
1237
1238 case OP_MARK:
1239 case OP_PRUNE_ARG:
1240 case OP_THEN_ARG:
1241 SLJIT_ASSERT(common->mark_ptr != 0);
1242 stack_restore = TRUE;
1243 if (!setmark_found)
1244 {
1245 length += 2;
1246 setmark_found = TRUE;
1247 }
1248 if (common->control_head_ptr != 0)
1249 *needs_control_head = TRUE;
1250 cc += 1 + 2 + cc[1];
1251 break;
1252
1253 case OP_RECURSE:
1254 stack_restore = TRUE;
1255 if (common->has_set_som && !setsom_found)
1256 {
1257 length += 2;
1258 setsom_found = TRUE;
1259 }
1260 if (common->mark_ptr != 0 && !setmark_found)
1261 {
1262 length += 2;
1263 setmark_found = TRUE;
1264 }
1265 if (common->capture_last_ptr != 0 && !capture_last_found)
1266 {
1267 length += 2;
1268 capture_last_found = TRUE;
1269 }
1270 cc += 1 + LINK_SIZE;
1271 break;
1272
1273 case OP_CBRA:
1274 case OP_CBRAPOS:
1275 case OP_SCBRA:
1276 case OP_SCBRAPOS:
1277 stack_restore = TRUE;
1278 if (common->capture_last_ptr != 0 && !capture_last_found)
1279 {
1280 length += 2;
1281 capture_last_found = TRUE;
1282 }
1283 length += 3;
1284 cc += 1 + LINK_SIZE + IMM2_SIZE;
1285 break;
1286
1287 default:
1288 stack_restore = TRUE;
1289 /* Fall through. */
1290
1291 case OP_NOT_WORD_BOUNDARY:
1292 case OP_WORD_BOUNDARY:
1293 case OP_NOT_DIGIT:
1294 case OP_DIGIT:
1295 case OP_NOT_WHITESPACE:
1296 case OP_WHITESPACE:
1297 case OP_NOT_WORDCHAR:
1298 case OP_WORDCHAR:
1299 case OP_ANY:
1300 case OP_ALLANY:
1301 case OP_ANYBYTE:
1302 case OP_NOTPROP:
1303 case OP_PROP:
1304 case OP_ANYNL:
1305 case OP_NOT_HSPACE:
1306 case OP_HSPACE:
1307 case OP_NOT_VSPACE:
1308 case OP_VSPACE:
1309 case OP_EXTUNI:
1310 case OP_EODN:
1311 case OP_EOD:
1312 case OP_CIRC:
1313 case OP_CIRCM:
1314 case OP_DOLL:
1315 case OP_DOLLM:
1316 case OP_CHAR:
1317 case OP_CHARI:
1318 case OP_NOT:
1319 case OP_NOTI:
1320
1321 case OP_EXACT:
1322 case OP_POSSTAR:
1323 case OP_POSPLUS:
1324 case OP_POSQUERY:
1325 case OP_POSUPTO:
1326
1327 case OP_EXACTI:
1328 case OP_POSSTARI:
1329 case OP_POSPLUSI:
1330 case OP_POSQUERYI:
1331 case OP_POSUPTOI:
1332
1333 case OP_NOTEXACT:
1334 case OP_NOTPOSSTAR:
1335 case OP_NOTPOSPLUS:
1336 case OP_NOTPOSQUERY:
1337 case OP_NOTPOSUPTO:
1338
1339 case OP_NOTEXACTI:
1340 case OP_NOTPOSSTARI:
1341 case OP_NOTPOSPLUSI:
1342 case OP_NOTPOSQUERYI:
1343 case OP_NOTPOSUPTOI:
1344
1345 case OP_TYPEEXACT:
1346 case OP_TYPEPOSSTAR:
1347 case OP_TYPEPOSPLUS:
1348 case OP_TYPEPOSQUERY:
1349 case OP_TYPEPOSUPTO:
1350
1351 case OP_CLASS:
1352 case OP_NCLASS:
1353 case OP_XCLASS:
1354
1355 cc = next_opcode(common, cc);
1356 SLJIT_ASSERT(cc != NULL);
1357 break;
1358 }
1359
1360 /* Possessive quantifiers can use a special case. */
1361 if (SLJIT_UNLIKELY(possessive == length))
1362 return stack_restore ? no_frame : no_stack;
1363
1364 if (length > 0)
1365 return length + 1;
1366 return stack_restore ? no_frame : no_stack;
1367 }
1368
1369 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1370 {
1371 DEFINE_COMPILER;
1372 BOOL setsom_found = recursive;
1373 BOOL setmark_found = recursive;
1374 /* The last capture is a local variable even for recursions. */
1375 BOOL capture_last_found = FALSE;
1376 int offset;
1377
1378 /* >= 1 + shortest item size (2) */
1379 SLJIT_UNUSED_ARG(stacktop);
1380 SLJIT_ASSERT(stackpos >= stacktop + 2);
1381
1382 stackpos = STACK(stackpos);
1383 if (ccend == NULL)
1384 {
1385 ccend = bracketend(cc) - (1 + LINK_SIZE);
1386 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1387 cc = next_opcode(common, cc);
1388 }
1389
1390 SLJIT_ASSERT(cc != NULL);
1391 while (cc < ccend)
1392 switch(*cc)
1393 {
1394 case OP_SET_SOM:
1395 SLJIT_ASSERT(common->has_set_som);
1396 if (!setsom_found)
1397 {
1398 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1400 stackpos += (int)sizeof(sljit_sw);
1401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1402 stackpos += (int)sizeof(sljit_sw);
1403 setsom_found = TRUE;
1404 }
1405 cc += 1;
1406 break;
1407
1408 case OP_MARK:
1409 case OP_PRUNE_ARG:
1410 case OP_THEN_ARG:
1411 SLJIT_ASSERT(common->mark_ptr != 0);
1412 if (!setmark_found)
1413 {
1414 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1416 stackpos += (int)sizeof(sljit_sw);
1417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1418 stackpos += (int)sizeof(sljit_sw);
1419 setmark_found = TRUE;
1420 }
1421 cc += 1 + 2 + cc[1];
1422 break;
1423
1424 case OP_RECURSE:
1425 if (common->has_set_som && !setsom_found)
1426 {
1427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1429 stackpos += (int)sizeof(sljit_sw);
1430 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1431 stackpos += (int)sizeof(sljit_sw);
1432 setsom_found = TRUE;
1433 }
1434 if (common->mark_ptr != 0 && !setmark_found)
1435 {
1436 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1437 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1438 stackpos += (int)sizeof(sljit_sw);
1439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1440 stackpos += (int)sizeof(sljit_sw);
1441 setmark_found = TRUE;
1442 }
1443 if (common->capture_last_ptr != 0 && !capture_last_found)
1444 {
1445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1447 stackpos += (int)sizeof(sljit_sw);
1448 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1449 stackpos += (int)sizeof(sljit_sw);
1450 capture_last_found = TRUE;
1451 }
1452 cc += 1 + LINK_SIZE;
1453 break;
1454
1455 case OP_CBRA:
1456 case OP_CBRAPOS:
1457 case OP_SCBRA:
1458 case OP_SCBRAPOS:
1459 if (common->capture_last_ptr != 0 && !capture_last_found)
1460 {
1461 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1463 stackpos += (int)sizeof(sljit_sw);
1464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1465 stackpos += (int)sizeof(sljit_sw);
1466 capture_last_found = TRUE;
1467 }
1468 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1470 stackpos += (int)sizeof(sljit_sw);
1471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1472 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1474 stackpos += (int)sizeof(sljit_sw);
1475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1476 stackpos += (int)sizeof(sljit_sw);
1477
1478 cc += 1 + LINK_SIZE + IMM2_SIZE;
1479 break;
1480
1481 default:
1482 cc = next_opcode(common, cc);
1483 SLJIT_ASSERT(cc != NULL);
1484 break;
1485 }
1486
1487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1488 SLJIT_ASSERT(stackpos == STACK(stacktop));
1489 }
1490
1491 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1492 {
1493 int private_data_length = needs_control_head ? 3 : 2;
1494 int size;
1495 pcre_uchar *alternative;
1496 /* Calculate the sum of the private machine words. */
1497 while (cc < ccend)
1498 {
1499 size = 0;
1500 switch(*cc)
1501 {
1502 case OP_KET:
1503 if (PRIVATE_DATA(cc) != 0)
1504 private_data_length++;
1505 cc += 1 + LINK_SIZE;
1506 break;
1507
1508 case OP_ASSERT:
1509 case OP_ASSERT_NOT:
1510 case OP_ASSERTBACK:
1511 case OP_ASSERTBACK_NOT:
1512 case OP_ONCE:
1513 case OP_ONCE_NC:
1514 case OP_BRAPOS:
1515 case OP_SBRA:
1516 case OP_SBRAPOS:
1517 case OP_SCOND:
1518 private_data_length++;
1519 cc += 1 + LINK_SIZE;
1520 break;
1521
1522 case OP_CBRA:
1523 case OP_SCBRA:
1524 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1525 private_data_length++;
1526 cc += 1 + LINK_SIZE + IMM2_SIZE;
1527 break;
1528
1529 case OP_CBRAPOS:
1530 case OP_SCBRAPOS:
1531 private_data_length += 2;
1532 cc += 1 + LINK_SIZE + IMM2_SIZE;
1533 break;
1534
1535 case OP_COND:
1536 /* Might be a hidden SCOND. */
1537 alternative = cc + GET(cc, 1);
1538 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1539 private_data_length++;
1540 cc += 1 + LINK_SIZE;
1541 break;
1542
1543 CASE_ITERATOR_PRIVATE_DATA_1
1544 if (PRIVATE_DATA(cc))
1545 private_data_length++;
1546 cc += 2;
1547 #ifdef SUPPORT_UTF
1548 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1549 #endif
1550 break;
1551
1552 CASE_ITERATOR_PRIVATE_DATA_2A
1553 if (PRIVATE_DATA(cc))
1554 private_data_length += 2;
1555 cc += 2;
1556 #ifdef SUPPORT_UTF
1557 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1558 #endif
1559 break;
1560
1561 CASE_ITERATOR_PRIVATE_DATA_2B
1562 if (PRIVATE_DATA(cc))
1563 private_data_length += 2;
1564 cc += 2 + IMM2_SIZE;
1565 #ifdef SUPPORT_UTF
1566 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1567 #endif
1568 break;
1569
1570 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1571 if (PRIVATE_DATA(cc))
1572 private_data_length++;
1573 cc += 1;
1574 break;
1575
1576 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1577 if (PRIVATE_DATA(cc))
1578 private_data_length += 2;
1579 cc += 1;
1580 break;
1581
1582 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1583 if (PRIVATE_DATA(cc))
1584 private_data_length += 2;
1585 cc += 1 + IMM2_SIZE;
1586 break;
1587
1588 case OP_CLASS:
1589 case OP_NCLASS:
1590 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1591 case OP_XCLASS:
1592 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1593 #else
1594 size = 1 + 32 / (int)sizeof(pcre_uchar);
1595 #endif
1596 if (PRIVATE_DATA(cc))
1597 private_data_length += get_class_iterator_size(cc + size);
1598 cc += size;
1599 break;
1600
1601 default:
1602 cc = next_opcode(common, cc);
1603 SLJIT_ASSERT(cc != NULL);
1604 break;
1605 }
1606 }
1607 SLJIT_ASSERT(cc == ccend);
1608 return private_data_length;
1609 }
1610
1611 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1612 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1613 {
1614 DEFINE_COMPILER;
1615 int srcw[2];
1616 int count, size;
1617 BOOL tmp1next = TRUE;
1618 BOOL tmp1empty = TRUE;
1619 BOOL tmp2empty = TRUE;
1620 pcre_uchar *alternative;
1621 enum {
1622 start,
1623 loop,
1624 end
1625 } status;
1626
1627 status = save ? start : loop;
1628 stackptr = STACK(stackptr - 2);
1629 stacktop = STACK(stacktop - 1);
1630
1631 if (!save)
1632 {
1633 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1634 if (stackptr < stacktop)
1635 {
1636 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1637 stackptr += sizeof(sljit_sw);
1638 tmp1empty = FALSE;
1639 }
1640 if (stackptr < stacktop)
1641 {
1642 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1643 stackptr += sizeof(sljit_sw);
1644 tmp2empty = FALSE;
1645 }
1646 /* The tmp1next must be TRUE in either way. */
1647 }
1648
1649 do
1650 {
1651 count = 0;
1652 switch(status)
1653 {
1654 case start:
1655 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1656 count = 1;
1657 srcw[0] = common->recursive_head_ptr;
1658 if (needs_control_head)
1659 {
1660 SLJIT_ASSERT(common->control_head_ptr != 0);
1661 count = 2;
1662 srcw[1] = common->control_head_ptr;
1663 }
1664 status = loop;
1665 break;
1666
1667 case loop:
1668 if (cc >= ccend)
1669 {
1670 status = end;
1671 break;
1672 }
1673
1674 switch(*cc)
1675 {
1676 case OP_KET:
1677 if (PRIVATE_DATA(cc) != 0)
1678 {
1679 count = 1;
1680 srcw[0] = PRIVATE_DATA(cc);
1681 }
1682 cc += 1 + LINK_SIZE;
1683 break;
1684
1685 case OP_ASSERT:
1686 case OP_ASSERT_NOT:
1687 case OP_ASSERTBACK:
1688 case OP_ASSERTBACK_NOT:
1689 case OP_ONCE:
1690 case OP_ONCE_NC:
1691 case OP_BRAPOS:
1692 case OP_SBRA:
1693 case OP_SBRAPOS:
1694 case OP_SCOND:
1695 count = 1;
1696 srcw[0] = PRIVATE_DATA(cc);
1697 SLJIT_ASSERT(srcw[0] != 0);
1698 cc += 1 + LINK_SIZE;
1699 break;
1700
1701 case OP_CBRA:
1702 case OP_SCBRA:
1703 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1704 {
1705 count = 1;
1706 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1707 }
1708 cc += 1 + LINK_SIZE + IMM2_SIZE;
1709 break;
1710
1711 case OP_CBRAPOS:
1712 case OP_SCBRAPOS:
1713 count = 2;
1714 srcw[0] = PRIVATE_DATA(cc);
1715 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1716 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1717 cc += 1 + LINK_SIZE + IMM2_SIZE;
1718 break;
1719
1720 case OP_COND:
1721 /* Might be a hidden SCOND. */
1722 alternative = cc + GET(cc, 1);
1723 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1724 {
1725 count = 1;
1726 srcw[0] = PRIVATE_DATA(cc);
1727 SLJIT_ASSERT(srcw[0] != 0);
1728 }
1729 cc += 1 + LINK_SIZE;
1730 break;
1731
1732 CASE_ITERATOR_PRIVATE_DATA_1
1733 if (PRIVATE_DATA(cc))
1734 {
1735 count = 1;
1736 srcw[0] = PRIVATE_DATA(cc);
1737 }
1738 cc += 2;
1739 #ifdef SUPPORT_UTF
1740 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1741 #endif
1742 break;
1743
1744 CASE_ITERATOR_PRIVATE_DATA_2A
1745 if (PRIVATE_DATA(cc))
1746 {
1747 count = 2;
1748 srcw[0] = PRIVATE_DATA(cc);
1749 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1750 }
1751 cc += 2;
1752 #ifdef SUPPORT_UTF
1753 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1754 #endif
1755 break;
1756
1757 CASE_ITERATOR_PRIVATE_DATA_2B
1758 if (PRIVATE_DATA(cc))
1759 {
1760 count = 2;
1761 srcw[0] = PRIVATE_DATA(cc);
1762 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1763 }
1764 cc += 2 + IMM2_SIZE;
1765 #ifdef SUPPORT_UTF
1766 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1767 #endif
1768 break;
1769
1770 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1771 if (PRIVATE_DATA(cc))
1772 {
1773 count = 1;
1774 srcw[0] = PRIVATE_DATA(cc);
1775 }
1776 cc += 1;
1777 break;
1778
1779 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1780 if (PRIVATE_DATA(cc))
1781 {
1782 count = 2;
1783 srcw[0] = PRIVATE_DATA(cc);
1784 srcw[1] = srcw[0] + sizeof(sljit_sw);
1785 }
1786 cc += 1;
1787 break;
1788
1789 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1790 if (PRIVATE_DATA(cc))
1791 {
1792 count = 2;
1793 srcw[0] = PRIVATE_DATA(cc);
1794 srcw[1] = srcw[0] + sizeof(sljit_sw);
1795 }
1796 cc += 1 + IMM2_SIZE;
1797 break;
1798
1799 case OP_CLASS:
1800 case OP_NCLASS:
1801 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1802 case OP_XCLASS:
1803 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1804 #else
1805 size = 1 + 32 / (int)sizeof(pcre_uchar);
1806 #endif
1807 if (PRIVATE_DATA(cc))
1808 switch(get_class_iterator_size(cc + size))
1809 {
1810 case 1:
1811 count = 1;
1812 srcw[0] = PRIVATE_DATA(cc);
1813 break;
1814
1815 case 2:
1816 count = 2;
1817 srcw[0] = PRIVATE_DATA(cc);
1818 srcw[1] = srcw[0] + sizeof(sljit_sw);
1819 break;
1820
1821 default:
1822 SLJIT_ASSERT_STOP();
1823 break;
1824 }
1825 cc += size;
1826 break;
1827
1828 default:
1829 cc = next_opcode(common, cc);
1830 SLJIT_ASSERT(cc != NULL);
1831 break;
1832 }
1833 break;
1834
1835 case end:
1836 SLJIT_ASSERT_STOP();
1837 break;
1838 }
1839
1840 while (count > 0)
1841 {
1842 count--;
1843 if (save)
1844 {
1845 if (tmp1next)
1846 {
1847 if (!tmp1empty)
1848 {
1849 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1850 stackptr += sizeof(sljit_sw);
1851 }
1852 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1853 tmp1empty = FALSE;
1854 tmp1next = FALSE;
1855 }
1856 else
1857 {
1858 if (!tmp2empty)
1859 {
1860 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1861 stackptr += sizeof(sljit_sw);
1862 }
1863 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1864 tmp2empty = FALSE;
1865 tmp1next = TRUE;
1866 }
1867 }
1868 else
1869 {
1870 if (tmp1next)
1871 {
1872 SLJIT_ASSERT(!tmp1empty);
1873 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1874 tmp1empty = stackptr >= stacktop;
1875 if (!tmp1empty)
1876 {
1877 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1878 stackptr += sizeof(sljit_sw);
1879 }
1880 tmp1next = FALSE;
1881 }
1882 else
1883 {
1884 SLJIT_ASSERT(!tmp2empty);
1885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1886 tmp2empty = stackptr >= stacktop;
1887 if (!tmp2empty)
1888 {
1889 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1890 stackptr += sizeof(sljit_sw);
1891 }
1892 tmp1next = TRUE;
1893 }
1894 }
1895 }
1896 }
1897 while (status != end);
1898
1899 if (save)
1900 {
1901 if (tmp1next)
1902 {
1903 if (!tmp1empty)
1904 {
1905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1906 stackptr += sizeof(sljit_sw);
1907 }
1908 if (!tmp2empty)
1909 {
1910 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1911 stackptr += sizeof(sljit_sw);
1912 }
1913 }
1914 else
1915 {
1916 if (!tmp2empty)
1917 {
1918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1919 stackptr += sizeof(sljit_sw);
1920 }
1921 if (!tmp1empty)
1922 {
1923 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1924 stackptr += sizeof(sljit_sw);
1925 }
1926 }
1927 }
1928 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1929 }
1930
1931 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1932 {
1933 pcre_uchar *end = bracketend(cc);
1934 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1935
1936 /* Assert captures then. */
1937 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1938 current_offset = NULL;
1939 /* Conditional block does not. */
1940 if (*cc == OP_COND || *cc == OP_SCOND)
1941 has_alternatives = FALSE;
1942
1943 cc = next_opcode(common, cc);
1944 if (has_alternatives)
1945 current_offset = common->then_offsets + (cc - common->start);
1946
1947 while (cc < end)
1948 {
1949 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1950 cc = set_then_offsets(common, cc, current_offset);
1951 else
1952 {
1953 if (*cc == OP_ALT && has_alternatives)
1954 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1955 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1956 *current_offset = 1;
1957 cc = next_opcode(common, cc);
1958 }
1959 }
1960
1961 return end;
1962 }
1963
1964 #undef CASE_ITERATOR_PRIVATE_DATA_1
1965 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1966 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1967 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1968 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1969 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1970
1971 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1972 {
1973 return (value & (value - 1)) == 0;
1974 }
1975
1976 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1977 {
1978 while (list)
1979 {
1980 /* sljit_set_label is clever enough to do nothing
1981 if either the jump or the label is NULL. */
1982 SET_LABEL(list->jump, label);
1983 list = list->next;
1984 }
1985 }
1986
1987 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1988 {
1989 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1990 if (list_item)
1991 {
1992 list_item->next = *list;
1993 list_item->jump = jump;
1994 *list = list_item;
1995 }
1996 }
1997
1998 static void add_stub(compiler_common *common, struct sljit_jump *start)
1999 {
2000 DEFINE_COMPILER;
2001 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2002
2003 if (list_item)
2004 {
2005 list_item->start = start;
2006 list_item->quit = LABEL();
2007 list_item->next = common->stubs;
2008 common->stubs = list_item;
2009 }
2010 }
2011
2012 static void flush_stubs(compiler_common *common)
2013 {
2014 DEFINE_COMPILER;
2015 stub_list* list_item = common->stubs;
2016
2017 while (list_item)
2018 {
2019 JUMPHERE(list_item->start);
2020 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2021 JUMPTO(SLJIT_JUMP, list_item->quit);
2022 list_item = list_item->next;
2023 }
2024 common->stubs = NULL;
2025 }
2026
2027 static SLJIT_INLINE void count_match(compiler_common *common)
2028 {
2029 DEFINE_COMPILER;
2030
2031 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2032 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2033 }
2034
2035 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2036 {
2037 /* May destroy all locals and registers except TMP2. */
2038 DEFINE_COMPILER;
2039
2040 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2041 #ifdef DESTROY_REGISTERS
2042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2043 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2044 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2047 #endif
2048 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2049 }
2050
2051 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2052 {
2053 DEFINE_COMPILER;
2054 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2055 }
2056
2057 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2058 {
2059 DEFINE_COMPILER;
2060 struct sljit_label *loop;
2061 int i;
2062
2063 /* At this point we can freely use all temporary registers. */
2064 SLJIT_ASSERT(length > 1);
2065 /* TMP1 returns with begin - 1. */
2066 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2067 if (length < 8)
2068 {
2069 for (i = 1; i < length; i++)
2070 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2071 }
2072 else
2073 {
2074 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2075 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2076 loop = LABEL();
2077 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2078 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2079 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2080 }
2081 }
2082
2083 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2084 {
2085 DEFINE_COMPILER;
2086 struct sljit_label *loop;
2087 int i;
2088
2089 SLJIT_ASSERT(length > 1);
2090 /* OVECTOR(1) contains the "string begin - 1" constant. */
2091 if (length > 2)
2092 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2093 if (length < 8)
2094 {
2095 for (i = 2; i < length; i++)
2096 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2097 }
2098 else
2099 {
2100 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2101 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2102 loop = LABEL();
2103 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2104 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2105 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2106 }
2107
2108 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2109 if (common->mark_ptr != 0)
2110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2111 if (common->control_head_ptr != 0)
2112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2113 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2114 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2115 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2116 }
2117
2118 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2119 {
2120 while (current != NULL)
2121 {
2122 switch (current[-2])
2123 {
2124 case type_then_trap:
2125 break;
2126
2127 case type_mark:
2128 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2129 return current[-4];
2130 break;
2131
2132 default:
2133 SLJIT_ASSERT_STOP();
2134 break;
2135 }
2136 current = (sljit_sw*)current[-1];
2137 }
2138 return -1;
2139 }
2140
2141 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2142 {
2143 DEFINE_COMPILER;
2144 struct sljit_label *loop;
2145 struct sljit_jump *early_quit;
2146
2147 /* At this point we can freely use all registers. */
2148 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2149 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2150
2151 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2152 if (common->mark_ptr != 0)
2153 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2154 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2155 if (common->mark_ptr != 0)
2156 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2157 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2158 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2159 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2160 /* Unlikely, but possible */
2161 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2162 loop = LABEL();
2163 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2164 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2165 /* Copy the integer value to the output buffer */
2166 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2167 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2168 #endif
2169 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2170 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2171 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2172 JUMPHERE(early_quit);
2173
2174 /* Calculate the return value, which is the maximum ovector value. */
2175 if (topbracket > 1)
2176 {
2177 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2178 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2179
2180 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2181 loop = LABEL();
2182 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2183 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2184 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2185 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2186 }
2187 else
2188 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2189 }
2190
2191 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2192 {
2193 DEFINE_COMPILER;
2194 struct sljit_jump *jump;
2195
2196 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2197 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2198 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2199
2200 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2201 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2202 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2203 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2204
2205 /* Store match begin and end. */
2206 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2207 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2208
2209 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2210 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2211 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2212 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2213 #endif
2214 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2215 JUMPHERE(jump);
2216
2217 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2218 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2219 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2220 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2221 #endif
2222 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2223
2224 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2225 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2226 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2227 #endif
2228 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2229
2230 JUMPTO(SLJIT_JUMP, quit);
2231 }
2232
2233 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2234 {
2235 /* May destroy TMP1. */
2236 DEFINE_COMPILER;
2237 struct sljit_jump *jump;
2238
2239 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2240 {
2241 /* The value of -1 must be kept for start_used_ptr! */
2242 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2243 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2244 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2245 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2247 JUMPHERE(jump);
2248 }
2249 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2250 {
2251 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2252 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2253 JUMPHERE(jump);
2254 }
2255 }
2256
2257 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2258 {
2259 /* Detects if the character has an othercase. */
2260 unsigned int c;
2261
2262 #ifdef SUPPORT_UTF
2263 if (common->utf)
2264 {
2265 GETCHAR(c, cc);
2266 if (c > 127)
2267 {
2268 #ifdef SUPPORT_UCP
2269 return c != UCD_OTHERCASE(c);
2270 #else
2271 return FALSE;
2272 #endif
2273 }
2274 #ifndef COMPILE_PCRE8
2275 return common->fcc[c] != c;
2276 #endif
2277 }
2278 else
2279 #endif
2280 c = *cc;
2281 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2282 }
2283
2284 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2285 {
2286 /* Returns with the othercase. */
2287 #ifdef SUPPORT_UTF
2288 if (common->utf && c > 127)
2289 {
2290 #ifdef SUPPORT_UCP
2291 return UCD_OTHERCASE(c);
2292 #else
2293 return c;
2294 #endif
2295 }
2296 #endif
2297 return TABLE_GET(c, common->fcc, c);
2298 }
2299
2300 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2301 {
2302 /* Detects if the character and its othercase has only 1 bit difference. */
2303 unsigned int c, oc, bit;
2304 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2305 int n;
2306 #endif
2307
2308 #ifdef SUPPORT_UTF
2309 if (common->utf)
2310 {
2311 GETCHAR(c, cc);
2312 if (c <= 127)
2313 oc = common->fcc[c];
2314 else
2315 {
2316 #ifdef SUPPORT_UCP
2317 oc = UCD_OTHERCASE(c);
2318 #else
2319 oc = c;
2320 #endif
2321 }
2322 }
2323 else
2324 {
2325 c = *cc;
2326 oc = TABLE_GET(c, common->fcc, c);
2327 }
2328 #else
2329 c = *cc;
2330 oc = TABLE_GET(c, common->fcc, c);
2331 #endif
2332
2333 SLJIT_ASSERT(c != oc);
2334
2335 bit = c ^ oc;
2336 /* Optimized for English alphabet. */
2337 if (c <= 127 && bit == 0x20)
2338 return (0 << 8) | 0x20;
2339
2340 /* Since c != oc, they must have at least 1 bit difference. */
2341 if (!is_powerof2(bit))
2342 return 0;
2343
2344 #if defined COMPILE_PCRE8
2345
2346 #ifdef SUPPORT_UTF
2347 if (common->utf && c > 127)
2348 {
2349 n = GET_EXTRALEN(*cc);
2350 while ((bit & 0x3f) == 0)
2351 {
2352 n--;
2353 bit >>= 6;
2354 }
2355 return (n << 8) | bit;
2356 }
2357 #endif /* SUPPORT_UTF */
2358 return (0 << 8) | bit;
2359
2360 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2361
2362 #ifdef SUPPORT_UTF
2363 if (common->utf && c > 65535)
2364 {
2365 if (bit >= (1 << 10))
2366 bit >>= 10;
2367 else
2368 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2369 }
2370 #endif /* SUPPORT_UTF */
2371 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2372
2373 #endif /* COMPILE_PCRE[8|16|32] */
2374 }
2375
2376 static void check_partial(compiler_common *common, BOOL force)
2377 {
2378 /* Checks whether a partial matching is occurred. Does not modify registers. */
2379 DEFINE_COMPILER;
2380 struct sljit_jump *jump = NULL;
2381
2382 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2383
2384 if (common->mode == JIT_COMPILE)
2385 return;
2386
2387 if (!force)
2388 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2389 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2390 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2391
2392 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2394 else
2395 {
2396 if (common->partialmatchlabel != NULL)
2397 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2398 else
2399 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2400 }
2401
2402 if (jump != NULL)
2403 JUMPHERE(jump);
2404 }
2405
2406 static void check_str_end(compiler_common *common, jump_list **end_reached)
2407 {
2408 /* Does not affect registers. Usually used in a tight spot. */
2409 DEFINE_COMPILER;
2410 struct sljit_jump *jump;
2411
2412 if (common->mode == JIT_COMPILE)
2413 {
2414 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2415 return;
2416 }
2417
2418 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2419 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2420 {
2421 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2423 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2424 }
2425 else
2426 {
2427 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2428 if (common->partialmatchlabel != NULL)
2429 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2430 else
2431 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2432 }
2433 JUMPHERE(jump);
2434 }
2435
2436 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2437 {
2438 DEFINE_COMPILER;
2439 struct sljit_jump *jump;
2440
2441 if (common->mode == JIT_COMPILE)
2442 {
2443 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2444 return;
2445 }
2446
2447 /* Partial matching mode. */
2448 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2449 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2450 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2451 {
2452 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2453 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2454 }
2455 else
2456 {
2457 if (common->partialmatchlabel != NULL)
2458 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2459 else
2460 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2461 }
2462 JUMPHERE(jump);
2463 }
2464
2465 static void read_char(compiler_common *common)
2466 {
2467 /* Reads the character into TMP1, updates STR_PTR.
2468 Does not check STR_END. TMP2 Destroyed. */
2469 DEFINE_COMPILER;
2470 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2471 struct sljit_jump *jump;
2472 #endif
2473
2474 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2475 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2476 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2477 if (common->utf)
2478 {
2479 #if defined COMPILE_PCRE8
2480 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2481 #elif defined COMPILE_PCRE16
2482 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2483 #endif /* COMPILE_PCRE[8|16] */
2484 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2485 JUMPHERE(jump);
2486 }
2487 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2488 }
2489
2490 static void peek_char(compiler_common *common)
2491 {
2492 /* Reads the character into TMP1, keeps STR_PTR.
2493 Does not check STR_END. TMP2 Destroyed. */
2494 DEFINE_COMPILER;
2495 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2496 struct sljit_jump *jump;
2497 #endif
2498
2499 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2500 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2501 if (common->utf)
2502 {
2503 #if defined COMPILE_PCRE8
2504 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2505 #elif defined COMPILE_PCRE16
2506 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2507 #endif /* COMPILE_PCRE[8|16] */
2508 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2509 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2510 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2511 JUMPHERE(jump);
2512 }
2513 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2514 }
2515
2516 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2517
2518 static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2519 {
2520 const pcre_uint8 value = nclass ? 0xff : 0;
2521 const pcre_uint8* end = bitset + 32;
2522
2523 bitset += 16;
2524 do
2525 {
2526 if (*bitset++ != value)
2527 return FALSE;
2528 }
2529 while (bitset < end);
2530 return TRUE;
2531 }
2532
2533 static void read_char7_type(compiler_common *common, BOOL full_read)
2534 {
2535 /* Reads the precise character type of a character into TMP1, if the character is
2536 less than 128. Otherwise it returns with zero. */
2537 DEFINE_COMPILER;
2538 struct sljit_jump *jump;
2539
2540 SLJIT_ASSERT(common->utf);
2541
2542 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2544
2545 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2546
2547 if (full_read)
2548 {
2549 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2550 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2551 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2552 JUMPHERE(jump);
2553 }
2554 }
2555
2556 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2557
2558 static void read_char_max(compiler_common *common, pcre_uint32 max, BOOL full_read)
2559 {
2560 /* Reads the precise value of a character into TMP1, if the character is
2561 less than or equal to max. Otherwise it returns with a value greater than max. */
2562 DEFINE_COMPILER;
2563 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2564 struct sljit_jump *jump;
2565 #endif
2566
2567 SLJIT_UNUSED_ARG(full_read);
2568 SLJIT_UNUSED_ARG(max);
2569
2570 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2571 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2572
2573 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2574 if (common->utf)
2575 {
2576 if (max < 128 && !full_read)
2577 return;
2578
2579 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2580 if (max < 128)
2581 {
2582 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2583 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2584 }
2585 else if (max < 0x400)
2586 {
2587 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2588 if (!full_read)
2589 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2590 else
2591 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2592 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2593 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2594 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2595 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2596 if (full_read)
2597 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2598 }
2599 else
2600 add_jump(compiler, (max < 0x800) ? &common->utfreadchar11 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2601 JUMPHERE(jump);
2602 }
2603 #endif
2604
2605 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2606 if (common->utf)
2607 {
2608 if (max < 0xd800 && !full_read)
2609 return;
2610
2611 if (max >= 0x10000)
2612 {
2613 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2614 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2615 JUMPHERE(jump);
2616 return;
2617 }
2618
2619 /* Skip low surrogate if necessary. */
2620 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2621 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2622 if (full_read)
2623 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2624 if (max >= 0xd800)
2625 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2626 JUMPHERE(jump);
2627 }
2628 #endif
2629 }
2630
2631 static void read_char8_type(compiler_common *common, BOOL full_read)
2632 {
2633 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2634 DEFINE_COMPILER;
2635 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2636 struct sljit_jump *jump;
2637 #endif
2638 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2639 struct sljit_jump *jump2;
2640 #endif
2641
2642 SLJIT_UNUSED_ARG(full_read);
2643
2644 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2645 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2646
2647 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2648 if (common->utf)
2649 {
2650 /* This can be an extra read in some situations, but hopefully
2651 it is needed in most cases. */
2652 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2653 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2654 if (!full_read)
2655 {
2656 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2658 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2659 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2660 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2661 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2662 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2663 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2664 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2665 JUMPHERE(jump2);
2666 }
2667 else
2668 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2669 JUMPHERE(jump);
2670 return;
2671 }
2672 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2673
2674 #if !defined COMPILE_PCRE8
2675 /* The ctypes array contains only 256 values. */
2676 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2677 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2678 #endif
2679 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2680 #if !defined COMPILE_PCRE8
2681 JUMPHERE(jump);
2682 #endif
2683
2684 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2685 if (common->utf && full_read)
2686 {
2687 /* Skip low surrogate if necessary. */
2688 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2689 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2690 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2691 JUMPHERE(jump);
2692 }
2693 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2694 }
2695
2696 static void skip_char_back(compiler_common *common)
2697 {
2698 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2699 DEFINE_COMPILER;
2700 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2701 #if defined COMPILE_PCRE8
2702 struct sljit_label *label;
2703
2704 if (common->utf)
2705 {
2706 label = LABEL();
2707 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2708 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2709 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2710 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2711 return;
2712 }
2713 #elif defined COMPILE_PCRE16
2714 if (common->utf)
2715 {
2716 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2717 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2718 /* Skip low surrogate if necessary. */
2719 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2720 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2721 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2722 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2723 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2724 return;
2725 }
2726 #endif /* COMPILE_PCRE[8|16] */
2727 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2728 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2729 }
2730
2731 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2732 {
2733 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2734 DEFINE_COMPILER;
2735
2736 if (nltype == NLTYPE_ANY)
2737 {
2738 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2739 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2740 }
2741 else if (nltype == NLTYPE_ANYCRLF)
2742 {
2743 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2744 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2745 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2746 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2747 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2748 }
2749 else
2750 {
2751 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2752 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2753 }
2754 }
2755
2756 #ifdef SUPPORT_UTF
2757
2758 #if defined COMPILE_PCRE8
2759 static void do_utfreadchar(compiler_common *common)
2760 {
2761 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2762 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2763 DEFINE_COMPILER;
2764 struct sljit_jump *jump;
2765
2766 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2767 /* Searching for the first zero. */
2768 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2769 jump = JUMP(SLJIT_C_NOT_ZERO);
2770 /* Two byte sequence. */
2771 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2772 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2773 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2774 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2775 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2776 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2777 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2778 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2779 JUMPHERE(jump);
2780
2781 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2782 jump = JUMP(SLJIT_C_NOT_ZERO);
2783 /* Three byte sequence. */
2784 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2785 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2786 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2787 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2788 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2789 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2790 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2791 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2792 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2793 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2794 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2795 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2796 JUMPHERE(jump);
2797
2798 /* Four byte sequence. */
2799 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2800 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2801 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2802 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2803 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2804 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2805 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2806 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2807 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2808 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2809 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2810 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2811 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2812 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2813 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2814 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2815 }
2816
2817 static void do_utfreadchar11(compiler_common *common)
2818 {
2819 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2820 of the character (>= 0xc0). Return value in TMP1. */
2821 DEFINE_COMPILER;
2822 struct sljit_jump *jump;
2823
2824 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2825
2826 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2827 jump = JUMP(SLJIT_C_NOT_ZERO);
2828 /* Two byte sequence. */
2829 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2830 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2831 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2832 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2833 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2834 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2835 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2836
2837 JUMPHERE(jump);
2838 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2839 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x800);
2840 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2841 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2842 }
2843
2844 static void do_utfreadtype8(compiler_common *common)
2845 {
2846 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2847 of the character (>= 0xc0). Return value in TMP1. */
2848 DEFINE_COMPILER;
2849 struct sljit_jump *jump;
2850 struct sljit_jump *compare;
2851
2852 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2853
2854 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2855 jump = JUMP(SLJIT_C_NOT_ZERO);
2856 /* Two byte sequence. */
2857 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2858 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2859 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2860 /* The upper 5 bits are known at this point. */
2861 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2862 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2863 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2864 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2865 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2866 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2867
2868 JUMPHERE(compare);
2869 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2870 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2871
2872 /* We only have types for characters less than 256. */
2873 JUMPHERE(jump);
2874 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2875 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2876 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2877 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2878 }
2879
2880 #elif defined COMPILE_PCRE16
2881
2882 static void do_utfreadchar(compiler_common *common)
2883 {
2884 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2885 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2886 DEFINE_COMPILER;
2887 struct sljit_jump *jump;
2888
2889 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2890 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2891 /* Do nothing, only return. */
2892 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2893
2894 JUMPHERE(jump);
2895 /* Combine two 16 bit characters. */
2896 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2897 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2898 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2899 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2900 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2901 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2902 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2903 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2904 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2905 }
2906
2907 #endif /* COMPILE_PCRE[8|16] */
2908
2909 #endif /* SUPPORT_UTF */
2910
2911 #ifdef SUPPORT_UCP
2912
2913 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2914 #define UCD_BLOCK_MASK 127
2915 #define UCD_BLOCK_SHIFT 7
2916
2917 static void do_getucd(compiler_common *common)
2918 {
2919 /* Search the UCD record for the character comes in TMP1.
2920 Returns chartype in TMP1 and UCD offset in TMP2. */
2921 DEFINE_COMPILER;
2922
2923 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2924
2925 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2926 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2927 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2928 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2929 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2930 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2931 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2932 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2933 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2934 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2935 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2936 }
2937 #endif
2938
2939 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2940 {
2941 DEFINE_COMPILER;
2942 struct sljit_label *mainloop;
2943 struct sljit_label *newlinelabel = NULL;
2944 struct sljit_jump *start;
2945 struct sljit_jump *end = NULL;
2946 struct sljit_jump *nl = NULL;
2947 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2948 struct sljit_jump *singlechar;
2949 #endif
2950 jump_list *newline = NULL;
2951 BOOL newlinecheck = FALSE;
2952 BOOL readuchar = FALSE;
2953
2954 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2955 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2956 newlinecheck = TRUE;
2957
2958 if (firstline)
2959 {
2960 /* Search for the end of the first line. */
2961 SLJIT_ASSERT(common->first_line_end != 0);
2962 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2963
2964 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2965 {
2966 mainloop = LABEL();
2967 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2968 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2969 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2970 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2971 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2972 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2973 JUMPHERE(end);
2974 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2975 }
2976 else
2977 {
2978 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2979 mainloop = LABEL();
2980 /* Continual stores does not cause data dependency. */
2981 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2982 read_char(common);
2983 check_newlinechar(common, common->nltype, &newline, TRUE);
2984 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2985 JUMPHERE(end);
2986 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2987 set_jumps(newline, LABEL());
2988 }
2989
2990 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2991 }
2992
2993 start = JUMP(SLJIT_JUMP);
2994
2995 if (newlinecheck)
2996 {
2997 newlinelabel = LABEL();
2998 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2999 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3000 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3001 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
3002 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3003 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3004 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3005 #endif
3006 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3007 nl = JUMP(SLJIT_JUMP);
3008 }
3009
3010 mainloop = LABEL();
3011
3012 /* Increasing the STR_PTR here requires one less jump in the most common case. */
3013 #ifdef SUPPORT_UTF
3014 if (common->utf) readuchar = TRUE;
3015 #endif
3016 if (newlinecheck) readuchar = TRUE;
3017
3018 if (readuchar)
3019 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3020
3021 if (newlinecheck)
3022 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
3023
3024 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3025 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
3026 #if defined COMPILE_PCRE8
3027 if (common->utf)
3028 {
3029 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3030 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3031 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3032 JUMPHERE(singlechar);
3033 }
3034 #elif defined COMPILE_PCRE16
3035 if (common->utf)
3036 {
3037 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3038 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3039 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3040 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3041 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3042 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3043 JUMPHERE(singlechar);
3044 }
3045 #endif /* COMPILE_PCRE[8|16] */
3046 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3047 JUMPHERE(start);
3048
3049 if (newlinecheck)
3050 {
3051 JUMPHERE(end);
3052 JUMPHERE(nl);
3053 }
3054
3055 return mainloop;
3056 }
3057
3058 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
3059 {
3060 /* Recursive function, which scans prefix literals. */
3061 int len, repeat, len_save, consumed = 0;
3062 pcre_uint32 caseless, chr, mask;
3063 pcre_uchar *alternative, *cc_save;
3064 BOOL last, any;
3065
3066 repeat = 1;
3067 while (TRUE)
3068 {
3069 last = TRUE;
3070 any = FALSE;
3071 caseless = 0;
3072 switch (*cc)
3073 {
3074 case OP_CHARI:
3075 caseless = 1;
3076 case OP_CHAR:
3077 last = FALSE;
3078 cc++;
3079 break;
3080
3081 case OP_SOD:
3082 case OP_SOM:
3083 case OP_SET_SOM:
3084 case OP_NOT_WORD_BOUNDARY:
3085 case OP_WORD_BOUNDARY:
3086 case OP_EODN:
3087 case OP_EOD:
3088 case OP_CIRC:
3089 case OP_CIRCM:
3090 case OP_DOLL:
3091 case OP_DOLLM:
3092 /* Zero width assertions. */
3093 cc++;
3094 continue;
3095
3096 case OP_PLUS:
3097 case OP_MINPLUS:
3098 case OP_POSPLUS:
3099 cc++;
3100 break;
3101
3102 case OP_EXACTI:
3103 caseless = 1;
3104 case OP_EXACT:
3105 repeat = GET2(cc, 1);
3106 last = FALSE;
3107 cc += 1 + IMM2_SIZE;
3108 break;
3109
3110 case OP_PLUSI:
3111 case OP_MINPLUSI:
3112 case OP_POSPLUSI:
3113 caseless = 1;
3114 cc++;
3115 break;
3116
3117 case OP_KET:
3118 cc += 1 + LINK_SIZE;
3119 continue;
3120
3121 case OP_ALT:
3122 cc += GET(cc, 1);
3123 continue;
3124
3125 case OP_ONCE:
3126 case OP_ONCE_NC:
3127 case OP_BRA:
3128 case OP_BRAPOS:
3129 case OP_CBRA:
3130 case OP_CBRAPOS:
3131 alternative = cc + GET(cc, 1);
3132 while (*alternative == OP_ALT)
3133 {
3134 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3135 if (max_chars == 0)
3136 return consumed;
3137 alternative += GET(alternative, 1);
3138 }
3139
3140 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3141 cc += IMM2_SIZE;
3142 cc += 1 + LINK_SIZE;
3143 continue;
3144
3145 case OP_CLASS:
3146 case OP_NCLASS:
3147 any = TRUE;
3148 cc += 1 + 32 / sizeof(pcre_uchar);
3149 break;
3150
3151 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3152 case OP_XCLASS:
3153 any = TRUE;
3154 cc += GET(cc, 1);
3155 break;
3156 #endif
3157
3158 case OP_NOT_DIGIT:
3159 case OP_DIGIT:
3160 case OP_NOT_WHITESPACE:
3161 case OP_WHITESPACE:
3162 case OP_NOT_WORDCHAR:
3163 case OP_WORDCHAR:
3164 case OP_ANY:
3165 case OP_ALLANY:
3166 any = TRUE;
3167 cc++;
3168 break;
3169
3170 #ifdef SUPPORT_UCP
3171 case OP_NOTPROP:
3172 case OP_PROP:
3173 any = TRUE;
3174 cc += 1 + 2;
3175 break;
3176 #endif
3177
3178 case OP_TYPEEXACT:
3179 repeat = GET2(cc, 1);
3180 cc += 1 + IMM2_SIZE;
3181 continue;
3182
3183 default:
3184 return consumed;
3185 }
3186
3187 if (any)
3188 {
3189 #ifdef SUPPORT_UTF
3190 if (common->utf) return consumed;
3191 #endif
3192 #if defined COMPILE_PCRE8
3193 mask = 0xff;
3194 #elif defined COMPILE_PCRE16
3195 mask = 0xffff;
3196 #elif defined COMPILE_PCRE32
3197 mask = 0xffffffff;
3198 #else
3199 SLJIT_ASSERT_STOP();
3200 #endif
3201
3202 do
3203 {
3204 chars[0] = mask;
3205 chars[1] = mask;
3206
3207 if (--max_chars == 0)
3208 return consumed;
3209 consumed++;
3210 chars += 2;
3211 }
3212 while (--repeat > 0);
3213
3214 repeat = 1;
3215 continue;
3216 }
3217
3218 len = 1;
3219 #ifdef SUPPORT_UTF
3220 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3221 #endif
3222
3223 if (caseless != 0 && char_has_othercase(common, cc))
3224 {
3225 caseless = char_get_othercase_bit(common, cc);
3226 if (caseless == 0)
3227 return consumed;
3228 #ifdef COMPILE_PCRE8
3229 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3230 #else
3231 if ((caseless & 0x100) != 0)
3232 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3233 else
3234 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3235 #endif
3236 }
3237 else
3238 caseless = 0;
3239
3240 len_save = len;
3241 cc_save = cc;
3242 while (TRUE)
3243 {
3244 do
3245 {
3246 chr = *cc;
3247 #ifdef COMPILE_PCRE32
3248 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3249 return consumed;
3250 #endif
3251 mask = 0;
3252 if (len == (caseless & 0xff))
3253 {
3254 mask = caseless >> 8;
3255 chr |= mask;
3256 }
3257
3258 if (chars[0] == NOTACHAR)
3259 {
3260 chars[0] = chr;
3261 chars[1] = mask;
3262 }
3263 else
3264 {
3265 mask |= chars[0] ^ chr;
3266 chr |= mask;
3267 chars[0] = chr;
3268 chars[1] |= mask;
3269 }
3270
3271 len--;
3272 if (--max_chars == 0)
3273 return consumed;
3274 consumed++;
3275 chars += 2;
3276 cc++;
3277 }
3278 while (len > 0);
3279
3280 if (--repeat == 0)
3281 break;
3282
3283 len = len_save;
3284 cc = cc_save;
3285 }
3286
3287 repeat = 1;
3288 if (last)
3289 return consumed;
3290 }
3291 }
3292
3293 #define MAX_N_CHARS 16
3294
3295 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3296 {
3297 DEFINE_COMPILER;
3298 struct sljit_label *start;
3299 struct sljit_jump *quit;
3300 pcre_uint32 chars[MAX_N_CHARS * 2];
3301 pcre_uint8 ones[MAX_N_CHARS];
3302 pcre_uint32 mask;
3303 int i, max;
3304 int offsets[3];
3305
3306 for (i = 0; i < MAX_N_CHARS; i++)
3307 {
3308 chars[i << 1] = NOTACHAR;
3309 chars[(i << 1) + 1] = 0;
3310 }
3311
3312 max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3313
3314 if (max <= 1)
3315 return FALSE;
3316
3317 for (i = 0; i < max; i++)
3318 {
3319 mask = chars[(i << 1) + 1];
3320 ones[i] = ones_in_half_byte[mask & 0xf];
3321 mask >>= 4;
3322 while (mask != 0)
3323 {
3324 ones[i] += ones_in_half_byte[mask & 0xf];
3325 mask >>= 4;
3326 }
3327 }
3328
3329 offsets[0] = -1;
3330 /* Scan forward. */
3331 for (i = 0; i < max; i++)
3332 if (ones[i] <= 2) {
3333 offsets[0] = i;
3334 break;
3335 }
3336
3337 if (offsets[0] == -1)
3338 return FALSE;
3339
3340 /* Scan backward. */
3341 offsets[1] = -1;
3342 for (i = max - 1; i > offsets[0]; i--)
3343 if (ones[i] <= 2) {
3344 offsets[1] = i;
3345 break;
3346 }
3347
3348 offsets[2] = -1;
3349 if (offsets[1] >= 0)
3350 {
3351 /* Scan from middle. */
3352 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3353 if (ones[i] <= 2)
3354 {
3355 offsets[2] = i;
3356 break;
3357 }
3358
3359 if (offsets[2] == -1)
3360 {
3361 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3362 if (ones[i] <= 2)
3363 {
3364 offsets[2] = i;
3365 break;
3366 }
3367 }
3368 }
3369
3370 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3371 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3372
3373 chars[0] = chars[offsets[0] << 1];
3374 chars[1] = chars[(offsets[0] << 1) + 1];
3375 if (offsets[2] >= 0)
3376 {
3377 chars[2] = chars[offsets[2] << 1];
3378 chars[3] = chars[(offsets[2] << 1) + 1];
3379 }
3380 if (offsets[1] >= 0)
3381 {
3382 chars[4] = chars[offsets[1] << 1];
3383 chars[5] = chars[(offsets[1] << 1) + 1];
3384 }
3385
3386 max -= 1;
3387 if (firstline)
3388 {
3389 SLJIT_ASSERT(common->first_line_end != 0);
3390 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3391 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3392 }
3393 else
3394 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3395
3396 start = LABEL();
3397 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3398
3399 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3400 if (offsets[1] >= 0)
3401 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3402 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3403
3404 if (chars[1] != 0)
3405 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3406 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3407 if (offsets[2] >= 0)
3408 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3409
3410 if (offsets[1] >= 0)
3411 {
3412 if (chars[5] != 0)
3413 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3414 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3415 }
3416
3417 if (offsets[2] >= 0)
3418 {
3419 if (chars[3] != 0)
3420 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3421 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3422 }
3423 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3424
3425 JUMPHERE(quit);
3426
3427 if (firstline)
3428 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3429 else
3430 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3431 return TRUE;
3432 }
3433
3434 #undef MAX_N_CHARS
3435
3436 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3437 {
3438 DEFINE_COMPILER;
3439 struct sljit_label *start;
3440 struct sljit_jump *quit;
3441 struct sljit_jump *found;
3442 pcre_uchar oc, bit;
3443
3444 if (firstline)
3445 {
3446 SLJIT_ASSERT(common->first_line_end != 0);
3447 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3448 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3449 }
3450
3451 start = LABEL();
3452 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3453 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3454
3455 oc = first_char;
3456 if (caseless)
3457 {
3458 oc = TABLE_GET(first_char, common->fcc, first_char);
3459 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3460 if (first_char > 127 && common->utf)
3461 oc = UCD_OTHERCASE(first_char);
3462 #endif
3463 }
3464 if (first_char == oc)
3465 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3466 else
3467 {
3468 bit = first_char ^ oc;
3469 if (is_powerof2(bit))
3470 {
3471 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3472 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3473 }
3474 else
3475 {
3476 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3477 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3478 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3479 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3480 found = JUMP(SLJIT_C_NOT_ZERO);
3481 }
3482 }
3483
3484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3485 JUMPTO(SLJIT_JUMP, start);
3486 JUMPHERE(found);
3487 JUMPHERE(quit);
3488
3489 if (firstline)
3490 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3491 }
3492
3493 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3494 {
3495 DEFINE_COMPILER;
3496 struct sljit_label *loop;
3497 struct sljit_jump *lastchar;
3498 struct sljit_jump *firstchar;
3499 struct sljit_jump *quit;
3500 struct sljit_jump *foundcr = NULL;
3501 struct sljit_jump *notfoundnl;
3502 jump_list *newline = NULL;
3503
3504 if (firstline)
3505 {
3506 SLJIT_ASSERT(common->first_line_end != 0);
3507 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3508 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3509 }
3510
3511 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3512 {
3513 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3514 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3515 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3516 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3517 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3518
3519 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3520 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3521 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3522 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3523 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3524 #endif
3525 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3526
3527 loop = LABEL();
3528 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3529 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3530 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3531 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3532 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3533 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3534
3535 JUMPHERE(quit);
3536 JUMPHERE(firstchar);
3537 JUMPHERE(lastchar);
3538
3539 if (firstline)
3540 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3541 return;
3542 }
3543
3544 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3545 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3546 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3547 skip_char_back(common);
3548
3549 loop = LABEL();
3550 read_char(common);
3551 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3552 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3553 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3554 check_newlinechar(common, common->nltype, &newline, FALSE);
3555 set_jumps(newline, loop);
3556
3557 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3558 {
3559 quit = JUMP(SLJIT_JUMP);
3560 JUMPHERE(foundcr);
3561 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3562 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3563 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3564 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3565 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3566 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3567 #endif
3568 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3569 JUMPHERE(notfoundnl);
3570 JUMPHERE(quit);
3571 }
3572 JUMPHERE(lastchar);
3573 JUMPHERE(firstchar);
3574
3575 if (firstline)
3576 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3577 }
3578
3579 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3580
3581 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3582 {
3583 DEFINE_COMPILER;
3584 struct sljit_label *start;
3585 struct sljit_jump *quit;
3586 struct sljit_jump *found = NULL;
3587 jump_list *matches = NULL;
3588 #ifndef COMPILE_PCRE8
3589 struct sljit_jump *jump;
3590 #endif
3591
3592 if (firstline)
3593 {
3594 SLJIT_ASSERT(common->first_line_end != 0);
3595 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3596 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3597 }
3598
3599 start = LABEL();
3600 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3601 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3602 #ifdef SUPPORT_UTF
3603 if (common->utf)
3604 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3605 #endif
3606
3607 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3608 {
3609 #ifndef COMPILE_PCRE8
3610 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3611 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3612 JUMPHERE(jump);
3613 #endif
3614 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3615 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3616 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3617 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3618 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3619 found = JUMP(SLJIT_C_NOT_ZERO);
3620 }
3621
3622 #ifdef SUPPORT_UTF
3623 if (common->utf)
3624 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3625 #endif
3626 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3627 #ifdef SUPPORT_UTF
3628 #if defined COMPILE_PCRE8
3629 if (common->utf)
3630 {
3631 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3632 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3633 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3634 }
3635 #elif defined COMPILE_PCRE16
3636 if (common->utf)
3637 {
3638 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3639 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3640 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3641 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3642 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3643 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3644 }
3645 #endif /* COMPILE_PCRE[8|16] */
3646 #endif /* SUPPORT_UTF */
3647 JUMPTO(SLJIT_JUMP, start);
3648 if (found != NULL)
3649 JUMPHERE(found);
3650 if (matches != NULL)
3651 set_jumps(matches, LABEL());
3652 JUMPHERE(quit);
3653
3654 if (firstline)
3655 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3656 }
3657
3658 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3659 {
3660 DEFINE_COMPILER;
3661 struct sljit_label *loop;
3662 struct sljit_jump *toolong;
3663 struct sljit_jump *alreadyfound;
3664 struct sljit_jump *found;
3665 struct sljit_jump *foundoc = NULL;
3666 struct sljit_jump *notfound;
3667 pcre_uint32 oc, bit;
3668
3669 SLJIT_ASSERT(common->req_char_ptr != 0);
3670 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3671 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3672 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3673 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3674
3675 if (has_firstchar)
3676 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3677 else
3678 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3679
3680 loop = LABEL();
3681 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3682
3683 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3684 oc = req_char;
3685 if (caseless)
3686 {
3687 oc = TABLE_GET(req_char, common->fcc, req_char);
3688 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3689 if (req_char > 127 && common->utf)
3690 oc = UCD_OTHERCASE(req_char);
3691 #endif
3692 }
3693 if (req_char == oc)
3694 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3695 else
3696 {
3697 bit = req_char ^ oc;
3698 if (is_powerof2(bit))
3699 {
3700 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3701 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3702 }
3703 else
3704 {
3705 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3706 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3707 }
3708 }
3709 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3710 JUMPTO(SLJIT_JUMP, loop);
3711
3712 JUMPHERE(found);
3713 if (foundoc)
3714 JUMPHERE(foundoc);
3715 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3716 JUMPHERE(alreadyfound);
3717 JUMPHERE(toolong);
3718 return notfound;
3719 }
3720
3721 static void do_revertframes(compiler_common *common)
3722 {
3723 DEFINE_COMPILER;
3724 struct sljit_jump *jump;
3725 struct sljit_label *mainloop;
3726
3727 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3728 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3729 GET_LOCAL_BASE(TMP3, 0, 0);
3730
3731 /* Drop frames until we reach STACK_TOP. */
3732 mainloop = LABEL();
3733 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3734 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3735 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3736
3737 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3738 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3739 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3740 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3741 JUMPTO(SLJIT_JUMP, mainloop);
3742
3743 JUMPHERE(jump);
3744 jump = JUMP(SLJIT_C_SIG_LESS);
3745 /* End of dropping frames. */
3746 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3747
3748 JUMPHERE(jump);
3749 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3750 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3751 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3752 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3753 JUMPTO(SLJIT_JUMP, mainloop);
3754 }
3755
3756 static void check_wordboundary(compiler_common *common)
3757 {
3758 DEFINE_COMPILER;
3759 struct sljit_jump *skipread;
3760 jump_list *skipread_list = NULL;
3761 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3762 struct sljit_jump *jump;
3763 #endif
3764
3765 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3766
3767 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3768 /* Get type of the previous char, and put it to LOCALS1. */
3769 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3770 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3772 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3773 skip_char_back(common);
3774 check_start_used_ptr(common);
3775 read_char(common);
3776
3777 /* Testing char type. */
3778 #ifdef SUPPORT_UCP
3779 if (common->use_ucp)
3780 {
3781 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3782 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3783 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3784 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3785 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3786 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3787 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3788 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3789 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3790 JUMPHERE(jump);
3791 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3792 }
3793 else
3794 #endif
3795 {
3796 #ifndef COMPILE_PCRE8
3797 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3798 #elif defined SUPPORT_UTF
3799 /* Here LOCALS1 has already been zeroed. */
3800 jump = NULL;
3801 if (common->utf)
3802 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3803 #endif /* COMPILE_PCRE8 */
3804 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3805 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3806 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3807 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3808 #ifndef COMPILE_PCRE8
3809 JUMPHERE(jump);
3810 #elif defined SUPPORT_UTF
3811 if (jump != NULL)
3812 JUMPHERE(jump);
3813 #endif /* COMPILE_PCRE8 */
3814 }
3815 JUMPHERE(skipread);
3816
3817 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3818 check_str_end(common, &skipread_list);
3819 peek_char(common);
3820
3821 /* Testing char type. This is a code duplication. */
3822 #ifdef SUPPORT_UCP
3823 if (common->use_ucp)
3824 {
3825 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3826 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3827 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3828 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3829 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3830 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3831 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3832 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3833 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3834 JUMPHERE(jump);
3835 }
3836 else
3837 #endif
3838 {
3839 #ifndef COMPILE_PCRE8
3840 /* TMP2 may be destroyed by peek_char. */
3841 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3842 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3843 #elif defined SUPPORT_UTF
3844 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3845 jump = NULL;
3846 if (common->utf)
3847 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3848 #endif
3849 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3850 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3851 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3852 #ifndef COMPILE_PCRE8
3853 JUMPHERE(jump);
3854 #elif defined SUPPORT_UTF
3855 if (jump != NULL)
3856 JUMPHERE(jump);
3857 #endif /* COMPILE_PCRE8 */
3858 }
3859 set_jumps(skipread_list, LABEL());
3860
3861 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3862 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3863 }
3864
3865 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3866 {
3867 DEFINE_COMPILER;
3868 int ranges[MAX_RANGE_SIZE];
3869 pcre_uint8 bit, cbit, all;
3870 int i, byte, length = 0;
3871
3872 bit = bits[0] & 0x1;
3873 /* All bits will be zero or one (since bit is zero or one). */
3874 all = -bit;
3875
3876 for (i = 0; i < 256; )
3877 {
3878 byte = i >> 3;
3879 if ((i & 0x7) == 0 && bits[byte] == all)
3880 i += 8;
3881 else
3882 {
3883 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3884 if (cbit != bit)
3885 {
3886 if (length >= MAX_RANGE_SIZE)
3887 return FALSE;
3888 ranges[length] = i;
3889 length++;
3890 bit = cbit;
3891 all = -cbit;
3892 }
3893 i++;
3894 }
3895 }
3896
3897 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3898 {
3899 if (length >= MAX_RANGE_SIZE)
3900 return FALSE;
3901 ranges[length] = 256;
3902 length++;
3903 }
3904
3905 if (length < 0 || length > 4)
3906 return FALSE;
3907
3908 bit = bits[0] & 0x1;
3909 if (invert) bit ^= 0x1;
3910
3911 /* No character is accepted. */
3912 if (length == 0 && bit == 0)
3913 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3914
3915 switch(length)
3916 {
3917 case 0:
3918 /* When bit != 0, all characters are accepted. */
3919 return TRUE;
3920
3921 case 1:
3922 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3923 return TRUE;
3924
3925 case 2:
3926 if (ranges[0] + 1 != ranges[1])
3927 {
3928 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3929 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3930 }
3931 else
3932 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3933 return TRUE;
3934
3935 case 3:
3936 if (bit != 0)
3937 {
3938 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3939 if (ranges[0] + 1 != ranges[1])
3940 {
3941 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3942 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3943 }
3944 else
3945 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3946 return TRUE;
3947 }
3948
3949 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
3950 if (ranges[1] + 1 != ranges[2])
3951 {
3952 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
3953 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3954 }
3955 else
3956 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
3957 return TRUE;
3958
3959 case 4:
3960 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
3961 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
3962 && is_powerof2(ranges[2] - ranges[0]))
3963 {
3964 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
3965 if (ranges[2] + 1 != ranges[3])
3966 {
3967 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3968 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3969 }
3970 else
3971 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3972 return TRUE;
3973 }
3974
3975 if (bit != 0)
3976 {
3977 i = 0;
3978 if (ranges[0] + 1 != ranges[1])
3979 {
3980 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3981 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3982 i = ranges[0];
3983 }
3984 else
3985 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3986
3987 if (ranges[2] + 1 != ranges[3])
3988 {
3989 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
3990 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3991 }
3992 else
3993 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
3994 return TRUE;
3995 }
3996
3997 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3998 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
3999 if (ranges[1] + 1 != ranges[2])
4000 {
4001 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
4002 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
4003 }
4004 else
4005 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
4006 return TRUE;
4007
4008 default:
4009 SLJIT_ASSERT_STOP();
4010 return FALSE;
4011 }
4012 }
4013
4014 static void check_anynewline(compiler_common *common)
4015 {
4016 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4017 DEFINE_COMPILER;
4018
4019 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4020
4021 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4022 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4023 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4024 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4025 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4026 #ifdef COMPILE_PCRE8
4027 if (common->utf)
4028 {
4029 #endif
4030 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4031 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4032 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4033 #ifdef COMPILE_PCRE8
4034 }
4035 #endif
4036 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4037 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4038 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4039 }
4040
4041 static void check_hspace(compiler_common *common)
4042 {
4043 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4044 DEFINE_COMPILER;
4045
4046 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4047
4048 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4049 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4050 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4051 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4052 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4053 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4054 #ifdef COMPILE_PCRE8
4055 if (common->utf)
4056 {
4057 #endif
4058 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4059 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4060 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4061 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4062 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4063 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4064 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4065 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4066 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4067 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4068 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4069 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4070 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4071 #ifdef COMPILE_PCRE8
4072 }
4073 #endif
4074 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4075 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4076
4077 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4078 }
4079
4080 static void check_vspace(compiler_common *common)
4081 {
4082 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4083 DEFINE_COMPILER;
4084
4085 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4086
4087 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4088 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4089 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4090 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4091 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4092 #ifdef COMPILE_PCRE8
4093 if (common->utf)
4094 {
4095 #endif
4096 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4097 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4098 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4099 #ifdef COMPILE_PCRE8
4100 }
4101 #endif
4102 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4103 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4104
4105 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4106 }
4107
4108 #define CHAR1 STR_END
4109 #define CHAR2 STACK_TOP
4110
4111 static void do_casefulcmp(compiler_common *common)
4112 {
4113 DEFINE_COMPILER;
4114 struct sljit_jump *jump;
4115 struct sljit_label *label;
4116
4117 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4118 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4119 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4121 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4122 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4123
4124 label = LABEL();
4125 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4126 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4127 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4128 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4129 JUMPTO(SLJIT_C_NOT_ZERO, label);
4130
4131 JUMPHERE(jump);
4132 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4133 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4134 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4135 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4136 }
4137
4138 #define LCC_TABLE STACK_LIMIT
4139
4140 static void do_caselesscmp(compiler_common *common)
4141 {
4142 DEFINE_COMPILER;
4143 struct sljit_jump *jump;
4144 struct sljit_label *label;
4145
4146 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4147 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4148
4149 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4150 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4151 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4152 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4153 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4154 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4155
4156 label = LABEL();
4157 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4158 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4159 #ifndef COMPILE_PCRE8
4160 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4161 #endif
4162 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4163 #ifndef COMPILE_PCRE8
4164 JUMPHERE(jump);
4165 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4166 #endif
4167 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4168 #ifndef COMPILE_PCRE8
4169 JUMPHERE(jump);
4170 #endif
4171 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4172 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4173 JUMPTO(SLJIT_C_NOT_ZERO, label);
4174
4175 JUMPHERE(jump);
4176 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4177 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4178 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4179 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4180 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4181 }
4182
4183 #undef LCC_TABLE
4184 #undef CHAR1
4185 #undef CHAR2
4186
4187 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4188
4189 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4190 {
4191 /* This function would be ineffective to do in JIT level. */
4192 pcre_uint32 c1, c2;
4193 const pcre_uchar *src2 = args->uchar_ptr;
4194 const pcre_uchar *end2 = args->end;
4195 const ucd_record *ur;
4196 const pcre_uint32 *pp;
4197
4198 while (src1 < end1)
4199 {
4200 if (src2 >= end2)
4201 return (pcre_uchar*)1;
4202 GETCHARINC(c1, src1);
4203 GETCHARINC(c2, src2);
4204 ur = GET_UCD(c2);
4205 if (c1 != c2 && c1 != c2 + ur->other_case)
4206 {
4207 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4208 for (;;)
4209 {
4210 if (c1 < *pp) return NULL;
4211 if (c1 == *pp++) break;
4212 }
4213 }
4214 }
4215 return src2;
4216 }
4217
4218 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4219
4220 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4221 compare_context* context, jump_list **backtracks)
4222 {
4223 DEFINE_COMPILER;
4224 unsigned int othercasebit = 0;
4225 pcre_uchar *othercasechar = NULL;
4226 #ifdef SUPPORT_UTF
4227 int utflength;
4228 #endif
4229
4230 if (caseless && char_has_othercase(common, cc))
4231 {
4232 othercasebit = char_get_othercase_bit(common, cc);
4233 SLJIT_ASSERT(othercasebit);
4234 /* Extracting bit difference info. */
4235 #if defined COMPILE_PCRE8
4236 othercasechar = cc + (othercasebit >> 8);
4237 othercasebit &= 0xff;
4238 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4239 /* Note that this code only handles characters in the BMP. If there
4240 ever are characters outside the BMP whose othercase differs in only one
4241 bit from itself (there currently are none), this code will need to be
4242 revised for COMPILE_PCRE32. */
4243 othercasechar = cc + (othercasebit >> 9);
4244 if ((othercasebit & 0x100) != 0)
4245 othercasebit = (othercasebit & 0xff) << 8;
4246 else
4247 othercasebit &= 0xff;
4248 #endif /* COMPILE_PCRE[8|16|32] */
4249 }
4250
4251 if (context->sourcereg == -1)
4252 {
4253 #if defined COMPILE_PCRE8
4254 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4255 if (context->length >= 4)
4256 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4257 else if (context->length >= 2)
4258 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4259 else
4260 #endif
4261 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4262 #elif defined COMPILE_PCRE16
4263 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4264 if (context->length >= 4)
4265 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4266 else
4267 #endif
4268 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4269 #elif defined COMPILE_PCRE32
4270 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4271 #endif /* COMPILE_PCRE[8|16|32] */
4272 context->sourcereg = TMP2;
4273 }
4274
4275 #ifdef SUPPORT_UTF
4276 utflength = 1;
4277 if (common->utf && HAS_EXTRALEN(*cc))
4278 utflength += GET_EXTRALEN(*cc);
4279
4280 do
4281 {
4282 #endif
4283
4284 context->length -= IN_UCHARS(1);
4285 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4286
4287 /* Unaligned read is supported. */
4288 if (othercasebit != 0 && othercasechar == cc)
4289 {
4290 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4291 context->oc.asuchars[context->ucharptr] = othercasebit;
4292 }
4293 else
4294 {
4295 context->c.asuchars[context->ucharptr] = *cc;
4296 context->oc.asuchars[context->ucharptr] = 0;
4297 }
4298 context->ucharptr++;
4299
4300 #if defined COMPILE_PCRE8
4301 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4302 #else
4303 if (context->ucharptr >= 2 || context->length == 0)
4304 #endif
4305 {
4306 if (context->length >= 4)
4307 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4308 else if (context->length >= 2)
4309 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4310 #if defined COMPILE_PCRE8
4311 else if (context->length >= 1)
4312 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4313 #endif /* COMPILE_PCRE8 */
4314 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4315
4316 switch(context->ucharptr)
4317 {
4318 case 4 / sizeof(pcre_uchar):
4319 if (context->oc.asint != 0)
4320 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4321 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4322 break;
4323
4324 case 2 / sizeof(pcre_uchar):
4325 if (context->oc.asushort != 0)
4326 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4327 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4328 break;
4329
4330 #ifdef COMPILE_PCRE8
4331 case 1:
4332 if (context->oc.asbyte != 0)
4333 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4334 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4335 break;
4336 #endif
4337
4338 default:
4339 SLJIT_ASSERT_STOP();
4340 break;
4341 }
4342 context->ucharptr = 0;
4343 }
4344
4345 #else
4346
4347 /* Unaligned read is unsupported or in 32 bit mode. */
4348 if (context->length >= 1)
4349 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4350
4351 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4352
4353 if (othercasebit != 0 && othercasechar == cc)
4354 {
4355 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4356 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4357 }
4358 else
4359 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4360
4361 #endif
4362
4363 cc++;
4364 #ifdef SUPPORT_UTF
4365 utflength--;
4366 }
4367 while (utflength > 0);
4368 #endif
4369
4370 return cc;
4371 }
4372
4373 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4374
4375 #define SET_TYPE_OFFSET(value) \
4376 if ((value) != typeoffset) \
4377 { \
4378 if ((value) > typeoffset) \
4379 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4380 else \
4381 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4382 } \
4383 typeoffset = (value);
4384
4385 #define SET_CHAR_OFFSET(value) \
4386 if ((value) != charoffset) \
4387 { \
4388 if ((value) > charoffset) \
4389 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4390 else \
4391 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4392 } \
4393 charoffset = (value);
4394
4395 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4396 {
4397 DEFINE_COMPILER;
4398 jump_list *found = NULL;
4399 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4400 pcre_int32 c, charoffset;
4401 struct sljit_jump *jump = NULL;
4402 pcre_uchar *ccbegin;
4403 int compares, invertcmp, numberofcmps;
4404
4405 #ifdef SUPPORT_UCP
4406 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4407 BOOL charsaved = FALSE;
4408 int typereg = TMP1, scriptreg = TMP1;
4409 const pcre_uint32 *other_cases;
4410 pcre_int32 typeoffset;
4411 #endif
4412
4413 /* Although SUPPORT_UTF must be defined, we are
4414 not necessary in utf mode even in 8 bit mode. */
4415 detect_partial_match(common, backtracks);
4416 read_char(common);
4417
4418 cc++;
4419 if ((cc[-1] & XCL_HASPROP) == 0)
4420 {
4421 if ((cc[-1] & XCL_MAP) != 0)
4422 {
4423 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4424 #ifdef SUPPORT_UCP
4425 charsaved = TRUE;
4426 #endif
4427 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks))
4428 {
4429 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4430
4431 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4432 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4433 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4434 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4435 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4436 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4437 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4438
4439 JUMPHERE(jump);
4440 }
4441 else
4442 add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff));
4443
4444 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4445 cc += 32 / sizeof(pcre_uchar);
4446 }
4447 else
4448 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4449 }
4450 else if ((cc[-1] & XCL_MAP) != 0)
4451 {
4452 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4453 #ifdef SUPPORT_UCP
4454 charsaved = TRUE;
4455 #endif
4456 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4457 {
4458 #ifdef COMPILE_PCRE8
4459 SLJIT_ASSERT(common->utf);
4460 #endif
4461 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4462
4463 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4464 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4465 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4466 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4467 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4468 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4469
4470 JUMPHERE(jump);
4471 }
4472
4473 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4474 cc += 32 / sizeof(pcre_uchar);
4475 }
4476
4477 /* Scanning the necessary info. */
4478 ccbegin = cc;
4479 compares = 0;
4480 while (*cc != XCL_END)
4481 {
4482 compares++;
4483 if (*cc == XCL_SINGLE)
4484 {
4485 cc += 2;
4486 #ifdef SUPPORT_UTF
4487 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4488 #endif
4489 #ifdef SUPPORT_UCP
4490 needschar = TRUE;
4491 #endif
4492 }
4493 else if (*cc == XCL_RANGE)
4494 {
4495 cc += 2;
4496 #ifdef SUPPORT_UTF
4497 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4498 #endif
4499 cc++;
4500 #ifdef SUPPORT_UTF
4501 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4502 #endif
4503 #ifdef SUPPORT_UCP
4504 needschar = TRUE;
4505 #endif
4506 }
4507 #ifdef SUPPORT_UCP
4508 else
4509 {
4510 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4511 cc++;
4512 switch(*cc)
4513 {
4514 case PT_ANY:
4515 break;
4516
4517 case PT_LAMP:
4518 case PT_GC:
4519 case PT_PC:
4520 case PT_ALNUM:
4521 needstype = TRUE;
4522 break;
4523
4524 case PT_SC:
4525 needsscript = TRUE;
4526 break;
4527
4528 case PT_SPACE:
4529 case PT_PXSPACE:
4530 case PT_WORD:
4531 case PT_PXGRAPH:
4532 case PT_PXPRINT:
4533 case PT_PXPUNCT:
4534 needstype = TRUE;
4535 needschar = TRUE;
4536 break;
4537
4538 case PT_CLIST:
4539 case PT_UCNC:
4540 needschar = TRUE;
4541 break;
4542
4543 default:
4544 SLJIT_ASSERT_STOP();
4545 break;
4546 }
4547 cc += 2;
4548 }
4549 #endif
4550 }
4551
4552 #ifdef SUPPORT_UCP
4553 /* Simple register allocation. TMP1 is preferred if possible. */
4554 if (needstype || needsscript)
4555 {
4556 if (needschar && !charsaved)
4557 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4558 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4559 if (needschar)
4560 {
4561 if (needstype)
4562 {
4563 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4564 typereg = RETURN_ADDR;
4565 }
4566
4567 if (needsscript)
4568 scriptreg = TMP3;
4569 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4570 }
4571 else if (needstype && needsscript)
4572 scriptreg = TMP3;
4573 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4574
4575 if (needsscript)
4576 {
4577 if (scriptreg == TMP1)
4578 {
4579 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4580 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4581 }
4582 else
4583 {
4584 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4585 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4586 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4587 }
4588 }
4589 }
4590 #endif
4591
4592 /* Generating code. */
4593 cc = ccbegin;
4594 charoffset = 0;
4595 numberofcmps = 0;
4596 #ifdef SUPPORT_UCP
4597 typeoffset = 0;
4598 #endif
4599
4600 while (*cc != XCL_END)
4601 {
4602 compares--;
4603 invertcmp = (compares == 0 && list != backtracks);
4604 jump = NULL;
4605
4606 if (*cc == XCL_SINGLE)
4607 {
4608 cc ++;
4609 #ifdef SUPPORT_UTF
4610 if (common->utf)
4611 {
4612 GETCHARINC(c, cc);
4613 }
4614 else
4615 #endif
4616 c = *cc++;
4617
4618 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4619 {
4620 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4621 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4622 numberofcmps++;
4623 }
4624 else if (numberofcmps > 0)
4625 {
4626 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4627 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4628 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4629 numberofcmps = 0;
4630 }
4631 else
4632 {
4633 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4634 numberofcmps = 0;
4635 }
4636 }
4637 else if (*cc == XCL_RANGE)
4638 {
4639 cc ++;
4640 #ifdef SUPPORT_UTF
4641 if (common->utf)
4642 {
4643 GETCHARINC(c, cc);
4644 }
4645 else
4646 #endif
4647 c = *cc++;
4648 SET_CHAR_OFFSET(c);
4649 #ifdef SUPPORT_UTF
4650 if (common->utf)
4651 {
4652 GETCHARINC(c, cc);
4653 }
4654 else
4655 #endif
4656 c = *cc++;
4657 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4658 {
4659 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4660 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4661 numberofcmps++;
4662 }
4663 else if (numberofcmps > 0)
4664 {
4665 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4666 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4667 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4668 numberofcmps = 0;
4669 }
4670 else
4671 {
4672 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4673 numberofcmps = 0;
4674 }
4675 }
4676 #ifdef SUPPORT_UCP
4677 else
4678 {
4679 if (*cc == XCL_NOTPROP)
4680 invertcmp ^= 0x1;
4681 cc++;
4682 switch(*cc)
4683 {
4684 case PT_ANY:
4685 if (list != backtracks)
4686 {
4687 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4688 continue;
4689 }
4690 else if (cc[-1] == XCL_NOTPROP)
4691 continue;
4692 jump = JUMP(SLJIT_JUMP);
4693 break;
4694
4695 case PT_LAMP:
4696 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4697 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4698 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4699 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4700 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4701 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4702 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4703 break;
4704
4705 case PT_GC:
4706 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4707 SET_TYPE_OFFSET(c);
4708 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4709 break;
4710
4711 case PT_PC:
4712 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4713 break;
4714
4715 case PT_SC:
4716 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4717 break;
4718
4719 case PT_SPACE:
4720 case PT_PXSPACE:
4721 SET_CHAR_OFFSET(9);
4722 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4723 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4724
4725 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4726 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4727
4728 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4729 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4730
4731 SET_TYPE_OFFSET(ucp_Zl);
4732 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4733 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4734 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4735 break;
4736
4737 case PT_WORD:
4738 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4739 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4740 /* Fall through. */
4741
4742 case PT_ALNUM:
4743 SET_TYPE_OFFSET(ucp_Ll);
4744 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4745 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4746 SET_TYPE_OFFSET(ucp_Nd);
4747 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4748 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4749 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4750 break;
4751
4752 case PT_CLIST:
4753 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4754
4755 /* At least three characters are required.
4756 Otherwise this case would be handled by the normal code path. */
4757 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4758 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4759
4760 /* Optimizing character pairs, if their difference is power of 2. */
4761 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4762 {
4763 if (charoffset == 0)
4764 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4765 else
4766 {
4767 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4768 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4769 }
4770 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4771 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4772 other_cases += 2;
4773 }
4774 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4775 {
4776 if (charoffset == 0)
4777 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4778 else
4779 {
4780 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4781 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4782 }
4783 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4784 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4785
4786 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4787 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4788
4789 other_cases += 3;
4790 }
4791 else
4792 {
4793 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4794 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4795 }
4796
4797 while (*other_cases != NOTACHAR)
4798 {
4799 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4800 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4801 }
4802 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4803 break;
4804
4805 case PT_UCNC:
4806 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4807 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4808 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4809 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4810 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4811 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4812
4813 SET_CHAR_OFFSET(0xa0);
4814 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4815 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4816 SET_CHAR_OFFSET(0);
4817 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4818 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4819 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4820 break;
4821
4822 case PT_PXGRAPH:
4823 /* C and Z groups are the farthest two groups. */
4824 SET_TYPE_OFFSET(ucp_Ll);
4825 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4826 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4827
4828 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4829
4830 /* In case of ucp_Cf, we overwrite the result. */
4831 SET_CHAR_OFFSET(0x2066);
4832 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4833 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4834
4835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4836 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4837
4838 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4839 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4840
4841 JUMPHERE(jump);
4842 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4843 break;
4844
4845 case PT_PXPRINT:
4846 /* C and Z groups are the farthest two groups. */
4847 SET_TYPE_OFFSET(ucp_Ll);
4848 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4849 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4850
4851 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4852 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4853
4854 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4855
4856 /* In case of ucp_Cf, we overwrite the result. */
4857 SET_CHAR_OFFSET(0x2066);
4858 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4859 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4860
4861 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4862 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4863
4864 JUMPHERE(jump);
4865 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4866 break;
4867
4868 case PT_PXPUNCT:
4869 SET_TYPE_OFFSET(ucp_Sc);
4870 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4871 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4872
4873 SET_CHAR_OFFSET(0);
4874 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4875 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4876
4877 SET_TYPE_OFFSET(ucp_Pc);
4878 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4879 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4880 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4881 break;
4882 }
4883 cc += 2;
4884 }
4885 #endif
4886
4887 if (jump != NULL)
4888 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4889 }
4890
4891 if (found != NULL)
4892 set_jumps(found, LABEL());
4893 }
4894
4895 #undef SET_TYPE_OFFSET
4896 #undef SET_CHAR_OFFSET
4897
4898 #endif
4899
4900 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4901 {
4902 DEFINE_COMPILER;
4903 int length;
4904 unsigned int c, oc, bit;
4905 compare_context context;
4906 struct sljit_jump *jump[4];
4907 jump_list *end_list;
4908 #ifdef SUPPORT_UTF
4909 struct sljit_label *label;
4910 #ifdef SUPPORT_UCP
4911 pcre_uchar propdata[5];
4912 #endif
4913 #endif /* SUPPORT_UTF */
4914
4915 switch(type)
4916 {
4917 case OP_SOD:
4918 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4919 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4920 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4921 return cc;
4922
4923 case OP_SOM:
4924 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4925 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4926 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4927 return cc;
4928
4929 case OP_NOT_WORD_BOUNDARY:
4930 case OP_WORD_BOUNDARY:
4931 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4932 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4933 return cc;
4934
4935 case OP_NOT_DIGIT:
4936 case OP_DIGIT:
4937 /* Digits are usually 0-9, so it is worth to optimize them. */
4938 detect_partial_match(common, backtracks);
4939 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4940 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
4941 read_char7_type(common, type == OP_NOT_DIGIT);
4942 else
4943 #endif
4944 read_char8_type(common, type == OP_NOT_DIGIT);
4945 /* Flip the starting bit in the negative case. */
4946 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4947 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4948 return cc;
4949
4950 case OP_NOT_WHITESPACE:
4951 case OP_WHITESPACE:
4952 detect_partial_match(common, backtracks);
4953 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4954 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
4955 read_char7_type(common, type == OP_NOT_WHITESPACE);
4956 else
4957 #endif
4958 read_char8_type(common, type == OP_NOT_WHITESPACE);
4959 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4960 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4961 return cc;
4962
4963 case OP_NOT_WORDCHAR:
4964 case OP_WORDCHAR:
4965 detect_partial_match(common, backtracks);
4966 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4967 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
4968 read_char7_type(common, type == OP_NOT_WORDCHAR);
4969 else
4970 #endif
4971 read_char8_type(common, type == OP_NOT_WORDCHAR);
4972 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4973 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4974 return cc;
4975
4976 case OP_ANY:
4977 detect_partial_match(common, backtracks);
4978 read_char(common);
4979 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4980 {
4981 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4982 end_list = NULL;
4983 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4984 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4985 else
4986 check_str_end(common, &end_list);
4987
4988 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4989 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4990 set_jumps(end_list, LABEL());
4991 JUMPHERE(jump[0]);
4992 }
4993 else
4994 check_newlinechar(common, common->nltype, backtracks, TRUE);
4995 return cc;
4996
4997 case OP_ALLANY:
4998 detect_partial_match(common, backtracks);
4999 #ifdef SUPPORT_UTF
5000 if (common->utf)
5001 {
5002 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5003 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5004 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
5005 #if defined COMPILE_PCRE8
5006 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5007 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5008 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5009 #elif defined COMPILE_PCRE16
5010 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
5011 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
5012 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5013 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5014 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5015 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5016 #endif
5017 JUMPHERE(jump[0]);
5018 #endif /* COMPILE_PCRE[8|16] */
5019 return cc;
5020 }
5021 #endif
5022 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5023 return cc;
5024
5025 case OP_ANYBYTE:
5026 detect_partial_match(common, backtracks);
5027 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5028 return cc;
5029
5030 #ifdef SUPPORT_UTF
5031 #ifdef SUPPORT_UCP
5032 case OP_NOTPROP:
5033 case OP_PROP:
5034 propdata[0] = XCL_HASPROP;
5035 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5036 propdata[2] = cc[0];
5037 propdata[3] = cc[1];
5038 propdata[4] = XCL_END;
5039 compile_xclass_matchingpath(common, propdata, backtracks);
5040 return cc + 2;
5041 #endif
5042 #endif
5043
5044 case OP_ANYNL:
5045 detect_partial_match(common, backtracks);
5046 read_char(common);
5047 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5048 /* We don't need to handle soft partial matching case. */
5049 end_list = NULL;
5050 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5051 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5052 else
5053 check_str_end(common, &end_list);
5054 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5055 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5056 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5057 jump[2] = JUMP(SLJIT_JUMP);
5058 JUMPHERE(jump[0]);
5059 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5060 set_jumps(end_list, LABEL());
5061 JUMPHERE(jump[1]);
5062 JUMPHERE(jump[2]);
5063 return cc;
5064
5065 case OP_NOT_HSPACE:
5066 case OP_HSPACE:
5067 detect_partial_match(common, backtracks);
5068 read_char(common);
5069 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5070 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5071 return cc;
5072
5073 case OP_NOT_VSPACE:
5074 case OP_VSPACE:
5075 detect_partial_match(common, backtracks);
5076 read_char(common);
5077 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5078 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5079 return cc;
5080
5081 #ifdef SUPPORT_UCP
5082 case OP_EXTUNI:
5083 detect_partial_match(common, backtracks);
5084 read_char(common);
5085 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5086 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5087 /* Optimize register allocation: use a real register. */
5088 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5089 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5090
5091 label = LABEL();
5092 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5093 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5094 read_char(common);
5095 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5096 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5097 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5098
5099 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5100 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5101 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5102 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5103 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5104 JUMPTO(SLJIT_C_NOT_ZERO, label);
5105
5106 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5107 JUMPHERE(jump[0]);
5108 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5109
5110 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5111 {
5112 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5113 /* Since we successfully read a char above, partial matching must occure. */
5114 check_partial(common, TRUE);
5115 JUMPHERE(jump[0]);
5116 }
5117 return cc;
5118 #endif
5119
5120 case OP_EODN:
5121 /* Requires rather complex checks. */
5122 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5123 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5124 {
5125 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5126 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5127 if (common->mode == JIT_COMPILE)
5128 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5129 else
5130 {
5131 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5132 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5133 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5134 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5135 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5136 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5137 check_partial(common, TRUE);
5138 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5139 JUMPHERE(jump[1]);
5140 }
5141 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5142 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5143 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5144 }
5145 else if (common->nltype == NLTYPE_FIXED)
5146 {
5147 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5148 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5149 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5150 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5151 }
5152 else
5153 {
5154 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5155 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5156 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5157 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5158 jump[2] = JUMP(SLJIT_C_GREATER);
5159 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5160 /* Equal. */
5161 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5162 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5163 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5164
5165 JUMPHERE(jump[1]);
5166 if (common->nltype == NLTYPE_ANYCRLF)
5167 {
5168 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5169 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5170 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5171 }
5172 else
5173 {
5174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5175 read_char(common);
5176 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5177 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5178 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5179 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5180 }
5181 JUMPHERE(jump[2]);
5182 JUMPHERE(jump[3]);
5183 }
5184 JUMPHERE(jump[0]);
5185 check_partial(common, FALSE);
5186 return cc;
5187
5188 case OP_EOD:
5189 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5190 check_partial(common, FALSE);
5191 return cc;
5192
5193 case OP_CIRC:
5194 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5195 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5196 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5197 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5198 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5199 return cc;
5200
5201 case OP_CIRCM:
5202 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5203 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5204 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5205 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5206 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5207 jump[0] = JUMP(SLJIT_JUMP);
5208 JUMPHERE(jump[1]);
5209
5210 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5211 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5212 {
5213 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5214 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5215 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5216 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5217 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5218 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5219 }
5220 else
5221 {
5222 skip_char_back(common);
5223 read_char(common);
5224 check_newlinechar(common, common->nltype, backtracks, FALSE);
5225 }
5226 JUMPHERE(jump[0]);
5227 return cc;
5228
5229 case OP_DOLL:
5230 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5231 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5232 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5233
5234 if (!common->endonly)
5235 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5236 else
5237 {
5238 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5239 check_partial(common, FALSE);
5240 }
5241 return cc;
5242
5243 case OP_DOLLM:
5244 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5245 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5246 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5247 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5248 check_partial(common, FALSE);
5249 jump[0] = JUMP(SLJIT_JUMP);
5250 JUMPHERE(jump[1]);
5251
5252 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5253 {
5254 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5255 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5256 if (common->mode == JIT_COMPILE)
5257 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5258 else
5259 {
5260 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5261 /* STR_PTR = STR_END - IN_UCHARS(1) */
5262 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5263 check_partial(common, TRUE);
5264 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5265 JUMPHERE(jump[1]);
5266 }
5267
5268 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5269 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5270 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5271 }
5272 else
5273 {
5274 peek_char(common);
5275 check_newlinechar(common, common->nltype, backtracks, FALSE);
5276 }
5277 JUMPHERE(jump[0]);
5278 return cc;
5279
5280 case OP_CHAR:
5281 case OP_CHARI:
5282 length = 1;
5283 #ifdef SUPPORT_UTF
5284 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5285 #endif
5286 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5287 {
5288 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5289 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5290
5291 context.length = IN_UCHARS(length);
5292 context.sourcereg = -1;
5293 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5294 context.ucharptr = 0;
5295 #endif
5296 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5297 }
5298 detect_partial_match(common, backtracks);
5299 read_char(common);
5300 #ifdef SUPPORT_UTF
5301 if (common->utf)
5302 {
5303 GETCHAR(c, cc);
5304 }
5305 else
5306 #endif
5307 c = *cc;
5308 if (type == OP_CHAR || !char_has_othercase(common, cc))
5309 {
5310 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5311 return cc + length;
5312 }
5313 oc = char_othercase(common, c);
5314 bit = c ^ oc;
5315 if (is_powerof2(bit))
5316 {
5317 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5318 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5319 return cc + length;
5320 }
5321 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
5322 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5323 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
5324 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5325 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5326 return cc + length;
5327
5328 case OP_NOT:
5329 case OP_NOTI:
5330 detect_partial_match(common, backtracks);
5331 length = 1;
5332 #ifdef SUPPORT_UTF
5333 if (common->utf)
5334 {
5335 #ifdef COMPILE_PCRE8
5336 c = *cc;
5337 if (c < 128)
5338 {
5339 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5340 if (type == OP_NOT || !char_has_othercase(common, cc))
5341 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5342 else
5343 {
5344 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5345 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5346 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5347 }
5348 /* Skip the variable-length character. */
5349 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5350 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5351 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5352 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5353 JUMPHERE(jump[0]);
5354 return cc + 1;
5355 }
5356 else
5357 #endif /* COMPILE_PCRE8 */
5358 {
5359 GETCHARLEN(c, cc, length);
5360 read_char(common);
5361 }
5362 }
5363 else
5364 #endif /* SUPPORT_UTF */
5365 {
5366 read_char(common);
5367 c = *cc;
5368 }
5369
5370 if (type == OP_NOT || !char_has_othercase(common, cc))
5371 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5372 else
5373 {
5374 oc = char_othercase(common, c);
5375 bit = c ^ oc;
5376 if (is_powerof2(bit))
5377 {
5378 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5379 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5380 }
5381 else
5382 {
5383 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5384 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5385 }
5386 }
5387 return cc + length;
5388
5389 case OP_CLASS:
5390 case OP_NCLASS:
5391 detect_partial_match(common, backtracks);
5392
5393 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5394 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5395 read_char_max(common, bit, type == OP_NCLASS);
5396 #else
5397 read_char_max(common, 255, type == OP_NCLASS);
5398 #endif
5399
5400 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5401 return cc + 32 / sizeof(pcre_uchar);
5402
5403 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5404 jump[0] = NULL;
5405 if (common->utf)
5406 {
5407 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5408 if (type == OP_CLASS)
5409 {
5410 add_jump(compiler, backtracks, jump[0]);
5411 jump[0] = NULL;
5412 }
5413 }
5414 #elif !defined COMPILE_PCRE8
5415 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5416 if (type == OP_CLASS)
5417 {
5418 add_jump(compiler, backtracks, jump[0]);
5419 jump[0] = NULL;
5420 }
5421 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5422
5423 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5424 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5425 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5426 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5427 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5428 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5429
5430 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5431 if (jump[0] != NULL)
5432 JUMPHERE(jump[0]);
5433 #endif
5434
5435 return cc + 32 / sizeof(pcre_uchar);
5436
5437 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5438 case OP_XCLASS:
5439 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5440 return cc + GET(cc, 0) - 1;
5441 #endif
5442
5443 case OP_REVERSE:
5444 length = GET(cc, 0);
5445 if (length == 0)
5446 return cc + LINK_SIZE;
5447 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5448 #ifdef SUPPORT_UTF
5449 if (common->utf)
5450 {
5451 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5452 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5453 label = LABEL();
5454 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5455 skip_char_back(common);
5456 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5457 JUMPTO(SLJIT_C_NOT_ZERO, label);
5458 }
5459 else
5460 #endif
5461 {
5462 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5463 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5464 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5465 }
5466 check_start_used_ptr(common);
5467 return cc + LINK_SIZE;
5468 }
5469 SLJIT_ASSERT_STOP();
5470 return cc;
5471 }
5472
5473 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5474 {
5475 /* This function consumes at least one input character. */
5476 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5477 DEFINE_COMPILER;
5478 pcre_uchar *ccbegin = cc;
5479 compare_context context;
5480 int size;
5481
5482 context.length = 0;
5483 do
5484 {
5485 if (cc >= ccend)
5486 break;
5487
5488 if (*cc == OP_CHAR)
5489 {
5490 size = 1;
5491 #ifdef SUPPORT_UTF
5492 if (common->utf && HAS_EXTRALEN(cc[1]))
5493 size += GET_EXTRALEN(cc[1]);
5494 #endif
5495 }
5496 else if (*cc == OP_CHARI)
5497 {
5498 size = 1;
5499 #ifdef SUPPORT_UTF
5500 if (common->utf)
5501 {
5502 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5503 size = 0;
5504 else if (HAS_EXTRALEN(cc[1]))
5505 size += GET_EXTRALEN(cc[1]);
5506 }
5507 else
5508 #endif
5509 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5510 size = 0;
5511 }
5512 else
5513 size = 0;
5514
5515 cc += 1 + size;
5516 context.length += IN_UCHARS(size);
5517 }
5518 while (size > 0 && context.length <= 128);
5519
5520 cc = ccbegin;
5521 if (context.length > 0)
5522 {
5523 /* We have a fixed-length byte sequence. */
5524 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5525 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5526
5527 context.sourcereg = -1;
5528 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5529 context.ucharptr = 0;
5530 #endif
5531 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5532 return cc;
5533 }
5534
5535 /* A non-fixed length character will be checked if length == 0. */
5536 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5537 }
5538
5539 /* Forward definitions. */
5540 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5541 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5542
5543 #define PUSH_BACKTRACK(size, ccstart, error) \
5544 do \
5545 { \
5546 backtrack = sljit_alloc_memory(compiler, (size)); \
5547 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5548 return error; \
5549 memset(backtrack, 0, size); \
5550 backtrack->prev = parent->top; \
5551 backtrack->cc = (ccstart); \
5552 parent->top = backtrack; \
5553 } \
5554 while (0)
5555
5556 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5557 do \
5558 { \
5559 backtrack = sljit_alloc_memory(compiler, (size)); \
5560 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5561 return; \
5562 memset(backtrack, 0, size); \
5563 backtrack->prev = parent->top; \
5564 backtrack->cc = (ccstart); \
5565 parent->top = backtrack; \
5566 } \
5567 while (0)
5568
5569 #define BACKTRACK_AS(type) ((type *)backtrack)
5570
5571 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5572 {
5573 /* The OVECTOR offset goes to TMP2. */
5574 DEFINE_COMPILER;
5575 int count = GET2(cc, 1 + IMM2_SIZE);
5576 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5577 unsigned int offset;
5578 jump_list *found = NULL;
5579
5580 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5581
5582 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5583
5584 count--;
5585 while (count-- > 0)
5586 {
5587 offset = GET2(slot, 0) << 1;
5588 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5589 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5590 slot += common->name_entry_size;
5591 }
5592
5593 offset = GET2(slot, 0) << 1;
5594 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5595 if (backtracks != NULL && !common->jscript_compat)
5596 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5597
5598 set_jumps(found, LABEL());
5599 }
5600
5601 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5602 {
5603 DEFINE_COMPILER;
5604 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5605 int offset = 0;
5606 struct sljit_jump *jump = NULL;
5607 struct sljit_jump *partial;
5608 struct sljit_jump *nopartial;
5609
5610 if (ref)
5611 {
5612 offset = GET2(cc, 1) << 1;
5613 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5614 /* OVECTOR(1) contains the "string begin - 1" constant. */
5615 if (withchecks && !common->jscript_compat)
5616 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5617 }
5618 else
5619 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5620
5621 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5622 if (common->utf && *cc == OP_REFI)
5623 {
5624 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5625 if (ref)
5626 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5627 else
5628 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5629
5630 if (withchecks)
5631 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5632
5633 /* Needed to save important temporary registers. */
5634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5635 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5636 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5637 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5638 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5639 if (common->mode == JIT_COMPILE)
5640 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5641 else
5642 {
5643 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5644 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5645 check_partial(common, FALSE);
5646 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5647 JUMPHERE(nopartial);
5648 }
5649 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5650 }
5651 else
5652 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5653 {
5654 if (ref)
5655 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5656 else
5657 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5658
5659 if (withchecks)
5660 jump = JUMP(SLJIT_C_ZERO);
5661
5662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5663 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5664 if (common->mode == JIT_COMPILE)
5665 add_jump(compiler, backtracks, partial);
5666
5667 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5668 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5669
5670 if (common->mode != JIT_COMPILE)
5671 {
5672 nopartial = JUMP(SLJIT_JUMP);
5673 JUMPHERE(partial);
5674 /* TMP2 -= STR_END - STR_PTR */
5675 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5676 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5677 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5678 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5679 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5680 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5681 JUMPHERE(partial);
5682 check_partial(common, FALSE);
5683 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5684 JUMPHERE(nopartial);
5685 }
5686 }
5687
5688 if (jump != NULL)
5689 {
5690 if (emptyfail)
5691 add_jump(compiler, backtracks, jump);
5692 else
5693 JUMPHERE(jump);
5694 }
5695 }
5696
5697 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5698 {
5699 DEFINE_COMPILER;
5700 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5701 backtrack_common *backtrack;
5702 pcre_uchar type;
5703 int offset = 0;
5704 struct sljit_label *label;
5705 struct sljit_jump *zerolength;
5706 struct sljit_jump *jump = NULL;
5707 pcre_uchar *ccbegin = cc;
5708 int min = 0, max = 0;
5709 BOOL minimize;
5710
5711 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5712
5713 if (ref)
5714 offset = GET2(cc, 1) << 1;
5715 else
5716 cc += IMM2_SIZE;
5717 type = cc[1 + IMM2_SIZE];
5718
5719 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5720 minimize = (type & 0x1) != 0;
5721 switch(type)
5722 {
5723 case OP_CRSTAR:
5724 case OP_CRMINSTAR:
5725 min = 0;
5726 max = 0;
5727 cc += 1 + IMM2_SIZE + 1;
5728 break;
5729 case OP_CRPLUS:
5730 case OP_CRMINPLUS:
5731 min = 1;
5732 max = 0;
5733 cc += 1 + IMM2_SIZE + 1;
5734 break;
5735 case OP_CRQUERY:
5736 case OP_CRMINQUERY:
5737 min = 0;
5738 max = 1;
5739 cc += 1 + IMM2_SIZE + 1;
5740 break;
5741 case OP_CRRANGE:
5742 case OP_CRMINRANGE:
5743 min = GET2(cc, 1 + IMM2_SIZE + 1);
5744 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5745 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5746 break;
5747 default:
5748 SLJIT_ASSERT_STOP();
5749 break;
5750 }
5751
5752 if (!minimize)
5753 {
5754 if (min == 0)
5755 {
5756 allocate_stack(common, 2);
5757 if (ref)
5758 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5759 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5760 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5761 /* Temporary release of STR_PTR. */
5762 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5763 /* Handles both invalid and empty cases. Since the minimum repeat,
5764 is zero the invalid case is basically the same as an empty case. */
5765 if (ref)
5766 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5767 else
5768 {
5769 compile_dnref_search(common, ccbegin, NULL);
5770 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5772 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5773 }
5774 /* Restore if not zero length. */
5775 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5776 }
5777 else
5778 {
5779 allocate_stack(common, 1);
5780 if (ref)
5781 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5782 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5783 if (ref)
5784 {
5785 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5786 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5787 }
5788 else
5789 {
5790 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5792 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5793 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5794 }
5795 }
5796
5797 if (min > 1 || max > 1)
5798 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5799
5800 label = LABEL();
5801 if (!ref)
5802 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5803 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5804
5805 if (min > 1 || max > 1)
5806 {
5807 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5808 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5809 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5810 if (min > 1)
5811 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5812 if (max > 1)
5813 {
5814 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5815 allocate_stack(common, 1);
5816 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5817 JUMPTO(SLJIT_JUMP, label);
5818 JUMPHERE(jump);
5819 }
5820 }
5821
5822 if (max == 0)
5823 {
5824 /* Includes min > 1 case as well. */
5825 allocate_stack(common, 1);
5826 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5827 JUMPTO(SLJIT_JUMP, label);
5828 }
5829
5830 JUMPHERE(zerolength);
5831 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5832
5833 count_match(common);
5834 return cc;
5835 }
5836
5837 allocate_stack(common, ref ? 2 : 3);
5838 if (ref)
5839 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5840 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5841 if (type != OP_CRMINSTAR)
5842 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5843
5844 if (min == 0)
5845 {
5846 /* Handles both invalid and empty cases. Since the minimum repeat,
5847 is zero the invalid case is basically the same as an empty case. */
5848 if (ref)
5849 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5850 else
5851 {
5852 compile_dnref_search(common, ccbegin, NULL);
5853 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5855 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5856 }
5857 /* Length is non-zero, we can match real repeats. */
5858 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5859 jump = JUMP(SLJIT_JUMP);
5860 }
5861 else
5862 {
5863 if (ref)
5864 {
5865 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5866 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5867 }
5868 else
5869 {
5870 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5871 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5872 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5873 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5874 }
5875 }
5876
5877 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5878 if (max > 0)
5879 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5880
5881 if (!ref)
5882 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5883 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5884 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5885
5886 if (min > 1)
5887 {
5888 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5889 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5890 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5891 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5892 }
5893 else if (max > 0)
5894 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5895
5896 if (jump != NULL)
5897 JUMPHERE(jump);
5898 JUMPHERE(zerolength);
5899
5900 count_match(common);
5901 return cc;
5902 }
5903
5904 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5905 {
5906 DEFINE_COMPILER;
5907 backtrack_common *backtrack;
5908 recurse_entry *entry = common->entries;
5909 recurse_entry *prev = NULL;
5910 sljit_sw start = GET(cc, 1);
5911 pcre_uchar *start_cc;
5912 BOOL needs_control_head;
5913
5914 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5915
5916 /* Inlining simple patterns. */
5917 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5918 {
5919 start_cc = common->start + start;
5920 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5921 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5922 return cc + 1 + LINK_SIZE;
5923 }
5924
5925 while (entry != NULL)
5926 {
5927 if (entry->start == start)
5928 break;
5929 prev = entry;
5930 entry = entry->next;
5931 }
5932
5933 if (entry == NULL)
5934 {
5935 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5936 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5937 return NULL;
5938 entry->next = NULL;
5939 entry->entry = NULL;
5940 entry->calls = NULL;
5941 entry->start = start;
5942
5943 if (prev != NULL)
5944 prev->next = entry;
5945 else
5946 common->entries = entry;
5947 }
5948
5949 if (common->has_set_som && common->mark_ptr != 0)
5950 {
5951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5952 allocate_stack(common, 2);
5953 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5954 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5956 }
5957 else if (common->has_set_som || common->mark_ptr != 0)
5958 {
5959 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5960 allocate_stack(common, 1);
5961 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5962 }
5963
5964 if (entry->entry == NULL)
5965 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5966 else
5967 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5968 /* Leave if the match is failed. */
5969 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5970 return cc + 1 + LINK_SIZE;
5971 }
5972
5973 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5974 {
5975 const pcre_uchar *begin = arguments->begin;
5976 int *offset_vector = arguments->offsets;
5977 int offset_count = arguments->offset_count;
5978 int i;
5979
5980 if (PUBL(callout) == NULL)
5981 return 0;
5982
5983 callout_block->version = 2;
5984 callout_block->callout_data = arguments->callout_data;
5985
5986 /* Offsets in subject. */
5987 callout_block->subject_length = arguments->end - arguments->begin;
5988 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5989 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5990 #if defined COMPILE_PCRE8
5991 callout_block->subject = (PCRE_SPTR)begin;
5992 #elif defined COMPILE_PCRE16
5993 callout_block->subject = (PCRE_SPTR16)begin;
5994 #elif defined COMPILE_PCRE32
5995 callout_block->subject = (PCRE_SPTR32)begin;
5996 #endif
5997
5998 /* Convert and copy the JIT offset vector to the offset_vector array. */
5999 callout_block->capture_top = 0;
6000 callout_block->offset_vector = offset_vector;
6001 for (i = 2; i < offset_count; i += 2)
6002 {
6003 offset_vector[i] = jit_ovector[i] - begin;
6004 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
6005 if (jit_ovector[i] >= begin)
6006 callout_block->capture_top = i;
6007 }
6008
6009 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
6010 if (offset_count > 0)
6011 offset_vector[0] = -1;
6012 if (offset_count > 1)
6013 offset_vector[1] = -1;
6014 return (*PUBL(callout))(callout_block);
6015 }
6016
6017 /* Aligning to 8 byte. */
6018 #define CALLOUT_ARG_SIZE \
6019 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
6020
6021 #define CALLOUT_ARG_OFFSET(arg) \
6022 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
6023
6024 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6025 {
6026 DEFINE_COMPILER;
6027 backtrack_common *backtrack;
6028
6029 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6030
6031 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6032
6033 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6034 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6035 SLJIT_ASSERT(common->capture_last_ptr != 0);
6036 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6037 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6038
6039 /* These pointer sized fields temporarly stores internal variables. */
6040 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6041 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6042 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6043
6044 if (common->mark_ptr != 0)
6045 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6046 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6047 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6048 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6049
6050 /* Needed to save important temporary registers. */
6051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6052 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6053 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6054 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6055 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6056 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6057 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6058
6059 /* Check return value. */
6060 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6061 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6062 if (common->forced_quit_label == NULL)
6063 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6064 else
6065 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6066 return cc + 2 + 2 * LINK_SIZE;
6067 }
6068
6069 #undef CALLOUT_ARG_SIZE
6070 #undef CALLOUT_ARG_OFFSET
6071
6072 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6073 {
6074 DEFINE_COMPILER;
6075 int framesize;
6076 int extrasize;
6077 BOOL needs_control_head;
6078 int private_data_ptr;
6079 backtrack_common altbacktrack;
6080 pcre_uchar *ccbegin;
6081 pcre_uchar opcode;
6082 pcre_uchar bra = OP_BRA;
6083 jump_list *tmp = NULL;
6084 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6085 jump_list **found;
6086 /* Saving previous accept variables. */
6087 BOOL save_local_exit = common->local_exit;
6088 BOOL save_positive_assert = common->positive_assert;
6089 then_trap_backtrack *save_then_trap = common->then_trap;
6090 struct sljit_label *save_quit_label = common->quit_label;
6091 struct sljit_label *save_accept_label = common->accept_label;
6092 jump_list *save_quit = common->quit;
6093 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6094 jump_list *save_accept = common->accept;
6095 struct sljit_jump *jump;
6096 struct sljit_jump *brajump = NULL;
6097
6098 /* Assert captures then. */
6099 common->then_trap = NULL;
6100
6101 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6102 {
6103 SLJIT_ASSERT(!conditional);
6104 bra = *cc;
6105 cc++;
6106 }
6107 private_data_ptr = PRIVATE_DATA(cc);
6108 SLJIT_ASSERT(private_data_ptr != 0);
6109 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6110 backtrack->framesize = framesize;
6111 backtrack->private_data_ptr = private_data_ptr;
6112 opcode = *cc;
6113 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6114 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6115 ccbegin = cc;
6116 cc += GET(cc, 1);
6117
6118 if (bra == OP_BRAMINZERO)
6119 {
6120 /* This is a braminzero backtrack path. */
6121 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6122 free_stack(common, 1);
6123 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6124 }
6125
6126 if (framesize < 0)
6127 {
6128 extrasize = needs_control_head ? 2 : 1;
6129 if (framesize == no_frame)
6130 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6131 allocate_stack(common, extrasize);
6132 if (needs_control_head)
6133 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6134 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6135 if (needs_control_head)
6136 {
6137 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6138 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6139 }
6140 }
6141 else
6142 {
6143 extrasize = needs_control_head ? 3 : 2;
6144 allocate_stack(common, framesize + extrasize);
6145 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6146 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6148 if (needs_control_head)
6149 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6150 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6151 if (needs_control_head)
6152 {
6153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6154 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6156 }
6157 else
6158 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6159 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6160 }
6161
6162 memset(&altbacktrack, 0, sizeof(backtrack_common));
6163 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6164 {
6165 /* Negative assert is stronger than positive assert. */
6166 common->local_exit = TRUE;
6167 common->quit_label = NULL;
6168 common->quit = NULL;
6169 common->positive_assert = FALSE;
6170 }
6171 else
6172 common->positive_assert = TRUE;
6173 common->positive_assert_quit = NULL;
6174
6175 while (1)
6176 {
6177 common->accept_label = NULL;
6178 common->accept = NULL;
6179 altbacktrack.top = NULL;
6180 altbacktrack.topbacktracks = NULL;
6181
6182 if (*ccbegin == OP_ALT)
6183 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6184
6185 altbacktrack.cc = ccbegin;
6186 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6187 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6188 {
6189 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6190 {
6191 common->local_exit = save_local_exit;
6192 common->quit_label = save_quit_label;
6193 common->quit = save_quit;
6194 }
6195 common->positive_assert = save_positive_assert;
6196 common->then_trap = save_then_trap;
6197 common->accept_label = save_accept_label;
6198 common->positive_assert_quit = save_positive_assert_quit;
6199 common->accept = save_accept;
6200 return NULL;
6201 }
6202 common->accept_label = LABEL();
6203 if (common->accept != NULL)
6204 set_jumps(common->accept, common->accept_label);
6205
6206 /* Reset stack. */
6207 if (framesize < 0)
6208 {
6209 if (framesize == no_frame)
6210 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6211 else
6212 free_stack(common, extrasize);
6213 if (needs_control_head)
6214 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6215 }
6216 else
6217 {
6218 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6219 {
6220 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6221 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6222 if (needs_control_head)
6223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6224 }
6225 else
6226 {
6227 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6228 if (needs_control_head)
6229 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6230 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6231 }
6232 }
6233
6234 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6235 {
6236 /* We know that STR_PTR was stored on the top of the stack. */
6237 if (conditional)
6238 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6239 else if (bra == OP_BRAZERO)
6240 {
6241 if (framesize < 0)
6242 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6243 else
6244 {
6245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6246 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6247 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6248 }
6249 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6250 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6251 }
6252 else if (framesize >= 0)
6253 {
6254 /* For OP_BRA and OP_BRAMINZERO. */
6255 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6256 }
6257 }
6258 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6259
6260 compile_backtrackingpath(common, altbacktrack.top);
6261 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6262 {
6263 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6264 {
6265 common->local_exit = save_local_exit;
6266 common->quit_label = save_quit_label;
6267 common->quit = save_quit;
6268 }
6269 common->positive_assert = save_positive_assert;
6270 common->then_trap = save_then_trap;
6271 common->accept_label = save_accept_label;
6272 common->positive_assert_quit = save_positive_assert_quit;
6273 common->accept = save_accept;
6274 return NULL;
6275 }
6276 set_jumps(altbacktrack.topbacktracks, LABEL());
6277
6278 if (*cc != OP_ALT)
6279 break;
6280
6281 ccbegin = cc;
6282 cc += GET(cc, 1);
6283 }
6284
6285 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6286 {
6287 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6288 /* Makes the check less complicated below. */
6289 common->positive_assert_quit = common->quit;
6290 }
6291
6292 /* None of them matched. */
6293 if (common->positive_assert_quit != NULL)
6294 {
6295 jump = JUMP(SLJIT_JUMP);
6296 set_jumps(common->positive_assert_quit, LABEL());
6297 SLJIT_ASSERT(framesize != no_stack);
6298 if (framesize < 0)
6299 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6300 else
6301 {
6302 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6303 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6304 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6305 }
6306 JUMPHERE(jump);
6307 }
6308
6309 if (needs_control_head)
6310 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6311
6312 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6313 {
6314 /* Assert is failed. */
6315 if (conditional || bra == OP_BRAZERO)
6316 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6317
6318 if (framesize < 0)
6319 {
6320 /* The topmost item should be 0. */
6321 if (bra == OP_BRAZERO)
6322 {
6323 if (extrasize == 2)
6324 free_stack(common, 1);
6325 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6326 }
6327 else
6328 free_stack(common, extrasize);
6329 }
6330 else
6331 {
6332 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6333 /* The topmost item should be 0. */
6334 if (bra == OP_BRAZERO)
6335 {
6336 free_stack(common, framesize + extrasize - 1);
6337 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6338 }
6339 else
6340 free_stack(common, framesize + extrasize);
6341 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6342 }
6343 jump = JUMP(SLJIT_JUMP);
6344 if (bra != OP_BRAZERO)
6345 add_jump(compiler, target, jump);
6346
6347 /* Assert is successful. */
6348 set_jumps(tmp, LABEL());
6349 if (framesize < 0)
6350 {
6351 /* We know that STR_PTR was stored on the top of the stack. */
6352 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6353 /* Keep the STR_PTR on the top of the stack. */
6354 if (bra == OP_BRAZERO)
6355 {
6356 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6357 if (extrasize == 2)
6358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6359 }
6360 else if (bra == OP_BRAMINZERO)
6361 {
6362 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6363 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6364 }
6365 }
6366 else
6367 {
6368 if (bra == OP_BRA)
6369 {
6370 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6371 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6372 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6373 }
6374 else
6375 {
6376 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6377 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6378 if (extrasize == 2)
6379 {
6380 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6381 if (bra == OP_BRAMINZERO)
6382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6383 }
6384 else
6385 {
6386 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6388 }
6389 }
6390 }
6391
6392 if (bra == OP_BRAZERO)
6393 {
6394 backtrack->matchingpath = LABEL();
6395 SET_LABEL(jump, backtrack->matchingpath);
6396 }
6397 else if (bra == OP_BRAMINZERO)
6398 {
6399 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6400 JUMPHERE(brajump);
6401 if (framesize >= 0)
6402 {
6403 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6404 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6406 }
6407 set_jumps(backtrack->common.topbacktracks, LABEL());
6408 }
6409 }
6410 else
6411 {
6412 /* AssertNot is successful. */
6413 if (framesize < 0)
6414 {
6415 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6416 if (bra != OP_BRA)
6417 {
6418 if (extrasize == 2)
6419 free_stack(common, 1);
6420 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6421 }
6422 else
6423 free_stack(common, extrasize);
6424 }
6425 else
6426 {
6427 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6428 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6429 /* The topmost item should be 0. */
6430 if (bra != OP_BRA)
6431 {
6432 free_stack(common, framesize + extrasize - 1);
6433 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6434 }
6435 else
6436 free_stack(common, framesize + extrasize);
6437 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6438 }
6439
6440 if (bra == OP_BRAZERO)
6441 backtrack->matchingpath = LABEL();
6442 else if (bra == OP_BRAMINZERO)
6443 {
6444 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6445 JUMPHERE(brajump);
6446 }
6447
6448 if (bra != OP_BRA)
6449 {
6450 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6451 set_jumps(backtrack->common.topbacktracks, LABEL());
6452 backtrack->common.topbacktracks = NULL;
6453 }
6454 }
6455
6456 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6457 {
6458 common->local_exit = save_local_exit;
6459 common->quit_label = save_quit_label;
6460 common->quit = save_quit;
6461 }
6462 common->positive_assert = save_positive_assert;
6463 common->then_trap = save_then_trap;
6464 common->accept_label = save_accept_label;
6465 common->positive_assert_quit = save_positive_assert_quit;
6466 common->accept = save_accept;
6467 return cc + 1 + LINK_SIZE;
6468 }
6469
6470 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6471 {
6472 DEFINE_COMPILER;
6473 int stacksize;
6474
6475 if (framesize < 0)
6476 {
6477 if (framesize == no_frame)
6478 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6479 else
6480 {
6481 stacksize = needs_control_head ? 1 : 0;
6482 if (ket != OP_KET || has_alternatives)
6483 stacksize++;
6484 free_stack(common, stacksize);
6485 }
6486
6487 if (needs_control_head)
6488 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6489
6490 /* TMP2 which is set here used by OP_KETRMAX below. */
6491 if (ket == OP_KETRMAX)
6492 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6493 else if (ket == OP_KETRMIN)
6494 {
6495 /* Move the STR_PTR to the private_data_ptr. */
6496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6497 }
6498 }
6499 else
6500 {
6501 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6502 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6503 if (needs_control_head)
6504 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6505
6506 if (ket == OP_KETRMAX)
6507 {
6508 /* TMP2 which is set here used by OP_KETRMAX below. */
6509 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6510 }
6511 }
6512 if (needs_control_head)
6513 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6514 }
6515
6516 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6517 {
6518 DEFINE_COMPILER;
6519
6520 if (common->capture_last_ptr != 0)
6521 {
6522 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6525 stacksize++;
6526 }
6527 if (common->optimized_cbracket[offset >> 1] == 0)
6528 {
6529 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6530 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6531 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6532 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6534 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6535 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6536 stacksize += 2;
6537 }
6538 return stacksize;
6539 }
6540
6541 /*
6542 Handling bracketed expressions is probably the most complex part.
6543
6544 Stack layout naming characters:
6545 S - Push the current STR_PTR
6546 0 - Push a 0 (NULL)
6547 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6548 before the next alternative. Not pushed if there are no alternatives.
6549 M - Any values pushed by the current alternative. Can be empty, or anything.
6550 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6551 L - Push the previous local (pointed by localptr) to the stack
6552 () - opional values stored on the stack
6553 ()* - optonal, can be stored multiple times
6554
6555 The following list shows the regular expression templates, their PCRE byte codes
6556 and stack layout supported by pcre-sljit.
6557
6558 (?:) OP_BRA | OP_KET A M
6559 () OP_CBRA | OP_KET C M
6560 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6561 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6562 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6563 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6564 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6565 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6566 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6567 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6568 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6569 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6570 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6571 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6572 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6573 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6574 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6575 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6576 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6577 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6578 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6579 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6580
6581
6582 Stack layout naming characters:
6583 A - Push the alternative index (starting from 0) on the stack.
6584 Not pushed if there is no alternatives.
6585 M - Any values pushed by the current alternative. Can be empty, or anything.
6586
6587 The next list shows the possible content of a bracket:
6588 (|) OP_*BRA | OP_ALT ... M A
6589 (?()|) OP_*COND | OP_ALT M A
6590 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6591 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6592 Or nothing, if trace is unnecessary
6593 */
6594
6595 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6596 {
6597 DEFINE_COMPILER;
6598 backtrack_common *backtrack;
6599 pcre_uchar opcode;
6600 int private_data_ptr = 0;
6601 int offset = 0;
6602 int i, stacksize;
6603 int repeat_ptr = 0, repeat_length = 0;
6604 int repeat_type = 0, repeat_count = 0;
6605 pcre_uchar *ccbegin;
6606 pcre_uchar *matchingpath;
6607 pcre_uchar *slot;
6608 pcre_uchar bra = OP_BRA;
6609 pcre_uchar ket;
6610 assert_backtrack *assert;
6611 BOOL has_alternatives;
6612 BOOL needs_control_head = FALSE;
6613 struct sljit_jump *jump;
6614 struct sljit_jump *skip;
6615 struct sljit_label *rmax_label = NULL;
6616 struct sljit_jump *braminzero = NULL;
6617
6618 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6619
6620 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6621 {
6622 bra = *cc;
6623 cc++;
6624 opcode = *cc;
6625 }
6626
6627 opcode = *cc;
6628 ccbegin = cc;
6629 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6630 ket = *matchingpath;
6631 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6632 {
6633 repeat_ptr = PRIVATE_DATA(matchingpath);
6634 repeat_length = PRIVATE_DATA(matchingpath + 1);
6635 repeat_type = PRIVATE_DATA(matchingpath + 2);
6636 repeat_count = PRIVATE_DATA(matchingpath + 3);
6637 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6638 if (repeat_type == OP_UPTO)
6639 ket = OP_KETRMAX;
6640 if (repeat_type == OP_MINUPTO)
6641 ket = OP_KETRMIN;
6642 }
6643
6644 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6645 {
6646 /* Drop this bracket_backtrack. */
6647 parent->top = backtrack->prev;
6648 return matchingpath + 1 + LINK_SIZE + repeat_length;
6649 }
6650
6651 matchingpath = ccbegin + 1 + LINK_SIZE;
6652 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6653 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6654 cc += GET(cc, 1);
6655
6656 has_alternatives = *cc == OP_ALT;
6657 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6658 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6659
6660 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6661 opcode = OP_SCOND;
6662 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6663 opcode = OP_ONCE;
6664
6665 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6666 {
6667 /* Capturing brackets has a pre-allocated space. */
6668 offset = GET2(ccbegin, 1 + LINK_SIZE);
6669 if (common->optimized_cbracket[offset] == 0)
6670 {
6671 private_data_ptr = OVECTOR_PRIV(offset);
6672 offset <<= 1;
6673 }
6674 else
6675 {
6676 offset <<= 1;
6677 private_data_ptr = OVECTOR(offset);
6678 }
6679 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6680 matchingpath += IMM2_SIZE;
6681 }
6682 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6683 {
6684 /* Other brackets simply allocate the next entry. */
6685 private_data_ptr = PRIVATE_DATA(ccbegin);
6686 SLJIT_ASSERT(private_data_ptr != 0);
6687 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6688 if (opcode == OP_ONCE)
6689 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6690 }
6691
6692 /* Instructions before the first alternative. */
6693 stacksize = 0;
6694 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6695 stacksize++;
6696 if (bra == OP_BRAZERO)
6697 stacksize++;
6698
6699 if (stacksize > 0)
6700 allocate_stack(common, stacksize);
6701
6702 stacksize = 0;
6703 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6704 {
6705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6706 stacksize++;
6707 }
6708
6709 if (bra == OP_BRAZERO)
6710 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6711
6712 if (bra == OP_BRAMINZERO)
6713 {
6714 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6715 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6716 if (ket != OP_KETRMIN)
6717 {
6718 free_stack(common, 1);
6719 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6720 }
6721 else
6722 {
6723 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6724 {
6725 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6726 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6727 /* Nothing stored during the first run. */
6728 skip = JUMP(SLJIT_JUMP);
6729 JUMPHERE(jump);
6730 /* Checking zero-length iteration. */
6731 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6732 {
6733 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6734 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6735 }
6736 else
6737 {
6738 /* Except when the whole stack frame must be saved. */
6739 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6740 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6741 }
6742 JUMPHERE(skip);
6743 }
6744 else
6745 {
6746 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6747 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6748 JUMPHERE(jump