/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1420 - (show annotations)
Sun Dec 29 11:43:10 2013 UTC (5 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 319308 byte(s)
Improve character range checks in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 4
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 int newline;
367 int bsr_nltype;
368 /* Dollar endonly. */
369 int endonly;
370 /* Tables. */
371 sljit_sw ctypes;
372 /* Named capturing brackets. */
373 pcre_uchar *name_table;
374 sljit_sw name_count;
375 sljit_sw name_entry_size;
376
377 /* Labels and jump lists. */
378 struct sljit_label *partialmatchlabel;
379 struct sljit_label *quit_label;
380 struct sljit_label *forced_quit_label;
381 struct sljit_label *accept_label;
382 stub_list *stubs;
383 recurse_entry *entries;
384 recurse_entry *currententry;
385 jump_list *partialmatch;
386 jump_list *quit;
387 jump_list *positive_assert_quit;
388 jump_list *forced_quit;
389 jump_list *accept;
390 jump_list *calllimit;
391 jump_list *stackalloc;
392 jump_list *revertframes;
393 jump_list *wordboundary;
394 jump_list *anynewline;
395 jump_list *hspace;
396 jump_list *vspace;
397 jump_list *casefulcmp;
398 jump_list *caselesscmp;
399 jump_list *reset_match;
400 BOOL jscript_compat;
401 #ifdef SUPPORT_UTF
402 BOOL utf;
403 #ifdef SUPPORT_UCP
404 BOOL use_ucp;
405 #endif
406 #ifndef COMPILE_PCRE32
407 jump_list *utfreadchar;
408 #endif
409 #ifdef COMPILE_PCRE8
410 jump_list *utfreadchar8;
411 jump_list *utfreadtype8;
412 #endif
413 #endif /* SUPPORT_UTF */
414 #ifdef SUPPORT_UCP
415 jump_list *getucd;
416 #endif
417 } compiler_common;
418
419 /* For byte_sequence_compare. */
420
421 typedef struct compare_context {
422 int length;
423 int sourcereg;
424 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
425 int ucharptr;
426 union {
427 sljit_si asint;
428 sljit_uh asushort;
429 #if defined COMPILE_PCRE8
430 sljit_ub asbyte;
431 sljit_ub asuchars[4];
432 #elif defined COMPILE_PCRE16
433 sljit_uh asuchars[2];
434 #elif defined COMPILE_PCRE32
435 sljit_ui asuchars[1];
436 #endif
437 } c;
438 union {
439 sljit_si asint;
440 sljit_uh asushort;
441 #if defined COMPILE_PCRE8
442 sljit_ub asbyte;
443 sljit_ub asuchars[4];
444 #elif defined COMPILE_PCRE16
445 sljit_uh asuchars[2];
446 #elif defined COMPILE_PCRE32
447 sljit_ui asuchars[1];
448 #endif
449 } oc;
450 #endif
451 } compare_context;
452
453 /* Undefine sljit macros. */
454 #undef CMP
455
456 /* Used for accessing the elements of the stack. */
457 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
458
459 #define TMP1 SLJIT_SCRATCH_REG1
460 #define TMP2 SLJIT_SCRATCH_REG3
461 #define TMP3 SLJIT_TEMPORARY_EREG2
462 #define STR_PTR SLJIT_SAVED_REG1
463 #define STR_END SLJIT_SAVED_REG2
464 #define STACK_TOP SLJIT_SCRATCH_REG2
465 #define STACK_LIMIT SLJIT_SAVED_REG3
466 #define ARGUMENTS SLJIT_SAVED_EREG1
467 #define COUNT_MATCH SLJIT_SAVED_EREG2
468 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
469
470 /* Local space layout. */
471 /* These two locals can be used by the current opcode. */
472 #define LOCALS0 (0 * sizeof(sljit_sw))
473 #define LOCALS1 (1 * sizeof(sljit_sw))
474 /* Two local variables for possessive quantifiers (char1 cannot use them). */
475 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
476 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
477 /* Max limit of recursions. */
478 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
479 /* The output vector is stored on the stack, and contains pointers
480 to characters. The vector data is divided into two groups: the first
481 group contains the start / end character pointers, and the second is
482 the start pointers when the end of the capturing group has not yet reached. */
483 #define OVECTOR_START (common->ovector_start)
484 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
485 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
486 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
487
488 #if defined COMPILE_PCRE8
489 #define MOV_UCHAR SLJIT_MOV_UB
490 #define MOVU_UCHAR SLJIT_MOVU_UB
491 #elif defined COMPILE_PCRE16
492 #define MOV_UCHAR SLJIT_MOV_UH
493 #define MOVU_UCHAR SLJIT_MOVU_UH
494 #elif defined COMPILE_PCRE32
495 #define MOV_UCHAR SLJIT_MOV_UI
496 #define MOVU_UCHAR SLJIT_MOVU_UI
497 #else
498 #error Unsupported compiling mode
499 #endif
500
501 /* Shortcuts. */
502 #define DEFINE_COMPILER \
503 struct sljit_compiler *compiler = common->compiler
504 #define OP1(op, dst, dstw, src, srcw) \
505 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
506 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
507 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
508 #define LABEL() \
509 sljit_emit_label(compiler)
510 #define JUMP(type) \
511 sljit_emit_jump(compiler, (type))
512 #define JUMPTO(type, label) \
513 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
514 #define JUMPHERE(jump) \
515 sljit_set_label((jump), sljit_emit_label(compiler))
516 #define SET_LABEL(jump, label) \
517 sljit_set_label((jump), (label))
518 #define CMP(type, src1, src1w, src2, src2w) \
519 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
520 #define CMPTO(type, src1, src1w, src2, src2w, label) \
521 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
522 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
523 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
524 #define GET_LOCAL_BASE(dst, dstw, offset) \
525 sljit_get_local_base(compiler, (dst), (dstw), (offset))
526
527 static pcre_uchar* bracketend(pcre_uchar* cc)
528 {
529 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
530 do cc += GET(cc, 1); while (*cc == OP_ALT);
531 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
532 cc += 1 + LINK_SIZE;
533 return cc;
534 }
535
536 static int ones_in_half_byte[16] = {
537 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
538 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
539 };
540
541 /* Functions whose might need modification for all new supported opcodes:
542 next_opcode
543 check_opcode_types
544 set_private_data_ptrs
545 get_framesize
546 init_frame
547 get_private_data_copy_length
548 copy_private_data
549 compile_matchingpath
550 compile_backtrackingpath
551 */
552
553 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
554 {
555 SLJIT_UNUSED_ARG(common);
556 switch(*cc)
557 {
558 case OP_SOD:
559 case OP_SOM:
560 case OP_SET_SOM:
561 case OP_NOT_WORD_BOUNDARY:
562 case OP_WORD_BOUNDARY:
563 case OP_NOT_DIGIT:
564 case OP_DIGIT:
565 case OP_NOT_WHITESPACE:
566 case OP_WHITESPACE:
567 case OP_NOT_WORDCHAR:
568 case OP_WORDCHAR:
569 case OP_ANY:
570 case OP_ALLANY:
571 case OP_NOTPROP:
572 case OP_PROP:
573 case OP_ANYNL:
574 case OP_NOT_HSPACE:
575 case OP_HSPACE:
576 case OP_NOT_VSPACE:
577 case OP_VSPACE:
578 case OP_EXTUNI:
579 case OP_EODN:
580 case OP_EOD:
581 case OP_CIRC:
582 case OP_CIRCM:
583 case OP_DOLL:
584 case OP_DOLLM:
585 case OP_CRSTAR:
586 case OP_CRMINSTAR:
587 case OP_CRPLUS:
588 case OP_CRMINPLUS:
589 case OP_CRQUERY:
590 case OP_CRMINQUERY:
591 case OP_CRRANGE:
592 case OP_CRMINRANGE:
593 case OP_CRPOSSTAR:
594 case OP_CRPOSPLUS:
595 case OP_CRPOSQUERY:
596 case OP_CRPOSRANGE:
597 case OP_CLASS:
598 case OP_NCLASS:
599 case OP_REF:
600 case OP_REFI:
601 case OP_DNREF:
602 case OP_DNREFI:
603 case OP_RECURSE:
604 case OP_CALLOUT:
605 case OP_ALT:
606 case OP_KET:
607 case OP_KETRMAX:
608 case OP_KETRMIN:
609 case OP_KETRPOS:
610 case OP_REVERSE:
611 case OP_ASSERT:
612 case OP_ASSERT_NOT:
613 case OP_ASSERTBACK:
614 case OP_ASSERTBACK_NOT:
615 case OP_ONCE:
616 case OP_ONCE_NC:
617 case OP_BRA:
618 case OP_BRAPOS:
619 case OP_CBRA:
620 case OP_CBRAPOS:
621 case OP_COND:
622 case OP_SBRA:
623 case OP_SBRAPOS:
624 case OP_SCBRA:
625 case OP_SCBRAPOS:
626 case OP_SCOND:
627 case OP_CREF:
628 case OP_DNCREF:
629 case OP_RREF:
630 case OP_DNRREF:
631 case OP_DEF:
632 case OP_BRAZERO:
633 case OP_BRAMINZERO:
634 case OP_BRAPOSZERO:
635 case OP_PRUNE:
636 case OP_SKIP:
637 case OP_THEN:
638 case OP_COMMIT:
639 case OP_FAIL:
640 case OP_ACCEPT:
641 case OP_ASSERT_ACCEPT:
642 case OP_CLOSE:
643 case OP_SKIPZERO:
644 return cc + PRIV(OP_lengths)[*cc];
645
646 case OP_CHAR:
647 case OP_CHARI:
648 case OP_NOT:
649 case OP_NOTI:
650 case OP_STAR:
651 case OP_MINSTAR:
652 case OP_PLUS:
653 case OP_MINPLUS:
654 case OP_QUERY:
655 case OP_MINQUERY:
656 case OP_UPTO:
657 case OP_MINUPTO:
658 case OP_EXACT:
659 case OP_POSSTAR:
660 case OP_POSPLUS:
661 case OP_POSQUERY:
662 case OP_POSUPTO:
663 case OP_STARI:
664 case OP_MINSTARI:
665 case OP_PLUSI:
666 case OP_MINPLUSI:
667 case OP_QUERYI:
668 case OP_MINQUERYI:
669 case OP_UPTOI:
670 case OP_MINUPTOI:
671 case OP_EXACTI:
672 case OP_POSSTARI:
673 case OP_POSPLUSI:
674 case OP_POSQUERYI:
675 case OP_POSUPTOI:
676 case OP_NOTSTAR:
677 case OP_NOTMINSTAR:
678 case OP_NOTPLUS:
679 case OP_NOTMINPLUS:
680 case OP_NOTQUERY:
681 case OP_NOTMINQUERY:
682 case OP_NOTUPTO:
683 case OP_NOTMINUPTO:
684 case OP_NOTEXACT:
685 case OP_NOTPOSSTAR:
686 case OP_NOTPOSPLUS:
687 case OP_NOTPOSQUERY:
688 case OP_NOTPOSUPTO:
689 case OP_NOTSTARI:
690 case OP_NOTMINSTARI:
691 case OP_NOTPLUSI:
692 case OP_NOTMINPLUSI:
693 case OP_NOTQUERYI:
694 case OP_NOTMINQUERYI:
695 case OP_NOTUPTOI:
696 case OP_NOTMINUPTOI:
697 case OP_NOTEXACTI:
698 case OP_NOTPOSSTARI:
699 case OP_NOTPOSPLUSI:
700 case OP_NOTPOSQUERYI:
701 case OP_NOTPOSUPTOI:
702 cc += PRIV(OP_lengths)[*cc];
703 #ifdef SUPPORT_UTF
704 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
705 #endif
706 return cc;
707
708 /* Special cases. */
709 case OP_TYPESTAR:
710 case OP_TYPEMINSTAR:
711 case OP_TYPEPLUS:
712 case OP_TYPEMINPLUS:
713 case OP_TYPEQUERY:
714 case OP_TYPEMINQUERY:
715 case OP_TYPEUPTO:
716 case OP_TYPEMINUPTO:
717 case OP_TYPEEXACT:
718 case OP_TYPEPOSSTAR:
719 case OP_TYPEPOSPLUS:
720 case OP_TYPEPOSQUERY:
721 case OP_TYPEPOSUPTO:
722 return cc + PRIV(OP_lengths)[*cc] - 1;
723
724 case OP_ANYBYTE:
725 #ifdef SUPPORT_UTF
726 if (common->utf) return NULL;
727 #endif
728 return cc + 1;
729
730 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
731 case OP_XCLASS:
732 return cc + GET(cc, 1);
733 #endif
734
735 case OP_MARK:
736 case OP_PRUNE_ARG:
737 case OP_SKIP_ARG:
738 case OP_THEN_ARG:
739 return cc + 1 + 2 + cc[1];
740
741 default:
742 /* All opcodes are supported now! */
743 SLJIT_ASSERT_STOP();
744 return NULL;
745 }
746 }
747
748 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
749 {
750 int count;
751 pcre_uchar *slot;
752
753 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
754 while (cc < ccend)
755 {
756 switch(*cc)
757 {
758 case OP_SET_SOM:
759 common->has_set_som = TRUE;
760 cc += 1;
761 break;
762
763 case OP_REF:
764 case OP_REFI:
765 common->optimized_cbracket[GET2(cc, 1)] = 0;
766 cc += 1 + IMM2_SIZE;
767 break;
768
769 case OP_CBRAPOS:
770 case OP_SCBRAPOS:
771 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
772 cc += 1 + LINK_SIZE + IMM2_SIZE;
773 break;
774
775 case OP_COND:
776 case OP_SCOND:
777 /* Only AUTO_CALLOUT can insert this opcode. We do
778 not intend to support this case. */
779 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
780 return FALSE;
781 cc += 1 + LINK_SIZE;
782 break;
783
784 case OP_CREF:
785 common->optimized_cbracket[GET2(cc, 1)] = 0;
786 cc += 1 + IMM2_SIZE;
787 break;
788
789 case OP_DNREF:
790 case OP_DNREFI:
791 case OP_DNCREF:
792 count = GET2(cc, 1 + IMM2_SIZE);
793 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
794 while (count-- > 0)
795 {
796 common->optimized_cbracket[GET2(slot, 0)] = 0;
797 slot += common->name_entry_size;
798 }
799 cc += 1 + 2 * IMM2_SIZE;
800 break;
801
802 case OP_RECURSE:
803 /* Set its value only once. */
804 if (common->recursive_head_ptr == 0)
805 {
806 common->recursive_head_ptr = common->ovector_start;
807 common->ovector_start += sizeof(sljit_sw);
808 }
809 cc += 1 + LINK_SIZE;
810 break;
811
812 case OP_CALLOUT:
813 if (common->capture_last_ptr == 0)
814 {
815 common->capture_last_ptr = common->ovector_start;
816 common->ovector_start += sizeof(sljit_sw);
817 }
818 cc += 2 + 2 * LINK_SIZE;
819 break;
820
821 case OP_THEN_ARG:
822 common->has_then = TRUE;
823 common->control_head_ptr = 1;
824 /* Fall through. */
825
826 case OP_PRUNE_ARG:
827 common->needs_start_ptr = TRUE;
828 /* Fall through. */
829
830 case OP_MARK:
831 if (common->mark_ptr == 0)
832 {
833 common->mark_ptr = common->ovector_start;
834 common->ovector_start += sizeof(sljit_sw);
835 }
836 cc += 1 + 2 + cc[1];
837 break;
838
839 case OP_THEN:
840 common->has_then = TRUE;
841 common->control_head_ptr = 1;
842 /* Fall through. */
843
844 case OP_PRUNE:
845 case OP_SKIP:
846 common->needs_start_ptr = TRUE;
847 cc += 1;
848 break;
849
850 case OP_SKIP_ARG:
851 common->control_head_ptr = 1;
852 common->has_skip_arg = TRUE;
853 cc += 1 + 2 + cc[1];
854 break;
855
856 default:
857 cc = next_opcode(common, cc);
858 if (cc == NULL)
859 return FALSE;
860 break;
861 }
862 }
863 return TRUE;
864 }
865
866 static int get_class_iterator_size(pcre_uchar *cc)
867 {
868 switch(*cc)
869 {
870 case OP_CRSTAR:
871 case OP_CRPLUS:
872 return 2;
873
874 case OP_CRMINSTAR:
875 case OP_CRMINPLUS:
876 case OP_CRQUERY:
877 case OP_CRMINQUERY:
878 return 1;
879
880 case OP_CRRANGE:
881 case OP_CRMINRANGE:
882 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
883 return 0;
884 return 2;
885
886 default:
887 return 0;
888 }
889 }
890
891 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
892 {
893 pcre_uchar *end = bracketend(begin);
894 pcre_uchar *next;
895 pcre_uchar *next_end;
896 pcre_uchar *max_end;
897 pcre_uchar type;
898 sljit_sw length = end - begin;
899 int min, max, i;
900
901 /* Detect fixed iterations first. */
902 if (end[-(1 + LINK_SIZE)] != OP_KET)
903 return FALSE;
904
905 /* Already detected repeat. */
906 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
907 return TRUE;
908
909 next = end;
910 min = 1;
911 while (1)
912 {
913 if (*next != *begin)
914 break;
915 next_end = bracketend(next);
916 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
917 break;
918 next = next_end;
919 min++;
920 }
921
922 if (min == 2)
923 return FALSE;
924
925 max = 0;
926 max_end = next;
927 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
928 {
929 type = *next;
930 while (1)
931 {
932 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
933 break;
934 next_end = bracketend(next + 2 + LINK_SIZE);
935 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
936 break;
937 next = next_end;
938 max++;
939 }
940
941 if (next[0] == type && next[1] == *begin && max >= 1)
942 {
943 next_end = bracketend(next + 1);
944 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
945 {
946 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
947 if (*next_end != OP_KET)
948 break;
949
950 if (i == max)
951 {
952 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
953 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
954 /* +2 the original and the last. */
955 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
956 if (min == 1)
957 return TRUE;
958 min--;
959 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
960 }
961 }
962 }
963 }
964
965 if (min >= 3)
966 {
967 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
968 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
969 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
970 return TRUE;
971 }
972
973 return FALSE;
974 }
975
976 #define CASE_ITERATOR_PRIVATE_DATA_1 \
977 case OP_MINSTAR: \
978 case OP_MINPLUS: \
979 case OP_QUERY: \
980 case OP_MINQUERY: \
981 case OP_MINSTARI: \
982 case OP_MINPLUSI: \
983 case OP_QUERYI: \
984 case OP_MINQUERYI: \
985 case OP_NOTMINSTAR: \
986 case OP_NOTMINPLUS: \
987 case OP_NOTQUERY: \
988 case OP_NOTMINQUERY: \
989 case OP_NOTMINSTARI: \
990 case OP_NOTMINPLUSI: \
991 case OP_NOTQUERYI: \
992 case OP_NOTMINQUERYI:
993
994 #define CASE_ITERATOR_PRIVATE_DATA_2A \
995 case OP_STAR: \
996 case OP_PLUS: \
997 case OP_STARI: \
998 case OP_PLUSI: \
999 case OP_NOTSTAR: \
1000 case OP_NOTPLUS: \
1001 case OP_NOTSTARI: \
1002 case OP_NOTPLUSI:
1003
1004 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1005 case OP_UPTO: \
1006 case OP_MINUPTO: \
1007 case OP_UPTOI: \
1008 case OP_MINUPTOI: \
1009 case OP_NOTUPTO: \
1010 case OP_NOTMINUPTO: \
1011 case OP_NOTUPTOI: \
1012 case OP_NOTMINUPTOI:
1013
1014 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1015 case OP_TYPEMINSTAR: \
1016 case OP_TYPEMINPLUS: \
1017 case OP_TYPEQUERY: \
1018 case OP_TYPEMINQUERY:
1019
1020 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1021 case OP_TYPESTAR: \
1022 case OP_TYPEPLUS:
1023
1024 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1025 case OP_TYPEUPTO: \
1026 case OP_TYPEMINUPTO:
1027
1028 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1029 {
1030 pcre_uchar *cc = common->start;
1031 pcre_uchar *alternative;
1032 pcre_uchar *end = NULL;
1033 int private_data_ptr = *private_data_start;
1034 int space, size, bracketlen;
1035
1036 while (cc < ccend)
1037 {
1038 space = 0;
1039 size = 0;
1040 bracketlen = 0;
1041 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1042 return;
1043
1044 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1045 if (detect_repeat(common, cc))
1046 {
1047 /* These brackets are converted to repeats, so no global
1048 based single character repeat is allowed. */
1049 if (cc >= end)
1050 end = bracketend(cc);
1051 }
1052
1053 switch(*cc)
1054 {
1055 case OP_KET:
1056 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1057 {
1058 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1059 private_data_ptr += sizeof(sljit_sw);
1060 cc += common->private_data_ptrs[cc + 1 - common->start];
1061 }
1062 cc += 1 + LINK_SIZE;
1063 break;
1064
1065 case OP_ASSERT:
1066 case OP_ASSERT_NOT:
1067 case OP_ASSERTBACK:
1068 case OP_ASSERTBACK_NOT:
1069 case OP_ONCE:
1070 case OP_ONCE_NC:
1071 case OP_BRAPOS:
1072 case OP_SBRA:
1073 case OP_SBRAPOS:
1074 case OP_SCOND:
1075 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1076 private_data_ptr += sizeof(sljit_sw);
1077 bracketlen = 1 + LINK_SIZE;
1078 break;
1079
1080 case OP_CBRAPOS:
1081 case OP_SCBRAPOS:
1082 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1083 private_data_ptr += sizeof(sljit_sw);
1084 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1085 break;
1086
1087 case OP_COND:
1088 /* Might be a hidden SCOND. */
1089 alternative = cc + GET(cc, 1);
1090 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1091 {
1092 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1093 private_data_ptr += sizeof(sljit_sw);
1094 }
1095 bracketlen = 1 + LINK_SIZE;
1096 break;
1097
1098 case OP_BRA:
1099 bracketlen = 1 + LINK_SIZE;
1100 break;
1101
1102 case OP_CBRA:
1103 case OP_SCBRA:
1104 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1105 break;
1106
1107 CASE_ITERATOR_PRIVATE_DATA_1
1108 space = 1;
1109 size = -2;
1110 break;
1111
1112 CASE_ITERATOR_PRIVATE_DATA_2A
1113 space = 2;
1114 size = -2;
1115 break;
1116
1117 CASE_ITERATOR_PRIVATE_DATA_2B
1118 space = 2;
1119 size = -(2 + IMM2_SIZE);
1120 break;
1121
1122 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1123 space = 1;
1124 size = 1;
1125 break;
1126
1127 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1128 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1129 space = 2;
1130 size = 1;
1131 break;
1132
1133 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1134 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1135 space = 2;
1136 size = 1 + IMM2_SIZE;
1137 break;
1138
1139 case OP_CLASS:
1140 case OP_NCLASS:
1141 size += 1 + 32 / sizeof(pcre_uchar);
1142 space = get_class_iterator_size(cc + size);
1143 break;
1144
1145 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1146 case OP_XCLASS:
1147 size = GET(cc, 1);
1148 space = get_class_iterator_size(cc + size);
1149 break;
1150 #endif
1151
1152 default:
1153 cc = next_opcode(common, cc);
1154 SLJIT_ASSERT(cc != NULL);
1155 break;
1156 }
1157
1158 /* Character iterators, which are not inside a repeated bracket,
1159 gets a private slot instead of allocating it on the stack. */
1160 if (space > 0 && cc >= end)
1161 {
1162 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1163 private_data_ptr += sizeof(sljit_sw) * space;
1164 }
1165
1166 if (size != 0)
1167 {
1168 if (size < 0)
1169 {
1170 cc += -size;
1171 #ifdef SUPPORT_UTF
1172 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1173 #endif
1174 }
1175 else
1176 cc += size;
1177 }
1178
1179 if (bracketlen > 0)
1180 {
1181 if (cc >= end)
1182 {
1183 end = bracketend(cc);
1184 if (end[-1 - LINK_SIZE] == OP_KET)
1185 end = NULL;
1186 }
1187 cc += bracketlen;
1188 }
1189 }
1190 *private_data_start = private_data_ptr;
1191 }
1192
1193 /* Returns with a frame_types (always < 0) if no need for frame. */
1194 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1195 {
1196 int length = 0;
1197 int possessive = 0;
1198 BOOL stack_restore = FALSE;
1199 BOOL setsom_found = recursive;
1200 BOOL setmark_found = recursive;
1201 /* The last capture is a local variable even for recursions. */
1202 BOOL capture_last_found = FALSE;
1203
1204 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1205 SLJIT_ASSERT(common->control_head_ptr != 0);
1206 *needs_control_head = TRUE;
1207 #else
1208 *needs_control_head = FALSE;
1209 #endif
1210
1211 if (ccend == NULL)
1212 {
1213 ccend = bracketend(cc) - (1 + LINK_SIZE);
1214 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1215 {
1216 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1217 /* This is correct regardless of common->capture_last_ptr. */
1218 capture_last_found = TRUE;
1219 }
1220 cc = next_opcode(common, cc);
1221 }
1222
1223 SLJIT_ASSERT(cc != NULL);
1224 while (cc < ccend)
1225 switch(*cc)
1226 {
1227 case OP_SET_SOM:
1228 SLJIT_ASSERT(common->has_set_som);
1229 stack_restore = TRUE;
1230 if (!setsom_found)
1231 {
1232 length += 2;
1233 setsom_found = TRUE;
1234 }
1235 cc += 1;
1236 break;
1237
1238 case OP_MARK:
1239 case OP_PRUNE_ARG:
1240 case OP_THEN_ARG:
1241 SLJIT_ASSERT(common->mark_ptr != 0);
1242 stack_restore = TRUE;
1243 if (!setmark_found)
1244 {
1245 length += 2;
1246 setmark_found = TRUE;
1247 }
1248 if (common->control_head_ptr != 0)
1249 *needs_control_head = TRUE;
1250 cc += 1 + 2 + cc[1];
1251 break;
1252
1253 case OP_RECURSE:
1254 stack_restore = TRUE;
1255 if (common->has_set_som && !setsom_found)
1256 {
1257 length += 2;
1258 setsom_found = TRUE;
1259 }
1260 if (common->mark_ptr != 0 && !setmark_found)
1261 {
1262 length += 2;
1263 setmark_found = TRUE;
1264 }
1265 if (common->capture_last_ptr != 0 && !capture_last_found)
1266 {
1267 length += 2;
1268 capture_last_found = TRUE;
1269 }
1270 cc += 1 + LINK_SIZE;
1271 break;
1272
1273 case OP_CBRA:
1274 case OP_CBRAPOS:
1275 case OP_SCBRA:
1276 case OP_SCBRAPOS:
1277 stack_restore = TRUE;
1278 if (common->capture_last_ptr != 0 && !capture_last_found)
1279 {
1280 length += 2;
1281 capture_last_found = TRUE;
1282 }
1283 length += 3;
1284 cc += 1 + LINK_SIZE + IMM2_SIZE;
1285 break;
1286
1287 default:
1288 stack_restore = TRUE;
1289 /* Fall through. */
1290
1291 case OP_NOT_WORD_BOUNDARY:
1292 case OP_WORD_BOUNDARY:
1293 case OP_NOT_DIGIT:
1294 case OP_DIGIT:
1295 case OP_NOT_WHITESPACE:
1296 case OP_WHITESPACE:
1297 case OP_NOT_WORDCHAR:
1298 case OP_WORDCHAR:
1299 case OP_ANY:
1300 case OP_ALLANY:
1301 case OP_ANYBYTE:
1302 case OP_NOTPROP:
1303 case OP_PROP:
1304 case OP_ANYNL:
1305 case OP_NOT_HSPACE:
1306 case OP_HSPACE:
1307 case OP_NOT_VSPACE:
1308 case OP_VSPACE:
1309 case OP_EXTUNI:
1310 case OP_EODN:
1311 case OP_EOD:
1312 case OP_CIRC:
1313 case OP_CIRCM:
1314 case OP_DOLL:
1315 case OP_DOLLM:
1316 case OP_CHAR:
1317 case OP_CHARI:
1318 case OP_NOT:
1319 case OP_NOTI:
1320
1321 case OP_EXACT:
1322 case OP_POSSTAR:
1323 case OP_POSPLUS:
1324 case OP_POSQUERY:
1325 case OP_POSUPTO:
1326
1327 case OP_EXACTI:
1328 case OP_POSSTARI:
1329 case OP_POSPLUSI:
1330 case OP_POSQUERYI:
1331 case OP_POSUPTOI:
1332
1333 case OP_NOTEXACT:
1334 case OP_NOTPOSSTAR:
1335 case OP_NOTPOSPLUS:
1336 case OP_NOTPOSQUERY:
1337 case OP_NOTPOSUPTO:
1338
1339 case OP_NOTEXACTI:
1340 case OP_NOTPOSSTARI:
1341 case OP_NOTPOSPLUSI:
1342 case OP_NOTPOSQUERYI:
1343 case OP_NOTPOSUPTOI:
1344
1345 case OP_TYPEEXACT:
1346 case OP_TYPEPOSSTAR:
1347 case OP_TYPEPOSPLUS:
1348 case OP_TYPEPOSQUERY:
1349 case OP_TYPEPOSUPTO:
1350
1351 case OP_CLASS:
1352 case OP_NCLASS:
1353 case OP_XCLASS:
1354
1355 cc = next_opcode(common, cc);
1356 SLJIT_ASSERT(cc != NULL);
1357 break;
1358 }
1359
1360 /* Possessive quantifiers can use a special case. */
1361 if (SLJIT_UNLIKELY(possessive == length))
1362 return stack_restore ? no_frame : no_stack;
1363
1364 if (length > 0)
1365 return length + 1;
1366 return stack_restore ? no_frame : no_stack;
1367 }
1368
1369 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1370 {
1371 DEFINE_COMPILER;
1372 BOOL setsom_found = recursive;
1373 BOOL setmark_found = recursive;
1374 /* The last capture is a local variable even for recursions. */
1375 BOOL capture_last_found = FALSE;
1376 int offset;
1377
1378 /* >= 1 + shortest item size (2) */
1379 SLJIT_UNUSED_ARG(stacktop);
1380 SLJIT_ASSERT(stackpos >= stacktop + 2);
1381
1382 stackpos = STACK(stackpos);
1383 if (ccend == NULL)
1384 {
1385 ccend = bracketend(cc) - (1 + LINK_SIZE);
1386 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1387 cc = next_opcode(common, cc);
1388 }
1389
1390 SLJIT_ASSERT(cc != NULL);
1391 while (cc < ccend)
1392 switch(*cc)
1393 {
1394 case OP_SET_SOM:
1395 SLJIT_ASSERT(common->has_set_som);
1396 if (!setsom_found)
1397 {
1398 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1400 stackpos += (int)sizeof(sljit_sw);
1401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1402 stackpos += (int)sizeof(sljit_sw);
1403 setsom_found = TRUE;
1404 }
1405 cc += 1;
1406 break;
1407
1408 case OP_MARK:
1409 case OP_PRUNE_ARG:
1410 case OP_THEN_ARG:
1411 SLJIT_ASSERT(common->mark_ptr != 0);
1412 if (!setmark_found)
1413 {
1414 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1416 stackpos += (int)sizeof(sljit_sw);
1417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1418 stackpos += (int)sizeof(sljit_sw);
1419 setmark_found = TRUE;
1420 }
1421 cc += 1 + 2 + cc[1];
1422 break;
1423
1424 case OP_RECURSE:
1425 if (common->has_set_som && !setsom_found)
1426 {
1427 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1429 stackpos += (int)sizeof(sljit_sw);
1430 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1431 stackpos += (int)sizeof(sljit_sw);
1432 setsom_found = TRUE;
1433 }
1434 if (common->mark_ptr != 0 && !setmark_found)
1435 {
1436 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1437 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1438 stackpos += (int)sizeof(sljit_sw);
1439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1440 stackpos += (int)sizeof(sljit_sw);
1441 setmark_found = TRUE;
1442 }
1443 if (common->capture_last_ptr != 0 && !capture_last_found)
1444 {
1445 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1447 stackpos += (int)sizeof(sljit_sw);
1448 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1449 stackpos += (int)sizeof(sljit_sw);
1450 capture_last_found = TRUE;
1451 }
1452 cc += 1 + LINK_SIZE;
1453 break;
1454
1455 case OP_CBRA:
1456 case OP_CBRAPOS:
1457 case OP_SCBRA:
1458 case OP_SCBRAPOS:
1459 if (common->capture_last_ptr != 0 && !capture_last_found)
1460 {
1461 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1463 stackpos += (int)sizeof(sljit_sw);
1464 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1465 stackpos += (int)sizeof(sljit_sw);
1466 capture_last_found = TRUE;
1467 }
1468 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1469 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1470 stackpos += (int)sizeof(sljit_sw);
1471 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1472 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1474 stackpos += (int)sizeof(sljit_sw);
1475 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1476 stackpos += (int)sizeof(sljit_sw);
1477
1478 cc += 1 + LINK_SIZE + IMM2_SIZE;
1479 break;
1480
1481 default:
1482 cc = next_opcode(common, cc);
1483 SLJIT_ASSERT(cc != NULL);
1484 break;
1485 }
1486
1487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1488 SLJIT_ASSERT(stackpos == STACK(stacktop));
1489 }
1490
1491 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1492 {
1493 int private_data_length = needs_control_head ? 3 : 2;
1494 int size;
1495 pcre_uchar *alternative;
1496 /* Calculate the sum of the private machine words. */
1497 while (cc < ccend)
1498 {
1499 size = 0;
1500 switch(*cc)
1501 {
1502 case OP_KET:
1503 if (PRIVATE_DATA(cc) != 0)
1504 private_data_length++;
1505 cc += 1 + LINK_SIZE;
1506 break;
1507
1508 case OP_ASSERT:
1509 case OP_ASSERT_NOT:
1510 case OP_ASSERTBACK:
1511 case OP_ASSERTBACK_NOT:
1512 case OP_ONCE:
1513 case OP_ONCE_NC:
1514 case OP_BRAPOS:
1515 case OP_SBRA:
1516 case OP_SBRAPOS:
1517 case OP_SCOND:
1518 private_data_length++;
1519 cc += 1 + LINK_SIZE;
1520 break;
1521
1522 case OP_CBRA:
1523 case OP_SCBRA:
1524 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1525 private_data_length++;
1526 cc += 1 + LINK_SIZE + IMM2_SIZE;
1527 break;
1528
1529 case OP_CBRAPOS:
1530 case OP_SCBRAPOS:
1531 private_data_length += 2;
1532 cc += 1 + LINK_SIZE + IMM2_SIZE;
1533 break;
1534
1535 case OP_COND:
1536 /* Might be a hidden SCOND. */
1537 alternative = cc + GET(cc, 1);
1538 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1539 private_data_length++;
1540 cc += 1 + LINK_SIZE;
1541 break;
1542
1543 CASE_ITERATOR_PRIVATE_DATA_1
1544 if (PRIVATE_DATA(cc))
1545 private_data_length++;
1546 cc += 2;
1547 #ifdef SUPPORT_UTF
1548 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1549 #endif
1550 break;
1551
1552 CASE_ITERATOR_PRIVATE_DATA_2A
1553 if (PRIVATE_DATA(cc))
1554 private_data_length += 2;
1555 cc += 2;
1556 #ifdef SUPPORT_UTF
1557 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1558 #endif
1559 break;
1560
1561 CASE_ITERATOR_PRIVATE_DATA_2B
1562 if (PRIVATE_DATA(cc))
1563 private_data_length += 2;
1564 cc += 2 + IMM2_SIZE;
1565 #ifdef SUPPORT_UTF
1566 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1567 #endif
1568 break;
1569
1570 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1571 if (PRIVATE_DATA(cc))
1572 private_data_length++;
1573 cc += 1;
1574 break;
1575
1576 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1577 if (PRIVATE_DATA(cc))
1578 private_data_length += 2;
1579 cc += 1;
1580 break;
1581
1582 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1583 if (PRIVATE_DATA(cc))
1584 private_data_length += 2;
1585 cc += 1 + IMM2_SIZE;
1586 break;
1587
1588 case OP_CLASS:
1589 case OP_NCLASS:
1590 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1591 case OP_XCLASS:
1592 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1593 #else
1594 size = 1 + 32 / (int)sizeof(pcre_uchar);
1595 #endif
1596 if (PRIVATE_DATA(cc))
1597 private_data_length += get_class_iterator_size(cc + size);
1598 cc += size;
1599 break;
1600
1601 default:
1602 cc = next_opcode(common, cc);
1603 SLJIT_ASSERT(cc != NULL);
1604 break;
1605 }
1606 }
1607 SLJIT_ASSERT(cc == ccend);
1608 return private_data_length;
1609 }
1610
1611 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1612 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1613 {
1614 DEFINE_COMPILER;
1615 int srcw[2];
1616 int count, size;
1617 BOOL tmp1next = TRUE;
1618 BOOL tmp1empty = TRUE;
1619 BOOL tmp2empty = TRUE;
1620 pcre_uchar *alternative;
1621 enum {
1622 start,
1623 loop,
1624 end
1625 } status;
1626
1627 status = save ? start : loop;
1628 stackptr = STACK(stackptr - 2);
1629 stacktop = STACK(stacktop - 1);
1630
1631 if (!save)
1632 {
1633 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1634 if (stackptr < stacktop)
1635 {
1636 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1637 stackptr += sizeof(sljit_sw);
1638 tmp1empty = FALSE;
1639 }
1640 if (stackptr < stacktop)
1641 {
1642 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1643 stackptr += sizeof(sljit_sw);
1644 tmp2empty = FALSE;
1645 }
1646 /* The tmp1next must be TRUE in either way. */
1647 }
1648
1649 do
1650 {
1651 count = 0;
1652 switch(status)
1653 {
1654 case start:
1655 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1656 count = 1;
1657 srcw[0] = common->recursive_head_ptr;
1658 if (needs_control_head)
1659 {
1660 SLJIT_ASSERT(common->control_head_ptr != 0);
1661 count = 2;
1662 srcw[1] = common->control_head_ptr;
1663 }
1664 status = loop;
1665 break;
1666
1667 case loop:
1668 if (cc >= ccend)
1669 {
1670 status = end;
1671 break;
1672 }
1673
1674 switch(*cc)
1675 {
1676 case OP_KET:
1677 if (PRIVATE_DATA(cc) != 0)
1678 {
1679 count = 1;
1680 srcw[0] = PRIVATE_DATA(cc);
1681 }
1682 cc += 1 + LINK_SIZE;
1683 break;
1684
1685 case OP_ASSERT:
1686 case OP_ASSERT_NOT:
1687 case OP_ASSERTBACK:
1688 case OP_ASSERTBACK_NOT:
1689 case OP_ONCE:
1690 case OP_ONCE_NC:
1691 case OP_BRAPOS:
1692 case OP_SBRA:
1693 case OP_SBRAPOS:
1694 case OP_SCOND:
1695 count = 1;
1696 srcw[0] = PRIVATE_DATA(cc);
1697 SLJIT_ASSERT(srcw[0] != 0);
1698 cc += 1 + LINK_SIZE;
1699 break;
1700
1701 case OP_CBRA:
1702 case OP_SCBRA:
1703 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1704 {
1705 count = 1;
1706 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1707 }
1708 cc += 1 + LINK_SIZE + IMM2_SIZE;
1709 break;
1710
1711 case OP_CBRAPOS:
1712 case OP_SCBRAPOS:
1713 count = 2;
1714 srcw[0] = PRIVATE_DATA(cc);
1715 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1716 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1717 cc += 1 + LINK_SIZE + IMM2_SIZE;
1718 break;
1719
1720 case OP_COND:
1721 /* Might be a hidden SCOND. */
1722 alternative = cc + GET(cc, 1);
1723 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1724 {
1725 count = 1;
1726 srcw[0] = PRIVATE_DATA(cc);
1727 SLJIT_ASSERT(srcw[0] != 0);
1728 }
1729 cc += 1 + LINK_SIZE;
1730 break;
1731
1732 CASE_ITERATOR_PRIVATE_DATA_1
1733 if (PRIVATE_DATA(cc))
1734 {
1735 count = 1;
1736 srcw[0] = PRIVATE_DATA(cc);
1737 }
1738 cc += 2;
1739 #ifdef SUPPORT_UTF
1740 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1741 #endif
1742 break;
1743
1744 CASE_ITERATOR_PRIVATE_DATA_2A
1745 if (PRIVATE_DATA(cc))
1746 {
1747 count = 2;
1748 srcw[0] = PRIVATE_DATA(cc);
1749 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1750 }
1751 cc += 2;
1752 #ifdef SUPPORT_UTF
1753 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1754 #endif
1755 break;
1756
1757 CASE_ITERATOR_PRIVATE_DATA_2B
1758 if (PRIVATE_DATA(cc))
1759 {
1760 count = 2;
1761 srcw[0] = PRIVATE_DATA(cc);
1762 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1763 }
1764 cc += 2 + IMM2_SIZE;
1765 #ifdef SUPPORT_UTF
1766 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1767 #endif
1768 break;
1769
1770 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1771 if (PRIVATE_DATA(cc))
1772 {
1773 count = 1;
1774 srcw[0] = PRIVATE_DATA(cc);
1775 }
1776 cc += 1;
1777 break;
1778
1779 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1780 if (PRIVATE_DATA(cc))
1781 {
1782 count = 2;
1783 srcw[0] = PRIVATE_DATA(cc);
1784 srcw[1] = srcw[0] + sizeof(sljit_sw);
1785 }
1786 cc += 1;
1787 break;
1788
1789 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1790 if (PRIVATE_DATA(cc))
1791 {
1792 count = 2;
1793 srcw[0] = PRIVATE_DATA(cc);
1794 srcw[1] = srcw[0] + sizeof(sljit_sw);
1795 }
1796 cc += 1 + IMM2_SIZE;
1797 break;
1798
1799 case OP_CLASS:
1800 case OP_NCLASS:
1801 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1802 case OP_XCLASS:
1803 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1804 #else
1805 size = 1 + 32 / (int)sizeof(pcre_uchar);
1806 #endif
1807 if (PRIVATE_DATA(cc))
1808 switch(get_class_iterator_size(cc + size))
1809 {
1810 case 1:
1811 count = 1;
1812 srcw[0] = PRIVATE_DATA(cc);
1813 break;
1814
1815 case 2:
1816 count = 2;
1817 srcw[0] = PRIVATE_DATA(cc);
1818 srcw[1] = srcw[0] + sizeof(sljit_sw);
1819 break;
1820
1821 default:
1822 SLJIT_ASSERT_STOP();
1823 break;
1824 }
1825 cc += size;
1826 break;
1827
1828 default:
1829 cc = next_opcode(common, cc);
1830 SLJIT_ASSERT(cc != NULL);
1831 break;
1832 }
1833 break;
1834
1835 case end:
1836 SLJIT_ASSERT_STOP();
1837 break;
1838 }
1839
1840 while (count > 0)
1841 {
1842 count--;
1843 if (save)
1844 {
1845 if (tmp1next)
1846 {
1847 if (!tmp1empty)
1848 {
1849 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1850 stackptr += sizeof(sljit_sw);
1851 }
1852 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1853 tmp1empty = FALSE;
1854 tmp1next = FALSE;
1855 }
1856 else
1857 {
1858 if (!tmp2empty)
1859 {
1860 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1861 stackptr += sizeof(sljit_sw);
1862 }
1863 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1864 tmp2empty = FALSE;
1865 tmp1next = TRUE;
1866 }
1867 }
1868 else
1869 {
1870 if (tmp1next)
1871 {
1872 SLJIT_ASSERT(!tmp1empty);
1873 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1874 tmp1empty = stackptr >= stacktop;
1875 if (!tmp1empty)
1876 {
1877 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1878 stackptr += sizeof(sljit_sw);
1879 }
1880 tmp1next = FALSE;
1881 }
1882 else
1883 {
1884 SLJIT_ASSERT(!tmp2empty);
1885 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1886 tmp2empty = stackptr >= stacktop;
1887 if (!tmp2empty)
1888 {
1889 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1890 stackptr += sizeof(sljit_sw);
1891 }
1892 tmp1next = TRUE;
1893 }
1894 }
1895 }
1896 }
1897 while (status != end);
1898
1899 if (save)
1900 {
1901 if (tmp1next)
1902 {
1903 if (!tmp1empty)
1904 {
1905 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1906 stackptr += sizeof(sljit_sw);
1907 }
1908 if (!tmp2empty)
1909 {
1910 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1911 stackptr += sizeof(sljit_sw);
1912 }
1913 }
1914 else
1915 {
1916 if (!tmp2empty)
1917 {
1918 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1919 stackptr += sizeof(sljit_sw);
1920 }
1921 if (!tmp1empty)
1922 {
1923 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1924 stackptr += sizeof(sljit_sw);
1925 }
1926 }
1927 }
1928 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1929 }
1930
1931 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1932 {
1933 pcre_uchar *end = bracketend(cc);
1934 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1935
1936 /* Assert captures then. */
1937 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1938 current_offset = NULL;
1939 /* Conditional block does not. */
1940 if (*cc == OP_COND || *cc == OP_SCOND)
1941 has_alternatives = FALSE;
1942
1943 cc = next_opcode(common, cc);
1944 if (has_alternatives)
1945 current_offset = common->then_offsets + (cc - common->start);
1946
1947 while (cc < end)
1948 {
1949 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1950 cc = set_then_offsets(common, cc, current_offset);
1951 else
1952 {
1953 if (*cc == OP_ALT && has_alternatives)
1954 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1955 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1956 *current_offset = 1;
1957 cc = next_opcode(common, cc);
1958 }
1959 }
1960
1961 return end;
1962 }
1963
1964 #undef CASE_ITERATOR_PRIVATE_DATA_1
1965 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1966 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1967 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1968 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1969 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1970
1971 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1972 {
1973 return (value & (value - 1)) == 0;
1974 }
1975
1976 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1977 {
1978 while (list)
1979 {
1980 /* sljit_set_label is clever enough to do nothing
1981 if either the jump or the label is NULL. */
1982 SET_LABEL(list->jump, label);
1983 list = list->next;
1984 }
1985 }
1986
1987 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1988 {
1989 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1990 if (list_item)
1991 {
1992 list_item->next = *list;
1993 list_item->jump = jump;
1994 *list = list_item;
1995 }
1996 }
1997
1998 static void add_stub(compiler_common *common, struct sljit_jump *start)
1999 {
2000 DEFINE_COMPILER;
2001 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2002
2003 if (list_item)
2004 {
2005 list_item->start = start;
2006 list_item->quit = LABEL();
2007 list_item->next = common->stubs;
2008 common->stubs = list_item;
2009 }
2010 }
2011
2012 static void flush_stubs(compiler_common *common)
2013 {
2014 DEFINE_COMPILER;
2015 stub_list* list_item = common->stubs;
2016
2017 while (list_item)
2018 {
2019 JUMPHERE(list_item->start);
2020 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2021 JUMPTO(SLJIT_JUMP, list_item->quit);
2022 list_item = list_item->next;
2023 }
2024 common->stubs = NULL;
2025 }
2026
2027 static SLJIT_INLINE void count_match(compiler_common *common)
2028 {
2029 DEFINE_COMPILER;
2030
2031 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2032 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2033 }
2034
2035 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2036 {
2037 /* May destroy all locals and registers except TMP2. */
2038 DEFINE_COMPILER;
2039
2040 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2041 #ifdef DESTROY_REGISTERS
2042 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2043 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2044 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2047 #endif
2048 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2049 }
2050
2051 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2052 {
2053 DEFINE_COMPILER;
2054 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2055 }
2056
2057 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2058 {
2059 DEFINE_COMPILER;
2060 struct sljit_label *loop;
2061 int i;
2062
2063 /* At this point we can freely use all temporary registers. */
2064 SLJIT_ASSERT(length > 1);
2065 /* TMP1 returns with begin - 1. */
2066 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2067 if (length < 8)
2068 {
2069 for (i = 1; i < length; i++)
2070 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2071 }
2072 else
2073 {
2074 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2075 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2076 loop = LABEL();
2077 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2078 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2079 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2080 }
2081 }
2082
2083 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2084 {
2085 DEFINE_COMPILER;
2086 struct sljit_label *loop;
2087 int i;
2088
2089 SLJIT_ASSERT(length > 1);
2090 /* OVECTOR(1) contains the "string begin - 1" constant. */
2091 if (length > 2)
2092 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2093 if (length < 8)
2094 {
2095 for (i = 2; i < length; i++)
2096 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2097 }
2098 else
2099 {
2100 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2101 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2102 loop = LABEL();
2103 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2104 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2105 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2106 }
2107
2108 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2109 if (common->mark_ptr != 0)
2110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2111 if (common->control_head_ptr != 0)
2112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2113 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2114 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2115 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2116 }
2117
2118 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2119 {
2120 while (current != NULL)
2121 {
2122 switch (current[-2])
2123 {
2124 case type_then_trap:
2125 break;
2126
2127 case type_mark:
2128 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2129 return current[-4];
2130 break;
2131
2132 default:
2133 SLJIT_ASSERT_STOP();
2134 break;
2135 }
2136 current = (sljit_sw*)current[-1];
2137 }
2138 return -1;
2139 }
2140
2141 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2142 {
2143 DEFINE_COMPILER;
2144 struct sljit_label *loop;
2145 struct sljit_jump *early_quit;
2146
2147 /* At this point we can freely use all registers. */
2148 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2149 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2150
2151 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2152 if (common->mark_ptr != 0)
2153 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2154 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2155 if (common->mark_ptr != 0)
2156 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2157 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2158 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2159 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2160 /* Unlikely, but possible */
2161 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2162 loop = LABEL();
2163 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2164 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2165 /* Copy the integer value to the output buffer */
2166 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2167 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2168 #endif
2169 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2170 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2171 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2172 JUMPHERE(early_quit);
2173
2174 /* Calculate the return value, which is the maximum ovector value. */
2175 if (topbracket > 1)
2176 {
2177 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2178 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2179
2180 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2181 loop = LABEL();
2182 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2183 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2184 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2185 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2186 }
2187 else
2188 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2189 }
2190
2191 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2192 {
2193 DEFINE_COMPILER;
2194 struct sljit_jump *jump;
2195
2196 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2197 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2198 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2199
2200 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2201 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2202 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2203 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2204
2205 /* Store match begin and end. */
2206 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2207 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2208
2209 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2210 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2211 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2212 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2213 #endif
2214 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2215 JUMPHERE(jump);
2216
2217 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2218 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2219 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2220 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2221 #endif
2222 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2223
2224 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2225 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2226 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2227 #endif
2228 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2229
2230 JUMPTO(SLJIT_JUMP, quit);
2231 }
2232
2233 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2234 {
2235 /* May destroy TMP1. */
2236 DEFINE_COMPILER;
2237 struct sljit_jump *jump;
2238
2239 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2240 {
2241 /* The value of -1 must be kept for start_used_ptr! */
2242 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2243 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2244 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2245 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2247 JUMPHERE(jump);
2248 }
2249 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2250 {
2251 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2252 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2253 JUMPHERE(jump);
2254 }
2255 }
2256
2257 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2258 {
2259 /* Detects if the character has an othercase. */
2260 unsigned int c;
2261
2262 #ifdef SUPPORT_UTF
2263 if (common->utf)
2264 {
2265 GETCHAR(c, cc);
2266 if (c > 127)
2267 {
2268 #ifdef SUPPORT_UCP
2269 return c != UCD_OTHERCASE(c);
2270 #else
2271 return FALSE;
2272 #endif
2273 }
2274 #ifndef COMPILE_PCRE8
2275 return common->fcc[c] != c;
2276 #endif
2277 }
2278 else
2279 #endif
2280 c = *cc;
2281 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2282 }
2283
2284 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2285 {
2286 /* Returns with the othercase. */
2287 #ifdef SUPPORT_UTF
2288 if (common->utf && c > 127)
2289 {
2290 #ifdef SUPPORT_UCP
2291 return UCD_OTHERCASE(c);
2292 #else
2293 return c;
2294 #endif
2295 }
2296 #endif
2297 return TABLE_GET(c, common->fcc, c);
2298 }
2299
2300 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2301 {
2302 /* Detects if the character and its othercase has only 1 bit difference. */
2303 unsigned int c, oc, bit;
2304 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2305 int n;
2306 #endif
2307
2308 #ifdef SUPPORT_UTF
2309 if (common->utf)
2310 {
2311 GETCHAR(c, cc);
2312 if (c <= 127)
2313 oc = common->fcc[c];
2314 else
2315 {
2316 #ifdef SUPPORT_UCP
2317 oc = UCD_OTHERCASE(c);
2318 #else
2319 oc = c;
2320 #endif
2321 }
2322 }
2323 else
2324 {
2325 c = *cc;
2326 oc = TABLE_GET(c, common->fcc, c);
2327 }
2328 #else
2329 c = *cc;
2330 oc = TABLE_GET(c, common->fcc, c);
2331 #endif
2332
2333 SLJIT_ASSERT(c != oc);
2334
2335 bit = c ^ oc;
2336 /* Optimized for English alphabet. */
2337 if (c <= 127 && bit == 0x20)
2338 return (0 << 8) | 0x20;
2339
2340 /* Since c != oc, they must have at least 1 bit difference. */
2341 if (!is_powerof2(bit))
2342 return 0;
2343
2344 #if defined COMPILE_PCRE8
2345
2346 #ifdef SUPPORT_UTF
2347 if (common->utf && c > 127)
2348 {
2349 n = GET_EXTRALEN(*cc);
2350 while ((bit & 0x3f) == 0)
2351 {
2352 n--;
2353 bit >>= 6;
2354 }
2355 return (n << 8) | bit;
2356 }
2357 #endif /* SUPPORT_UTF */
2358 return (0 << 8) | bit;
2359
2360 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2361
2362 #ifdef SUPPORT_UTF
2363 if (common->utf && c > 65535)
2364 {
2365 if (bit >= (1 << 10))
2366 bit >>= 10;
2367 else
2368 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2369 }
2370 #endif /* SUPPORT_UTF */
2371 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2372
2373 #endif /* COMPILE_PCRE[8|16|32] */
2374 }
2375
2376 static void check_partial(compiler_common *common, BOOL force)
2377 {
2378 /* Checks whether a partial matching is occurred. Does not modify registers. */
2379 DEFINE_COMPILER;
2380 struct sljit_jump *jump = NULL;
2381
2382 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2383
2384 if (common->mode == JIT_COMPILE)
2385 return;
2386
2387 if (!force)
2388 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2389 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2390 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2391
2392 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2394 else
2395 {
2396 if (common->partialmatchlabel != NULL)
2397 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2398 else
2399 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2400 }
2401
2402 if (jump != NULL)
2403 JUMPHERE(jump);
2404 }
2405
2406 static void check_str_end(compiler_common *common, jump_list **end_reached)
2407 {
2408 /* Does not affect registers. Usually used in a tight spot. */
2409 DEFINE_COMPILER;
2410 struct sljit_jump *jump;
2411
2412 if (common->mode == JIT_COMPILE)
2413 {
2414 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2415 return;
2416 }
2417
2418 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2419 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2420 {
2421 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2422 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2423 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2424 }
2425 else
2426 {
2427 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2428 if (common->partialmatchlabel != NULL)
2429 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2430 else
2431 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2432 }
2433 JUMPHERE(jump);
2434 }
2435
2436 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2437 {
2438 DEFINE_COMPILER;
2439 struct sljit_jump *jump;
2440
2441 if (common->mode == JIT_COMPILE)
2442 {
2443 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2444 return;
2445 }
2446
2447 /* Partial matching mode. */
2448 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2449 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2450 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2451 {
2452 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2453 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2454 }
2455 else
2456 {
2457 if (common->partialmatchlabel != NULL)
2458 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2459 else
2460 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2461 }
2462 JUMPHERE(jump);
2463 }
2464
2465 static void read_char(compiler_common *common)
2466 {
2467 /* Reads the character into TMP1, updates STR_PTR.
2468 Does not check STR_END. TMP2 Destroyed. */
2469 DEFINE_COMPILER;
2470 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2471 struct sljit_jump *jump;
2472 #endif
2473
2474 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2475 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2476 if (common->utf)
2477 {
2478 #if defined COMPILE_PCRE8
2479 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2480 #elif defined COMPILE_PCRE16
2481 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2482 #endif /* COMPILE_PCRE[8|16] */
2483 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2484 JUMPHERE(jump);
2485 }
2486 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2487 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2488 }
2489
2490 static void peek_char(compiler_common *common)
2491 {
2492 /* Reads the character into TMP1, keeps STR_PTR.
2493 Does not check STR_END. TMP2 Destroyed. */
2494 DEFINE_COMPILER;
2495 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2496 struct sljit_jump *jump;
2497 #endif
2498
2499 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2500 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2501 if (common->utf)
2502 {
2503 #if defined COMPILE_PCRE8
2504 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2505 #elif defined COMPILE_PCRE16
2506 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2507 #endif /* COMPILE_PCRE[8|16] */
2508 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2509 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2510 JUMPHERE(jump);
2511 }
2512 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2513 }
2514
2515 static void read_char8(compiler_common *common)
2516 {
2517 /* Reads the precise value of a character into TMP1, if the character is
2518 less than 256. Otherwise it returns with a value greater or equal than 256. */
2519 DEFINE_COMPILER;
2520 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2521 struct sljit_jump *jump;
2522 #endif
2523
2524 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2526
2527 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2528 if (common->utf)
2529 {
2530 #if defined COMPILE_PCRE8
2531 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2532 add_jump(compiler, &common->utfreadchar8, JUMP(SLJIT_FAST_CALL));
2533 JUMPHERE(jump);
2534 #elif defined COMPILE_PCRE16
2535 /* Skip low surrogate if necessary. */
2536 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2537 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2538 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2539 JUMPHERE(jump);
2540 #endif /* COMPILE_PCRE[8|16] */
2541 }
2542 #endif
2543 }
2544
2545 static void read_char8_type(compiler_common *common)
2546 {
2547 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2548 DEFINE_COMPILER;
2549 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2550 struct sljit_jump *jump;
2551 #endif
2552
2553 #ifdef SUPPORT_UTF
2554 if (common->utf)
2555 {
2556 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2557 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2558 #if defined COMPILE_PCRE8
2559 /* This can be an extra read in some situations, but hopefully
2560 it is needed in most cases. */
2561 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2562 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2563 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2564 JUMPHERE(jump);
2565 #elif defined COMPILE_PCRE16
2566 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2567 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2568 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2569 JUMPHERE(jump);
2570 /* Skip low surrogate if necessary. */
2571 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2572 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2573 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2574 JUMPHERE(jump);
2575 #elif defined COMPILE_PCRE32
2576 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2577 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2578 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2579 JUMPHERE(jump);
2580 #endif /* COMPILE_PCRE[8|16|32] */
2581 return;
2582 }
2583 #endif /* SUPPORT_UTF */
2584 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2585 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2586 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2587 /* The ctypes array contains only 256 values. */
2588 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2589 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2590 #endif
2591 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2592 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2593 JUMPHERE(jump);
2594 #endif
2595 }
2596
2597 static void skip_char_back(compiler_common *common)
2598 {
2599 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2600 DEFINE_COMPILER;
2601 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2602 #if defined COMPILE_PCRE8
2603 struct sljit_label *label;
2604
2605 if (common->utf)
2606 {
2607 label = LABEL();
2608 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2609 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2610 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2611 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2612 return;
2613 }
2614 #elif defined COMPILE_PCRE16
2615 if (common->utf)
2616 {
2617 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2618 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2619 /* Skip low surrogate if necessary. */
2620 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2621 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2622 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2623 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2624 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2625 return;
2626 }
2627 #endif /* COMPILE_PCRE[8|16] */
2628 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2629 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2630 }
2631
2632 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2633 {
2634 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2635 DEFINE_COMPILER;
2636
2637 if (nltype == NLTYPE_ANY)
2638 {
2639 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2640 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2641 }
2642 else if (nltype == NLTYPE_ANYCRLF)
2643 {
2644 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2645 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2646 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2647 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2648 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2649 }
2650 else
2651 {
2652 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2653 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2654 }
2655 }
2656
2657 #ifdef SUPPORT_UTF
2658
2659 #if defined COMPILE_PCRE8
2660 static void do_utfreadchar(compiler_common *common)
2661 {
2662 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2663 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2664 DEFINE_COMPILER;
2665 struct sljit_jump *jump;
2666
2667 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2668 /* Searching for the first zero. */
2669 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2670 jump = JUMP(SLJIT_C_NOT_ZERO);
2671 /* Two byte sequence. */
2672 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2673 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2674 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2675 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2676 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2677 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2678 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2679 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2680 JUMPHERE(jump);
2681
2682 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2683 jump = JUMP(SLJIT_C_NOT_ZERO);
2684 /* Three byte sequence. */
2685 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2686 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2687 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2688 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2689 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2690 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2691 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2693 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2694 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2695 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2696 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2697 JUMPHERE(jump);
2698
2699 /* Four byte sequence. */
2700 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2701 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2702 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2703 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2704 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2705 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2706 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2707 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2708 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2709 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2710 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2711 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2712 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2713 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2714 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2715 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2716 }
2717
2718 static void do_utfreadchar8(compiler_common *common)
2719 {
2720 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2721 of the character (>= 0xc0). Return value in TMP1. */
2722 DEFINE_COMPILER;
2723 struct sljit_jump *jump;
2724
2725 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2726
2727 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2728 jump = JUMP(SLJIT_C_NOT_ZERO);
2729 /* Two byte sequence. */
2730 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2731 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2732 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2733 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2734 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2735 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2736 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2737
2738 JUMPHERE(jump);
2739 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2740 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x800);
2741 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2742 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2743 }
2744
2745 static void do_utfreadtype8(compiler_common *common)
2746 {
2747 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2748 of the character (>= 0xc0). Return value in TMP1. */
2749 DEFINE_COMPILER;
2750 struct sljit_jump *jump;
2751 struct sljit_jump *compare;
2752
2753 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2754
2755 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2756 jump = JUMP(SLJIT_C_NOT_ZERO);
2757 /* Two byte sequence. */
2758 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2760 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2761 /* The upper 5 bits are known at this point. */
2762 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2763 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2764 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2765 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2766 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2767 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2768
2769 JUMPHERE(compare);
2770 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2771 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2772
2773 /* We only have types for characters less than 256. */
2774 JUMPHERE(jump);
2775 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2776 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2777 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2778 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2779 }
2780
2781 #elif defined COMPILE_PCRE16
2782
2783 static void do_utfreadchar(compiler_common *common)
2784 {
2785 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2786 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2787 DEFINE_COMPILER;
2788 struct sljit_jump *jump;
2789
2790 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2791 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2792 /* Do nothing, only return. */
2793 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2794
2795 JUMPHERE(jump);
2796 /* Combine two 16 bit characters. */
2797 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2798 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2799 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2800 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2801 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2802 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2803 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2804 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2805 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2806 }
2807
2808 #endif /* COMPILE_PCRE[8|16] */
2809
2810 #endif /* SUPPORT_UTF */
2811
2812 #ifdef SUPPORT_UCP
2813
2814 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2815 #define UCD_BLOCK_MASK 127
2816 #define UCD_BLOCK_SHIFT 7
2817
2818 static void do_getucd(compiler_common *common)
2819 {
2820 /* Search the UCD record for the character comes in TMP1.
2821 Returns chartype in TMP1 and UCD offset in TMP2. */
2822 DEFINE_COMPILER;
2823
2824 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2825
2826 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2827 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2828 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2829 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2830 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2831 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2832 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2833 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2834 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2835 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2836 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2837 }
2838 #endif
2839
2840 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2841 {
2842 DEFINE_COMPILER;
2843 struct sljit_label *mainloop;
2844 struct sljit_label *newlinelabel = NULL;
2845 struct sljit_jump *start;
2846 struct sljit_jump *end = NULL;
2847 struct sljit_jump *nl = NULL;
2848 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2849 struct sljit_jump *singlechar;
2850 #endif
2851 jump_list *newline = NULL;
2852 BOOL newlinecheck = FALSE;
2853 BOOL readuchar = FALSE;
2854
2855 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2856 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2857 newlinecheck = TRUE;
2858
2859 if (firstline)
2860 {
2861 /* Search for the end of the first line. */
2862 SLJIT_ASSERT(common->first_line_end != 0);
2863 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2864
2865 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2866 {
2867 mainloop = LABEL();
2868 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2869 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2870 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2871 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2872 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2873 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2874 JUMPHERE(end);
2875 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2876 }
2877 else
2878 {
2879 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2880 mainloop = LABEL();
2881 /* Continual stores does not cause data dependency. */
2882 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2883 read_char(common);
2884 check_newlinechar(common, common->nltype, &newline, TRUE);
2885 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2886 JUMPHERE(end);
2887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2888 set_jumps(newline, LABEL());
2889 }
2890
2891 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2892 }
2893
2894 start = JUMP(SLJIT_JUMP);
2895
2896 if (newlinecheck)
2897 {
2898 newlinelabel = LABEL();
2899 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2900 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2901 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2902 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2903 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2904 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2905 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2906 #endif
2907 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2908 nl = JUMP(SLJIT_JUMP);
2909 }
2910
2911 mainloop = LABEL();
2912
2913 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2914 #ifdef SUPPORT_UTF
2915 if (common->utf) readuchar = TRUE;
2916 #endif
2917 if (newlinecheck) readuchar = TRUE;
2918
2919 if (readuchar)
2920 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2921
2922 if (newlinecheck)
2923 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2924
2925 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2926 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2927 #if defined COMPILE_PCRE8
2928 if (common->utf)
2929 {
2930 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2931 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2932 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2933 JUMPHERE(singlechar);
2934 }
2935 #elif defined COMPILE_PCRE16
2936 if (common->utf)
2937 {
2938 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2939 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2940 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2941 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2942 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2943 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2944 JUMPHERE(singlechar);
2945 }
2946 #endif /* COMPILE_PCRE[8|16] */
2947 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2948 JUMPHERE(start);
2949
2950 if (newlinecheck)
2951 {
2952 JUMPHERE(end);
2953 JUMPHERE(nl);
2954 }
2955
2956 return mainloop;
2957 }
2958
2959 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
2960 {
2961 /* Recursive function, which scans prefix literals. */
2962 int len, repeat, len_save, consumed = 0;
2963 pcre_int32 caseless, chr, mask;
2964 pcre_uchar *alternative, *cc_save;
2965 BOOL last, any;
2966
2967 repeat = 1;
2968 while (TRUE)
2969 {
2970 last = TRUE;
2971 any = FALSE;
2972 caseless = 0;
2973 switch (*cc)
2974 {
2975 case OP_CHARI:
2976 caseless = 1;
2977 case OP_CHAR:
2978 last = FALSE;
2979 cc++;
2980 break;
2981
2982 case OP_SOD:
2983 case OP_SOM:
2984 case OP_SET_SOM:
2985 case OP_NOT_WORD_BOUNDARY:
2986 case OP_WORD_BOUNDARY:
2987 case OP_EODN:
2988 case OP_EOD:
2989 case OP_CIRC:
2990 case OP_CIRCM:
2991 case OP_DOLL:
2992 case OP_DOLLM:
2993 /* Zero width assertions. */
2994 cc++;
2995 continue;
2996
2997 case OP_PLUS:
2998 case OP_MINPLUS:
2999 case OP_POSPLUS:
3000 cc++;
3001 break;
3002
3003 case OP_EXACTI:
3004 caseless = 1;
3005 case OP_EXACT:
3006 repeat = GET2(cc, 1);
3007 last = FALSE;
3008 cc += 1 + IMM2_SIZE;
3009 break;
3010
3011 case OP_PLUSI:
3012 case OP_MINPLUSI:
3013 case OP_POSPLUSI:
3014 caseless = 1;
3015 cc++;
3016 break;
3017
3018 case OP_KET:
3019 cc += 1 + LINK_SIZE;
3020 continue;
3021
3022 case OP_ALT:
3023 cc += GET(cc, 1);
3024 continue;
3025
3026 case OP_ONCE:
3027 case OP_ONCE_NC:
3028 case OP_BRA:
3029 case OP_BRAPOS:
3030 case OP_CBRA:
3031 case OP_CBRAPOS:
3032 alternative = cc + GET(cc, 1);
3033 while (*alternative == OP_ALT)
3034 {
3035 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3036 if (max_chars == 0)
3037 return consumed;
3038 alternative += GET(alternative, 1);
3039 }
3040
3041 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3042 cc += IMM2_SIZE;
3043 cc += 1 + LINK_SIZE;
3044 continue;
3045
3046 case OP_CLASS:
3047 case OP_NCLASS:
3048 any = TRUE;
3049 cc += 1 + 32 / sizeof(pcre_uchar);
3050 break;
3051
3052 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3053 case OP_XCLASS:
3054 any = TRUE;
3055 cc += GET(cc, 1);
3056 break;
3057 #endif
3058
3059 case OP_NOT_DIGIT:
3060 case OP_DIGIT:
3061 case OP_NOT_WHITESPACE:
3062 case OP_WHITESPACE:
3063 case OP_NOT_WORDCHAR:
3064 case OP_WORDCHAR:
3065 case OP_ANY:
3066 case OP_ALLANY:
3067 any = TRUE;
3068 cc++;
3069 break;
3070
3071 #ifdef SUPPORT_UCP
3072 case OP_NOTPROP:
3073 case OP_PROP:
3074 any = TRUE;
3075 cc += 1 + 2;
3076 break;
3077 #endif
3078
3079 case OP_TYPEEXACT:
3080 repeat = GET2(cc, 1);
3081 cc += 1 + IMM2_SIZE;
3082 continue;
3083
3084 default:
3085 return consumed;
3086 }
3087
3088 if (any)
3089 {
3090 #ifdef SUPPORT_UTF
3091 if (common->utf) return consumed;
3092 #endif
3093 #if defined COMPILE_PCRE8
3094 mask = 0xff;
3095 #elif defined COMPILE_PCRE16
3096 mask = 0xffff;
3097 #elif defined COMPILE_PCRE32
3098 mask = 0xffffffff;
3099 #else
3100 SLJIT_ASSERT_STOP();
3101 #endif
3102
3103 do
3104 {
3105 chars[0] = mask;
3106 chars[1] = mask;
3107
3108 if (--max_chars == 0)
3109 return consumed;
3110 consumed++;
3111 chars += 2;
3112 }
3113 while (--repeat > 0);
3114
3115 repeat = 1;
3116 continue;
3117 }
3118
3119 len = 1;
3120 #ifdef SUPPORT_UTF
3121 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3122 #endif
3123
3124 if (caseless != 0 && char_has_othercase(common, cc))
3125 {
3126 caseless = char_get_othercase_bit(common, cc);
3127 if (caseless == 0)
3128 return consumed;
3129 #ifdef COMPILE_PCRE8
3130 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3131 #else
3132 if ((caseless & 0x100) != 0)
3133 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3134 else
3135 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3136 #endif
3137 }
3138 else
3139 caseless = 0;
3140
3141 len_save = len;
3142 cc_save = cc;
3143 while (TRUE)
3144 {
3145 do
3146 {
3147 chr = *cc;
3148 #ifdef COMPILE_PCRE32
3149 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3150 return consumed;
3151 #endif
3152 mask = 0;
3153 if (len == (caseless & 0xff))
3154 {
3155 mask = caseless >> 8;
3156 chr |= mask;
3157 }
3158
3159 if (chars[0] == NOTACHAR)
3160 {
3161 chars[0] = chr;
3162 chars[1] = mask;
3163 }
3164 else
3165 {
3166 mask |= chars[0] ^ chr;
3167 chr |= mask;
3168 chars[0] = chr;
3169 chars[1] |= mask;
3170 }
3171
3172 len--;
3173 if (--max_chars == 0)
3174 return consumed;
3175 consumed++;
3176 chars += 2;
3177 cc++;
3178 }
3179 while (len > 0);
3180
3181 if (--repeat == 0)
3182 break;
3183
3184 len = len_save;
3185 cc = cc_save;
3186 }
3187
3188 repeat = 1;
3189 if (last)
3190 return consumed;
3191 }
3192 }
3193
3194 #define MAX_N_CHARS 16
3195
3196 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3197 {
3198 DEFINE_COMPILER;
3199 struct sljit_label *start;
3200 struct sljit_jump *quit;
3201 pcre_uint32 chars[MAX_N_CHARS * 2];
3202 pcre_uint8 ones[MAX_N_CHARS];
3203 pcre_uint32 mask;
3204 int i, max;
3205 int offsets[3];
3206
3207 for (i = 0; i < MAX_N_CHARS; i++)
3208 {
3209 chars[i << 1] = NOTACHAR;
3210 chars[(i << 1) + 1] = 0;
3211 }
3212
3213 max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3214
3215 if (max <= 1)
3216 return FALSE;
3217
3218 for (i = 0; i < max; i++)
3219 {
3220 mask = chars[(i << 1) + 1];
3221 ones[i] = ones_in_half_byte[mask & 0xf];
3222 mask >>= 4;
3223 while (mask != 0)
3224 {
3225 ones[i] += ones_in_half_byte[mask & 0xf];
3226 mask >>= 4;
3227 }
3228 }
3229
3230 offsets[0] = -1;
3231 /* Scan forward. */
3232 for (i = 0; i < max; i++)
3233 if (ones[i] <= 2) {
3234 offsets[0] = i;
3235 break;
3236 }
3237
3238 if (offsets[0] == -1)
3239 return FALSE;
3240
3241 /* Scan backward. */
3242 offsets[1] = -1;
3243 for (i = max - 1; i > offsets[0]; i--)
3244 if (ones[i] <= 2) {
3245 offsets[1] = i;
3246 break;
3247 }
3248
3249 offsets[2] = -1;
3250 if (offsets[1] >= 0)
3251 {
3252 /* Scan from middle. */
3253 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3254 if (ones[i] <= 2)
3255 {
3256 offsets[2] = i;
3257 break;
3258 }
3259
3260 if (offsets[2] == -1)
3261 {
3262 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3263 if (ones[i] <= 2)
3264 {
3265 offsets[2] = i;
3266 break;
3267 }
3268 }
3269 }
3270
3271 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3272 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3273
3274 chars[0] = chars[offsets[0] << 1];
3275 chars[1] = chars[(offsets[0] << 1) + 1];
3276 if (offsets[2] >= 0)
3277 {
3278 chars[2] = chars[offsets[2] << 1];
3279 chars[3] = chars[(offsets[2] << 1) + 1];
3280 }
3281 if (offsets[1] >= 0)
3282 {
3283 chars[4] = chars[offsets[1] << 1];
3284 chars[5] = chars[(offsets[1] << 1) + 1];
3285 }
3286
3287 max -= 1;
3288 if (firstline)
3289 {
3290 SLJIT_ASSERT(common->first_line_end != 0);
3291 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3292 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3293 }
3294 else
3295 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3296
3297 start = LABEL();
3298 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3299
3300 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3301 if (offsets[1] >= 0)
3302 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3303 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3304
3305 if (chars[1] != 0)
3306 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3307 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3308 if (offsets[2] >= 0)
3309 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3310
3311 if (offsets[1] >= 0)
3312 {
3313 if (chars[5] != 0)
3314 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3315 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3316 }
3317
3318 if (offsets[2] >= 0)
3319 {
3320 if (chars[3] != 0)
3321 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3322 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3323 }
3324 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3325
3326 JUMPHERE(quit);
3327
3328 if (firstline)
3329 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3330 else
3331 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3332 return TRUE;
3333 }
3334
3335 #undef MAX_N_CHARS
3336
3337 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3338 {
3339 DEFINE_COMPILER;
3340 struct sljit_label *start;
3341 struct sljit_jump *quit;
3342 struct sljit_jump *found;
3343 pcre_uchar oc, bit;
3344
3345 if (firstline)
3346 {
3347 SLJIT_ASSERT(common->first_line_end != 0);
3348 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3349 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3350 }
3351
3352 start = LABEL();
3353 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3354 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3355
3356 oc = first_char;
3357 if (caseless)
3358 {
3359 oc = TABLE_GET(first_char, common->fcc, first_char);
3360 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3361 if (first_char > 127 && common->utf)
3362 oc = UCD_OTHERCASE(first_char);
3363 #endif
3364 }
3365 if (first_char == oc)
3366 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3367 else
3368 {
3369 bit = first_char ^ oc;
3370 if (is_powerof2(bit))
3371 {
3372 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3373 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3374 }
3375 else
3376 {
3377 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3378 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3379 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3380 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3381 found = JUMP(SLJIT_C_NOT_ZERO);
3382 }
3383 }
3384
3385 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3386 JUMPTO(SLJIT_JUMP, start);
3387 JUMPHERE(found);
3388 JUMPHERE(quit);
3389
3390 if (firstline)
3391 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3392 }
3393
3394 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3395 {
3396 DEFINE_COMPILER;
3397 struct sljit_label *loop;
3398 struct sljit_jump *lastchar;
3399 struct sljit_jump *firstchar;
3400 struct sljit_jump *quit;
3401 struct sljit_jump *foundcr = NULL;
3402 struct sljit_jump *notfoundnl;
3403 jump_list *newline = NULL;
3404
3405 if (firstline)
3406 {
3407 SLJIT_ASSERT(common->first_line_end != 0);
3408 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3409 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3410 }
3411
3412 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3413 {
3414 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3415 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3416 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3417 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3418 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3419
3420 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3421 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3422 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3423 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3424 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3425 #endif
3426 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3427
3428 loop = LABEL();
3429 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3430 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3431 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3432 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3433 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3434 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3435
3436 JUMPHERE(quit);
3437 JUMPHERE(firstchar);
3438 JUMPHERE(lastchar);
3439
3440 if (firstline)
3441 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3442 return;
3443 }
3444
3445 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3446 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3447 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3448 skip_char_back(common);
3449
3450 loop = LABEL();
3451 read_char(common);
3452 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3453 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3454 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3455 check_newlinechar(common, common->nltype, &newline, FALSE);
3456 set_jumps(newline, loop);
3457
3458 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3459 {
3460 quit = JUMP(SLJIT_JUMP);
3461 JUMPHERE(foundcr);
3462 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3463 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3464 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3465 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3466 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3467 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3468 #endif
3469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3470 JUMPHERE(notfoundnl);
3471 JUMPHERE(quit);
3472 }
3473 JUMPHERE(lastchar);
3474 JUMPHERE(firstchar);
3475
3476 if (firstline)
3477 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3478 }
3479
3480 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3481
3482 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3483 {
3484 DEFINE_COMPILER;
3485 struct sljit_label *start;
3486 struct sljit_jump *quit;
3487 struct sljit_jump *found = NULL;
3488 jump_list *matches = NULL;
3489 #ifndef COMPILE_PCRE8
3490 struct sljit_jump *jump;
3491 #endif
3492
3493 if (firstline)
3494 {
3495 SLJIT_ASSERT(common->first_line_end != 0);
3496 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3497 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3498 }
3499
3500 start = LABEL();
3501 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3502 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3503 #ifdef SUPPORT_UTF
3504 if (common->utf)
3505 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3506 #endif
3507
3508 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3509 {
3510 #ifndef COMPILE_PCRE8
3511 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3512 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3513 JUMPHERE(jump);
3514 #endif
3515 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3516 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3517 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3518 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3519 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3520 found = JUMP(SLJIT_C_NOT_ZERO);
3521 }
3522
3523 #ifdef SUPPORT_UTF
3524 if (common->utf)
3525 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3526 #endif
3527 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3528 #ifdef SUPPORT_UTF
3529 #if defined COMPILE_PCRE8
3530 if (common->utf)
3531 {
3532 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3533 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3534 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3535 }
3536 #elif defined COMPILE_PCRE16
3537 if (common->utf)
3538 {
3539 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3540 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3541 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3542 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3543 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3544 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3545 }
3546 #endif /* COMPILE_PCRE[8|16] */
3547 #endif /* SUPPORT_UTF */
3548 JUMPTO(SLJIT_JUMP, start);
3549 if (found != NULL)
3550 JUMPHERE(found);
3551 if (matches != NULL)
3552 set_jumps(matches, LABEL());
3553 JUMPHERE(quit);
3554
3555 if (firstline)
3556 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3557 }
3558
3559 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3560 {
3561 DEFINE_COMPILER;
3562 struct sljit_label *loop;
3563 struct sljit_jump *toolong;
3564 struct sljit_jump *alreadyfound;
3565 struct sljit_jump *found;
3566 struct sljit_jump *foundoc = NULL;
3567 struct sljit_jump *notfound;
3568 pcre_uint32 oc, bit;
3569
3570 SLJIT_ASSERT(common->req_char_ptr != 0);
3571 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3572 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3573 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3574 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3575
3576 if (has_firstchar)
3577 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3578 else
3579 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3580
3581 loop = LABEL();
3582 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3583
3584 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3585 oc = req_char;
3586 if (caseless)
3587 {
3588 oc = TABLE_GET(req_char, common->fcc, req_char);
3589 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3590 if (req_char > 127 && common->utf)
3591 oc = UCD_OTHERCASE(req_char);
3592 #endif
3593 }
3594 if (req_char == oc)
3595 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3596 else
3597 {
3598 bit = req_char ^ oc;
3599 if (is_powerof2(bit))
3600 {
3601 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3602 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3603 }
3604 else
3605 {
3606 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3607 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3608 }
3609 }
3610 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3611 JUMPTO(SLJIT_JUMP, loop);
3612
3613 JUMPHERE(found);
3614 if (foundoc)
3615 JUMPHERE(foundoc);
3616 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3617 JUMPHERE(alreadyfound);
3618 JUMPHERE(toolong);
3619 return notfound;
3620 }
3621
3622 static void do_revertframes(compiler_common *common)
3623 {
3624 DEFINE_COMPILER;
3625 struct sljit_jump *jump;
3626 struct sljit_label *mainloop;
3627
3628 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3629 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3630 GET_LOCAL_BASE(TMP3, 0, 0);
3631
3632 /* Drop frames until we reach STACK_TOP. */
3633 mainloop = LABEL();
3634 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3635 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3636 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3637
3638 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3639 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3640 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3641 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3642 JUMPTO(SLJIT_JUMP, mainloop);
3643
3644 JUMPHERE(jump);
3645 jump = JUMP(SLJIT_C_SIG_LESS);
3646 /* End of dropping frames. */
3647 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3648
3649 JUMPHERE(jump);
3650 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3651 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3652 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3653 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3654 JUMPTO(SLJIT_JUMP, mainloop);
3655 }
3656
3657 static void check_wordboundary(compiler_common *common)
3658 {
3659 DEFINE_COMPILER;
3660 struct sljit_jump *skipread;
3661 jump_list *skipread_list = NULL;
3662 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3663 struct sljit_jump *jump;
3664 #endif
3665
3666 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3667
3668 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3669 /* Get type of the previous char, and put it to LOCALS1. */
3670 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3671 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3672 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3673 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3674 skip_char_back(common);
3675 check_start_used_ptr(common);
3676 read_char(common);
3677
3678 /* Testing char type. */
3679 #ifdef SUPPORT_UCP
3680 if (common->use_ucp)
3681 {
3682 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3683 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3684 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3685 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3686 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3687 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3688 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3689 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3690 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3691 JUMPHERE(jump);
3692 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3693 }
3694 else
3695 #endif
3696 {
3697 #ifndef COMPILE_PCRE8
3698 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3699 #elif defined SUPPORT_UTF
3700 /* Here LOCALS1 has already been zeroed. */
3701 jump = NULL;
3702 if (common->utf)
3703 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3704 #endif /* COMPILE_PCRE8 */
3705 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3706 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3707 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3709 #ifndef COMPILE_PCRE8
3710 JUMPHERE(jump);
3711 #elif defined SUPPORT_UTF
3712 if (jump != NULL)
3713 JUMPHERE(jump);
3714 #endif /* COMPILE_PCRE8 */
3715 }
3716 JUMPHERE(skipread);
3717
3718 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3719 check_str_end(common, &skipread_list);
3720 peek_char(common);
3721
3722 /* Testing char type. This is a code duplication. */
3723 #ifdef SUPPORT_UCP
3724 if (common->use_ucp)
3725 {
3726 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3727 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3728 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3729 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3730 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3731 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3732 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3733 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3734 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3735 JUMPHERE(jump);
3736 }
3737 else
3738 #endif
3739 {
3740 #ifndef COMPILE_PCRE8
3741 /* TMP2 may be destroyed by peek_char. */
3742 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3743 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3744 #elif defined SUPPORT_UTF
3745 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3746 jump = NULL;
3747 if (common->utf)
3748 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3749 #endif
3750 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3751 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3752 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3753 #ifndef COMPILE_PCRE8
3754 JUMPHERE(jump);
3755 #elif defined SUPPORT_UTF
3756 if (jump != NULL)
3757 JUMPHERE(jump);
3758 #endif /* COMPILE_PCRE8 */
3759 }
3760 set_jumps(skipread_list, LABEL());
3761
3762 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3763 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3764 }
3765
3766 /*
3767 range format:
3768
3769 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3770 ranges[1] = first bit (0 or 1)
3771 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3772 */
3773
3774 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3775 {
3776 DEFINE_COMPILER;
3777 int offset;
3778
3779 if (ranges[0] < 0 || ranges[0] > 4)
3780 return FALSE;
3781
3782 /* No character is accepted. */
3783 if (ranges[0] == 0 && ranges[1] == 0)
3784 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3785
3786 if (readch)
3787 read_char8(common);
3788
3789 switch(ranges[0])
3790 {
3791 case 0:
3792 /* When ranges[1] != 0, all characters are accepted. */
3793 return TRUE;
3794
3795 case 1:
3796 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3797 return TRUE;
3798
3799 case 2:
3800 if (ranges[2] + 1 != ranges[3])
3801 {
3802 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3803 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3804 }
3805 else
3806 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3807 return TRUE;
3808
3809 case 3:
3810 if (ranges[1] != 0)
3811 {
3812 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3813 if (ranges[2] + 1 != ranges[3])
3814 {
3815 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3816 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3817 }
3818 else
3819 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3820 return TRUE;
3821 }
3822
3823 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2]));
3824 if (ranges[3] + 1 != ranges[4])
3825 {
3826 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3]);
3827 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));
3828 }
3829 else
3830 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3]));
3831 return TRUE;
3832
3833 case 4:
3834 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4])
3835 && (ranges[2] | (ranges[4] - ranges[2])) == ranges[4]
3836 && is_powerof2(ranges[4] - ranges[2]))
3837 {
3838 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3839 if (ranges[4] + 1 != ranges[5])
3840 {
3841 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3842 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3843 }
3844 else
3845 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3846 return TRUE;
3847 }
3848
3849 if (ranges[1] != 0)
3850 {
3851 offset = 0;
3852 if (ranges[2] + 1 != ranges[3])
3853 {
3854 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3855 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3856 offset = ranges[2];
3857 }
3858 else
3859 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3860
3861 if (ranges[4] + 1 != ranges[5])
3862 {
3863 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - offset);
3864 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3865 }
3866 else
3867 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4] - offset));
3868 return TRUE;
3869 }
3870
3871 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3872 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[2]));
3873 if (ranges[3] + 1 != ranges[4])
3874 {
3875 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]);
3876 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[3]));
3877 }
3878 else
3879 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3880 return TRUE;
3881
3882 default:
3883 SLJIT_ASSERT_STOP();
3884 return FALSE;
3885 }
3886 }
3887
3888 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3889 {
3890 int ranges[2 + MAX_RANGE_SIZE];
3891 pcre_uint8 bit, cbit, all;
3892 int i, byte, length = 0;
3893
3894 bit = bits[0] & 0x1;
3895 ranges[1] = !invert ? bit : (bit ^ 0x1);
3896 /* All bits will be zero or one (since bit is zero or one). */
3897 all = -bit;
3898
3899 for (i = 0; i < 256; )
3900 {
3901 byte = i >> 3;
3902 if ((i & 0x7) == 0 && bits[byte] == all)
3903 i += 8;
3904 else
3905 {
3906 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3907 if (cbit != bit)
3908 {
3909 if (length >= MAX_RANGE_SIZE)
3910 return FALSE;
3911 ranges[2 + length] = i;
3912 length++;
3913 bit = cbit;
3914 all = -cbit;
3915 }
3916 i++;
3917 }
3918 }
3919
3920 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3921 {
3922 if (length >= MAX_RANGE_SIZE)
3923 return FALSE;
3924 ranges[2 + length] = 256;
3925 length++;
3926 }
3927 ranges[0] = length;
3928
3929 return check_ranges(common, ranges, backtracks, FALSE);
3930 }
3931
3932 static void check_anynewline(compiler_common *common)
3933 {
3934 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3935 DEFINE_COMPILER;
3936
3937 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3938
3939 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3940 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3941 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3942 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3943 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3944 #ifdef COMPILE_PCRE8
3945 if (common->utf)
3946 {
3947 #endif
3948 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3949 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3950 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3951 #ifdef COMPILE_PCRE8
3952 }
3953 #endif
3954 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3955 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3956 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3957 }
3958
3959 static void check_hspace(compiler_common *common)
3960 {
3961 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3962 DEFINE_COMPILER;
3963
3964 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3965
3966 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3967 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3968 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3969 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3970 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3971 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3972 #ifdef COMPILE_PCRE8
3973 if (common->utf)
3974 {
3975 #endif
3976 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3977 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3978 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3979 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3980 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3981 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3982 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3983 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3984 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3985 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3986 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3987 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3988 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3989 #ifdef COMPILE_PCRE8
3990 }
3991 #endif
3992 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3993 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3994
3995 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3996 }
3997
3998 static void check_vspace(compiler_common *common)
3999 {
4000 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4001 DEFINE_COMPILER;
4002
4003 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4004
4005 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4006 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4007 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4008 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4009 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4010 #ifdef COMPILE_PCRE8
4011 if (common->utf)
4012 {
4013 #endif
4014 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4015 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4016 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4017 #ifdef COMPILE_PCRE8
4018 }
4019 #endif
4020 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4021 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4022
4023 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4024 }
4025
4026 #define CHAR1 STR_END
4027 #define CHAR2 STACK_TOP
4028
4029 static void do_casefulcmp(compiler_common *common)
4030 {
4031 DEFINE_COMPILER;
4032 struct sljit_jump *jump;
4033 struct sljit_label *label;
4034
4035 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4036 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4037 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4038 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4039 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4040 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4041
4042 label = LABEL();
4043 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4044 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4045 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4046 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4047 JUMPTO(SLJIT_C_NOT_ZERO, label);
4048
4049 JUMPHERE(jump);
4050 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4051 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4052 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4053 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4054 }
4055
4056 #define LCC_TABLE STACK_LIMIT
4057
4058 static void do_caselesscmp(compiler_common *common)
4059 {
4060 DEFINE_COMPILER;
4061 struct sljit_jump *jump;
4062 struct sljit_label *label;
4063
4064 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4065 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4066
4067 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4069 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4070 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4071 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4072 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4073
4074 label = LABEL();
4075 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4076 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4077 #ifndef COMPILE_PCRE8
4078 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4079 #endif
4080 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4081 #ifndef COMPILE_PCRE8
4082 JUMPHERE(jump);
4083 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4084 #endif
4085 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4086 #ifndef COMPILE_PCRE8
4087 JUMPHERE(jump);
4088 #endif
4089 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4090 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4091 JUMPTO(SLJIT_C_NOT_ZERO, label);
4092
4093 JUMPHERE(jump);
4094 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4095 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4096 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4097 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4098 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4099 }
4100
4101 #undef LCC_TABLE
4102 #undef CHAR1
4103 #undef CHAR2
4104
4105 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4106
4107 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4108 {
4109 /* This function would be ineffective to do in JIT level. */
4110 pcre_uint32 c1, c2;
4111 const pcre_uchar *src2 = args->uchar_ptr;
4112 const pcre_uchar *end2 = args->end;
4113 const ucd_record *ur;
4114 const pcre_uint32 *pp;
4115
4116 while (src1 < end1)
4117 {
4118 if (src2 >= end2)
4119 return (pcre_uchar*)1;
4120 GETCHARINC(c1, src1);
4121 GETCHARINC(c2, src2);
4122 ur = GET_UCD(c2);
4123 if (c1 != c2 && c1 != c2 + ur->other_case)
4124 {
4125 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4126 for (;;)
4127 {
4128 if (c1 < *pp) return NULL;
4129 if (c1 == *pp++) break;
4130 }
4131 }
4132 }
4133 return src2;
4134 }
4135
4136 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4137
4138 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4139 compare_context* context, jump_list **backtracks)
4140 {
4141 DEFINE_COMPILER;
4142 unsigned int othercasebit = 0;
4143 pcre_uchar *othercasechar = NULL;
4144 #ifdef SUPPORT_UTF
4145 int utflength;
4146 #endif
4147
4148 if (caseless && char_has_othercase(common, cc))
4149 {
4150 othercasebit = char_get_othercase_bit(common, cc);
4151 SLJIT_ASSERT(othercasebit);
4152 /* Extracting bit difference info. */
4153 #if defined COMPILE_PCRE8
4154 othercasechar = cc + (othercasebit >> 8);
4155 othercasebit &= 0xff;
4156 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4157 /* Note that this code only handles characters in the BMP. If there
4158 ever are characters outside the BMP whose othercase differs in only one
4159 bit from itself (there currently are none), this code will need to be
4160 revised for COMPILE_PCRE32. */
4161 othercasechar = cc + (othercasebit >> 9);
4162 if ((othercasebit & 0x100) != 0)
4163 othercasebit = (othercasebit & 0xff) << 8;
4164 else
4165 othercasebit &= 0xff;
4166 #endif /* COMPILE_PCRE[8|16|32] */
4167 }
4168
4169 if (context->sourcereg == -1)
4170 {
4171 #if defined COMPILE_PCRE8
4172 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4173 if (context->length >= 4)
4174 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4175 else if (context->length >= 2)
4176 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4177 else
4178 #endif
4179 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4180 #elif defined COMPILE_PCRE16
4181 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4182 if (context->length >= 4)
4183 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4184 else
4185 #endif
4186 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4187 #elif defined COMPILE_PCRE32
4188 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4189 #endif /* COMPILE_PCRE[8|16|32] */
4190 context->sourcereg = TMP2;
4191 }
4192
4193 #ifdef SUPPORT_UTF
4194 utflength = 1;
4195 if (common->utf && HAS_EXTRALEN(*cc))
4196 utflength += GET_EXTRALEN(*cc);
4197
4198 do
4199 {
4200 #endif
4201
4202 context->length -= IN_UCHARS(1);
4203 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4204
4205 /* Unaligned read is supported. */
4206 if (othercasebit != 0 && othercasechar == cc)
4207 {
4208 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4209 context->oc.asuchars[context->ucharptr] = othercasebit;
4210 }
4211 else
4212 {
4213 context->c.asuchars[context->ucharptr] = *cc;
4214 context->oc.asuchars[context->ucharptr] = 0;
4215 }
4216 context->ucharptr++;
4217
4218 #if defined COMPILE_PCRE8
4219 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4220 #else
4221 if (context->ucharptr >= 2 || context->length == 0)
4222 #endif
4223 {
4224 if (context->length >= 4)
4225 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4226 else if (context->length >= 2)
4227 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4228 #if defined COMPILE_PCRE8
4229 else if (context->length >= 1)
4230 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4231 #endif /* COMPILE_PCRE8 */
4232 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4233
4234 switch(context->ucharptr)
4235 {
4236 case 4 / sizeof(pcre_uchar):
4237 if (context->oc.asint != 0)
4238 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4239 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4240 break;
4241
4242 case 2 / sizeof(pcre_uchar):
4243 if (context->oc.asushort != 0)
4244 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4245 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4246 break;
4247
4248 #ifdef COMPILE_PCRE8
4249 case 1:
4250 if (context->oc.asbyte != 0)
4251 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4252 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4253 break;
4254 #endif
4255
4256 default:
4257 SLJIT_ASSERT_STOP();
4258 break;
4259 }
4260 context->ucharptr = 0;
4261 }
4262
4263 #else
4264
4265 /* Unaligned read is unsupported or in 32 bit mode. */
4266 if (context->length >= 1)
4267 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4268
4269 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4270
4271 if (othercasebit != 0 && othercasechar == cc)
4272 {
4273 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4274 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4275 }
4276 else
4277 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4278
4279 #endif
4280
4281 cc++;
4282 #ifdef SUPPORT_UTF
4283 utflength--;
4284 }
4285 while (utflength > 0);
4286 #endif
4287
4288 return cc;
4289 }
4290
4291 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4292
4293 #define SET_TYPE_OFFSET(value) \
4294 if ((value) != typeoffset) \
4295 { \
4296 if ((value) > typeoffset) \
4297 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4298 else \
4299 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4300 } \
4301 typeoffset = (value);
4302
4303 #define SET_CHAR_OFFSET(value) \
4304 if ((value) != charoffset) \
4305 { \
4306 if ((value) > charoffset) \
4307 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4308 else \
4309 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4310 } \
4311 charoffset = (value);
4312
4313 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4314 {
4315 DEFINE_COMPILER;
4316 jump_list *found = NULL;
4317 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4318 pcre_int32 c, charoffset;
4319 struct sljit_jump *jump = NULL;
4320 pcre_uchar *ccbegin;
4321 int compares, invertcmp, numberofcmps;
4322
4323 #ifdef SUPPORT_UCP
4324 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4325 BOOL charsaved = FALSE;
4326 int typereg = TMP1, scriptreg = TMP1;
4327 const pcre_uint32 *other_cases;
4328 pcre_int32 typeoffset;
4329 #endif
4330
4331 /* Although SUPPORT_UTF must be defined, we are
4332 not necessary in utf mode even in 8 bit mode. */
4333 detect_partial_match(common, backtracks);
4334 read_char(common);
4335
4336 cc++;
4337 if ((cc[-1] & XCL_HASPROP) == 0)
4338 {
4339 if ((cc[-1] & XCL_MAP) != 0)
4340 {
4341 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4342 #ifdef SUPPORT_UCP
4343 charsaved = TRUE;
4344 #endif
4345 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks))
4346 {
4347 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4348
4349 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4350 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4351 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4352 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4353 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4354 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4355 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4356
4357 JUMPHERE(jump);
4358 }
4359 else
4360 add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff));
4361
4362 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4363 cc += 32 / sizeof(pcre_uchar);
4364 }
4365 else
4366 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4367 }
4368 else if ((cc[-1] & XCL_MAP) != 0)
4369 {
4370 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4371 #ifdef SUPPORT_UCP
4372 charsaved = TRUE;
4373 #endif
4374 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4375 {
4376 #ifdef COMPILE_PCRE8
4377 SLJIT_ASSERT(common->utf);
4378 #endif
4379 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4380
4381 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4382 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4383 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4384 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4385 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4386 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4387
4388 JUMPHERE(jump);
4389 }
4390
4391 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4392 cc += 32 / sizeof(pcre_uchar);
4393 }
4394
4395 /* Scanning the necessary info. */
4396 ccbegin = cc;
4397 compares = 0;
4398 while (*cc != XCL_END)
4399 {
4400 compares++;
4401 if (*cc == XCL_SINGLE)
4402 {
4403 cc += 2;
4404 #ifdef SUPPORT_UTF
4405 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4406 #endif
4407 #ifdef SUPPORT_UCP
4408 needschar = TRUE;
4409 #endif
4410 }
4411 else if (*cc == XCL_RANGE)
4412 {
4413 cc += 2;
4414 #ifdef SUPPORT_UTF
4415 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4416 #endif
4417 cc++;
4418 #ifdef SUPPORT_UTF
4419 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4420 #endif
4421 #ifdef SUPPORT_UCP
4422 needschar = TRUE;
4423 #endif
4424 }
4425 #ifdef SUPPORT_UCP
4426 else
4427 {
4428 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4429 cc++;
4430 switch(*cc)
4431 {
4432 case PT_ANY:
4433 break;
4434
4435 case PT_LAMP:
4436 case PT_GC:
4437 case PT_PC:
4438 case PT_ALNUM:
4439 needstype = TRUE;
4440 break;
4441
4442 case PT_SC:
4443 needsscript = TRUE;
4444 break;
4445
4446 case PT_SPACE:
4447 case PT_PXSPACE:
4448 case PT_WORD:
4449 case PT_PXGRAPH:
4450 case PT_PXPRINT:
4451 case PT_PXPUNCT:
4452 needstype = TRUE;
4453 needschar = TRUE;
4454 break;
4455
4456 case PT_CLIST:
4457 case PT_UCNC:
4458 needschar = TRUE;
4459 break;
4460
4461 default:
4462 SLJIT_ASSERT_STOP();
4463 break;
4464 }
4465 cc += 2;
4466 }
4467 #endif
4468 }
4469
4470 #ifdef SUPPORT_UCP
4471 /* Simple register allocation. TMP1 is preferred if possible. */
4472 if (needstype || needsscript)
4473 {
4474 if (needschar && !charsaved)
4475 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4476 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4477 if (needschar)
4478 {
4479 if (needstype)
4480 {
4481 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4482 typereg = RETURN_ADDR;
4483 }
4484
4485 if (needsscript)
4486 scriptreg = TMP3;
4487 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4488 }
4489 else if (needstype && needsscript)
4490 scriptreg = TMP3;
4491 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4492
4493 if (needsscript)
4494 {
4495 if (scriptreg == TMP1)
4496 {
4497 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4498 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4499 }
4500 else
4501 {
4502 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4503 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4504 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4505 }
4506 }
4507 }
4508 #endif
4509
4510 /* Generating code. */
4511 cc = ccbegin;
4512 charoffset = 0;
4513 numberofcmps = 0;
4514 #ifdef SUPPORT_UCP
4515 typeoffset = 0;
4516 #endif
4517
4518 while (*cc != XCL_END)
4519 {
4520 compares--;
4521 invertcmp = (compares == 0 && list != backtracks);
4522 jump = NULL;
4523
4524 if (*cc == XCL_SINGLE)
4525 {
4526 cc ++;
4527 #ifdef SUPPORT_UTF
4528 if (common->utf)
4529 {
4530 GETCHARINC(c, cc);
4531 }
4532 else
4533 #endif
4534 c = *cc++;
4535
4536 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4537 {
4538 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4539 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4540 numberofcmps++;
4541 }
4542 else if (numberofcmps > 0)
4543 {
4544 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4545 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4546 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4547 numberofcmps = 0;
4548 }
4549 else
4550 {
4551 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4552 numberofcmps = 0;
4553 }
4554 }
4555 else if (*cc == XCL_RANGE)
4556 {
4557 cc ++;
4558 #ifdef SUPPORT_UTF
4559 if (common->utf)
4560 {
4561 GETCHARINC(c, cc);
4562 }
4563 else
4564 #endif
4565 c = *cc++;
4566 SET_CHAR_OFFSET(c);
4567 #ifdef SUPPORT_UTF
4568 if (common->utf)
4569 {
4570 GETCHARINC(c, cc);
4571 }
4572 else
4573 #endif
4574 c = *cc++;
4575 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4576 {
4577 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4578 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4579 numberofcmps++;
4580 }
4581 else if (numberofcmps > 0)
4582 {
4583 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4584 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4585 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4586 numberofcmps = 0;
4587 }
4588 else
4589 {
4590 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4591 numberofcmps = 0;
4592 }
4593 }
4594 #ifdef SUPPORT_UCP
4595 else
4596 {
4597 if (*cc == XCL_NOTPROP)
4598 invertcmp ^= 0x1;
4599 cc++;
4600 switch(*cc)
4601 {
4602 case PT_ANY:
4603 if (list != backtracks)
4604 {
4605 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4606 continue;
4607 }
4608 else if (cc[-1] == XCL_NOTPROP)
4609 continue;
4610 jump = JUMP(SLJIT_JUMP);
4611 break;
4612
4613 case PT_LAMP:
4614 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4615 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4616 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4617 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4618 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4619 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4620 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4621 break;
4622
4623 case PT_GC:
4624 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4625 SET_TYPE_OFFSET(c);
4626 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4627 break;
4628
4629 case PT_PC:
4630 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4631 break;
4632
4633 case PT_SC:
4634 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4635 break;
4636
4637 case PT_SPACE:
4638 case PT_PXSPACE:
4639 SET_CHAR_OFFSET(9);
4640 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4641 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4642
4643 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4644 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4645
4646 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4647 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4648
4649 SET_TYPE_OFFSET(ucp_Zl);
4650 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4651 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4652 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4653 break;
4654
4655 case PT_WORD:
4656 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4657 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4658 /* Fall through. */
4659
4660 case PT_ALNUM:
4661 SET_TYPE_OFFSET(ucp_Ll);
4662 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4663 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4664 SET_TYPE_OFFSET(ucp_Nd);
4665 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4666 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4667 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4668 break;
4669
4670 case PT_CLIST:
4671 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4672
4673 /* At least three characters are required.
4674 Otherwise this case would be handled by the normal code path. */
4675 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4676 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4677
4678 /* Optimizing character pairs, if their difference is power of 2. */
4679 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4680 {
4681 if (charoffset == 0)
4682 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4683 else
4684 {
4685 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4686 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4687 }
4688 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4689 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4690 other_cases += 2;
4691 }
4692 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4693 {
4694 if (charoffset == 0)
4695 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4696 else
4697 {
4698 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4699 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4700 }
4701 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4702 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4703
4704 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4705 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4706
4707 other_cases += 3;
4708 }
4709 else
4710 {
4711 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4712 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4713 }
4714
4715 while (*other_cases != NOTACHAR)
4716 {
4717 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4718 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4719 }
4720 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4721 break;
4722
4723 case PT_UCNC:
4724 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4725 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4726 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4727 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4728 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4729 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4730
4731 SET_CHAR_OFFSET(0xa0);
4732 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4733 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4734 SET_CHAR_OFFSET(0);
4735 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4736 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4737 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4738 break;
4739
4740 case PT_PXGRAPH:
4741 /* C and Z groups are the farthest two groups. */
4742 SET_TYPE_OFFSET(ucp_Ll);
4743 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4744 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4745
4746 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4747
4748 /* In case of ucp_Cf, we overwrite the result. */
4749 SET_CHAR_OFFSET(0x2066);
4750 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4751 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4752
4753 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4754 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4755
4756 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4757 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4758
4759 JUMPHERE(jump);
4760 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4761 break;
4762
4763 case PT_PXPRINT:
4764 /* C and Z groups are the farthest two groups. */
4765 SET_TYPE_OFFSET(ucp_Ll);
4766 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4767 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4768
4769 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4770 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4771
4772 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4773
4774 /* In case of ucp_Cf, we overwrite the result. */
4775 SET_CHAR_OFFSET(0x2066);
4776 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4777 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4778
4779 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4780 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4781
4782 JUMPHERE(jump);
4783 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4784 break;
4785
4786 case PT_PXPUNCT:
4787 SET_TYPE_OFFSET(ucp_Sc);
4788 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4789 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4790
4791 SET_CHAR_OFFSET(0);
4792 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4793 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4794
4795 SET_TYPE_OFFSET(ucp_Pc);
4796 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4797 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4798 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4799 break;
4800 }
4801 cc += 2;
4802 }
4803 #endif
4804
4805 if (jump != NULL)
4806 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4807 }
4808
4809 if (found != NULL)
4810 set_jumps(found, LABEL());
4811 }
4812
4813 #undef SET_TYPE_OFFSET
4814 #undef SET_CHAR_OFFSET
4815
4816 #endif
4817
4818 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4819 {
4820 DEFINE_COMPILER;
4821 int length;
4822 unsigned int c, oc, bit;
4823 compare_context context;
4824 struct sljit_jump *jump[4];
4825 jump_list *end_list;
4826 #ifdef SUPPORT_UTF
4827 struct sljit_label *label;
4828 #ifdef SUPPORT_UCP
4829 pcre_uchar propdata[5];
4830 #endif
4831 #endif
4832
4833 switch(type)
4834 {
4835 case OP_SOD:
4836 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4837 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4838 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4839 return cc;
4840
4841 case OP_SOM:
4842 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4843 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4844 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4845 return cc;
4846
4847 case OP_NOT_WORD_BOUNDARY:
4848 case OP_WORD_BOUNDARY:
4849 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4850 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4851 return cc;
4852
4853 case OP_NOT_DIGIT:
4854 case OP_DIGIT:
4855 /* Digits are usually 0-9, so it is worth to optimize them. */
4856 detect_partial_match(common, backtracks);
4857 /* Flip the starting bit in the negative case. */
4858 read_char8_type(common);
4859 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4860 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4861 return cc;
4862
4863 case OP_NOT_WHITESPACE:
4864 case OP_WHITESPACE:
4865 detect_partial_match(common, backtracks);
4866 read_char8_type(common);
4867 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4868 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4869 return cc;
4870
4871 case OP_NOT_WORDCHAR:
4872 case OP_WORDCHAR:
4873 detect_partial_match(common, backtracks);
4874 read_char8_type(common);
4875 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4876 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4877 return cc;
4878
4879 case OP_ANY:
4880 detect_partial_match(common, backtracks);
4881 read_char(common);
4882 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4883 {
4884 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4885 end_list = NULL;
4886 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4887 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4888 else
4889 check_str_end(common, &end_list);
4890
4891 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4892 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4893 set_jumps(end_list, LABEL());
4894 JUMPHERE(jump[0]);
4895 }
4896 else
4897 check_newlinechar(common, common->nltype, backtracks, TRUE);
4898 return cc;
4899
4900 case OP_ALLANY:
4901 detect_partial_match(common, backtracks);
4902 #ifdef SUPPORT_UTF
4903 if (common->utf)
4904 {
4905 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4906 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4907 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4908 #if defined COMPILE_PCRE8
4909 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4910 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4911 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4912 #elif defined COMPILE_PCRE16
4913 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4914 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4915 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4916 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4917 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4918 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4919 #endif
4920 JUMPHERE(jump[0]);
4921 #endif /* COMPILE_PCRE[8|16] */
4922 return cc;
4923 }
4924 #endif
4925 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4926 return cc;
4927
4928 case OP_ANYBYTE:
4929 detect_partial_match(common, backtracks);
4930 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4931 return cc;
4932
4933 #ifdef SUPPORT_UTF
4934 #ifdef SUPPORT_UCP
4935 case OP_NOTPROP:
4936 case OP_PROP:
4937 propdata[0] = XCL_HASPROP;
4938 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4939 propdata[2] = cc[0];
4940 propdata[3] = cc[1];
4941 propdata[4] = XCL_END;
4942 compile_xclass_matchingpath(common, propdata, backtracks);
4943 return cc + 2;
4944 #endif
4945 #endif
4946
4947 case OP_ANYNL:
4948 detect_partial_match(common, backtracks);
4949 read_char(common);
4950 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4951 /* We don't need to handle soft partial matching case. */
4952 end_list = NULL;
4953 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4954 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4955 else
4956 check_str_end(common, &end_list);
4957 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4958 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4960 jump[2] = JUMP(SLJIT_JUMP);
4961 JUMPHERE(jump[0]);
4962 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4963 set_jumps(end_list, LABEL());
4964 JUMPHERE(jump[1]);
4965 JUMPHERE(jump[2]);
4966 return cc;
4967
4968 case OP_NOT_HSPACE:
4969 case OP_HSPACE:
4970 detect_partial_match(common, backtracks);
4971 read_char(common);
4972 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4973 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4974 return cc;
4975
4976 case OP_NOT_VSPACE:
4977 case OP_VSPACE:
4978 detect_partial_match(common, backtracks);
4979 read_char(common);
4980 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4981 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4982 return cc;
4983
4984 #ifdef SUPPORT_UCP
4985 case OP_EXTUNI:
4986 detect_partial_match(common, backtracks);
4987 read_char(common);
4988 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4989 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4990 /* Optimize register allocation: use a real register. */
4991 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4992 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4993
4994 label = LABEL();
4995 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4996 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4997 read_char(common);
4998 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5000 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5001
5002 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5003 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5004 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5005 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5006 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5007 JUMPTO(SLJIT_C_NOT_ZERO, label);
5008
5009 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5010 JUMPHERE(jump[0]);
5011 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5012
5013 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5014 {
5015 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5016 /* Since we successfully read a char above, partial matching must occure. */
5017 check_partial(common, TRUE);
5018 JUMPHERE(jump[0]);
5019 }
5020 return cc;
5021 #endif
5022
5023 case OP_EODN:
5024 /* Requires rather complex checks. */
5025 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5026 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5027 {
5028 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5029 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5030 if (common->mode == JIT_COMPILE)
5031 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5032 else
5033 {
5034 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5035 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5036 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5037 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5038 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5039 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5040 check_partial(common, TRUE);
5041 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5042 JUMPHERE(jump[1]);
5043 }
5044 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5045 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5046 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5047 }
5048 else if (common->nltype == NLTYPE_FIXED)
5049 {
5050 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5051 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5052 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5053 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5054 }
5055 else
5056 {
5057 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5058 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5059 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5060 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5061 jump[2] = JUMP(SLJIT_C_GREATER);
5062 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5063 /* Equal. */
5064 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5065 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5066 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5067
5068 JUMPHERE(jump[1]);
5069 if (common->nltype == NLTYPE_ANYCRLF)
5070 {
5071 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5072 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5073 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5074 }
5075 else
5076 {
5077 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5078 read_char(common);
5079 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5080 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5081 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5082 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5083 }
5084 JUMPHERE(jump[2]);
5085 JUMPHERE(jump[3]);
5086 }
5087 JUMPHERE(jump[0]);
5088 check_partial(common, FALSE);
5089 return cc;
5090
5091 case OP_EOD:
5092 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5093 check_partial(common, FALSE);
5094 return cc;
5095
5096 case OP_CIRC:
5097 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5098 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5099 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5100 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5101 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5102 return cc;
5103
5104 case OP_CIRCM:
5105 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5106 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5107 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5108 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5109 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5110 jump[0] = JUMP(SLJIT_JUMP);
5111 JUMPHERE(jump[1]);
5112
5113 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5114 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5115 {
5116 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5117 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5118 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5119 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5120 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5121 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5122 }
5123 else
5124 {
5125 skip_char_back(common);
5126 read_char(common);
5127 check_newlinechar(common, common->nltype, backtracks, FALSE);
5128 }
5129 JUMPHERE(jump[0]);
5130 return cc;
5131
5132 case OP_DOLL:
5133 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5134 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5135 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5136
5137 if (!common->endonly)
5138 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5139 else
5140 {
5141 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5142 check_partial(common, FALSE);
5143 }
5144 return cc;
5145
5146 case OP_DOLLM:
5147 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5148 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5149 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5150 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5151 check_partial(common, FALSE);
5152 jump[0] = JUMP(SLJIT_JUMP);
5153 JUMPHERE(jump[1]);
5154
5155 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5156 {
5157 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5158 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5159 if (common->mode == JIT_COMPILE)
5160 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5161 else
5162 {
5163 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5164 /* STR_PTR = STR_END - IN_UCHARS(1) */
5165 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5166 check_partial(common, TRUE);
5167 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5168 JUMPHERE(jump[1]);
5169 }
5170
5171 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5172 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5173 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5174 }
5175 else
5176 {
5177 peek_char(common);
5178 check_newlinechar(common, common->nltype, backtracks, FALSE);
5179 }
5180 JUMPHERE(jump[0]);
5181 return cc;
5182
5183 case OP_CHAR:
5184 case OP_CHARI:
5185 length = 1;
5186 #ifdef SUPPORT_UTF
5187 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5188 #endif
5189 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5190 {
5191 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5192 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5193
5194 context.length = IN_UCHARS(length);
5195 context.sourcereg = -1;
5196 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5197 context.ucharptr = 0;
5198 #endif
5199 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5200 }
5201 detect_partial_match(common, backtracks);
5202 read_char(common);
5203 #ifdef SUPPORT_UTF
5204 if (common->utf)
5205 {
5206 GETCHAR(c, cc);
5207 }
5208 else
5209 #endif
5210 c = *cc;
5211 if (type == OP_CHAR || !char_has_othercase(common, cc))
5212 {
5213 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5214 return cc + length;
5215 }
5216 oc = char_othercase(common, c);
5217 bit = c ^ oc;
5218 if (is_powerof2(bit))
5219 {
5220 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5221 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5222 return cc + length;
5223 }
5224 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
5225 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5226 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
5227 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5228 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5229 return cc + length;
5230
5231 case OP_NOT:
5232 case OP_NOTI:
5233 detect_partial_match(common, backtracks);
5234 length = 1;
5235 #ifdef SUPPORT_UTF
5236 if (common->utf)
5237 {
5238 #ifdef COMPILE_PCRE8
5239 c = *cc;
5240 if (c < 128)
5241 {
5242 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5243 if (type == OP_NOT || !char_has_othercase(common, cc))
5244 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5245 else
5246 {
5247 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5248 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5249 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5250 }
5251 /* Skip the variable-length character. */
5252 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5253 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5254 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5255 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5256 JUMPHERE(jump[0]);
5257 return cc + 1;
5258 }
5259 else
5260 #endif /* COMPILE_PCRE8 */
5261 {
5262 GETCHARLEN(c, cc, length);
5263 read_char(common);
5264 }
5265 }
5266 else
5267 #endif /* SUPPORT_UTF */
5268 {
5269 read_char(common);
5270 c = *cc;
5271 }
5272
5273 if (type == OP_NOT || !char_has_othercase(common, cc))
5274 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5275 else
5276 {
5277 oc = char_othercase(common, c);
5278 bit = c ^ oc;
5279 if (is_powerof2(bit))
5280 {
5281 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5282 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5283 }
5284 else
5285 {
5286 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5287 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5288 }
5289 }
5290 return cc + length;
5291
5292 case OP_CLASS:
5293 case OP_NCLASS:
5294 detect_partial_match(common, backtracks);
5295 read_char8(common);
5296 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5297 return cc + 32 / sizeof(pcre_uchar);
5298
5299 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5300 jump[0] = NULL;
5301 #ifdef COMPILE_PCRE8
5302 /* This check only affects 8 bit mode. In other modes, we
5303 always need to compare the value with 255. */
5304 if (common->utf)
5305 #endif /* COMPILE_PCRE8 */
5306 {
5307 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5308 if (type == OP_CLASS)
5309 {
5310 add_jump(compiler, backtracks, jump[0]);
5311 jump[0] = NULL;
5312 }
5313 }
5314 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
5315 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5316 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5317 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5318 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5319 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5320 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5321 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5322 if (jump[0] != NULL)
5323 JUMPHERE(jump[0]);
5324 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
5325 return cc + 32 / sizeof(pcre_uchar);
5326
5327 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5328 case OP_XCLASS:
5329 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5330 return cc + GET(cc, 0) - 1;
5331 #endif
5332
5333 case OP_REVERSE:
5334 length = GET(cc, 0);
5335 if (length == 0)
5336 return cc + LINK_SIZE;
5337 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5338 #ifdef SUPPORT_UTF
5339 if (common->utf)
5340 {
5341 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5342 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5343 label = LABEL();
5344 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5345 skip_char_back(common);
5346 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5347 JUMPTO(SLJIT_C_NOT_ZERO, label);
5348 }
5349 else
5350 #endif
5351 {
5352 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5353 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5354 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5355 }
5356 check_start_used_ptr(common);
5357 return cc + LINK_SIZE;
5358 }
5359 SLJIT_ASSERT_STOP();
5360 return cc;
5361 }
5362
5363 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5364 {
5365 /* This function consumes at least one input character. */
5366 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5367 DEFINE_COMPILER;
5368 pcre_uchar *ccbegin = cc;
5369 compare_context context;
5370 int size;
5371
5372 context.length = 0;
5373 do
5374 {
5375 if (cc >= ccend)
5376 break;
5377
5378 if (*cc == OP_CHAR)
5379 {
5380 size = 1;
5381 #ifdef SUPPORT_UTF
5382 if (common->utf && HAS_EXTRALEN(cc[1]))
5383 size += GET_EXTRALEN(cc[1]);
5384 #endif
5385 }
5386 else if (*cc == OP_CHARI)
5387 {
5388 size = 1;
5389 #ifdef SUPPORT_UTF
5390 if (common->utf)
5391 {
5392 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5393 size = 0;
5394 else if (HAS_EXTRALEN(cc[1]))
5395 size += GET_EXTRALEN(cc[1]);
5396 }
5397 else
5398 #endif
5399 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5400 size = 0;
5401 }
5402 else
5403 size = 0;
5404
5405 cc += 1 + size;
5406 context.length += IN_UCHARS(size);
5407 }
5408 while (size > 0 && context.length <= 128);
5409
5410 cc = ccbegin;
5411 if (context.length > 0)
5412 {
5413 /* We have a fixed-length byte sequence. */
5414 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5415 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5416
5417 context.sourcereg = -1;
5418 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5419 context.ucharptr = 0;
5420 #endif
5421 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5422 return cc;
5423 }
5424
5425 /* A non-fixed length character will be checked if length == 0. */
5426 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5427 }
5428
5429 /* Forward definitions. */
5430 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5431 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5432
5433 #define PUSH_BACKTRACK(size, ccstart, error) \
5434 do \
5435 { \
5436 backtrack = sljit_alloc_memory(compiler, (size)); \
5437 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5438 return error; \
5439 memset(backtrack, 0, size); \
5440 backtrack->prev = parent->top; \
5441 backtrack->cc = (ccstart); \
5442 parent->top = backtrack; \
5443 } \
5444 while (0)
5445
5446 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5447 do \
5448 { \
5449 backtrack = sljit_alloc_memory(compiler, (size)); \
5450 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5451 return; \
5452 memset(backtrack, 0, size); \
5453 backtrack->prev = parent->top; \
5454 backtrack->cc = (ccstart); \
5455 parent->top = backtrack; \
5456 } \
5457 while (0)
5458
5459 #define BACKTRACK_AS(type) ((type *)backtrack)
5460
5461 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5462 {
5463 /* The OVECTOR offset goes to TMP2. */
5464 DEFINE_COMPILER;
5465 int count = GET2(cc, 1 + IMM2_SIZE);
5466 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5467 unsigned int offset;
5468 jump_list *found = NULL;
5469
5470 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5471
5472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5473
5474 count--;
5475 while (count-- > 0)
5476 {
5477 offset = GET2(slot, 0) << 1;
5478 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5479 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5480 slot += common->name_entry_size;
5481 }
5482
5483 offset = GET2(slot, 0) << 1;
5484 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5485 if (backtracks != NULL && !common->jscript_compat)
5486 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5487
5488 set_jumps(found, LABEL());
5489 }
5490
5491 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5492 {
5493 DEFINE_COMPILER;
5494 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5495 int offset = 0;
5496 struct sljit_jump *jump = NULL;
5497 struct sljit_jump *partial;
5498 struct sljit_jump *nopartial;
5499
5500 if (ref)
5501 {
5502 offset = GET2(cc, 1) << 1;
5503 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5504 /* OVECTOR(1) contains the "string begin - 1" constant. */
5505 if (withchecks && !common->jscript_compat)
5506 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5507 }
5508 else
5509 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5510
5511 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5512 if (common->utf && *cc == OP_REFI)
5513 {
5514 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5515 if (ref)
5516 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5517 else
5518 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5519
5520 if (withchecks)
5521 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5522
5523 /* Needed to save important temporary registers. */
5524 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5525 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5526 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5527 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5528 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5529 if (common->mode == JIT_COMPILE)
5530 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5531 else
5532 {
5533 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5534 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5535 check_partial(common, FALSE);
5536 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5537 JUMPHERE(nopartial);
5538 }
5539 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5540 }
5541 else
5542 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5543 {
5544 if (ref)
5545 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5546 else
5547 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5548
5549 if (withchecks)
5550 jump = JUMP(SLJIT_C_ZERO);
5551
5552 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5553 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5554 if (common->mode == JIT_COMPILE)
5555 add_jump(compiler, backtracks, partial);
5556
5557 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5558 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5559
5560 if (common->mode != JIT_COMPILE)
5561 {
5562 nopartial = JUMP(SLJIT_JUMP);
5563 JUMPHERE(partial);
5564 /* TMP2 -= STR_END - STR_PTR */
5565 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5566 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5567 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5568 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5569 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5570 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5571 JUMPHERE(partial);
5572 check_partial(common, FALSE);
5573 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5574 JUMPHERE(nopartial);
5575 }
5576 }
5577
5578 if (jump != NULL)
5579 {
5580 if (emptyfail)
5581 add_jump(compiler, backtracks, jump);
5582 else
5583 JUMPHERE(jump);
5584 }
5585 }
5586
5587 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5588 {
5589 DEFINE_COMPILER;
5590 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5591 backtrack_common *backtrack;
5592 pcre_uchar type;
5593 int offset = 0;
5594 struct sljit_label *label;
5595 struct sljit_jump *zerolength;
5596 struct sljit_jump *jump = NULL;
5597 pcre_uchar *ccbegin = cc;
5598 int min = 0, max = 0;
5599 BOOL minimize;
5600
5601 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5602
5603 if (ref)
5604 offset = GET2(cc, 1) << 1;
5605 else
5606 cc += IMM2_SIZE;
5607 type = cc[1 + IMM2_SIZE];
5608
5609 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5610 minimize = (type & 0x1) != 0;
5611 switch(type)
5612 {
5613 case OP_CRSTAR:
5614 case OP_CRMINSTAR:
5615 min = 0;
5616 max = 0;
5617 cc += 1 + IMM2_SIZE + 1;
5618 break;
5619 case OP_CRPLUS:
5620 case OP_CRMINPLUS:
5621 min = 1;
5622 max = 0;
5623 cc += 1 + IMM2_SIZE + 1;
5624 break;
5625 case OP_CRQUERY:
5626 case OP_CRMINQUERY:
5627 min = 0;
5628 max = 1;
5629 cc += 1 + IMM2_SIZE + 1;
5630 break;
5631 case OP_CRRANGE:
5632 case OP_CRMINRANGE:
5633 min = GET2(cc, 1 + IMM2_SIZE + 1);
5634 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5635 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5636 break;
5637 default:
5638 SLJIT_ASSERT_STOP();
5639 break;
5640 }
5641
5642 if (!minimize)
5643 {
5644 if (min == 0)
5645 {
5646 allocate_stack(common, 2);
5647 if (ref)
5648 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5649 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5650 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5651 /* Temporary release of STR_PTR. */
5652 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5653 /* Handles both invalid and empty cases. Since the minimum repeat,
5654 is zero the invalid case is basically the same as an empty case. */
5655 if (ref)
5656 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5657 else
5658 {
5659 compile_dnref_search(common, ccbegin, NULL);
5660 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5661 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5662 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5663 }
5664 /* Restore if not zero length. */
5665 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5666 }
5667 else
5668 {
5669 allocate_stack(common, 1);
5670 if (ref)
5671 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5672 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5673 if (ref)
5674 {
5675 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5676 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5677 }
5678 else
5679 {
5680 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5681 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5683 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5684 }
5685 }
5686
5687 if (min > 1 || max > 1)
5688 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5689
5690 label = LABEL();
5691 if (!ref)
5692 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5693 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5694
5695 if (min > 1 || max > 1)
5696 {
5697 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5698 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5699 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5700 if (min > 1)
5701 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5702 if (max > 1)
5703 {
5704 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5705 allocate_stack(common, 1);
5706 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5707 JUMPTO(SLJIT_JUMP, label);
5708 JUMPHERE(jump);
5709 }
5710 }
5711
5712 if (max == 0)
5713 {
5714 /* Includes min > 1 case as well. */
5715 allocate_stack(common, 1);
5716 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5717 JUMPTO(SLJIT_JUMP, label);
5718 }
5719
5720 JUMPHERE(zerolength);
5721 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5722
5723 count_match(common);
5724 return cc;
5725 }
5726
5727 allocate_stack(common, ref ? 2 : 3);
5728 if (ref)
5729 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5731 if (type != OP_CRMINSTAR)
5732 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5733
5734 if (min == 0)
5735 {
5736 /* Handles both invalid and empty cases. Since the minimum repeat,
5737 is zero the invalid case is basically the same as an empty case. */
5738 if (ref)
5739 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5740 else
5741 {
5742 compile_dnref_search(common, ccbegin, NULL);
5743 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5744 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5745 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5746 }
5747 /* Length is non-zero, we can match real repeats. */
5748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5749 jump = JUMP(SLJIT_JUMP);
5750 }
5751 else
5752 {
5753 if (ref)
5754 {
5755 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5756 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5757 }
5758 else
5759 {
5760 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5761 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5762 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5763 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5764 }
5765 }
5766
5767 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5768 if (max > 0)
5769 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5770
5771 if (!ref)
5772 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5773 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5774 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5775
5776 if (min > 1)
5777 {
5778 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5779 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5780 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5781 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5782 }
5783 else if (max > 0)
5784 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5785
5786 if (jump != NULL)
5787 JUMPHERE(jump);
5788 JUMPHERE(zerolength);
5789
5790 count_match(common);
5791 return cc;
5792 }
5793
5794 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5795 {
5796 DEFINE_COMPILER;
5797 backtrack_common *backtrack;
5798 recurse_entry *entry = common->entries;
5799 recurse_entry *prev = NULL;
5800 sljit_sw start = GET(cc, 1);
5801 pcre_uchar *start_cc;
5802 BOOL needs_control_head;
5803
5804 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5805
5806 /* Inlining simple patterns. */
5807 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5808 {
5809 start_cc = common->start + start;
5810 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5811 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5812 return cc + 1 + LINK_SIZE;
5813 }
5814
5815 while (entry != NULL)
5816 {
5817 if (entry->start == start)
5818 break;
5819 prev = entry;
5820 entry = entry->next;
5821 }
5822
5823 if (entry == NULL)
5824 {
5825 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5826 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5827 return NULL;
5828 entry->next = NULL;
5829 entry->entry = NULL;
5830 entry->calls = NULL;
5831 entry->start = start;
5832
5833 if (prev != NULL)
5834 prev->next = entry;
5835 else
5836 common->entries = entry;
5837 }
5838
5839 if (common->has_set_som && common->mark_ptr != 0)
5840 {
5841 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5842 allocate_stack(common, 2);
5843 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5844 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5845 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5846 }
5847 else if (common->has_set_som || common->mark_ptr != 0)
5848 {
5849 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5850 allocate_stack(common, 1);
5851 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5852 }
5853
5854 if (entry->entry == NULL)
5855 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5856 else
5857 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5858 /* Leave if the match is failed. */
5859 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5860 return cc + 1 + LINK_SIZE;
5861 }
5862
5863 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5864 {
5865 const pcre_uchar *begin = arguments->begin;
5866 int *offset_vector = arguments->offsets;
5867 int offset_count = arguments->offset_count;
5868 int i;
5869
5870 if (PUBL(callout) == NULL)
5871 return 0;
5872
5873 callout_block->version = 2;
5874 callout_block->callout_data = arguments->callout_data;
5875
5876 /* Offsets in subject. */
5877 callout_block->subject_length = arguments->end - arguments->begin;
5878 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5879 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5880 #if defined COMPILE_PCRE8
5881 callout_block->subject = (PCRE_SPTR)begin;
5882 #elif defined COMPILE_PCRE16
5883 callout_block->subject = (PCRE_SPTR16)begin;
5884 #elif defined COMPILE_PCRE32
5885 callout_block->subject = (PCRE_SPTR32)begin;
5886 #endif
5887
5888 /* Convert and copy the JIT offset vector to the offset_vector array. */
5889 callout_block->capture_top = 0;
5890 callout_block->offset_vector = offset_vector;
5891 for (i = 2; i < offset_count; i += 2)
5892 {
5893 offset_vector[i] = jit_ovector[i] - begin;
5894 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5895 if (jit_ovector[i] >= begin)
5896 callout_block->capture_top = i;
5897 }
5898
5899 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5900 if (offset_count > 0)
5901 offset_vector[0] = -1;
5902 if (offset_count > 1)
5903 offset_vector[1] = -1;
5904 return (*PUBL(callout))(callout_block);
5905 }
5906
5907 /* Aligning to 8 byte. */
5908 #define CALLOUT_ARG_SIZE \
5909 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5910
5911 #define CALLOUT_ARG_OFFSET(arg) \
5912 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5913
5914 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5915 {
5916 DEFINE_COMPILER;
5917 backtrack_common *backtrack;
5918
5919 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5920
5921 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5922
5923 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5924 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5925 SLJIT_ASSERT(common->capture_last_ptr != 0);
5926 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5927 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5928
5929 /* These pointer sized fields temporarly stores internal variables. */
5930 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5931 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5932 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5933
5934 if (common->mark_ptr != 0)
5935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5936 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5937 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5938 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5939
5940 /* Needed to save important temporary registers. */
5941 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5942 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5943 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5944 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5945 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5946 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5947 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5948
5949 /* Check return value. */
5950 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5951 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5952 if (common->forced_quit_label == NULL)
5953 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5954 else
5955 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5956 return cc + 2 + 2 * LINK_SIZE;
5957 }
5958
5959 #undef CALLOUT_ARG_SIZE
5960 #undef CALLOUT_ARG_OFFSET
5961
5962 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5963 {
5964 DEFINE_COMPILER;
5965 int framesize;
5966 int extrasize;
5967 BOOL needs_control_head;
5968 int private_data_ptr;
5969 backtrack_common altbacktrack;
5970 pcre_uchar *ccbegin;
5971 pcre_uchar opcode;
5972 pcre_uchar bra = OP_BRA;
5973 jump_list *tmp = NULL;
5974 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5975 jump_list **found;
5976 /* Saving previous accept variables. */
5977 BOOL save_local_exit = common->local_exit;
5978 BOOL save_positive_assert = common->positive_assert;
5979 then_trap_backtrack *save_then_trap = common->then_trap;
5980 struct sljit_label *save_quit_label = common->quit_label;
5981 struct sljit_label *save_accept_label = common->accept_label;
5982 jump_list *save_quit = common->quit;
5983 jump_list *save_positive_assert_quit = common->positive_assert_quit;
5984 jump_list *save_accept = common->accept;
5985 struct sljit_jump *jump;
5986 struct sljit_jump *brajump = NULL;
5987
5988 /* Assert captures then. */
5989 common->then_trap = NULL;
5990
5991 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5992 {
5993 SLJIT_ASSERT(!conditional);
5994 bra = *cc;
5995 cc++;
5996 }
5997 private_data_ptr = PRIVATE_DATA(cc);
5998 SLJIT_ASSERT(private_data_ptr != 0);
5999 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6000 backtrack->framesize = framesize;
6001 backtrack->private_data_ptr = private_data_ptr;
6002 opcode = *cc;
6003 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6004 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6005 ccbegin = cc;
6006 cc += GET(cc, 1);
6007
6008 if (bra == OP_BRAMINZERO)
6009 {
6010 /* This is a braminzero backtrack path. */
6011 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6012 free_stack(common, 1);
6013 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6014 }
6015
6016 if (framesize < 0)
6017 {
6018 extrasize = needs_control_head ? 2 : 1;
6019 if (framesize == no_frame)
6020 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6021 allocate_stack(common, extrasize);
6022 if (needs_control_head)
6023 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6025 if (needs_control_head)
6026 {
6027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6028 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6029 }
6030 }
6031 else
6032 {
6033 extrasize = needs_control_head ? 3 : 2;
6034 allocate_stack(common, framesize + extrasize);
6035 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6036 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6037 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6038 if (needs_control_head)
6039 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6040 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6041 if (needs_control_head)
6042 {
6043 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6044 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6045 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6046 }
6047 else
6048 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6049 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6050 }
6051
6052 memset(&altbacktrack, 0, sizeof(backtrack_common));
6053 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6054 {
6055 /* Negative assert is stronger than positive assert. */
6056 common->local_exit = TRUE;
6057 common->quit_label = NULL;
6058 common->quit = NULL;
6059 common->positive_assert = FALSE;
6060 }
6061 else
6062 common->positive_assert = TRUE;
6063 common->positive_assert_quit = NULL;
6064
6065 while (1)
6066 {
6067 common->accept_label = NULL;
6068 common->accept = NULL;
6069 altbacktrack.top = NULL;
6070 altbacktrack.topbacktracks = NULL;
6071
6072 if (*ccbegin == OP_ALT)
6073 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6074
6075 altbacktrack.cc = ccbegin;
6076 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6077 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6078 {
6079 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6080 {
6081 common->local_exit = save_local_exit;
6082 common->quit_label = save_quit_label;
6083 common->quit = save_quit;
6084 }
6085 common->positive_assert = save_positive_assert;
6086 common->then_trap = save_then_trap;
6087 common->accept_label = save_accept_label;
6088 common->positive_assert_quit = save_positive_assert_quit;
6089 common->accept = save_accept;
6090 return NULL;
6091 }
6092 common->accept_label = LABEL();
6093 if (common->accept != NULL)
6094 set_jumps(common->accept, common->accept_label);
6095
6096 /* Reset stack. */
6097 if (framesize < 0)
6098 {
6099 if (framesize == no_frame)
6100 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6101 else
6102 free_stack(common, extrasize);
6103 if (needs_control_head)
6104 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6105 }
6106 else
6107 {
6108 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6109 {
6110 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6111 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6112 if (needs_control_head)
6113 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6114 }
6115 else
6116 {
6117 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6118 if (needs_control_head)
6119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6120 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6121 }
6122 }
6123
6124 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6125 {
6126 /* We know that STR_PTR was stored on the top of the stack. */
6127 if (conditional)
6128 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6129 else if (bra == OP_BRAZERO)
6130 {
6131 if (framesize < 0)
6132 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6133 else
6134 {
6135 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6136 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6137 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6138 }
6139 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6140 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6141 }
6142 else if (framesize >= 0)
6143 {
6144 /* For OP_BRA and OP_BRAMINZERO. */
6145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6146 }
6147 }
6148 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6149
6150 compile_backtrackingpath(common, altbacktrack.top);
6151 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6152 {
6153 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6154 {
6155 common->local_exit = save_local_exit;
6156 common->quit_label = save_quit_label;
6157 common->quit = save_quit;
6158 }
6159 common->positive_assert = save_positive_assert;
6160 common->then_trap = save_then_trap;
6161 common->accept_label = save_accept_label;
6162 common->positive_assert_quit = save_positive_assert_quit;
6163 common->accept = save_accept;
6164 return NULL;
6165 }
6166 set_jumps(altbacktrack.topbacktracks, LABEL());
6167
6168 if (*cc != OP_ALT)
6169 break;
6170
6171 ccbegin = cc;
6172 cc += GET(cc, 1);
6173 }
6174
6175 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6176 {
6177 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6178 /* Makes the check less complicated below. */
6179 common->positive_assert_quit = common->quit;
6180 }
6181
6182 /* None of them matched. */
6183 if (common->positive_assert_quit != NULL)
6184 {
6185 jump = JUMP(SLJIT_JUMP);
6186 set_jumps(common->positive_assert_quit, LABEL());
6187 SLJIT_ASSERT(framesize != no_stack);
6188 if (framesize < 0)
6189 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6190 else
6191 {
6192 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6193 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6194 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6195 }
6196 JUMPHERE(jump);
6197 }
6198
6199 if (needs_control_head)
6200 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6201
6202 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6203 {
6204 /* Assert is failed. */
6205 if (conditional || bra == OP_BRAZERO)
6206 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6207
6208 if (framesize < 0)
6209 {
6210 /* The topmost item should be 0. */
6211 if (bra == OP_BRAZERO)
6212 {
6213 if (extrasize == 2)
6214 free_stack(common, 1);
6215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6216 }
6217 else
6218 free_stack(common, extrasize);
6219 }
6220 else
6221 {
6222 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6223 /* The topmost item should be 0. */
6224 if (bra == OP_BRAZERO)
6225 {
6226 free_stack(common, framesize + extrasize - 1);
6227 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6228 }
6229 else
6230 free_stack(common, framesize + extrasize);
6231 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6232 }
6233 jump = JUMP(SLJIT_JUMP);
6234 if (bra != OP_BRAZERO)
6235 add_jump(compiler, target, jump);
6236
6237 /* Assert is successful. */
6238 set_jumps(tmp, LABEL());
6239 if (framesize < 0)
6240 {
6241 /* We know that STR_PTR was stored on the top of the stack. */
6242 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6243 /* Keep the STR_PTR on the top of the stack. */
6244 if (bra == OP_BRAZERO)
6245 {
6246 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6247 if (extrasize == 2)
6248 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6249 }
6250 else if (bra == OP_BRAMINZERO)
6251 {
6252 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6254 }
6255 }
6256 else
6257 {
6258 if (bra == OP_BRA)
6259 {
6260 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6261 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6262 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6263 }
6264 else
6265 {
6266 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6267 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6268 if (extrasize == 2)
6269 {
6270 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6271 if (bra == OP_BRAMINZERO)
6272 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6273 }
6274 else
6275 {
6276 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6277 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6278 }
6279 }
6280 }
6281
6282 if (bra == OP_BRAZERO)
6283 {
6284 backtrack->matchingpath = LABEL();
6285 SET_LABEL(jump, backtrack->matchingpath);
6286 }
6287 else if (bra == OP_BRAMINZERO)
6288 {
6289 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6290 JUMPHERE(brajump);
6291 if (framesize >= 0)
6292 {
6293 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6294 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6295 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6296 }
6297 set_jumps(backtrack->common.topbacktracks, LABEL());
6298 }
6299 }
6300 else
6301 {
6302 /* AssertNot is successful. */
6303 if (framesize < 0)
6304 {
6305 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6306 if (bra != OP_BRA)
6307 {
6308 if (extrasize == 2)
6309 free_stack(common, 1);
6310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6311 }
6312 else
6313 free_stack(common, extrasize);
6314 }
6315 else
6316 {
6317 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6318 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6319 /* The topmost item should be 0. */
6320 if (bra != OP_BRA)
6321 {
6322 free_stack(common, framesize + extrasize - 1);
6323 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6324 }
6325 else
6326 free_stack(common, framesize + extrasize);
6327 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6328 }
6329
6330 if (bra == OP_BRAZERO)
6331 backtrack->matchingpath = LABEL();
6332 else if (bra == OP_BRAMINZERO)
6333 {
6334 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6335 JUMPHERE(brajump);
6336 }
6337
6338 if (bra != OP_BRA)
6339 {
6340 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6341 set_jumps(backtrack->common.topbacktracks, LABEL());
6342 backtrack->common.topbacktracks = NULL;
6343 }
6344 }
6345
6346 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6347 {
6348 common->local_exit = save_local_exit;
6349 common->quit_label = save_quit_label;
6350 common->quit = save_quit;
6351 }
6352 common->positive_assert = save_positive_assert;
6353 common->then_trap = save_then_trap;
6354 common->accept_label = save_accept_label;
6355 common->positive_assert_quit = save_positive_assert_quit;
6356 common->accept = save_accept;
6357 return cc + 1 + LINK_SIZE;
6358 }
6359
6360 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6361 {
6362 DEFINE_COMPILER;
6363 int stacksize;
6364
6365 if (framesize < 0)
6366 {
6367 if (framesize == no_frame)
6368 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6369 else
6370 {
6371 stacksize = needs_control_head ? 1 : 0;
6372 if (ket != OP_KET || has_alternatives)
6373 stacksize++;
6374 free_stack(common, stacksize);
6375 }
6376
6377 if (needs_control_head)
6378 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6379
6380 /* TMP2 which is set here used by OP_KETRMAX below. */
6381 if (ket == OP_KETRMAX)
6382 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6383 else if (ket == OP_KETRMIN)
6384 {
6385 /* Move the STR_PTR to the private_data_ptr. */
6386 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6387 }
6388 }
6389 else
6390 {
6391 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6392 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6393 if (needs_control_head)
6394 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6395
6396 if (ket == OP_KETRMAX)
6397 {
6398 /* TMP2 which is set here used by OP_KETRMAX below. */
6399 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6400 }
6401 }
6402 if (needs_control_head)
6403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6404 }
6405
6406 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6407 {
6408 DEFINE_COMPILER;
6409
6410 if (common->capture_last_ptr != 0)
6411 {
6412 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6413 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6415 stacksize++;
6416 }
6417 if (common->optimized_cbracket[offset >> 1] == 0)
6418 {
6419 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6420 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6422 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6423 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6424 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6425 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6426 stacksize += 2;
6427 }
6428 return stacksize;
6429 }
6430
6431 /*
6432 Handling bracketed expressions is probably the most complex part.
6433
6434 Stack layout naming characters:
6435 S - Push the current STR_PTR
6436 0 - Push a 0 (NULL)
6437 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6438 before the next alternative. Not pushed if there are no alternatives.
6439 M - Any values pushed by the current alternative. Can be empty, or anything.
6440 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6441 L - Push the previous local (pointed by localptr) to the stack
6442 () - opional values stored on the stack
6443 ()* - optonal, can be stored multiple times
6444
6445 The following list shows the regular expression templates, their PCRE byte codes
6446 and stack layout supported by pcre-sljit.
6447
6448 (?:) OP_BRA | OP_KET A M
6449 () OP_CBRA | OP_KET C M
6450 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6451 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6452 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6453 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6454 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6455 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6456 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6457 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6458 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6459 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6460 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6461 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6462 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6463 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6464 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6465 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6466 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6467 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6468 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6469 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6470
6471
6472 Stack layout naming characters:
6473 A - Push the alternative index (starting from 0) on the stack.
6474 Not pushed if there is no alternatives.
6475 M - Any values pushed by the current alternative. Can be empty, or anything.
6476
6477 The next list shows the possible content of a bracket:
6478 (|) OP_*BRA | OP_ALT ... M A
6479 (?()|) OP_*COND | OP_ALT M A
6480 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6481 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6482 Or nothing, if trace is unnecessary
6483 */
6484
6485 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6486 {
6487 DEFINE_COMPILER;
6488 backtrack_common *backtrack;
6489 pcre_uchar opcode;
6490 int private_data_ptr = 0;
6491 int offset = 0;
6492 int i, stacksize;
6493 int repeat_ptr = 0, repeat_length = 0;
6494 int repeat_type = 0, repeat_count = 0;
6495 pcre_uchar *ccbegin;
6496 pcre_uchar *matchingpath;
6497 pcre_uchar *slot;
6498 pcre_uchar bra = OP_BRA;
6499 pcre_uchar ket;
6500 assert_backtrack *assert;
6501 BOOL has_alternatives;
6502 BOOL needs_control_head = FALSE;
6503 struct sljit_jump *jump;
6504 struct sljit_jump *skip;
6505 struct sljit_label *rmax_label = NULL;
6506 struct sljit_jump *braminzero = NULL;
6507
6508 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6509
6510 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6511 {
6512 bra = *cc;
6513 cc++;
6514 opcode = *cc;
6515 }
6516
6517 opcode = *cc;
6518 ccbegin = cc;
6519 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6520 ket = *matchingpath;
6521 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6522 {
6523 repeat_ptr = PRIVATE_DATA(matchingpath);
6524 repeat_length = PRIVATE_DATA(matchingpath + 1);
6525 repeat_type = PRIVATE_DATA(matchingpath + 2);
6526 repeat_count = PRIVATE_DATA(matchingpath + 3);
6527 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6528 if (repeat_type == OP_UPTO)
6529 ket = OP_KETRMAX;
6530 if (repeat_type == OP_MINUPTO)
6531 ket = OP_KETRMIN;
6532 }
6533
6534 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6535 {
6536 /* Drop this bracket_backtrack. */
6537 parent->top = backtrack->prev;
6538 return matchingpath + 1 + LINK_SIZE + repeat_length;
6539 }
6540
6541 matchingpath = ccbegin + 1 + LINK_SIZE;
6542 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6543 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6544 cc += GET(cc, 1);
6545
6546 has_alternatives = *cc == OP_ALT;
6547 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6548 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6549
6550 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6551 opcode = OP_SCOND;
6552 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6553 opcode = OP_ONCE;
6554
6555 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6556 {
6557 /* Capturing brackets has a pre-allocated space. */
6558 offset = GET2(ccbegin, 1 + LINK_SIZE);
6559 if (common->optimized_cbracket[offset] == 0)
6560 {
6561 private_data_ptr = OVECTOR_PRIV(offset);
6562 offset <<= 1;
6563 }
6564 else
6565 {
6566 offset <<= 1;
6567 private_data_ptr = OVECTOR(offset);
6568 }
6569 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6570 matchingpath += IMM2_SIZE;
6571 }
6572 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6573 {
6574 /* Other brackets simply allocate the next entry. */
6575 private_data_ptr = PRIVATE_DATA(ccbegin);
6576 SLJIT_ASSERT(private_data_ptr != 0);
6577 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6578 if (opcode == OP_ONCE)
6579 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6580 }
6581
6582 /* Instructions before the first alternative. */
6583 stacksize = 0;
6584 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6585 stacksize++;
6586 if (bra == OP_BRAZERO)
6587 stacksize++;
6588
6589 if (stacksize > 0)
6590 allocate_stack(common, stacksize);
6591
6592 stacksize = 0;
6593 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6594 {
6595 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6596 stacksize++;
6597 }
6598
6599 if (bra == OP_BRAZERO)
6600 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6601
6602 if (bra == OP_BRAMINZERO)
6603 {
6604 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6605 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6606 if (ket != OP_KETRMIN)
6607 {
6608 free_stack(common, 1);
6609 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6610 }
6611 else
6612 {
6613 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6614 {
6615 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6616 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6617 /* Nothing stored during the first run. */
6618 skip = JUMP(SLJIT_JUMP);
6619 JUMPHERE(jump);
6620 /* Checking zero-length iteration. */
6621 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6622 {
6623 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6624 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6625 }
6626 else
6627 {
6628 /* Except when the whole stack frame must be saved. */
6629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6630 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6631 }
6632 JUMPHERE(skip);
6633 }
6634 else
6635 {
6636 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6637 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6638 JUMPHERE(jump);
6639 }
6640 }
6641 }
6642
6643 if (repeat_type != 0)
6644 {
6645 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6646 if (repeat_type == OP_EXACT)
6647 rmax_label = LABEL();
6648 }
6649
6650 if (ket == OP_KETRMIN)
6651 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6652
6653 if (ket == OP_KETRMAX)
6654 {
6655 rmax_label = LABEL();
6656 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6657 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6658 }
6659
6660 /* Handling capturing brackets and alternatives. */
6661 if (opcode == OP_ONCE)
6662 {
6663 stacksize = 0;
6664 if (needs_control_head)
6665 {
6666 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6667 stacksize++;
6668 }
6669
6670 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6671 {
6672 /* Neither capturing brackets nor recursions are found in the block. */
6673 if (ket == OP_KETRMIN)
6674 {
6675 stacksize += 2;
6676 if (!needs_control_head)
6677 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6678 }
6679 else
6680 {
6681 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6683 if (ket == OP_KETRMAX || has_alternatives)
6684 stacksize++;
6685 }
6686
6687 if (stacksize > 0)
6688 allocate_stack(common, stacksize);
6689
6690 stacksize = 0;
6691 if (needs_control_head)
6692 {
6693 stacksize++;
6694 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6695 }
6696
6697 if (ket == OP_KETRMIN)
6698 {
6699 if (needs_control_head)
6700 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6701 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6702 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6703 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6704 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6705 }
6706 else if (ket == OP_KETRMAX || has_alternatives)
6707 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6708 }
6709 else
6710 {
6711 if (ket != OP_KET || has_alternatives)
6712 stacksize++;
6713
6714 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6715 allocate_stack(common, stacksize);
6716
6717 if (needs_control_head)
6718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6719
6720 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6721 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6722
6723 stacksize = needs_control_head ? 1 : 0;
6724 if (ket != OP_KET || has_alternatives)
6725 {
6726 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6728 stacksize++;
6729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6730 }
6731 else
6732 {
6733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6735 }
6736 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6737 }
6738 }
6739 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6740 {
6741 /* Saving the previous values. */
6742 if (common->optimized_cbracket[offset >> 1] != 0)
6743 {
6744 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6745 allocate_stack(common, 2);
6746