/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1422 - (show annotations)
Mon Dec 30 19:05:36 2013 UTC (5 years, 11 months ago) by zherczeg
File MIME type: text/plain
File size: 321387 byte(s)
Error occurred while calculating annotation data.
JIT: Improved character read in UTF mode.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 pcre_uint32 limit_match;
172 int real_offset_count;
173 int offset_count;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 pcre_uint32 limit_match;
186 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
187 } executable_functions;
188
189 typedef struct jump_list {
190 struct sljit_jump *jump;
191 struct jump_list *next;
192 } jump_list;
193
194 typedef struct stub_list {
195 struct sljit_jump *start;
196 struct sljit_label *quit;
197 struct stub_list *next;
198 } stub_list;
199
200 enum frame_types {
201 no_frame = -1,
202 no_stack = -2
203 };
204
205 enum control_types {
206 type_mark = 0,
207 type_then_trap = 1
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the arguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 sljit_sw start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define OP_THEN_TRAP OP_TABLE_LENGTH
295
296 typedef struct then_trap_backtrack {
297 backtrack_common common;
298 /* If then_trap is not NULL, this structure contains the real
299 then_trap for the backtracking path. */
300 struct then_trap_backtrack *then_trap;
301 /* Points to the starting opcode. */
302 sljit_sw start;
303 /* Exit point for the then opcodes of this alternative. */
304 jump_list *quit;
305 /* Frame size of the current alternative. */
306 int framesize;
307 } then_trap_backtrack;
308
309 #define MAX_RANGE_SIZE 4
310
311 typedef struct compiler_common {
312 /* The sljit ceneric compiler. */
313 struct sljit_compiler *compiler;
314 /* First byte code. */
315 pcre_uchar *start;
316 /* Maps private data offset to each opcode. */
317 sljit_si *private_data_ptrs;
318 /* Tells whether the capturing bracket is optimized. */
319 pcre_uint8 *optimized_cbracket;
320 /* Tells whether the starting offset is a target of then. */
321 pcre_uint8 *then_offsets;
322 /* Current position where a THEN must jump. */
323 then_trap_backtrack *then_trap;
324 /* Starting offset of private data for capturing brackets. */
325 int cbra_ptr;
326 /* Output vector starting point. Must be divisible by 2. */
327 int ovector_start;
328 /* Last known position of the requested byte. */
329 int req_char_ptr;
330 /* Head of the last recursion. */
331 int recursive_head_ptr;
332 /* First inspected character for partial matching. */
333 int start_used_ptr;
334 /* Starting pointer for partial soft matches. */
335 int hit_start;
336 /* End pointer of the first line. */
337 int first_line_end;
338 /* Points to the marked string. */
339 int mark_ptr;
340 /* Recursive control verb management chain. */
341 int control_head_ptr;
342 /* Points to the last matched capture block index. */
343 int capture_last_ptr;
344 /* Points to the starting position of the current match. */
345 int start_ptr;
346
347 /* Flipped and lower case tables. */
348 const pcre_uint8 *fcc;
349 sljit_sw lcc;
350 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
351 int mode;
352 /* \K is found in the pattern. */
353 BOOL has_set_som;
354 /* (*SKIP:arg) is found in the pattern. */
355 BOOL has_skip_arg;
356 /* (*THEN) is found in the pattern. */
357 BOOL has_then;
358 /* Needs to know the start position anytime. */
359 BOOL needs_start_ptr;
360 /* Currently in recurse or negative assert. */
361 BOOL local_exit;
362 /* Currently in a positive assert. */
363 BOOL positive_assert;
364 /* Newline control. */
365 int nltype;
366 int newline;
367 int bsr_nltype;
368 /* Dollar endonly. */
369 int endonly;
370 /* Tables. */
371 sljit_sw ctypes;
372 /* Named capturing brackets. */
373 pcre_uchar *name_table;
374 sljit_sw name_count;
375 sljit_sw name_entry_size;
376
377 /* Labels and jump lists. */
378 struct sljit_label *partialmatchlabel;
379 struct sljit_label *quit_label;
380 struct sljit_label *forced_quit_label;
381 struct sljit_label *accept_label;
382 stub_list *stubs;
383 recurse_entry *entries;
384 recurse_entry *currententry;
385 jump_list *partialmatch;
386 jump_list *quit;
387 jump_list *positive_assert_quit;
388 jump_list *forced_quit;
389 jump_list *accept;
390 jump_list *calllimit;
391 jump_list *stackalloc;
392 jump_list *revertframes;
393 jump_list *wordboundary;
394 jump_list *anynewline;
395 jump_list *hspace;
396 jump_list *vspace;
397 jump_list *casefulcmp;
398 jump_list *caselesscmp;
399 jump_list *reset_match;
400 BOOL jscript_compat;
401 #ifdef SUPPORT_UTF
402 BOOL utf;
403 #ifdef SUPPORT_UCP
404 BOOL use_ucp;
405 #endif
406 #ifdef COMPILE_PCRE8
407 jump_list *utfreadchar;
408 jump_list *utfreadchar16;
409 jump_list *utfreadtype8;
410 #endif
411 #endif /* SUPPORT_UTF */
412 #ifdef SUPPORT_UCP
413 jump_list *getucd;
414 #endif
415 } compiler_common;
416
417 /* For byte_sequence_compare. */
418
419 typedef struct compare_context {
420 int length;
421 int sourcereg;
422 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
423 int ucharptr;
424 union {
425 sljit_si asint;
426 sljit_uh asushort;
427 #if defined COMPILE_PCRE8
428 sljit_ub asbyte;
429 sljit_ub asuchars[4];
430 #elif defined COMPILE_PCRE16
431 sljit_uh asuchars[2];
432 #elif defined COMPILE_PCRE32
433 sljit_ui asuchars[1];
434 #endif
435 } c;
436 union {
437 sljit_si asint;
438 sljit_uh asushort;
439 #if defined COMPILE_PCRE8
440 sljit_ub asbyte;
441 sljit_ub asuchars[4];
442 #elif defined COMPILE_PCRE16
443 sljit_uh asuchars[2];
444 #elif defined COMPILE_PCRE32
445 sljit_ui asuchars[1];
446 #endif
447 } oc;
448 #endif
449 } compare_context;
450
451 /* Undefine sljit macros. */
452 #undef CMP
453
454 /* Used for accessing the elements of the stack. */
455 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
456
457 #define TMP1 SLJIT_SCRATCH_REG1
458 #define TMP2 SLJIT_SCRATCH_REG3
459 #define TMP3 SLJIT_TEMPORARY_EREG2
460 #define STR_PTR SLJIT_SAVED_REG1
461 #define STR_END SLJIT_SAVED_REG2
462 #define STACK_TOP SLJIT_SCRATCH_REG2
463 #define STACK_LIMIT SLJIT_SAVED_REG3
464 #define ARGUMENTS SLJIT_SAVED_EREG1
465 #define COUNT_MATCH SLJIT_SAVED_EREG2
466 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
467
468 /* Local space layout. */
469 /* These two locals can be used by the current opcode. */
470 #define LOCALS0 (0 * sizeof(sljit_sw))
471 #define LOCALS1 (1 * sizeof(sljit_sw))
472 /* Two local variables for possessive quantifiers (char1 cannot use them). */
473 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
474 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
475 /* Max limit of recursions. */
476 #define LIMIT_MATCH (4 * sizeof(sljit_sw))
477 /* The output vector is stored on the stack, and contains pointers
478 to characters. The vector data is divided into two groups: the first
479 group contains the start / end character pointers, and the second is
480 the start pointers when the end of the capturing group has not yet reached. */
481 #define OVECTOR_START (common->ovector_start)
482 #define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
483 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
484 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
485
486 #if defined COMPILE_PCRE8
487 #define MOV_UCHAR SLJIT_MOV_UB
488 #define MOVU_UCHAR SLJIT_MOVU_UB
489 #elif defined COMPILE_PCRE16
490 #define MOV_UCHAR SLJIT_MOV_UH
491 #define MOVU_UCHAR SLJIT_MOVU_UH
492 #elif defined COMPILE_PCRE32
493 #define MOV_UCHAR SLJIT_MOV_UI
494 #define MOVU_UCHAR SLJIT_MOVU_UI
495 #else
496 #error Unsupported compiling mode
497 #endif
498
499 /* Shortcuts. */
500 #define DEFINE_COMPILER \
501 struct sljit_compiler *compiler = common->compiler
502 #define OP1(op, dst, dstw, src, srcw) \
503 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
504 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
505 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
506 #define LABEL() \
507 sljit_emit_label(compiler)
508 #define JUMP(type) \
509 sljit_emit_jump(compiler, (type))
510 #define JUMPTO(type, label) \
511 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
512 #define JUMPHERE(jump) \
513 sljit_set_label((jump), sljit_emit_label(compiler))
514 #define SET_LABEL(jump, label) \
515 sljit_set_label((jump), (label))
516 #define CMP(type, src1, src1w, src2, src2w) \
517 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
518 #define CMPTO(type, src1, src1w, src2, src2w, label) \
519 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
520 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
521 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
522 #define GET_LOCAL_BASE(dst, dstw, offset) \
523 sljit_get_local_base(compiler, (dst), (dstw), (offset))
524
525 static pcre_uchar* bracketend(pcre_uchar* cc)
526 {
527 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
528 do cc += GET(cc, 1); while (*cc == OP_ALT);
529 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
530 cc += 1 + LINK_SIZE;
531 return cc;
532 }
533
534 static int ones_in_half_byte[16] = {
535 /* 0 */ 0, 1, 1, 2, /* 4 */ 1, 2, 2, 3,
536 /* 8 */ 1, 2, 2, 3, /* 12 */ 2, 3, 3, 4
537 };
538
539 /* Functions whose might need modification for all new supported opcodes:
540 next_opcode
541 check_opcode_types
542 set_private_data_ptrs
543 get_framesize
544 init_frame
545 get_private_data_copy_length
546 copy_private_data
547 compile_matchingpath
548 compile_backtrackingpath
549 */
550
551 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
552 {
553 SLJIT_UNUSED_ARG(common);
554 switch(*cc)
555 {
556 case OP_SOD:
557 case OP_SOM:
558 case OP_SET_SOM:
559 case OP_NOT_WORD_BOUNDARY:
560 case OP_WORD_BOUNDARY:
561 case OP_NOT_DIGIT:
562 case OP_DIGIT:
563 case OP_NOT_WHITESPACE:
564 case OP_WHITESPACE:
565 case OP_NOT_WORDCHAR:
566 case OP_WORDCHAR:
567 case OP_ANY:
568 case OP_ALLANY:
569 case OP_NOTPROP:
570 case OP_PROP:
571 case OP_ANYNL:
572 case OP_NOT_HSPACE:
573 case OP_HSPACE:
574 case OP_NOT_VSPACE:
575 case OP_VSPACE:
576 case OP_EXTUNI:
577 case OP_EODN:
578 case OP_EOD:
579 case OP_CIRC:
580 case OP_CIRCM:
581 case OP_DOLL:
582 case OP_DOLLM:
583 case OP_CRSTAR:
584 case OP_CRMINSTAR:
585 case OP_CRPLUS:
586 case OP_CRMINPLUS:
587 case OP_CRQUERY:
588 case OP_CRMINQUERY:
589 case OP_CRRANGE:
590 case OP_CRMINRANGE:
591 case OP_CRPOSSTAR:
592 case OP_CRPOSPLUS:
593 case OP_CRPOSQUERY:
594 case OP_CRPOSRANGE:
595 case OP_CLASS:
596 case OP_NCLASS:
597 case OP_REF:
598 case OP_REFI:
599 case OP_DNREF:
600 case OP_DNREFI:
601 case OP_RECURSE:
602 case OP_CALLOUT:
603 case OP_ALT:
604 case OP_KET:
605 case OP_KETRMAX:
606 case OP_KETRMIN:
607 case OP_KETRPOS:
608 case OP_REVERSE:
609 case OP_ASSERT:
610 case OP_ASSERT_NOT:
611 case OP_ASSERTBACK:
612 case OP_ASSERTBACK_NOT:
613 case OP_ONCE:
614 case OP_ONCE_NC:
615 case OP_BRA:
616 case OP_BRAPOS:
617 case OP_CBRA:
618 case OP_CBRAPOS:
619 case OP_COND:
620 case OP_SBRA:
621 case OP_SBRAPOS:
622 case OP_SCBRA:
623 case OP_SCBRAPOS:
624 case OP_SCOND:
625 case OP_CREF:
626 case OP_DNCREF:
627 case OP_RREF:
628 case OP_DNRREF:
629 case OP_DEF:
630 case OP_BRAZERO:
631 case OP_BRAMINZERO:
632 case OP_BRAPOSZERO:
633 case OP_PRUNE:
634 case OP_SKIP:
635 case OP_THEN:
636 case OP_COMMIT:
637 case OP_FAIL:
638 case OP_ACCEPT:
639 case OP_ASSERT_ACCEPT:
640 case OP_CLOSE:
641 case OP_SKIPZERO:
642 return cc + PRIV(OP_lengths)[*cc];
643
644 case OP_CHAR:
645 case OP_CHARI:
646 case OP_NOT:
647 case OP_NOTI:
648 case OP_STAR:
649 case OP_MINSTAR:
650 case OP_PLUS:
651 case OP_MINPLUS:
652 case OP_QUERY:
653 case OP_MINQUERY:
654 case OP_UPTO:
655 case OP_MINUPTO:
656 case OP_EXACT:
657 case OP_POSSTAR:
658 case OP_POSPLUS:
659 case OP_POSQUERY:
660 case OP_POSUPTO:
661 case OP_STARI:
662 case OP_MINSTARI:
663 case OP_PLUSI:
664 case OP_MINPLUSI:
665 case OP_QUERYI:
666 case OP_MINQUERYI:
667 case OP_UPTOI:
668 case OP_MINUPTOI:
669 case OP_EXACTI:
670 case OP_POSSTARI:
671 case OP_POSPLUSI:
672 case OP_POSQUERYI:
673 case OP_POSUPTOI:
674 case OP_NOTSTAR:
675 case OP_NOTMINSTAR:
676 case OP_NOTPLUS:
677 case OP_NOTMINPLUS:
678 case OP_NOTQUERY:
679 case OP_NOTMINQUERY:
680 case OP_NOTUPTO:
681 case OP_NOTMINUPTO:
682 case OP_NOTEXACT:
683 case OP_NOTPOSSTAR:
684 case OP_NOTPOSPLUS:
685 case OP_NOTPOSQUERY:
686 case OP_NOTPOSUPTO:
687 case OP_NOTSTARI:
688 case OP_NOTMINSTARI:
689 case OP_NOTPLUSI:
690 case OP_NOTMINPLUSI:
691 case OP_NOTQUERYI:
692 case OP_NOTMINQUERYI:
693 case OP_NOTUPTOI:
694 case OP_NOTMINUPTOI:
695 case OP_NOTEXACTI:
696 case OP_NOTPOSSTARI:
697 case OP_NOTPOSPLUSI:
698 case OP_NOTPOSQUERYI:
699 case OP_NOTPOSUPTOI:
700 cc += PRIV(OP_lengths)[*cc];
701 #ifdef SUPPORT_UTF
702 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
703 #endif
704 return cc;
705
706 /* Special cases. */
707 case OP_TYPESTAR:
708 case OP_TYPEMINSTAR:
709 case OP_TYPEPLUS:
710 case OP_TYPEMINPLUS:
711 case OP_TYPEQUERY:
712 case OP_TYPEMINQUERY:
713 case OP_TYPEUPTO:
714 case OP_TYPEMINUPTO:
715 case OP_TYPEEXACT:
716 case OP_TYPEPOSSTAR:
717 case OP_TYPEPOSPLUS:
718 case OP_TYPEPOSQUERY:
719 case OP_TYPEPOSUPTO:
720 return cc + PRIV(OP_lengths)[*cc] - 1;
721
722 case OP_ANYBYTE:
723 #ifdef SUPPORT_UTF
724 if (common->utf) return NULL;
725 #endif
726 return cc + 1;
727
728 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
729 case OP_XCLASS:
730 return cc + GET(cc, 1);
731 #endif
732
733 case OP_MARK:
734 case OP_PRUNE_ARG:
735 case OP_SKIP_ARG:
736 case OP_THEN_ARG:
737 return cc + 1 + 2 + cc[1];
738
739 default:
740 /* All opcodes are supported now! */
741 SLJIT_ASSERT_STOP();
742 return NULL;
743 }
744 }
745
746 static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
747 {
748 int count;
749 pcre_uchar *slot;
750
751 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
752 while (cc < ccend)
753 {
754 switch(*cc)
755 {
756 case OP_SET_SOM:
757 common->has_set_som = TRUE;
758 cc += 1;
759 break;
760
761 case OP_REF:
762 case OP_REFI:
763 common->optimized_cbracket[GET2(cc, 1)] = 0;
764 cc += 1 + IMM2_SIZE;
765 break;
766
767 case OP_CBRAPOS:
768 case OP_SCBRAPOS:
769 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
770 cc += 1 + LINK_SIZE + IMM2_SIZE;
771 break;
772
773 case OP_COND:
774 case OP_SCOND:
775 /* Only AUTO_CALLOUT can insert this opcode. We do
776 not intend to support this case. */
777 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
778 return FALSE;
779 cc += 1 + LINK_SIZE;
780 break;
781
782 case OP_CREF:
783 common->optimized_cbracket[GET2(cc, 1)] = 0;
784 cc += 1 + IMM2_SIZE;
785 break;
786
787 case OP_DNREF:
788 case OP_DNREFI:
789 case OP_DNCREF:
790 count = GET2(cc, 1 + IMM2_SIZE);
791 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
792 while (count-- > 0)
793 {
794 common->optimized_cbracket[GET2(slot, 0)] = 0;
795 slot += common->name_entry_size;
796 }
797 cc += 1 + 2 * IMM2_SIZE;
798 break;
799
800 case OP_RECURSE:
801 /* Set its value only once. */
802 if (common->recursive_head_ptr == 0)
803 {
804 common->recursive_head_ptr = common->ovector_start;
805 common->ovector_start += sizeof(sljit_sw);
806 }
807 cc += 1 + LINK_SIZE;
808 break;
809
810 case OP_CALLOUT:
811 if (common->capture_last_ptr == 0)
812 {
813 common->capture_last_ptr = common->ovector_start;
814 common->ovector_start += sizeof(sljit_sw);
815 }
816 cc += 2 + 2 * LINK_SIZE;
817 break;
818
819 case OP_THEN_ARG:
820 common->has_then = TRUE;
821 common->control_head_ptr = 1;
822 /* Fall through. */
823
824 case OP_PRUNE_ARG:
825 common->needs_start_ptr = TRUE;
826 /* Fall through. */
827
828 case OP_MARK:
829 if (common->mark_ptr == 0)
830 {
831 common->mark_ptr = common->ovector_start;
832 common->ovector_start += sizeof(sljit_sw);
833 }
834 cc += 1 + 2 + cc[1];
835 break;
836
837 case OP_THEN:
838 common->has_then = TRUE;
839 common->control_head_ptr = 1;
840 /* Fall through. */
841
842 case OP_PRUNE:
843 case OP_SKIP:
844 common->needs_start_ptr = TRUE;
845 cc += 1;
846 break;
847
848 case OP_SKIP_ARG:
849 common->control_head_ptr = 1;
850 common->has_skip_arg = TRUE;
851 cc += 1 + 2 + cc[1];
852 break;
853
854 default:
855 cc = next_opcode(common, cc);
856 if (cc == NULL)
857 return FALSE;
858 break;
859 }
860 }
861 return TRUE;
862 }
863
864 static int get_class_iterator_size(pcre_uchar *cc)
865 {
866 switch(*cc)
867 {
868 case OP_CRSTAR:
869 case OP_CRPLUS:
870 return 2;
871
872 case OP_CRMINSTAR:
873 case OP_CRMINPLUS:
874 case OP_CRQUERY:
875 case OP_CRMINQUERY:
876 return 1;
877
878 case OP_CRRANGE:
879 case OP_CRMINRANGE:
880 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
881 return 0;
882 return 2;
883
884 default:
885 return 0;
886 }
887 }
888
889 static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin)
890 {
891 pcre_uchar *end = bracketend(begin);
892 pcre_uchar *next;
893 pcre_uchar *next_end;
894 pcre_uchar *max_end;
895 pcre_uchar type;
896 sljit_sw length = end - begin;
897 int min, max, i;
898
899 /* Detect fixed iterations first. */
900 if (end[-(1 + LINK_SIZE)] != OP_KET)
901 return FALSE;
902
903 /* Already detected repeat. */
904 if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0)
905 return TRUE;
906
907 next = end;
908 min = 1;
909 while (1)
910 {
911 if (*next != *begin)
912 break;
913 next_end = bracketend(next);
914 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
915 break;
916 next = next_end;
917 min++;
918 }
919
920 if (min == 2)
921 return FALSE;
922
923 max = 0;
924 max_end = next;
925 if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
926 {
927 type = *next;
928 while (1)
929 {
930 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
931 break;
932 next_end = bracketend(next + 2 + LINK_SIZE);
933 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
934 break;
935 next = next_end;
936 max++;
937 }
938
939 if (next[0] == type && next[1] == *begin && max >= 1)
940 {
941 next_end = bracketend(next + 1);
942 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
943 {
944 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
945 if (*next_end != OP_KET)
946 break;
947
948 if (i == max)
949 {
950 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
951 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
952 /* +2 the original and the last. */
953 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
954 if (min == 1)
955 return TRUE;
956 min--;
957 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
958 }
959 }
960 }
961 }
962
963 if (min >= 3)
964 {
965 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
966 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
967 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
968 return TRUE;
969 }
970
971 return FALSE;
972 }
973
974 #define CASE_ITERATOR_PRIVATE_DATA_1 \
975 case OP_MINSTAR: \
976 case OP_MINPLUS: \
977 case OP_QUERY: \
978 case OP_MINQUERY: \
979 case OP_MINSTARI: \
980 case OP_MINPLUSI: \
981 case OP_QUERYI: \
982 case OP_MINQUERYI: \
983 case OP_NOTMINSTAR: \
984 case OP_NOTMINPLUS: \
985 case OP_NOTQUERY: \
986 case OP_NOTMINQUERY: \
987 case OP_NOTMINSTARI: \
988 case OP_NOTMINPLUSI: \
989 case OP_NOTQUERYI: \
990 case OP_NOTMINQUERYI:
991
992 #define CASE_ITERATOR_PRIVATE_DATA_2A \
993 case OP_STAR: \
994 case OP_PLUS: \
995 case OP_STARI: \
996 case OP_PLUSI: \
997 case OP_NOTSTAR: \
998 case OP_NOTPLUS: \
999 case OP_NOTSTARI: \
1000 case OP_NOTPLUSI:
1001
1002 #define CASE_ITERATOR_PRIVATE_DATA_2B \
1003 case OP_UPTO: \
1004 case OP_MINUPTO: \
1005 case OP_UPTOI: \
1006 case OP_MINUPTOI: \
1007 case OP_NOTUPTO: \
1008 case OP_NOTMINUPTO: \
1009 case OP_NOTUPTOI: \
1010 case OP_NOTMINUPTOI:
1011
1012 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1013 case OP_TYPEMINSTAR: \
1014 case OP_TYPEMINPLUS: \
1015 case OP_TYPEQUERY: \
1016 case OP_TYPEMINQUERY:
1017
1018 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1019 case OP_TYPESTAR: \
1020 case OP_TYPEPLUS:
1021
1022 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1023 case OP_TYPEUPTO: \
1024 case OP_TYPEMINUPTO:
1025
1026 static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend)
1027 {
1028 pcre_uchar *cc = common->start;
1029 pcre_uchar *alternative;
1030 pcre_uchar *end = NULL;
1031 int private_data_ptr = *private_data_start;
1032 int space, size, bracketlen;
1033
1034 while (cc < ccend)
1035 {
1036 space = 0;
1037 size = 0;
1038 bracketlen = 0;
1039 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1040 return;
1041
1042 if (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)
1043 if (detect_repeat(common, cc))
1044 {
1045 /* These brackets are converted to repeats, so no global
1046 based single character repeat is allowed. */
1047 if (cc >= end)
1048 end = bracketend(cc);
1049 }
1050
1051 switch(*cc)
1052 {
1053 case OP_KET:
1054 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1055 {
1056 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1057 private_data_ptr += sizeof(sljit_sw);
1058 cc += common->private_data_ptrs[cc + 1 - common->start];
1059 }
1060 cc += 1 + LINK_SIZE;
1061 break;
1062
1063 case OP_ASSERT:
1064 case OP_ASSERT_NOT:
1065 case OP_ASSERTBACK:
1066 case OP_ASSERTBACK_NOT:
1067 case OP_ONCE:
1068 case OP_ONCE_NC:
1069 case OP_BRAPOS:
1070 case OP_SBRA:
1071 case OP_SBRAPOS:
1072 case OP_SCOND:
1073 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1074 private_data_ptr += sizeof(sljit_sw);
1075 bracketlen = 1 + LINK_SIZE;
1076 break;
1077
1078 case OP_CBRAPOS:
1079 case OP_SCBRAPOS:
1080 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1081 private_data_ptr += sizeof(sljit_sw);
1082 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1083 break;
1084
1085 case OP_COND:
1086 /* Might be a hidden SCOND. */
1087 alternative = cc + GET(cc, 1);
1088 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1089 {
1090 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1091 private_data_ptr += sizeof(sljit_sw);
1092 }
1093 bracketlen = 1 + LINK_SIZE;
1094 break;
1095
1096 case OP_BRA:
1097 bracketlen = 1 + LINK_SIZE;
1098 break;
1099
1100 case OP_CBRA:
1101 case OP_SCBRA:
1102 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1103 break;
1104
1105 CASE_ITERATOR_PRIVATE_DATA_1
1106 space = 1;
1107 size = -2;
1108 break;
1109
1110 CASE_ITERATOR_PRIVATE_DATA_2A
1111 space = 2;
1112 size = -2;
1113 break;
1114
1115 CASE_ITERATOR_PRIVATE_DATA_2B
1116 space = 2;
1117 size = -(2 + IMM2_SIZE);
1118 break;
1119
1120 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1121 space = 1;
1122 size = 1;
1123 break;
1124
1125 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1126 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1127 space = 2;
1128 size = 1;
1129 break;
1130
1131 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1132 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1133 space = 2;
1134 size = 1 + IMM2_SIZE;
1135 break;
1136
1137 case OP_CLASS:
1138 case OP_NCLASS:
1139 size += 1 + 32 / sizeof(pcre_uchar);
1140 space = get_class_iterator_size(cc + size);
1141 break;
1142
1143 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1144 case OP_XCLASS:
1145 size = GET(cc, 1);
1146 space = get_class_iterator_size(cc + size);
1147 break;
1148 #endif
1149
1150 default:
1151 cc = next_opcode(common, cc);
1152 SLJIT_ASSERT(cc != NULL);
1153 break;
1154 }
1155
1156 /* Character iterators, which are not inside a repeated bracket,
1157 gets a private slot instead of allocating it on the stack. */
1158 if (space > 0 && cc >= end)
1159 {
1160 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1161 private_data_ptr += sizeof(sljit_sw) * space;
1162 }
1163
1164 if (size != 0)
1165 {
1166 if (size < 0)
1167 {
1168 cc += -size;
1169 #ifdef SUPPORT_UTF
1170 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1171 #endif
1172 }
1173 else
1174 cc += size;
1175 }
1176
1177 if (bracketlen > 0)
1178 {
1179 if (cc >= end)
1180 {
1181 end = bracketend(cc);
1182 if (end[-1 - LINK_SIZE] == OP_KET)
1183 end = NULL;
1184 }
1185 cc += bracketlen;
1186 }
1187 }
1188 *private_data_start = private_data_ptr;
1189 }
1190
1191 /* Returns with a frame_types (always < 0) if no need for frame. */
1192 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1193 {
1194 int length = 0;
1195 int possessive = 0;
1196 BOOL stack_restore = FALSE;
1197 BOOL setsom_found = recursive;
1198 BOOL setmark_found = recursive;
1199 /* The last capture is a local variable even for recursions. */
1200 BOOL capture_last_found = FALSE;
1201
1202 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1203 SLJIT_ASSERT(common->control_head_ptr != 0);
1204 *needs_control_head = TRUE;
1205 #else
1206 *needs_control_head = FALSE;
1207 #endif
1208
1209 if (ccend == NULL)
1210 {
1211 ccend = bracketend(cc) - (1 + LINK_SIZE);
1212 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1213 {
1214 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1215 /* This is correct regardless of common->capture_last_ptr. */
1216 capture_last_found = TRUE;
1217 }
1218 cc = next_opcode(common, cc);
1219 }
1220
1221 SLJIT_ASSERT(cc != NULL);
1222 while (cc < ccend)
1223 switch(*cc)
1224 {
1225 case OP_SET_SOM:
1226 SLJIT_ASSERT(common->has_set_som);
1227 stack_restore = TRUE;
1228 if (!setsom_found)
1229 {
1230 length += 2;
1231 setsom_found = TRUE;
1232 }
1233 cc += 1;
1234 break;
1235
1236 case OP_MARK:
1237 case OP_PRUNE_ARG:
1238 case OP_THEN_ARG:
1239 SLJIT_ASSERT(common->mark_ptr != 0);
1240 stack_restore = TRUE;
1241 if (!setmark_found)
1242 {
1243 length += 2;
1244 setmark_found = TRUE;
1245 }
1246 if (common->control_head_ptr != 0)
1247 *needs_control_head = TRUE;
1248 cc += 1 + 2 + cc[1];
1249 break;
1250
1251 case OP_RECURSE:
1252 stack_restore = TRUE;
1253 if (common->has_set_som && !setsom_found)
1254 {
1255 length += 2;
1256 setsom_found = TRUE;
1257 }
1258 if (common->mark_ptr != 0 && !setmark_found)
1259 {
1260 length += 2;
1261 setmark_found = TRUE;
1262 }
1263 if (common->capture_last_ptr != 0 && !capture_last_found)
1264 {
1265 length += 2;
1266 capture_last_found = TRUE;
1267 }
1268 cc += 1 + LINK_SIZE;
1269 break;
1270
1271 case OP_CBRA:
1272 case OP_CBRAPOS:
1273 case OP_SCBRA:
1274 case OP_SCBRAPOS:
1275 stack_restore = TRUE;
1276 if (common->capture_last_ptr != 0 && !capture_last_found)
1277 {
1278 length += 2;
1279 capture_last_found = TRUE;
1280 }
1281 length += 3;
1282 cc += 1 + LINK_SIZE + IMM2_SIZE;
1283 break;
1284
1285 default:
1286 stack_restore = TRUE;
1287 /* Fall through. */
1288
1289 case OP_NOT_WORD_BOUNDARY:
1290 case OP_WORD_BOUNDARY:
1291 case OP_NOT_DIGIT:
1292 case OP_DIGIT:
1293 case OP_NOT_WHITESPACE:
1294 case OP_WHITESPACE:
1295 case OP_NOT_WORDCHAR:
1296 case OP_WORDCHAR:
1297 case OP_ANY:
1298 case OP_ALLANY:
1299 case OP_ANYBYTE:
1300 case OP_NOTPROP:
1301 case OP_PROP:
1302 case OP_ANYNL:
1303 case OP_NOT_HSPACE:
1304 case OP_HSPACE:
1305 case OP_NOT_VSPACE:
1306 case OP_VSPACE:
1307 case OP_EXTUNI:
1308 case OP_EODN:
1309 case OP_EOD:
1310 case OP_CIRC:
1311 case OP_CIRCM:
1312 case OP_DOLL:
1313 case OP_DOLLM:
1314 case OP_CHAR:
1315 case OP_CHARI:
1316 case OP_NOT:
1317 case OP_NOTI:
1318
1319 case OP_EXACT:
1320 case OP_POSSTAR:
1321 case OP_POSPLUS:
1322 case OP_POSQUERY:
1323 case OP_POSUPTO:
1324
1325 case OP_EXACTI:
1326 case OP_POSSTARI:
1327 case OP_POSPLUSI:
1328 case OP_POSQUERYI:
1329 case OP_POSUPTOI:
1330
1331 case OP_NOTEXACT:
1332 case OP_NOTPOSSTAR:
1333 case OP_NOTPOSPLUS:
1334 case OP_NOTPOSQUERY:
1335 case OP_NOTPOSUPTO:
1336
1337 case OP_NOTEXACTI:
1338 case OP_NOTPOSSTARI:
1339 case OP_NOTPOSPLUSI:
1340 case OP_NOTPOSQUERYI:
1341 case OP_NOTPOSUPTOI:
1342
1343 case OP_TYPEEXACT:
1344 case OP_TYPEPOSSTAR:
1345 case OP_TYPEPOSPLUS:
1346 case OP_TYPEPOSQUERY:
1347 case OP_TYPEPOSUPTO:
1348
1349 case OP_CLASS:
1350 case OP_NCLASS:
1351 case OP_XCLASS:
1352
1353 cc = next_opcode(common, cc);
1354 SLJIT_ASSERT(cc != NULL);
1355 break;
1356 }
1357
1358 /* Possessive quantifiers can use a special case. */
1359 if (SLJIT_UNLIKELY(possessive == length))
1360 return stack_restore ? no_frame : no_stack;
1361
1362 if (length > 0)
1363 return length + 1;
1364 return stack_restore ? no_frame : no_stack;
1365 }
1366
1367 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1368 {
1369 DEFINE_COMPILER;
1370 BOOL setsom_found = recursive;
1371 BOOL setmark_found = recursive;
1372 /* The last capture is a local variable even for recursions. */
1373 BOOL capture_last_found = FALSE;
1374 int offset;
1375
1376 /* >= 1 + shortest item size (2) */
1377 SLJIT_UNUSED_ARG(stacktop);
1378 SLJIT_ASSERT(stackpos >= stacktop + 2);
1379
1380 stackpos = STACK(stackpos);
1381 if (ccend == NULL)
1382 {
1383 ccend = bracketend(cc) - (1 + LINK_SIZE);
1384 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1385 cc = next_opcode(common, cc);
1386 }
1387
1388 SLJIT_ASSERT(cc != NULL);
1389 while (cc < ccend)
1390 switch(*cc)
1391 {
1392 case OP_SET_SOM:
1393 SLJIT_ASSERT(common->has_set_som);
1394 if (!setsom_found)
1395 {
1396 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1397 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1398 stackpos += (int)sizeof(sljit_sw);
1399 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1400 stackpos += (int)sizeof(sljit_sw);
1401 setsom_found = TRUE;
1402 }
1403 cc += 1;
1404 break;
1405
1406 case OP_MARK:
1407 case OP_PRUNE_ARG:
1408 case OP_THEN_ARG:
1409 SLJIT_ASSERT(common->mark_ptr != 0);
1410 if (!setmark_found)
1411 {
1412 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1414 stackpos += (int)sizeof(sljit_sw);
1415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1416 stackpos += (int)sizeof(sljit_sw);
1417 setmark_found = TRUE;
1418 }
1419 cc += 1 + 2 + cc[1];
1420 break;
1421
1422 case OP_RECURSE:
1423 if (common->has_set_som && !setsom_found)
1424 {
1425 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1426 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1427 stackpos += (int)sizeof(sljit_sw);
1428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1429 stackpos += (int)sizeof(sljit_sw);
1430 setsom_found = TRUE;
1431 }
1432 if (common->mark_ptr != 0 && !setmark_found)
1433 {
1434 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1435 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1436 stackpos += (int)sizeof(sljit_sw);
1437 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1438 stackpos += (int)sizeof(sljit_sw);
1439 setmark_found = TRUE;
1440 }
1441 if (common->capture_last_ptr != 0 && !capture_last_found)
1442 {
1443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1444 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1445 stackpos += (int)sizeof(sljit_sw);
1446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1447 stackpos += (int)sizeof(sljit_sw);
1448 capture_last_found = TRUE;
1449 }
1450 cc += 1 + LINK_SIZE;
1451 break;
1452
1453 case OP_CBRA:
1454 case OP_CBRAPOS:
1455 case OP_SCBRA:
1456 case OP_SCBRAPOS:
1457 if (common->capture_last_ptr != 0 && !capture_last_found)
1458 {
1459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1461 stackpos += (int)sizeof(sljit_sw);
1462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1463 stackpos += (int)sizeof(sljit_sw);
1464 capture_last_found = TRUE;
1465 }
1466 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1468 stackpos += (int)sizeof(sljit_sw);
1469 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1470 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1471 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1472 stackpos += (int)sizeof(sljit_sw);
1473 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1474 stackpos += (int)sizeof(sljit_sw);
1475
1476 cc += 1 + LINK_SIZE + IMM2_SIZE;
1477 break;
1478
1479 default:
1480 cc = next_opcode(common, cc);
1481 SLJIT_ASSERT(cc != NULL);
1482 break;
1483 }
1484
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1486 SLJIT_ASSERT(stackpos == STACK(stacktop));
1487 }
1488
1489 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1490 {
1491 int private_data_length = needs_control_head ? 3 : 2;
1492 int size;
1493 pcre_uchar *alternative;
1494 /* Calculate the sum of the private machine words. */
1495 while (cc < ccend)
1496 {
1497 size = 0;
1498 switch(*cc)
1499 {
1500 case OP_KET:
1501 if (PRIVATE_DATA(cc) != 0)
1502 private_data_length++;
1503 cc += 1 + LINK_SIZE;
1504 break;
1505
1506 case OP_ASSERT:
1507 case OP_ASSERT_NOT:
1508 case OP_ASSERTBACK:
1509 case OP_ASSERTBACK_NOT:
1510 case OP_ONCE:
1511 case OP_ONCE_NC:
1512 case OP_BRAPOS:
1513 case OP_SBRA:
1514 case OP_SBRAPOS:
1515 case OP_SCOND:
1516 private_data_length++;
1517 cc += 1 + LINK_SIZE;
1518 break;
1519
1520 case OP_CBRA:
1521 case OP_SCBRA:
1522 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1523 private_data_length++;
1524 cc += 1 + LINK_SIZE + IMM2_SIZE;
1525 break;
1526
1527 case OP_CBRAPOS:
1528 case OP_SCBRAPOS:
1529 private_data_length += 2;
1530 cc += 1 + LINK_SIZE + IMM2_SIZE;
1531 break;
1532
1533 case OP_COND:
1534 /* Might be a hidden SCOND. */
1535 alternative = cc + GET(cc, 1);
1536 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1537 private_data_length++;
1538 cc += 1 + LINK_SIZE;
1539 break;
1540
1541 CASE_ITERATOR_PRIVATE_DATA_1
1542 if (PRIVATE_DATA(cc))
1543 private_data_length++;
1544 cc += 2;
1545 #ifdef SUPPORT_UTF
1546 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1547 #endif
1548 break;
1549
1550 CASE_ITERATOR_PRIVATE_DATA_2A
1551 if (PRIVATE_DATA(cc))
1552 private_data_length += 2;
1553 cc += 2;
1554 #ifdef SUPPORT_UTF
1555 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1556 #endif
1557 break;
1558
1559 CASE_ITERATOR_PRIVATE_DATA_2B
1560 if (PRIVATE_DATA(cc))
1561 private_data_length += 2;
1562 cc += 2 + IMM2_SIZE;
1563 #ifdef SUPPORT_UTF
1564 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1565 #endif
1566 break;
1567
1568 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1569 if (PRIVATE_DATA(cc))
1570 private_data_length++;
1571 cc += 1;
1572 break;
1573
1574 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1575 if (PRIVATE_DATA(cc))
1576 private_data_length += 2;
1577 cc += 1;
1578 break;
1579
1580 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1581 if (PRIVATE_DATA(cc))
1582 private_data_length += 2;
1583 cc += 1 + IMM2_SIZE;
1584 break;
1585
1586 case OP_CLASS:
1587 case OP_NCLASS:
1588 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1589 case OP_XCLASS:
1590 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1591 #else
1592 size = 1 + 32 / (int)sizeof(pcre_uchar);
1593 #endif
1594 if (PRIVATE_DATA(cc))
1595 private_data_length += get_class_iterator_size(cc + size);
1596 cc += size;
1597 break;
1598
1599 default:
1600 cc = next_opcode(common, cc);
1601 SLJIT_ASSERT(cc != NULL);
1602 break;
1603 }
1604 }
1605 SLJIT_ASSERT(cc == ccend);
1606 return private_data_length;
1607 }
1608
1609 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1610 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1611 {
1612 DEFINE_COMPILER;
1613 int srcw[2];
1614 int count, size;
1615 BOOL tmp1next = TRUE;
1616 BOOL tmp1empty = TRUE;
1617 BOOL tmp2empty = TRUE;
1618 pcre_uchar *alternative;
1619 enum {
1620 start,
1621 loop,
1622 end
1623 } status;
1624
1625 status = save ? start : loop;
1626 stackptr = STACK(stackptr - 2);
1627 stacktop = STACK(stacktop - 1);
1628
1629 if (!save)
1630 {
1631 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1632 if (stackptr < stacktop)
1633 {
1634 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1635 stackptr += sizeof(sljit_sw);
1636 tmp1empty = FALSE;
1637 }
1638 if (stackptr < stacktop)
1639 {
1640 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1641 stackptr += sizeof(sljit_sw);
1642 tmp2empty = FALSE;
1643 }
1644 /* The tmp1next must be TRUE in either way. */
1645 }
1646
1647 do
1648 {
1649 count = 0;
1650 switch(status)
1651 {
1652 case start:
1653 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1654 count = 1;
1655 srcw[0] = common->recursive_head_ptr;
1656 if (needs_control_head)
1657 {
1658 SLJIT_ASSERT(common->control_head_ptr != 0);
1659 count = 2;
1660 srcw[1] = common->control_head_ptr;
1661 }
1662 status = loop;
1663 break;
1664
1665 case loop:
1666 if (cc >= ccend)
1667 {
1668 status = end;
1669 break;
1670 }
1671
1672 switch(*cc)
1673 {
1674 case OP_KET:
1675 if (PRIVATE_DATA(cc) != 0)
1676 {
1677 count = 1;
1678 srcw[0] = PRIVATE_DATA(cc);
1679 }
1680 cc += 1 + LINK_SIZE;
1681 break;
1682
1683 case OP_ASSERT:
1684 case OP_ASSERT_NOT:
1685 case OP_ASSERTBACK:
1686 case OP_ASSERTBACK_NOT:
1687 case OP_ONCE:
1688 case OP_ONCE_NC:
1689 case OP_BRAPOS:
1690 case OP_SBRA:
1691 case OP_SBRAPOS:
1692 case OP_SCOND:
1693 count = 1;
1694 srcw[0] = PRIVATE_DATA(cc);
1695 SLJIT_ASSERT(srcw[0] != 0);
1696 cc += 1 + LINK_SIZE;
1697 break;
1698
1699 case OP_CBRA:
1700 case OP_SCBRA:
1701 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1702 {
1703 count = 1;
1704 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1705 }
1706 cc += 1 + LINK_SIZE + IMM2_SIZE;
1707 break;
1708
1709 case OP_CBRAPOS:
1710 case OP_SCBRAPOS:
1711 count = 2;
1712 srcw[0] = PRIVATE_DATA(cc);
1713 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1714 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1715 cc += 1 + LINK_SIZE + IMM2_SIZE;
1716 break;
1717
1718 case OP_COND:
1719 /* Might be a hidden SCOND. */
1720 alternative = cc + GET(cc, 1);
1721 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1722 {
1723 count = 1;
1724 srcw[0] = PRIVATE_DATA(cc);
1725 SLJIT_ASSERT(srcw[0] != 0);
1726 }
1727 cc += 1 + LINK_SIZE;
1728 break;
1729
1730 CASE_ITERATOR_PRIVATE_DATA_1
1731 if (PRIVATE_DATA(cc))
1732 {
1733 count = 1;
1734 srcw[0] = PRIVATE_DATA(cc);
1735 }
1736 cc += 2;
1737 #ifdef SUPPORT_UTF
1738 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1739 #endif
1740 break;
1741
1742 CASE_ITERATOR_PRIVATE_DATA_2A
1743 if (PRIVATE_DATA(cc))
1744 {
1745 count = 2;
1746 srcw[0] = PRIVATE_DATA(cc);
1747 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1748 }
1749 cc += 2;
1750 #ifdef SUPPORT_UTF
1751 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1752 #endif
1753 break;
1754
1755 CASE_ITERATOR_PRIVATE_DATA_2B
1756 if (PRIVATE_DATA(cc))
1757 {
1758 count = 2;
1759 srcw[0] = PRIVATE_DATA(cc);
1760 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1761 }
1762 cc += 2 + IMM2_SIZE;
1763 #ifdef SUPPORT_UTF
1764 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1765 #endif
1766 break;
1767
1768 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1769 if (PRIVATE_DATA(cc))
1770 {
1771 count = 1;
1772 srcw[0] = PRIVATE_DATA(cc);
1773 }
1774 cc += 1;
1775 break;
1776
1777 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1778 if (PRIVATE_DATA(cc))
1779 {
1780 count = 2;
1781 srcw[0] = PRIVATE_DATA(cc);
1782 srcw[1] = srcw[0] + sizeof(sljit_sw);
1783 }
1784 cc += 1;
1785 break;
1786
1787 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1788 if (PRIVATE_DATA(cc))
1789 {
1790 count = 2;
1791 srcw[0] = PRIVATE_DATA(cc);
1792 srcw[1] = srcw[0] + sizeof(sljit_sw);
1793 }
1794 cc += 1 + IMM2_SIZE;
1795 break;
1796
1797 case OP_CLASS:
1798 case OP_NCLASS:
1799 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1800 case OP_XCLASS:
1801 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1802 #else
1803 size = 1 + 32 / (int)sizeof(pcre_uchar);
1804 #endif
1805 if (PRIVATE_DATA(cc))
1806 switch(get_class_iterator_size(cc + size))
1807 {
1808 case 1:
1809 count = 1;
1810 srcw[0] = PRIVATE_DATA(cc);
1811 break;
1812
1813 case 2:
1814 count = 2;
1815 srcw[0] = PRIVATE_DATA(cc);
1816 srcw[1] = srcw[0] + sizeof(sljit_sw);
1817 break;
1818
1819 default:
1820 SLJIT_ASSERT_STOP();
1821 break;
1822 }
1823 cc += size;
1824 break;
1825
1826 default:
1827 cc = next_opcode(common, cc);
1828 SLJIT_ASSERT(cc != NULL);
1829 break;
1830 }
1831 break;
1832
1833 case end:
1834 SLJIT_ASSERT_STOP();
1835 break;
1836 }
1837
1838 while (count > 0)
1839 {
1840 count--;
1841 if (save)
1842 {
1843 if (tmp1next)
1844 {
1845 if (!tmp1empty)
1846 {
1847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1848 stackptr += sizeof(sljit_sw);
1849 }
1850 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1851 tmp1empty = FALSE;
1852 tmp1next = FALSE;
1853 }
1854 else
1855 {
1856 if (!tmp2empty)
1857 {
1858 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1859 stackptr += sizeof(sljit_sw);
1860 }
1861 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1862 tmp2empty = FALSE;
1863 tmp1next = TRUE;
1864 }
1865 }
1866 else
1867 {
1868 if (tmp1next)
1869 {
1870 SLJIT_ASSERT(!tmp1empty);
1871 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1872 tmp1empty = stackptr >= stacktop;
1873 if (!tmp1empty)
1874 {
1875 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1876 stackptr += sizeof(sljit_sw);
1877 }
1878 tmp1next = FALSE;
1879 }
1880 else
1881 {
1882 SLJIT_ASSERT(!tmp2empty);
1883 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1884 tmp2empty = stackptr >= stacktop;
1885 if (!tmp2empty)
1886 {
1887 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1888 stackptr += sizeof(sljit_sw);
1889 }
1890 tmp1next = TRUE;
1891 }
1892 }
1893 }
1894 }
1895 while (status != end);
1896
1897 if (save)
1898 {
1899 if (tmp1next)
1900 {
1901 if (!tmp1empty)
1902 {
1903 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1904 stackptr += sizeof(sljit_sw);
1905 }
1906 if (!tmp2empty)
1907 {
1908 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1909 stackptr += sizeof(sljit_sw);
1910 }
1911 }
1912 else
1913 {
1914 if (!tmp2empty)
1915 {
1916 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1917 stackptr += sizeof(sljit_sw);
1918 }
1919 if (!tmp1empty)
1920 {
1921 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1922 stackptr += sizeof(sljit_sw);
1923 }
1924 }
1925 }
1926 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1927 }
1928
1929 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1930 {
1931 pcre_uchar *end = bracketend(cc);
1932 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1933
1934 /* Assert captures then. */
1935 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1936 current_offset = NULL;
1937 /* Conditional block does not. */
1938 if (*cc == OP_COND || *cc == OP_SCOND)
1939 has_alternatives = FALSE;
1940
1941 cc = next_opcode(common, cc);
1942 if (has_alternatives)
1943 current_offset = common->then_offsets + (cc - common->start);
1944
1945 while (cc < end)
1946 {
1947 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1948 cc = set_then_offsets(common, cc, current_offset);
1949 else
1950 {
1951 if (*cc == OP_ALT && has_alternatives)
1952 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1953 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1954 *current_offset = 1;
1955 cc = next_opcode(common, cc);
1956 }
1957 }
1958
1959 return end;
1960 }
1961
1962 #undef CASE_ITERATOR_PRIVATE_DATA_1
1963 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1964 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1965 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1966 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1967 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1968
1969 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1970 {
1971 return (value & (value - 1)) == 0;
1972 }
1973
1974 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1975 {
1976 while (list)
1977 {
1978 /* sljit_set_label is clever enough to do nothing
1979 if either the jump or the label is NULL. */
1980 SET_LABEL(list->jump, label);
1981 list = list->next;
1982 }
1983 }
1984
1985 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1986 {
1987 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1988 if (list_item)
1989 {
1990 list_item->next = *list;
1991 list_item->jump = jump;
1992 *list = list_item;
1993 }
1994 }
1995
1996 static void add_stub(compiler_common *common, struct sljit_jump *start)
1997 {
1998 DEFINE_COMPILER;
1999 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
2000
2001 if (list_item)
2002 {
2003 list_item->start = start;
2004 list_item->quit = LABEL();
2005 list_item->next = common->stubs;
2006 common->stubs = list_item;
2007 }
2008 }
2009
2010 static void flush_stubs(compiler_common *common)
2011 {
2012 DEFINE_COMPILER;
2013 stub_list* list_item = common->stubs;
2014
2015 while (list_item)
2016 {
2017 JUMPHERE(list_item->start);
2018 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2019 JUMPTO(SLJIT_JUMP, list_item->quit);
2020 list_item = list_item->next;
2021 }
2022 common->stubs = NULL;
2023 }
2024
2025 static SLJIT_INLINE void count_match(compiler_common *common)
2026 {
2027 DEFINE_COMPILER;
2028
2029 OP2(SLJIT_SUB | SLJIT_SET_E, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
2030 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2031 }
2032
2033 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2034 {
2035 /* May destroy all locals and registers except TMP2. */
2036 DEFINE_COMPILER;
2037
2038 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2039 #ifdef DESTROY_REGISTERS
2040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2041 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2042 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2045 #endif
2046 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2047 }
2048
2049 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2050 {
2051 DEFINE_COMPILER;
2052 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2053 }
2054
2055 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2056 {
2057 DEFINE_COMPILER;
2058 struct sljit_label *loop;
2059 int i;
2060
2061 /* At this point we can freely use all temporary registers. */
2062 SLJIT_ASSERT(length > 1);
2063 /* TMP1 returns with begin - 1. */
2064 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2065 if (length < 8)
2066 {
2067 for (i = 1; i < length; i++)
2068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2069 }
2070 else
2071 {
2072 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2073 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2074 loop = LABEL();
2075 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2076 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2077 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2078 }
2079 }
2080
2081 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2082 {
2083 DEFINE_COMPILER;
2084 struct sljit_label *loop;
2085 int i;
2086
2087 SLJIT_ASSERT(length > 1);
2088 /* OVECTOR(1) contains the "string begin - 1" constant. */
2089 if (length > 2)
2090 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2091 if (length < 8)
2092 {
2093 for (i = 2; i < length; i++)
2094 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2095 }
2096 else
2097 {
2098 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2099 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2100 loop = LABEL();
2101 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2102 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2103 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2104 }
2105
2106 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2107 if (common->mark_ptr != 0)
2108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2109 if (common->control_head_ptr != 0)
2110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2111 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2112 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2113 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2114 }
2115
2116 static sljit_sw SLJIT_CALL do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg)
2117 {
2118 while (current != NULL)
2119 {
2120 switch (current[-2])
2121 {
2122 case type_then_trap:
2123 break;
2124
2125 case type_mark:
2126 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2127 return current[-4];
2128 break;
2129
2130 default:
2131 SLJIT_ASSERT_STOP();
2132 break;
2133 }
2134 current = (sljit_sw*)current[-1];
2135 }
2136 return -1;
2137 }
2138
2139 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2140 {
2141 DEFINE_COMPILER;
2142 struct sljit_label *loop;
2143 struct sljit_jump *early_quit;
2144
2145 /* At this point we can freely use all registers. */
2146 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2148
2149 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2150 if (common->mark_ptr != 0)
2151 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2152 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2153 if (common->mark_ptr != 0)
2154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2155 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2156 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2157 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2158 /* Unlikely, but possible */
2159 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2160 loop = LABEL();
2161 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2162 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2163 /* Copy the integer value to the output buffer */
2164 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2165 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2166 #endif
2167 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2168 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2169 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2170 JUMPHERE(early_quit);
2171
2172 /* Calculate the return value, which is the maximum ovector value. */
2173 if (topbracket > 1)
2174 {
2175 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2176 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2177
2178 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2179 loop = LABEL();
2180 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2181 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2182 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2183 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2184 }
2185 else
2186 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2187 }
2188
2189 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2190 {
2191 DEFINE_COMPILER;
2192 struct sljit_jump *jump;
2193
2194 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2195 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2196 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2197
2198 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2199 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2200 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2201 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2202
2203 /* Store match begin and end. */
2204 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2205 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2206
2207 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2208 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2209 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2210 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2211 #endif
2212 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2213 JUMPHERE(jump);
2214
2215 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2216 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2217 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2218 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2219 #endif
2220 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2221
2222 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2223 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2224 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2225 #endif
2226 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2227
2228 JUMPTO(SLJIT_JUMP, quit);
2229 }
2230
2231 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2232 {
2233 /* May destroy TMP1. */
2234 DEFINE_COMPILER;
2235 struct sljit_jump *jump;
2236
2237 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2238 {
2239 /* The value of -1 must be kept for start_used_ptr! */
2240 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2241 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2242 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2243 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2244 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2245 JUMPHERE(jump);
2246 }
2247 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2248 {
2249 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2251 JUMPHERE(jump);
2252 }
2253 }
2254
2255 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2256 {
2257 /* Detects if the character has an othercase. */
2258 unsigned int c;
2259
2260 #ifdef SUPPORT_UTF
2261 if (common->utf)
2262 {
2263 GETCHAR(c, cc);
2264 if (c > 127)
2265 {
2266 #ifdef SUPPORT_UCP
2267 return c != UCD_OTHERCASE(c);
2268 #else
2269 return FALSE;
2270 #endif
2271 }
2272 #ifndef COMPILE_PCRE8
2273 return common->fcc[c] != c;
2274 #endif
2275 }
2276 else
2277 #endif
2278 c = *cc;
2279 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2280 }
2281
2282 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2283 {
2284 /* Returns with the othercase. */
2285 #ifdef SUPPORT_UTF
2286 if (common->utf && c > 127)
2287 {
2288 #ifdef SUPPORT_UCP
2289 return UCD_OTHERCASE(c);
2290 #else
2291 return c;
2292 #endif
2293 }
2294 #endif
2295 return TABLE_GET(c, common->fcc, c);
2296 }
2297
2298 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2299 {
2300 /* Detects if the character and its othercase has only 1 bit difference. */
2301 unsigned int c, oc, bit;
2302 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2303 int n;
2304 #endif
2305
2306 #ifdef SUPPORT_UTF
2307 if (common->utf)
2308 {
2309 GETCHAR(c, cc);
2310 if (c <= 127)
2311 oc = common->fcc[c];
2312 else
2313 {
2314 #ifdef SUPPORT_UCP
2315 oc = UCD_OTHERCASE(c);
2316 #else
2317 oc = c;
2318 #endif
2319 }
2320 }
2321 else
2322 {
2323 c = *cc;
2324 oc = TABLE_GET(c, common->fcc, c);
2325 }
2326 #else
2327 c = *cc;
2328 oc = TABLE_GET(c, common->fcc, c);
2329 #endif
2330
2331 SLJIT_ASSERT(c != oc);
2332
2333 bit = c ^ oc;
2334 /* Optimized for English alphabet. */
2335 if (c <= 127 && bit == 0x20)
2336 return (0 << 8) | 0x20;
2337
2338 /* Since c != oc, they must have at least 1 bit difference. */
2339 if (!is_powerof2(bit))
2340 return 0;
2341
2342 #if defined COMPILE_PCRE8
2343
2344 #ifdef SUPPORT_UTF
2345 if (common->utf && c > 127)
2346 {
2347 n = GET_EXTRALEN(*cc);
2348 while ((bit & 0x3f) == 0)
2349 {
2350 n--;
2351 bit >>= 6;
2352 }
2353 return (n << 8) | bit;
2354 }
2355 #endif /* SUPPORT_UTF */
2356 return (0 << 8) | bit;
2357
2358 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2359
2360 #ifdef SUPPORT_UTF
2361 if (common->utf && c > 65535)
2362 {
2363 if (bit >= (1 << 10))
2364 bit >>= 10;
2365 else
2366 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2367 }
2368 #endif /* SUPPORT_UTF */
2369 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2370
2371 #endif /* COMPILE_PCRE[8|16|32] */
2372 }
2373
2374 static void check_partial(compiler_common *common, BOOL force)
2375 {
2376 /* Checks whether a partial matching is occurred. Does not modify registers. */
2377 DEFINE_COMPILER;
2378 struct sljit_jump *jump = NULL;
2379
2380 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2381
2382 if (common->mode == JIT_COMPILE)
2383 return;
2384
2385 if (!force)
2386 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2387 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2388 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2389
2390 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2392 else
2393 {
2394 if (common->partialmatchlabel != NULL)
2395 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2396 else
2397 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2398 }
2399
2400 if (jump != NULL)
2401 JUMPHERE(jump);
2402 }
2403
2404 static void check_str_end(compiler_common *common, jump_list **end_reached)
2405 {
2406 /* Does not affect registers. Usually used in a tight spot. */
2407 DEFINE_COMPILER;
2408 struct sljit_jump *jump;
2409
2410 if (common->mode == JIT_COMPILE)
2411 {
2412 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2413 return;
2414 }
2415
2416 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2417 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2418 {
2419 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2420 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2421 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2422 }
2423 else
2424 {
2425 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2426 if (common->partialmatchlabel != NULL)
2427 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2428 else
2429 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2430 }
2431 JUMPHERE(jump);
2432 }
2433
2434 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2435 {
2436 DEFINE_COMPILER;
2437 struct sljit_jump *jump;
2438
2439 if (common->mode == JIT_COMPILE)
2440 {
2441 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2442 return;
2443 }
2444
2445 /* Partial matching mode. */
2446 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2447 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2448 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2449 {
2450 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2451 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2452 }
2453 else
2454 {
2455 if (common->partialmatchlabel != NULL)
2456 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2457 else
2458 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2459 }
2460 JUMPHERE(jump);
2461 }
2462
2463 static void peek_char(compiler_common *common)
2464 {
2465 /* Reads the character into TMP1, keeps STR_PTR.
2466 Does not check STR_END. TMP2 Destroyed. */
2467 DEFINE_COMPILER;
2468 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2469 struct sljit_jump *jump;
2470 #endif
2471
2472 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2473 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2474 if (common->utf)
2475 {
2476 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2477 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2478 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2479 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2480 JUMPHERE(jump);
2481 }
2482 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2483
2484 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2485 if (common->utf)
2486 {
2487 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2488 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2489 /* TMP2 contains the high surrogate. */
2490 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2491 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2492 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2493 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2494 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2495 JUMPHERE(jump);
2496 }
2497 #endif
2498 }
2499
2500 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2501
2502 static BOOL is_char7_bitset(const pcre_uint8* bitset, BOOL nclass)
2503 {
2504 /* Tells whether the character codes below 128 are enough
2505 to determine a match. */
2506 const pcre_uint8 value = nclass ? 0xff : 0;
2507 const pcre_uint8* end = bitset + 32;
2508
2509 bitset += 16;
2510 do
2511 {
2512 if (*bitset++ != value)
2513 return FALSE;
2514 }
2515 while (bitset < end);
2516 return TRUE;
2517 }
2518
2519 static void read_char7_type(compiler_common *common, BOOL full_read)
2520 {
2521 /* Reads the precise character type of a character into TMP1, if the character
2522 is less than 128. Otherwise it returns with zero. Does not check STR_END. The
2523 full_read argument tells whether characters above max are accepted or not. */
2524 DEFINE_COMPILER;
2525 struct sljit_jump *jump;
2526
2527 SLJIT_ASSERT(common->utf);
2528
2529 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2530 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2531
2532 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2533
2534 if (full_read)
2535 {
2536 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2537 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2538 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2539 JUMPHERE(jump);
2540 }
2541 }
2542
2543 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2544
2545 static void read_char_max(compiler_common *common, pcre_uint32 max, BOOL full_read)
2546 {
2547 /* Reads the precise value of a character into TMP1, if the character is
2548 less than or equal to max. Otherwise it returns with a value greater than max.
2549 Does not check STR_END. The full_read argument tells whether characters above
2550 max are accepted or not. */
2551 DEFINE_COMPILER;
2552 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2553 struct sljit_jump *jump;
2554 #endif
2555
2556 SLJIT_UNUSED_ARG(full_read);
2557 SLJIT_UNUSED_ARG(max);
2558
2559 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2560 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2561
2562 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2563 if (common->utf)
2564 {
2565 if (max < 128 && !full_read)
2566 return;
2567
2568 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2569 if (max >= 0x800)
2570 add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2571 else if (max < 128)
2572 {
2573 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2574 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2575 }
2576 else
2577 {
2578 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2579 if (!full_read)
2580 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2581 else
2582 OP1(SLJIT_MOV_UB, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2583 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2584 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2585 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2586 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2587 if (full_read)
2588 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
2589 }
2590 JUMPHERE(jump);
2591 }
2592 #endif
2593
2594 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2595 if (common->utf)
2596 {
2597 if (max >= 0x10000)
2598 {
2599 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2600 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2601 /* TMP2 contains the high surrogate. */
2602 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2603 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40);
2604 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
2605 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2606 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2607 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2608 JUMPHERE(jump);
2609 return;
2610 }
2611
2612 if (max < 0xd800 && !full_read)
2613 return;
2614
2615 /* Skip low surrogate if necessary. */
2616 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2617 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2618 if (full_read)
2619 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2620 if (max >= 0xd800)
2621 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
2622 JUMPHERE(jump);
2623 }
2624 #endif
2625 }
2626
2627 static SLJIT_INLINE void read_char(compiler_common *common)
2628 {
2629 read_char_max(common, 0x7fffffff, TRUE);
2630 }
2631
2632 static void read_char8_type(compiler_common *common, BOOL full_read)
2633 {
2634 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END.
2635 The full_read argument tells whether characters above max are accepted or not. */
2636 DEFINE_COMPILER;
2637 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2638 struct sljit_jump *jump;
2639 #endif
2640 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2641 struct sljit_jump *jump2;
2642 #endif
2643
2644 SLJIT_UNUSED_ARG(full_read);
2645
2646 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2647 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2648
2649 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2650 if (common->utf)
2651 {
2652 /* This can be an extra read in some situations, but hopefully
2653 it is needed in most cases. */
2654 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2655 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2656 if (!full_read)
2657 {
2658 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2659 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2660 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2661 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2662 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2663 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2664 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2665 jump2 = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2666 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2667 JUMPHERE(jump2);
2668 }
2669 else
2670 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2671 JUMPHERE(jump);
2672 return;
2673 }
2674 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
2675
2676 #if !defined COMPILE_PCRE8
2677 /* The ctypes array contains only 256 values. */
2678 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2679 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2680 #endif
2681 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2682 #if !defined COMPILE_PCRE8
2683 JUMPHERE(jump);
2684 #endif
2685
2686 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2687 if (common->utf && full_read)
2688 {
2689 /* Skip low surrogate if necessary. */
2690 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2691 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1);
2692 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2693 JUMPHERE(jump);
2694 }
2695 #endif /* SUPPORT_UTF && COMPILE_PCRE16 */
2696 }
2697
2698 static void skip_char_back(compiler_common *common)
2699 {
2700 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2701 DEFINE_COMPILER;
2702 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2703 #if defined COMPILE_PCRE8
2704 struct sljit_label *label;
2705
2706 if (common->utf)
2707 {
2708 label = LABEL();
2709 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2710 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2711 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2712 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2713 return;
2714 }
2715 #elif defined COMPILE_PCRE16
2716 if (common->utf)
2717 {
2718 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2719 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2720 /* Skip low surrogate if necessary. */
2721 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2722 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2723 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2724 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2725 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2726 return;
2727 }
2728 #endif /* COMPILE_PCRE[8|16] */
2729 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2730 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2731 }
2732
2733 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2734 {
2735 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2736 DEFINE_COMPILER;
2737
2738 if (nltype == NLTYPE_ANY)
2739 {
2740 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2741 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2742 }
2743 else if (nltype == NLTYPE_ANYCRLF)
2744 {
2745 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2746 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2747 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2748 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2749 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2750 }
2751 else
2752 {
2753 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2754 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2755 }
2756 }
2757
2758 #ifdef SUPPORT_UTF
2759
2760 #if defined COMPILE_PCRE8
2761 static void do_utfreadchar(compiler_common *common)
2762 {
2763 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2764 of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */
2765 DEFINE_COMPILER;
2766 struct sljit_jump *jump;
2767
2768 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2769 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2770 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2771 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2772 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2773 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2774
2775 /* Searching for the first zero. */
2776 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2777 jump = JUMP(SLJIT_C_NOT_ZERO);
2778 /* Two byte sequence. */
2779 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2780 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2781 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2782
2783 JUMPHERE(jump);
2784 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2785 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2786 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2787 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2788 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2789
2790 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2791 jump = JUMP(SLJIT_C_NOT_ZERO);
2792 /* Three byte sequence. */
2793 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2794 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2795 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2796
2797 /* Four byte sequence. */
2798 JUMPHERE(jump);
2799 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2800 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2801 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2803 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2804 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2805 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4));
2806 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2807 }
2808
2809 static void do_utfreadchar16(compiler_common *common)
2810 {
2811 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2812 of the character (>= 0xc0). Return value in TMP1. */
2813 DEFINE_COMPILER;
2814 struct sljit_jump *jump;
2815
2816 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2817 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2818 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2819 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2820 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2821 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2822
2823 /* Searching for the first zero. */
2824 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
2825 jump = JUMP(SLJIT_C_NOT_ZERO);
2826 /* Two byte sequence. */
2827 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2828 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2829
2830 JUMPHERE(jump);
2831 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2832 OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800);
2833 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2834 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2835 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2836 /* Three byte sequence. */
2837 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2838 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2839 }
2840
2841 static void do_utfreadtype8(compiler_common *common)
2842 {
2843 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2844 of the character (>= 0xc0). Return value in TMP1. */
2845 DEFINE_COMPILER;
2846 struct sljit_jump *jump;
2847 struct sljit_jump *compare;
2848
2849 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2850
2851 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2852 jump = JUMP(SLJIT_C_NOT_ZERO);
2853 /* Two byte sequence. */
2854 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2855 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2856 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2857 /* The upper 5 bits are known at this point. */
2858 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
2859 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2860 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2861 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2862 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2863 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2864
2865 JUMPHERE(compare);
2866 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2867 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2868
2869 /* We only have types for characters less than 256. */
2870 JUMPHERE(jump);
2871 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2872 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2873 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2874 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2875 }
2876
2877 #endif /* COMPILE_PCRE8 */
2878
2879 #endif /* SUPPORT_UTF */
2880
2881 #ifdef SUPPORT_UCP
2882
2883 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2884 #define UCD_BLOCK_MASK 127
2885 #define UCD_BLOCK_SHIFT 7
2886
2887 static void do_getucd(compiler_common *common)
2888 {
2889 /* Search the UCD record for the character comes in TMP1.
2890 Returns chartype in TMP1 and UCD offset in TMP2. */
2891 DEFINE_COMPILER;
2892
2893 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2894
2895 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2896 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2897 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2898 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2899 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2900 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2901 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2902 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2903 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2904 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2905 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2906 }
2907 #endif
2908
2909 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2910 {
2911 DEFINE_COMPILER;
2912 struct sljit_label *mainloop;
2913 struct sljit_label *newlinelabel = NULL;
2914 struct sljit_jump *start;
2915 struct sljit_jump *end = NULL;
2916 struct sljit_jump *nl = NULL;
2917 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2918 struct sljit_jump *singlechar;
2919 #endif
2920 jump_list *newline = NULL;
2921 BOOL newlinecheck = FALSE;
2922 BOOL readuchar = FALSE;
2923
2924 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2925 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2926 newlinecheck = TRUE;
2927
2928 if (firstline)
2929 {
2930 /* Search for the end of the first line. */
2931 SLJIT_ASSERT(common->first_line_end != 0);
2932 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2933
2934 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2935 {
2936 mainloop = LABEL();
2937 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2938 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2939 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2940 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2941 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2942 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2943 JUMPHERE(end);
2944 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2945 }
2946 else
2947 {
2948 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2949 mainloop = LABEL();
2950 /* Continual stores does not cause data dependency. */
2951 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2952 read_char(common);
2953 check_newlinechar(common, common->nltype, &newline, TRUE);
2954 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2955 JUMPHERE(end);
2956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2957 set_jumps(newline, LABEL());
2958 }
2959
2960 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2961 }
2962
2963 start = JUMP(SLJIT_JUMP);
2964
2965 if (newlinecheck)
2966 {
2967 newlinelabel = LABEL();
2968 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2969 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2970 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2971 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2972 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2973 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2974 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2975 #endif
2976 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2977 nl = JUMP(SLJIT_JUMP);
2978 }
2979
2980 mainloop = LABEL();
2981
2982 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2983 #ifdef SUPPORT_UTF
2984 if (common->utf) readuchar = TRUE;
2985 #endif
2986 if (newlinecheck) readuchar = TRUE;
2987
2988 if (readuchar)
2989 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2990
2991 if (newlinecheck)
2992 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2993
2994 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2995 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2996 #if defined COMPILE_PCRE8
2997 if (common->utf)
2998 {
2999 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3000 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3001 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3002 JUMPHERE(singlechar);
3003 }
3004 #elif defined COMPILE_PCRE16
3005 if (common->utf)
3006 {
3007 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3008 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3009 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3010 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3011 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3012 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3013 JUMPHERE(singlechar);
3014 }
3015 #endif /* COMPILE_PCRE[8|16] */
3016 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
3017 JUMPHERE(start);
3018
3019 if (newlinecheck)
3020 {
3021 JUMPHERE(end);
3022 JUMPHERE(nl);
3023 }
3024
3025 return mainloop;
3026 }
3027
3028 static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uint32 *chars, int max_chars)
3029 {
3030 /* Recursive function, which scans prefix literals. */
3031 int len, repeat, len_save, consumed = 0;
3032 pcre_uint32 caseless, chr, mask;
3033 pcre_uchar *alternative, *cc_save;
3034 BOOL last, any;
3035
3036 repeat = 1;
3037 while (TRUE)
3038 {
3039 last = TRUE;
3040 any = FALSE;
3041 caseless = 0;
3042 switch (*cc)
3043 {
3044 case OP_CHARI:
3045 caseless = 1;
3046 case OP_CHAR:
3047 last = FALSE;
3048 cc++;
3049 break;
3050
3051 case OP_SOD:
3052 case OP_SOM:
3053 case OP_SET_SOM:
3054 case OP_NOT_WORD_BOUNDARY:
3055 case OP_WORD_BOUNDARY:
3056 case OP_EODN:
3057 case OP_EOD:
3058 case OP_CIRC:
3059 case OP_CIRCM:
3060 case OP_DOLL:
3061 case OP_DOLLM:
3062 /* Zero width assertions. */
3063 cc++;
3064 continue;
3065
3066 case OP_PLUS:
3067 case OP_MINPLUS:
3068 case OP_POSPLUS:
3069 cc++;
3070 break;
3071
3072 case OP_EXACTI:
3073 caseless = 1;
3074 case OP_EXACT:
3075 repeat = GET2(cc, 1);
3076 last = FALSE;
3077 cc += 1 + IMM2_SIZE;
3078 break;
3079
3080 case OP_PLUSI:
3081 case OP_MINPLUSI:
3082 case OP_POSPLUSI:
3083 caseless = 1;
3084 cc++;
3085 break;
3086
3087 case OP_KET:
3088 cc += 1 + LINK_SIZE;
3089 continue;
3090
3091 case OP_ALT:
3092 cc += GET(cc, 1);
3093 continue;
3094
3095 case OP_ONCE:
3096 case OP_ONCE_NC:
3097 case OP_BRA:
3098 case OP_BRAPOS:
3099 case OP_CBRA:
3100 case OP_CBRAPOS:
3101 alternative = cc + GET(cc, 1);
3102 while (*alternative == OP_ALT)
3103 {
3104 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars);
3105 if (max_chars == 0)
3106 return consumed;
3107 alternative += GET(alternative, 1);
3108 }
3109
3110 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
3111 cc += IMM2_SIZE;
3112 cc += 1 + LINK_SIZE;
3113 continue;
3114
3115 case OP_CLASS:
3116 case OP_NCLASS:
3117 any = TRUE;
3118 cc += 1 + 32 / sizeof(pcre_uchar);
3119 break;
3120
3121 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3122 case OP_XCLASS:
3123 any = TRUE;
3124 cc += GET(cc, 1);
3125 break;
3126 #endif
3127
3128 case OP_NOT_DIGIT:
3129 case OP_DIGIT:
3130 case OP_NOT_WHITESPACE:
3131 case OP_WHITESPACE:
3132 case OP_NOT_WORDCHAR:
3133 case OP_WORDCHAR:
3134 case OP_ANY:
3135 case OP_ALLANY:
3136 any = TRUE;
3137 cc++;
3138 break;
3139
3140 #ifdef SUPPORT_UCP
3141 case OP_NOTPROP:
3142 case OP_PROP:
3143 any = TRUE;
3144 cc += 1 + 2;
3145 break;
3146 #endif
3147
3148 case OP_TYPEEXACT:
3149 repeat = GET2(cc, 1);
3150 cc += 1 + IMM2_SIZE;
3151 continue;
3152
3153 default:
3154 return consumed;
3155 }
3156
3157 if (any)
3158 {
3159 #ifdef SUPPORT_UTF
3160 if (common->utf) return consumed;
3161 #endif
3162 #if defined COMPILE_PCRE8
3163 mask = 0xff;
3164 #elif defined COMPILE_PCRE16
3165 mask = 0xffff;
3166 #elif defined COMPILE_PCRE32
3167 mask = 0xffffffff;
3168 #else
3169 SLJIT_ASSERT_STOP();
3170 #endif
3171
3172 do
3173 {
3174 chars[0] = mask;
3175 chars[1] = mask;
3176
3177 if (--max_chars == 0)
3178 return consumed;
3179 consumed++;
3180 chars += 2;
3181 }
3182 while (--repeat > 0);
3183
3184 repeat = 1;
3185 continue;
3186 }
3187
3188 len = 1;
3189 #ifdef SUPPORT_UTF
3190 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
3191 #endif
3192
3193 if (caseless != 0 && char_has_othercase(common, cc))
3194 {
3195 caseless = char_get_othercase_bit(common, cc);
3196 if (caseless == 0)
3197 return consumed;
3198 #ifdef COMPILE_PCRE8
3199 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3200 #else
3201 if ((caseless & 0x100) != 0)
3202 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3203 else
3204 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3205 #endif
3206 }
3207 else
3208 caseless = 0;
3209
3210 len_save = len;
3211 cc_save = cc;
3212 while (TRUE)
3213 {
3214 do
3215 {
3216 chr = *cc;
3217 #ifdef COMPILE_PCRE32
3218 if (SLJIT_UNLIKELY(chr == NOTACHAR))
3219 return consumed;
3220 #endif
3221 mask = 0;
3222 if (len == (caseless & 0xff))
3223 {
3224 mask = caseless >> 8;
3225 chr |= mask;
3226 }
3227
3228 if (chars[0] == NOTACHAR)
3229 {
3230 chars[0] = chr;
3231 chars[1] = mask;
3232 }
3233 else
3234 {
3235 mask |= chars[0] ^ chr;
3236 chr |= mask;
3237 chars[0] = chr;
3238 chars[1] |= mask;
3239 }
3240
3241 len--;
3242 if (--max_chars == 0)
3243 return consumed;
3244 consumed++;
3245 chars += 2;
3246 cc++;
3247 }
3248 while (len > 0);
3249
3250 if (--repeat == 0)
3251 break;
3252
3253 len = len_save;
3254 cc = cc_save;
3255 }
3256
3257 repeat = 1;
3258 if (last)
3259 return consumed;
3260 }
3261 }
3262
3263 #define MAX_N_CHARS 16
3264
3265 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
3266 {
3267 DEFINE_COMPILER;
3268 struct sljit_label *start;
3269 struct sljit_jump *quit;
3270 pcre_uint32 chars[MAX_N_CHARS * 2];
3271 pcre_uint8 ones[MAX_N_CHARS];
3272 pcre_uint32 mask;
3273 int i, max;
3274 int offsets[3];
3275
3276 for (i = 0; i < MAX_N_CHARS; i++)
3277 {
3278 chars[i << 1] = NOTACHAR;
3279 chars[(i << 1) + 1] = 0;
3280 }
3281
3282 max = scan_prefix(common, common->start, chars, MAX_N_CHARS);
3283
3284 if (max <= 1)
3285 return FALSE;
3286
3287 for (i = 0; i < max; i++)
3288 {
3289 mask = chars[(i << 1) + 1];
3290 ones[i] = ones_in_half_byte[mask & 0xf];
3291 mask >>= 4;
3292 while (mask != 0)
3293 {
3294 ones[i] += ones_in_half_byte[mask & 0xf];
3295 mask >>= 4;
3296 }
3297 }
3298
3299 offsets[0] = -1;
3300 /* Scan forward. */
3301 for (i = 0; i < max; i++)
3302 if (ones[i] <= 2) {
3303 offsets[0] = i;
3304 break;
3305 }
3306
3307 if (offsets[0] == -1)
3308 return FALSE;
3309
3310 /* Scan backward. */
3311 offsets[1] = -1;
3312 for (i = max - 1; i > offsets[0]; i--)
3313 if (ones[i] <= 2) {
3314 offsets[1] = i;
3315 break;
3316 }
3317
3318 offsets[2] = -1;
3319 if (offsets[1] >= 0)
3320 {
3321 /* Scan from middle. */
3322 for (i = (offsets[0] + offsets[1]) / 2 + 1; i < offsets[1]; i++)
3323 if (ones[i] <= 2)
3324 {
3325 offsets[2] = i;
3326 break;
3327 }
3328
3329 if (offsets[2] == -1)
3330 {
3331 for (i = (offsets[0] + offsets[1]) / 2; i > offsets[0]; i--)
3332 if (ones[i] <= 2)
3333 {
3334 offsets[2] = i;
3335 break;
3336 }
3337 }
3338 }
3339
3340 SLJIT_ASSERT(offsets[1] == -1 || (offsets[0] < offsets[1]));
3341 SLJIT_ASSERT(offsets[2] == -1 || (offsets[0] < offsets[2] && offsets[1] > offsets[2]));
3342
3343 chars[0] = chars[offsets[0] << 1];
3344 chars[1] = chars[(offsets[0] << 1) + 1];
3345 if (offsets[2] >= 0)
3346 {
3347 chars[2] = chars[offsets[2] << 1];
3348 chars[3] = chars[(offsets[2] << 1) + 1];
3349 }
3350 if (offsets[1] >= 0)
3351 {
3352 chars[4] = chars[offsets[1] << 1];
3353 chars[5] = chars[(offsets[1] << 1) + 1];
3354 }
3355
3356 max -= 1;
3357 if (firstline)
3358 {
3359 SLJIT_ASSERT(common->first_line_end != 0);
3360 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3361 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS(max));
3362 }
3363 else
3364 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3365
3366 start = LABEL();
3367 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3368
3369 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[0]));
3370 if (offsets[1] >= 0)
3371 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[1]));
3372 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3373
3374 if (chars[1] != 0)
3375 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3376 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3377 if (offsets[2] >= 0)
3378 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offsets[2] - 1));
3379
3380 if (offsets[1] >= 0)
3381 {
3382 if (chars[5] != 0)
3383 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[5]);
3384 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[4], start);
3385 }
3386
3387 if (offsets[2] >= 0)
3388 {
3389 if (chars[3] != 0)
3390 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[3]);
3391 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[2], start);
3392 }
3393 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3394
3395 JUMPHERE(quit);
3396
3397 if (firstline)
3398 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3399 else
3400 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
3401 return TRUE;
3402 }
3403
3404 #undef MAX_N_CHARS
3405
3406 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3407 {
3408 DEFINE_COMPILER;
3409 struct sljit_label *start;
3410 struct sljit_jump *quit;
3411 struct sljit_jump *found;
3412 pcre_uchar oc, bit;
3413
3414 if (firstline)
3415 {
3416 SLJIT_ASSERT(common->first_line_end != 0);
3417 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3418 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3419 }
3420
3421 start = LABEL();
3422 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3423 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3424
3425 oc = first_char;
3426 if (caseless)
3427 {
3428 oc = TABLE_GET(first_char, common->fcc, first_char);
3429 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3430 if (first_char > 127 && common->utf)
3431 oc = UCD_OTHERCASE(first_char);
3432 #endif
3433 }
3434 if (first_char == oc)
3435 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3436 else
3437 {
3438 bit = first_char ^ oc;
3439 if (is_powerof2(bit))
3440 {
3441 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3442 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3443 }
3444 else
3445 {
3446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3447 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3448 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3449 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3450 found = JUMP(SLJIT_C_NOT_ZERO);
3451 }
3452 }
3453
3454 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3455 JUMPTO(SLJIT_JUMP, start);
3456 JUMPHERE(found);
3457 JUMPHERE(quit);
3458
3459 if (firstline)
3460 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3461 }
3462
3463 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3464 {
3465 DEFINE_COMPILER;
3466 struct sljit_label *loop;
3467 struct sljit_jump *lastchar;
3468 struct sljit_jump *firstchar;
3469 struct sljit_jump *quit;
3470 struct sljit_jump *foundcr = NULL;
3471 struct sljit_jump *notfoundnl;
3472 jump_list *newline = NULL;
3473
3474 if (firstline)
3475 {
3476 SLJIT_ASSERT(common->first_line_end != 0);
3477 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3478 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3479 }
3480
3481 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3482 {
3483 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3484 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3485 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3486 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3487 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3488
3489 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3490 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3491 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3492 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3493 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3494 #endif
3495 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3496
3497 loop = LABEL();
3498 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3499 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3500 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3501 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3502 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3503 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3504
3505 JUMPHERE(quit);
3506 JUMPHERE(firstchar);
3507 JUMPHERE(lastchar);
3508
3509 if (firstline)
3510 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3511 return;
3512 }
3513
3514 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3515 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3516 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3517 skip_char_back(common);
3518
3519 loop = LABEL();
3520 read_char(common);
3521 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3522 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3523 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3524 check_newlinechar(common, common->nltype, &newline, FALSE);
3525 set_jumps(newline, loop);
3526
3527 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3528 {
3529 quit = JUMP(SLJIT_JUMP);
3530 JUMPHERE(foundcr);
3531 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3532 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3533 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3534 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3535 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3536 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3537 #endif
3538 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3539 JUMPHERE(notfoundnl);
3540 JUMPHERE(quit);
3541 }
3542 JUMPHERE(lastchar);
3543 JUMPHERE(firstchar);
3544
3545 if (firstline)
3546 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3547 }
3548
3549 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
3550
3551 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, pcre_uint8 *start_bits, BOOL firstline)
3552 {
3553 DEFINE_COMPILER;
3554 struct sljit_label *start;
3555 struct sljit_jump *quit;
3556 struct sljit_jump *found = NULL;
3557 jump_list *matches = NULL;
3558 #ifndef COMPILE_PCRE8
3559 struct sljit_jump *jump;
3560 #endif
3561
3562 if (firstline)
3563 {
3564 SLJIT_ASSERT(common->first_line_end != 0);
3565 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3566 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3567 }
3568
3569 start = LABEL();
3570 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3571 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3572 #ifdef SUPPORT_UTF
3573 if (common->utf)
3574 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3575 #endif
3576
3577 if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches))
3578 {
3579 #ifndef COMPILE_PCRE8
3580 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3581 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3582 JUMPHERE(jump);
3583 #endif
3584 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3585 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3586 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
3587 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3588 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3589 found = JUMP(SLJIT_C_NOT_ZERO);
3590 }
3591
3592 #ifdef SUPPORT_UTF
3593 if (common->utf)
3594 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3595 #endif
3596 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3597 #ifdef SUPPORT_UTF
3598 #if defined COMPILE_PCRE8
3599 if (common->utf)
3600 {
3601 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3602 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3603 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3604 }
3605 #elif defined COMPILE_PCRE16
3606 if (common->utf)
3607 {
3608 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3609 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3610 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3611 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3612 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3613 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3614 }
3615 #endif /* COMPILE_PCRE[8|16] */
3616 #endif /* SUPPORT_UTF */
3617 JUMPTO(SLJIT_JUMP, start);
3618 if (found != NULL)
3619 JUMPHERE(found);
3620 if (matches != NULL)
3621 set_jumps(matches, LABEL());
3622 JUMPHERE(quit);
3623
3624 if (firstline)
3625 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3626 }
3627
3628 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3629 {
3630 DEFINE_COMPILER;
3631 struct sljit_label *loop;
3632 struct sljit_jump *toolong;
3633 struct sljit_jump *alreadyfound;
3634 struct sljit_jump *found;
3635 struct sljit_jump *foundoc = NULL;
3636 struct sljit_jump *notfound;
3637 pcre_uint32 oc, bit;
3638
3639 SLJIT_ASSERT(common->req_char_ptr != 0);
3640 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3641 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3642 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3643 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3644
3645 if (has_firstchar)
3646 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3647 else
3648 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3649
3650 loop = LABEL();
3651 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3652
3653 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3654 oc = req_char;
3655 if (caseless)
3656 {
3657 oc = TABLE_GET(req_char, common->fcc, req_char);
3658 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3659 if (req_char > 127 && common->utf)
3660 oc = UCD_OTHERCASE(req_char);
3661 #endif
3662 }
3663 if (req_char == oc)
3664 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3665 else
3666 {
3667 bit = req_char ^ oc;
3668 if (is_powerof2(bit))
3669 {
3670 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3671 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3672 }
3673 else
3674 {
3675 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3676 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3677 }
3678 }
3679 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3680 JUMPTO(SLJIT_JUMP, loop);
3681
3682 JUMPHERE(found);
3683 if (foundoc)
3684 JUMPHERE(foundoc);
3685 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3686 JUMPHERE(alreadyfound);
3687 JUMPHERE(toolong);
3688 return notfound;
3689 }
3690
3691 static void do_revertframes(compiler_common *common)
3692 {
3693 DEFINE_COMPILER;
3694 struct sljit_jump *jump;
3695 struct sljit_label *mainloop;
3696
3697 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3698 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3699 GET_LOCAL_BASE(TMP3, 0, 0);
3700
3701 /* Drop frames until we reach STACK_TOP. */
3702 mainloop = LABEL();
3703 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3704 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3705 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3706
3707 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3708 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3709 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3710 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3711 JUMPTO(SLJIT_JUMP, mainloop);
3712
3713 JUMPHERE(jump);
3714 jump = JUMP(SLJIT_C_SIG_LESS);
3715 /* End of dropping frames. */
3716 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3717
3718 JUMPHERE(jump);
3719 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3720 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3721 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3722 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3723 JUMPTO(SLJIT_JUMP, mainloop);
3724 }
3725
3726 static void check_wordboundary(compiler_common *common)
3727 {
3728 DEFINE_COMPILER;
3729 struct sljit_jump *skipread;
3730 jump_list *skipread_list = NULL;
3731 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3732 struct sljit_jump *jump;
3733 #endif
3734
3735 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3736
3737 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3738 /* Get type of the previous char, and put it to LOCALS1. */
3739 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3740 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3742 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3743 skip_char_back(common);
3744 check_start_used_ptr(common);
3745 read_char(common);
3746
3747 /* Testing char type. */
3748 #ifdef SUPPORT_UCP
3749 if (common->use_ucp)
3750 {
3751 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3752 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3753 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3754 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3755 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3756 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3757 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3758 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3759 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3760 JUMPHERE(jump);
3761 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3762 }
3763 else
3764 #endif
3765 {
3766 #ifndef COMPILE_PCRE8
3767 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3768 #elif defined SUPPORT_UTF
3769 /* Here LOCALS1 has already been zeroed. */
3770 jump = NULL;
3771 if (common->utf)
3772 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3773 #endif /* COMPILE_PCRE8 */
3774 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3775 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3776 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3778 #ifndef COMPILE_PCRE8
3779 JUMPHERE(jump);
3780 #elif defined SUPPORT_UTF
3781 if (jump != NULL)
3782 JUMPHERE(jump);
3783 #endif /* COMPILE_PCRE8 */
3784 }
3785 JUMPHERE(skipread);
3786
3787 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3788 check_str_end(common, &skipread_list);
3789 peek_char(common);
3790
3791 /* Testing char type. This is a code duplication. */
3792 #ifdef SUPPORT_UCP
3793 if (common->use_ucp)
3794 {
3795 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3796 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3797 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3798 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3799 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3800 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3801 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3802 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3803 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3804 JUMPHERE(jump);
3805 }
3806 else
3807 #endif
3808 {
3809 #ifndef COMPILE_PCRE8
3810 /* TMP2 may be destroyed by peek_char. */
3811 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3812 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3813 #elif defined SUPPORT_UTF
3814 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3815 jump = NULL;
3816 if (common->utf)
3817 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3818 #endif
3819 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3820 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3821 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3822 #ifndef COMPILE_PCRE8
3823 JUMPHERE(jump);
3824 #elif defined SUPPORT_UTF
3825 if (jump != NULL)
3826 JUMPHERE(jump);
3827 #endif /* COMPILE_PCRE8 */
3828 }
3829 set_jumps(skipread_list, LABEL());
3830
3831 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3832 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3833 }
3834
3835 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
3836 {
3837 DEFINE_COMPILER;
3838 int ranges[MAX_RANGE_SIZE];
3839 pcre_uint8 bit, cbit, all;
3840 int i, byte, length = 0;
3841
3842 bit = bits[0] & 0x1;
3843 /* All bits will be zero or one (since bit is zero or one). */
3844 all = -bit;
3845
3846 for (i = 0; i < 256; )
3847 {
3848 byte = i >> 3;
3849 if ((i & 0x7) == 0 && bits[byte] == all)
3850 i += 8;
3851 else
3852 {
3853 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3854 if (cbit != bit)
3855 {
3856 if (length >= MAX_RANGE_SIZE)
3857 return FALSE;
3858 ranges[length] = i;
3859 length++;
3860 bit = cbit;
3861 all = -cbit;
3862 }
3863 i++;
3864 }
3865 }
3866
3867 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3868 {
3869 if (length >= MAX_RANGE_SIZE)
3870 return FALSE;
3871 ranges[length] = 256;
3872 length++;
3873 }
3874
3875 if (length < 0 || length > 4)
3876 return FALSE;
3877
3878 bit = bits[0] & 0x1;
3879 if (invert) bit ^= 0x1;
3880
3881 /* No character is accepted. */
3882 if (length == 0 && bit == 0)
3883 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3884
3885 switch(length)
3886 {
3887 case 0:
3888 /* When bit != 0, all characters are accepted. */
3889 return TRUE;
3890
3891 case 1:
3892 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3893 return TRUE;
3894
3895 case 2:
3896 if (ranges[0] + 1 != ranges[1])
3897 {
3898 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3899 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3900 }
3901 else
3902 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3903 return TRUE;
3904
3905 case 3:
3906 if (bit != 0)
3907 {
3908 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3909 if (ranges[0] + 1 != ranges[1])
3910 {
3911 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3912 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3913 }
3914 else
3915 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3916 return TRUE;
3917 }
3918
3919 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
3920 if (ranges[1] + 1 != ranges[2])
3921 {
3922 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
3923 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3924 }
3925 else
3926 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
3927 return TRUE;
3928
3929 case 4:
3930 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
3931 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
3932 && is_powerof2(ranges[2] - ranges[0]))
3933 {
3934 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
3935 if (ranges[2] + 1 != ranges[3])
3936 {
3937 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3938 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3939 }
3940 else
3941 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3942 return TRUE;
3943 }
3944
3945 if (bit != 0)
3946 {
3947 i = 0;
3948 if (ranges[0] + 1 != ranges[1])
3949 {
3950 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3951 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3952 i = ranges[0];
3953 }
3954 else
3955 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
3956
3957 if (ranges[2] + 1 != ranges[3])
3958 {
3959 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
3960 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3961 }
3962 else
3963 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
3964 return TRUE;
3965 }
3966
3967 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
3968 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
3969 if (ranges[1] + 1 != ranges[2])
3970 {
3971 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
3972 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
3973 }
3974 else
3975 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
3976 return TRUE;
3977
3978 default:
3979 SLJIT_ASSERT_STOP();
3980 return FALSE;
3981 }
3982 }
3983
3984 static void check_anynewline(compiler_common *common)
3985 {
3986 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3987 DEFINE_COMPILER;
3988
3989 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3990
3991 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3992 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3993 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3994 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3995 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3996 #ifdef COMPILE_PCRE8
3997 if (common->utf)
3998 {
3999 #endif
4000 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4001 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4002 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4003 #ifdef COMPILE_PCRE8
4004 }
4005 #endif
4006 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4007 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4008 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4009 }
4010
4011 static void check_hspace(compiler_common *common)
4012 {
4013 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4014 DEFINE_COMPILER;
4015
4016 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4017
4018 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
4019 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4020 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
4021 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4022 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
4023 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4024 #ifdef COMPILE_PCRE8
4025 if (common->utf)
4026 {
4027 #endif
4028 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4029 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
4030 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4031 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
4032 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4033 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
4034 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
4035 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4036 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
4037 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4038 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
4039 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4040 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
4041 #ifdef COMPILE_PCRE8
4042 }
4043 #endif
4044 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4045 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4046
4047 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4048 }
4049
4050 static void check_vspace(compiler_common *common)
4051 {
4052 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
4053 DEFINE_COMPILER;
4054
4055 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4056
4057 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
4058 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
4059 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4060 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
4061 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4062 #ifdef COMPILE_PCRE8
4063 if (common->utf)
4064 {
4065 #endif
4066 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4067 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
4068 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
4069 #ifdef COMPILE_PCRE8
4070 }
4071 #endif
4072 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
4073 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4074
4075 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4076 }
4077
4078 #define CHAR1 STR_END
4079 #define CHAR2 STACK_TOP
4080
4081 static void do_casefulcmp(compiler_common *common)
4082 {
4083 DEFINE_COMPILER;
4084 struct sljit_jump *jump;
4085 struct sljit_label *label;
4086
4087 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4088 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4089 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
4090 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
4091 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4092 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4093
4094 label = LABEL();
4095 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4096 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4097 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4098 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4099 JUMPTO(SLJIT_C_NOT_ZERO, label);
4100
4101 JUMPHERE(jump);
4102 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4103 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
4104 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4105 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4106 }
4107
4108 #define LCC_TABLE STACK_LIMIT
4109
4110 static void do_caselesscmp(compiler_common *common)
4111 {
4112 DEFINE_COMPILER;
4113 struct sljit_jump *jump;
4114 struct sljit_label *label;
4115
4116 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4117 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4118
4119 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
4120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
4121 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
4122 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
4123 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
4124 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4125
4126 label = LABEL();
4127 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
4128 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4129 #ifndef COMPILE_PCRE8
4130 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
4131 #endif
4132 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
4133 #ifndef COMPILE_PCRE8
4134 JUMPHERE(jump);
4135 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
4136 #endif
4137 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
4138 #ifndef COMPILE_PCRE8
4139 JUMPHERE(jump);
4140 #endif
4141 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
4142 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4143 JUMPTO(SLJIT_C_NOT_ZERO, label);
4144
4145 JUMPHERE(jump);
4146 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4147 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
4148 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4149 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4150 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
4151 }
4152
4153 #undef LCC_TABLE
4154 #undef CHAR1
4155 #undef CHAR2
4156
4157 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4158
4159 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
4160 {
4161 /* This function would be ineffective to do in JIT level. */
4162 pcre_uint32 c1, c2;
4163 const pcre_uchar *src2 = args->uchar_ptr;
4164 const pcre_uchar *end2 = args->end;
4165 const ucd_record *ur;
4166 const pcre_uint32 *pp;
4167
4168 while (src1 < end1)
4169 {
4170 if (src2 >= end2)
4171 return (pcre_uchar*)1;
4172 GETCHARINC(c1, src1);
4173 GETCHARINC(c2, src2);
4174 ur = GET_UCD(c2);
4175 if (c1 != c2 && c1 != c2 + ur->other_case)
4176 {
4177 pp = PRIV(ucd_caseless_sets) + ur->caseset;
4178 for (;;)
4179 {
4180 if (c1 < *pp) return NULL;
4181 if (c1 == *pp++) break;
4182 }
4183 }
4184 }
4185 return src2;
4186 }
4187
4188 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4189
4190 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
4191 compare_context* context, jump_list **backtracks)
4192 {
4193 DEFINE_COMPILER;
4194 unsigned int othercasebit = 0;
4195 pcre_uchar *othercasechar = NULL;
4196 #ifdef SUPPORT_UTF
4197 int utflength;
4198 #endif
4199
4200 if (caseless && char_has_othercase(common, cc))
4201 {
4202 othercasebit = char_get_othercase_bit(common, cc);
4203 SLJIT_ASSERT(othercasebit);
4204 /* Extracting bit difference info. */
4205 #if defined COMPILE_PCRE8
4206 othercasechar = cc + (othercasebit >> 8);
4207 othercasebit &= 0xff;
4208 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4209 /* Note that this code only handles characters in the BMP. If there
4210 ever are characters outside the BMP whose othercase differs in only one
4211 bit from itself (there currently are none), this code will need to be
4212 revised for COMPILE_PCRE32. */
4213 othercasechar = cc + (othercasebit >> 9);
4214 if ((othercasebit & 0x100) != 0)
4215 othercasebit = (othercasebit & 0xff) << 8;
4216 else
4217 othercasebit &= 0xff;
4218 #endif /* COMPILE_PCRE[8|16|32] */
4219 }
4220
4221 if (context->sourcereg == -1)
4222 {
4223 #if defined COMPILE_PCRE8
4224 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4225 if (context->length >= 4)
4226 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4227 else if (context->length >= 2)
4228 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4229 else
4230 #endif
4231 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4232 #elif defined COMPILE_PCRE16
4233 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4234 if (context->length >= 4)
4235 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4236 else
4237 #endif
4238 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4239 #elif defined COMPILE_PCRE32
4240 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
4241 #endif /* COMPILE_PCRE[8|16|32] */
4242 context->sourcereg = TMP2;
4243 }
4244
4245 #ifdef SUPPORT_UTF
4246 utflength = 1;
4247 if (common->utf && HAS_EXTRALEN(*cc))
4248 utflength += GET_EXTRALEN(*cc);
4249
4250 do
4251 {
4252 #endif
4253
4254 context->length -= IN_UCHARS(1);
4255 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
4256
4257 /* Unaligned read is supported. */
4258 if (othercasebit != 0 && othercasechar == cc)
4259 {
4260 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
4261 context->oc.asuchars[context->ucharptr] = othercasebit;
4262 }
4263 else
4264 {
4265 context->c.asuchars[context->ucharptr] = *cc;
4266 context->oc.asuchars[context->ucharptr] = 0;
4267 }
4268 context->ucharptr++;
4269
4270 #if defined COMPILE_PCRE8
4271 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
4272 #else
4273 if (context->ucharptr >= 2 || context->length == 0)
4274 #endif
4275 {
4276 if (context->length >= 4)
4277 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4278 else if (context->length >= 2)
4279 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4280 #if defined COMPILE_PCRE8
4281 else if (context->length >= 1)
4282 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4283 #endif /* COMPILE_PCRE8 */
4284 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4285
4286 switch(context->ucharptr)
4287 {
4288 case 4 / sizeof(pcre_uchar):
4289 if (context->oc.asint != 0)
4290 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4291 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4292 break;
4293
4294 case 2 / sizeof(pcre_uchar):
4295 if (context->oc.asushort != 0)
4296 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4297 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4298 break;
4299
4300 #ifdef COMPILE_PCRE8
4301 case 1:
4302 if (context->oc.asbyte != 0)
4303 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4304 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4305 break;
4306 #endif
4307
4308 default:
4309 SLJIT_ASSERT_STOP();
4310 break;
4311 }
4312 context->ucharptr = 0;
4313 }
4314
4315 #else
4316
4317 /* Unaligned read is unsupported or in 32 bit mode. */
4318 if (context->length >= 1)
4319 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4320
4321 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4322
4323 if (othercasebit != 0 && othercasechar == cc)
4324 {
4325 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4326 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4327 }
4328 else
4329 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4330
4331 #endif
4332
4333 cc++;
4334 #ifdef SUPPORT_UTF
4335 utflength--;
4336 }
4337 while (utflength > 0);
4338 #endif
4339
4340 return cc;
4341 }
4342
4343 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4344
4345 #define SET_TYPE_OFFSET(value) \
4346 if ((value) != typeoffset) \
4347 { \
4348 if ((value) > typeoffset) \
4349 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4350 else \
4351 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4352 } \
4353 typeoffset = (value);
4354
4355 #define SET_CHAR_OFFSET(value) \
4356 if ((value) != charoffset) \
4357 { \
4358 if ((value) > charoffset) \
4359 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4360 else \
4361 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4362 } \
4363 charoffset = (value);
4364
4365 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4366 {
4367 DEFINE_COMPILER;
4368 jump_list *found = NULL;
4369 jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
4370 pcre_int32 c, charoffset;
4371 struct sljit_jump *jump = NULL;
4372 pcre_uchar *ccbegin;
4373 int compares, invertcmp, numberofcmps;
4374
4375 #ifdef SUPPORT_UCP
4376 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4377 BOOL charsaved = FALSE;
4378 int typereg = TMP1, scriptreg = TMP1;
4379 const pcre_uint32 *other_cases;
4380 pcre_int32 typeoffset;
4381 #endif
4382
4383 /* Although SUPPORT_UTF must be defined, we are
4384 not necessary in utf mode even in 8 bit mode. */
4385 detect_partial_match(common, backtracks);
4386 read_char(common);
4387
4388 cc++;
4389 if ((cc[-1] & XCL_HASPROP) == 0)
4390 {
4391 if ((cc[-1] & XCL_MAP) != 0)
4392 {
4393 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4394 #ifdef SUPPORT_UCP
4395 charsaved = TRUE;
4396 #endif
4397 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, FALSE, backtracks))
4398 {
4399 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4400
4401 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4402 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4403 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4404 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4405 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4406 add_jump(compiler, &found, JUMP(SLJIT_C_NOT_ZERO));
4407 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4408
4409 JUMPHERE(jump);
4410 }
4411 else
4412 add_jump(compiler, &found, CMP(SLJIT_C_LESS_EQUAL, TMP3, 0, SLJIT_IMM, 0xff));
4413
4414 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4415 cc += 32 / sizeof(pcre_uchar);
4416 }
4417 else
4418 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff));
4419 }
4420 else if ((cc[-1] & XCL_MAP) != 0)
4421 {
4422 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4423 #ifdef SUPPORT_UCP
4424 charsaved = TRUE;
4425 #endif
4426 if (!check_class_ranges(common, (const pcre_uint8 *)cc, FALSE, TRUE, list))
4427 {
4428 #ifdef COMPILE_PCRE8
4429 SLJIT_ASSERT(common->utf);
4430 #endif
4431 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4432
4433 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4434 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4435 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4436 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4437 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4438 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4439
4440 JUMPHERE(jump);
4441 }
4442
4443 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4444 cc += 32 / sizeof(pcre_uchar);
4445 }
4446
4447 /* Scanning the necessary info. */
4448 ccbegin = cc;
4449 compares = 0;
4450 while (*cc != XCL_END)
4451 {
4452 compares++;
4453 if (*cc == XCL_SINGLE)
4454 {
4455 cc += 2;
4456 #ifdef SUPPORT_UTF
4457 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4458 #endif
4459 #ifdef SUPPORT_UCP
4460 needschar = TRUE;
4461 #endif
4462 }
4463 else if (*cc == XCL_RANGE)
4464 {
4465 cc += 2;
4466 #ifdef SUPPORT_UTF
4467 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4468 #endif
4469 cc++;
4470 #ifdef SUPPORT_UTF
4471 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4472 #endif
4473 #ifdef SUPPORT_UCP
4474 needschar = TRUE;
4475 #endif
4476 }
4477 #ifdef SUPPORT_UCP
4478 else
4479 {
4480 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4481 cc++;
4482 switch(*cc)
4483 {
4484 case PT_ANY:
4485 break;
4486
4487 case PT_LAMP:
4488 case PT_GC:
4489 case PT_PC:
4490 case PT_ALNUM:
4491 needstype = TRUE;
4492 break;
4493
4494 case PT_SC:
4495 needsscript = TRUE;
4496 break;
4497
4498 case PT_SPACE:
4499 case PT_PXSPACE:
4500 case PT_WORD:
4501 case PT_PXGRAPH:
4502 case PT_PXPRINT:
4503 case PT_PXPUNCT:
4504 needstype = TRUE;
4505 needschar = TRUE;
4506 break;
4507
4508 case PT_CLIST:
4509 case PT_UCNC:
4510 needschar = TRUE;
4511 break;
4512
4513 default:
4514 SLJIT_ASSERT_STOP();
4515 break;
4516 }
4517 cc += 2;
4518 }
4519 #endif
4520 }
4521
4522 #ifdef SUPPORT_UCP
4523 /* Simple register allocation. TMP1 is preferred if possible. */
4524 if (needstype || needsscript)
4525 {
4526 if (needschar && !charsaved)
4527 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4528 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4529 if (needschar)
4530 {
4531 if (needstype)
4532 {
4533 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4534 typereg = RETURN_ADDR;
4535 }
4536
4537 if (needsscript)
4538 scriptreg = TMP3;
4539 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4540 }
4541 else if (needstype && needsscript)
4542 scriptreg = TMP3;
4543 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4544
4545 if (needsscript)
4546 {
4547 if (scriptreg == TMP1)
4548 {
4549 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4550 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4551 }
4552 else
4553 {
4554 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4555 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4556 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4557 }
4558 }
4559 }
4560 #endif
4561
4562 /* Generating code. */
4563 cc = ccbegin;
4564 charoffset = 0;
4565 numberofcmps = 0;
4566 #ifdef SUPPORT_UCP
4567 typeoffset = 0;
4568 #endif
4569
4570 while (*cc != XCL_END)
4571 {
4572 compares--;
4573 invertcmp = (compares == 0 && list != backtracks);
4574 jump = NULL;
4575
4576 if (*cc == XCL_SINGLE)
4577 {
4578 cc ++;
4579 #ifdef SUPPORT_UTF
4580 if (common->utf)
4581 {
4582 GETCHARINC(c, cc);
4583 }
4584 else
4585 #endif
4586 c = *cc++;
4587
4588 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4589 {
4590 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4591 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4592 numberofcmps++;
4593 }
4594 else if (numberofcmps > 0)
4595 {
4596 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4597 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4598 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4599 numberofcmps = 0;
4600 }
4601 else
4602 {
4603 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4604 numberofcmps = 0;
4605 }
4606 }
4607 else if (*cc == XCL_RANGE)
4608 {
4609 cc ++;
4610 #ifdef SUPPORT_UTF
4611 if (common->utf)
4612 {
4613 GETCHARINC(c, cc);
4614 }
4615 else
4616 #endif
4617 c = *cc++;
4618 SET_CHAR_OFFSET(c);
4619 #ifdef SUPPORT_UTF
4620 if (common->utf)
4621 {
4622 GETCHARINC(c, cc);
4623 }
4624 else
4625 #endif
4626 c = *cc++;
4627 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4628 {
4629 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4630 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4631 numberofcmps++;
4632 }
4633 else if (numberofcmps > 0)
4634 {
4635 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4636 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4637 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4638 numberofcmps = 0;
4639 }
4640 else
4641 {
4642 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4643 numberofcmps = 0;
4644 }
4645 }
4646 #ifdef SUPPORT_UCP
4647 else
4648 {
4649 if (*cc == XCL_NOTPROP)
4650 invertcmp ^= 0x1;
4651 cc++;
4652 switch(*cc)
4653 {
4654 case PT_ANY:
4655 if (list != backtracks)
4656 {
4657 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4658 continue;
4659 }
4660 else if (cc[-1] == XCL_NOTPROP)
4661 continue;
4662 jump = JUMP(SLJIT_JUMP);
4663 break;
4664
4665 case PT_LAMP:
4666 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4667 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4668 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4669 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4670 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4671 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4672 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4673 break;
4674
4675 case PT_GC:
4676 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4677 SET_TYPE_OFFSET(c);
4678 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4679 break;
4680
4681 case PT_PC:
4682 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4683 break;
4684
4685 case PT_SC:
4686 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4687 break;
4688
4689 case PT_SPACE:
4690 case PT_PXSPACE:
4691 SET_CHAR_OFFSET(9);
4692 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
4693 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4694
4695 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
4696 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4697
4698 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
4699 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4700
4701 SET_TYPE_OFFSET(ucp_Zl);
4702 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4703 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4704 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4705 break;
4706
4707 case PT_WORD:
4708 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4709 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4710 /* Fall through. */
4711
4712 case PT_ALNUM:
4713 SET_TYPE_OFFSET(ucp_Ll);
4714 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4715 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4716 SET_TYPE_OFFSET(ucp_Nd);
4717 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4718 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4719 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4720 break;
4721
4722 case PT_CLIST:
4723 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4724
4725 /* At least three characters are required.
4726 Otherwise this case would be handled by the normal code path. */
4727 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4728 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4729
4730 /* Optimizing character pairs, if their difference is power of 2. */
4731 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4732 {
4733 if (charoffset == 0)
4734 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4735 else
4736 {
4737 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4738 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4739 }
4740 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4741 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4742 other_cases += 2;
4743 }
4744 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4745 {
4746 if (charoffset == 0)
4747 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4748 else
4749 {
4750 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4751 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4752 }
4753 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4754 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4755
4756 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4757 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4758
4759 other_cases += 3;
4760 }
4761 else
4762 {
4763 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4764 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4765 }
4766
4767 while (*other_cases != NOTACHAR)
4768 {
4769 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4770 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4771 }
4772 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4773 break;
4774
4775 case PT_UCNC:
4776 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4777 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4778 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4779 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4780 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4781 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4782
4783 SET_CHAR_OFFSET(0xa0);
4784 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4785 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4786 SET_CHAR_OFFSET(0);
4787 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4788 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4789 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4790 break;
4791
4792 case PT_PXGRAPH:
4793 /* C and Z groups are the farthest two groups. */
4794 SET_TYPE_OFFSET(ucp_Ll);
4795 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4796 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4797
4798 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4799
4800 /* In case of ucp_Cf, we overwrite the result. */
4801 SET_CHAR_OFFSET(0x2066);
4802 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4803 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4804
4805 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4806 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4807
4808 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
4809 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4810
4811 JUMPHERE(jump);
4812 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4813 break;
4814
4815 case PT_PXPRINT:
4816 /* C and Z groups are the farthest two groups. */
4817 SET_TYPE_OFFSET(ucp_Ll);
4818 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
4819 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER);
4820
4821 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
4822 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4823
4824 jump = CMP(SLJIT_C_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
4825
4826 /* In case of ucp_Cf, we overwrite the result. */
4827 SET_CHAR_OFFSET(0x2066);
4828 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
4829 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4830
4831 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
4832 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4833
4834 JUMPHERE(jump);
4835 jump = CMP(SLJIT_C_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
4836 break;
4837
4838 case PT_PXPUNCT:
4839 SET_TYPE_OFFSET(ucp_Sc);
4840 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
4841 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4842
4843 SET_CHAR_OFFSET(0);
4844 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xff);
4845 OP_FLAGS(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4846
4847 SET_TYPE_OFFSET(ucp_Pc);
4848 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
4849 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4850 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4851 break;
4852 }
4853 cc += 2;
4854 }
4855 #endif
4856
4857 if (jump != NULL)
4858 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4859 }
4860
4861 if (found != NULL)
4862 set_jumps(found, LABEL());
4863 }
4864
4865 #undef SET_TYPE_OFFSET
4866 #undef SET_CHAR_OFFSET
4867
4868 #endif
4869
4870 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4871 {
4872 DEFINE_COMPILER;
4873 int length;
4874 unsigned int c, oc, bit;
4875 compare_context context;
4876 struct sljit_jump *jump[4];
4877 jump_list *end_list;
4878 #ifdef SUPPORT_UTF
4879 struct sljit_label *label;
4880 #ifdef SUPPORT_UCP
4881 pcre_uchar propdata[5];
4882 #endif
4883 #endif /* SUPPORT_UTF */
4884
4885 switch(type)
4886 {
4887 case OP_SOD:
4888 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4889 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4890 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4891 return cc;
4892
4893 case OP_SOM:
4894 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4895 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4896 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4897 return cc;
4898
4899 case OP_NOT_WORD_BOUNDARY:
4900 case OP_WORD_BOUNDARY:
4901 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4902 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4903 return cc;
4904
4905 case OP_NOT_DIGIT:
4906 case OP_DIGIT:
4907 /* Digits are usually 0-9, so it is worth to optimize them. */
4908 detect_partial_match(common, backtracks);
4909 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4910 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_digit, FALSE))
4911 read_char7_type(common, type == OP_NOT_DIGIT);
4912 else
4913 #endif
4914 read_char8_type(common, type == OP_NOT_DIGIT);
4915 /* Flip the starting bit in the negative case. */
4916 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4917 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4918 return cc;
4919
4920 case OP_NOT_WHITESPACE:
4921 case OP_WHITESPACE:
4922 detect_partial_match(common, backtracks);
4923 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4924 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_space, FALSE))
4925 read_char7_type(common, type == OP_NOT_WHITESPACE);
4926 else
4927 #endif
4928 read_char8_type(common, type == OP_NOT_WHITESPACE);
4929 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4930 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4931 return cc;
4932
4933 case OP_NOT_WORDCHAR:
4934 case OP_WORDCHAR:
4935 detect_partial_match(common, backtracks);
4936 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
4937 if (common->utf && is_char7_bitset((const pcre_uint8*)common->ctypes - cbit_length + cbit_word, FALSE))
4938 read_char7_type(common, type == OP_NOT_WORDCHAR);
4939 else
4940 #endif
4941 read_char8_type(common, type == OP_NOT_WORDCHAR);
4942 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4943 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4944 return cc;
4945
4946 case OP_ANY:
4947 detect_partial_match(common, backtracks);
4948 read_char(common);
4949 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4950 {
4951 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4952 end_list = NULL;
4953 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4954 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4955 else
4956 check_str_end(common, &end_list);
4957
4958 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4959 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4960 set_jumps(end_list, LABEL());
4961 JUMPHERE(jump[0]);
4962 }
4963 else
4964 check_newlinechar(common, common->nltype, backtracks, TRUE);
4965 return cc;
4966
4967 case OP_ALLANY:
4968 detect_partial_match(common, backtracks);
4969 #ifdef SUPPORT_UTF
4970 if (common->utf)
4971 {
4972 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4973 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4974 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4975 #if defined COMPILE_PCRE8
4976 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4977 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4978 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4979 #elif defined COMPILE_PCRE16
4980 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4981 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4982 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4983 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4984 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4985 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4986 #endif
4987 JUMPHERE(jump[0]);
4988 #endif /* COMPILE_PCRE[8|16] */
4989 return cc;
4990 }
4991 #endif
4992 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4993 return cc;
4994
4995 case OP_ANYBYTE:
4996 detect_partial_match(common, backtracks);
4997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4998 return cc;
4999
5000 #ifdef SUPPORT_UTF
5001 #ifdef SUPPORT_UCP
5002 case OP_NOTPROP:
5003 case OP_PROP:
5004 propdata[0] = XCL_HASPROP;
5005 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
5006 propdata[2] = cc[0];
5007 propdata[3] = cc[1];
5008 propdata[4] = XCL_END;
5009 compile_xclass_matchingpath(common, propdata, backtracks);
5010 return cc + 2;
5011 #endif
5012 #endif
5013
5014 case OP_ANYNL:
5015 detect_partial_match(common, backtracks);
5016 read_char(common);
5017 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5018 /* We don't need to handle soft partial matching case. */
5019 end_list = NULL;
5020 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
5021 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5022 else
5023 check_str_end(common, &end_list);
5024 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5025 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5026 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5027 jump[2] = JUMP(SLJIT_JUMP);
5028 JUMPHERE(jump[0]);
5029 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
5030 set_jumps(end_list, LABEL());
5031 JUMPHERE(jump[1]);
5032 JUMPHERE(jump[2]);
5033 return cc;
5034
5035 case OP_NOT_HSPACE:
5036 case OP_HSPACE:
5037 detect_partial_match(common, backtracks);
5038 read_char(common);
5039 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
5040 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5041 return cc;
5042
5043 case OP_NOT_VSPACE:
5044 case OP_VSPACE:
5045 detect_partial_match(common, backtracks);
5046 read_char(common);
5047 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
5048 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
5049 return cc;
5050
5051 #ifdef SUPPORT_UCP
5052 case OP_EXTUNI:
5053 detect_partial_match(common, backtracks);
5054 read_char(common);
5055 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5056 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5057 /* Optimize register allocation: use a real register. */
5058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5059 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5060
5061 label = LABEL();
5062 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5063 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5064 read_char(common);
5065 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
5066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
5067 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
5068
5069 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
5070 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
5071 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
5072 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5073 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5074 JUMPTO(SLJIT_C_NOT_ZERO, label);
5075
5076 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5077 JUMPHERE(jump[0]);
5078 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5079
5080 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
5081 {
5082 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5083 /* Since we successfully read a char above, partial matching must occure. */
5084 check_partial(common, TRUE);
5085 JUMPHERE(jump[0]);
5086 }
5087 return cc;
5088 #endif
5089
5090 case OP_EODN:
5091 /* Requires rather complex checks. */
5092 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5093 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5094 {
5095 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5096 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5097 if (common->mode == JIT_COMPILE)
5098 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5099 else
5100 {
5101 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
5102 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5103 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
5104 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
5105 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
5106 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
5107 check_partial(common, TRUE);
5108 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5109 JUMPHERE(jump[1]);
5110 }
5111 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5112 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5113 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5114 }
5115 else if (common->nltype == NLTYPE_FIXED)
5116 {
5117 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5118 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5119 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
5120 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
5121 }
5122 else
5123 {
5124 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5125 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
5126 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5127 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
5128 jump[2] = JUMP(SLJIT_C_GREATER);
5129 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
5130 /* Equal. */
5131 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5132 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
5133 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5134
5135 JUMPHERE(jump[1]);
5136 if (common->nltype == NLTYPE_ANYCRLF)
5137 {
5138 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5139 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
5140 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
5141 }
5142 else
5143 {
5144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
5145 read_char(common);
5146 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
5147 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
5148 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5149 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
5150 }
5151 JUMPHERE(jump[2]);
5152 JUMPHERE(jump[3]);
5153 }
5154 JUMPHERE(jump[0]);
5155 check_partial(common, FALSE);
5156 return cc;
5157
5158 case OP_EOD:
5159 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5160 check_partial(common, FALSE);
5161 return cc;
5162
5163 case OP_CIRC:
5164 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5165 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5166 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
5167 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5168 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5169 return cc;
5170
5171 case OP_CIRCM:
5172 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5173 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5174 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
5175 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
5176 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5177 jump[0] = JUMP(SLJIT_JUMP);
5178 JUMPHERE(jump[1]);
5179
5180 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
5181 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5182 {
5183 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5184 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
5185 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5186 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5187 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5188 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5189 }
5190 else
5191 {
5192 skip_char_back(common);
5193 read_char(common);
5194 check_newlinechar(common, common->nltype, backtracks, FALSE);
5195 }
5196 JUMPHERE(jump[0]);
5197 return cc;
5198
5199 case OP_DOLL:
5200 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5201 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5202 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5203
5204 if (!common->endonly)
5205 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
5206 else
5207 {
5208 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
5209 check_partial(common, FALSE);
5210 }
5211 return cc;
5212
5213 case OP_DOLLM:
5214 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
5215 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5216 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
5217 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5218 check_partial(common, FALSE);
5219 jump[0] = JUMP(SLJIT_JUMP);
5220 JUMPHERE(jump[1]);
5221
5222 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5223 {
5224 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
5225 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5226 if (common->mode == JIT_COMPILE)
5227 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
5228 else
5229 {
5230 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
5231 /* STR_PTR = STR_END - IN_UCHARS(1) */
5232 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5233 check_partial(common, TRUE);
5234 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5235 JUMPHERE(jump[1]);
5236 }
5237
5238 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
5239 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
5240 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
5241 }
5242 else
5243 {
5244 peek_char(common);
5245 check_newlinechar(common, common->nltype, backtracks, FALSE);
5246 }
5247 JUMPHERE(jump[0]);
5248 return cc;
5249
5250 case OP_CHAR:
5251 case OP_CHARI:
5252 length = 1;
5253 #ifdef SUPPORT_UTF
5254 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
5255 #endif
5256 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
5257 {
5258 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5259 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5260
5261 context.length = IN_UCHARS(length);
5262 context.sourcereg = -1;
5263 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5264 context.ucharptr = 0;
5265 #endif
5266 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
5267 }
5268 detect_partial_match(common, backtracks);
5269 read_char(common);
5270 #ifdef SUPPORT_UTF
5271 if (common->utf)
5272 {
5273 GETCHAR(c, cc);
5274 }
5275 else
5276 #endif
5277 c = *cc;
5278 if (type == OP_CHAR || !char_has_othercase(common, cc))
5279 {
5280 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
5281 return cc + length;
5282 }
5283 oc = char_othercase(common, c);
5284 bit = c ^ oc;
5285 if (is_powerof2(bit))
5286 {
5287 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5288 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5289 return cc + length;
5290 }
5291 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
5292 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5293 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
5294 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5295 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5296 return cc + length;
5297
5298 case OP_NOT:
5299 case OP_NOTI:
5300 detect_partial_match(common, backtracks);
5301 length = 1;
5302 #ifdef SUPPORT_UTF
5303 if (common->utf)
5304 {
5305 #ifdef COMPILE_PCRE8
5306 c = *cc;
5307 if (c < 128)
5308 {
5309 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5310 if (type == OP_NOT || !char_has_othercase(common, cc))
5311 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5312 else
5313 {
5314 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
5315 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
5316 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
5317 }
5318 /* Skip the variable-length character. */
5319 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5320 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5321 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5322 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5323 JUMPHERE(jump[0]);
5324 return cc + 1;
5325 }
5326 else
5327 #endif /* COMPILE_PCRE8 */
5328 {
5329 GETCHARLEN(c, cc, length);
5330 }
5331 }
5332 else
5333 #endif /* SUPPORT_UTF */
5334 c = *cc;
5335
5336 if (type == OP_NOT || !char_has_othercase(common, cc))
5337 {
5338 read_char_max(common, c, TRUE);
5339 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5340 }
5341 else
5342 {
5343 oc = char_othercase(common, c);
5344 read_char_max(common, c > oc ? c : oc, TRUE);
5345 bit = c ^ oc;
5346 if (is_powerof2(bit))
5347 {
5348 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
5349 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
5350 }
5351 else
5352 {
5353 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
5354 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
5355 }
5356 }
5357 return cc + length;
5358
5359 case OP_CLASS:
5360 case OP_NCLASS:
5361 detect_partial_match(common, backtracks);
5362
5363 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5364 bit = (common->utf && is_char7_bitset((const pcre_uint8 *)cc, type == OP_NCLASS)) ? 127 : 255;
5365 read_char_max(common, bit, type == OP_NCLASS);
5366 #else
5367 read_char_max(common, 255, type == OP_NCLASS);
5368 #endif
5369
5370 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, FALSE, backtracks))
5371 return cc + 32 / sizeof(pcre_uchar);
5372
5373 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
5374 jump[0] = NULL;
5375 if (common->utf)
5376 {
5377 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, bit);
5378 if (type == OP_CLASS)
5379 {
5380 add_jump(compiler, backtracks, jump[0]);
5381 jump[0] = NULL;
5382 }
5383 }
5384 #elif !defined COMPILE_PCRE8
5385 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
5386 if (type == OP_CLASS)
5387 {
5388 add_jump(compiler, backtracks, jump[0]);
5389 jump[0] = NULL;
5390 }
5391 #endif /* SUPPORT_UTF && COMPILE_PCRE8 */
5392
5393 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
5394 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
5395 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
5396 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5397 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5398 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5399
5400 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5401 if (jump[0] != NULL)
5402 JUMPHERE(jump[0]);
5403 #endif
5404
5405 return cc + 32 / sizeof(pcre_uchar);
5406
5407 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5408 case OP_XCLASS:
5409 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5410 return cc + GET(cc, 0) - 1;
5411 #endif
5412
5413 case OP_REVERSE:
5414 length = GET(cc, 0);
5415 if (length == 0)
5416 return cc + LINK_SIZE;
5417 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5418 #ifdef SUPPORT_UTF
5419 if (common->utf)
5420 {
5421 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5422 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5423 label = LABEL();
5424 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5425 skip_char_back(common);
5426 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5427 JUMPTO(SLJIT_C_NOT_ZERO, label);
5428 }
5429 else
5430 #endif
5431 {
5432 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5433 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5434 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5435 }
5436 check_start_used_ptr(common);
5437 return cc + LINK_SIZE;
5438 }
5439 SLJIT_ASSERT_STOP();
5440 return cc;
5441 }
5442
5443 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5444 {
5445 /* This function consumes at least one input character. */
5446 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5447 DEFINE_COMPILER;
5448 pcre_uchar *ccbegin = cc;
5449 compare_context context;
5450 int size;
5451
5452 context.length = 0;
5453 do
5454 {
5455 if (cc >= ccend)
5456 break;
5457
5458 if (*cc == OP_CHAR)
5459 {
5460 size = 1;
5461 #ifdef SUPPORT_UTF
5462 if (common->utf && HAS_EXTRALEN(cc[1]))
5463 size += GET_EXTRALEN(cc[1]);
5464 #endif
5465 }
5466 else if (*cc == OP_CHARI)
5467 {
5468 size = 1;
5469 #ifdef SUPPORT_UTF
5470 if (common->utf)
5471 {
5472 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5473 size = 0;
5474 else if (HAS_EXTRALEN(cc[1]))
5475 size += GET_EXTRALEN(cc[1]);
5476 }
5477 else
5478 #endif
5479 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5480 size = 0;
5481 }
5482 else
5483 size = 0;
5484
5485 cc += 1 + size;
5486 context.length += IN_UCHARS(size);
5487 }
5488 while (size > 0 && context.length <= 128);
5489
5490 cc = ccbegin;
5491 if (context.length > 0)
5492 {
5493 /* We have a fixed-length byte sequence. */
5494 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5495 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5496
5497 context.sourcereg = -1;
5498 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5499 context.ucharptr = 0;
5500 #endif
5501 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5502 return cc;
5503 }
5504
5505 /* A non-fixed length character will be checked if length == 0. */
5506 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5507 }
5508
5509 /* Forward definitions. */
5510 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5511 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5512
5513 #define PUSH_BACKTRACK(size, ccstart, error) \
5514 do \
5515 { \
5516 backtrack = sljit_alloc_memory(compiler, (size)); \
5517 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5518 return error; \
5519 memset(backtrack, 0, size); \
5520 backtrack->prev = parent->top; \
5521 backtrack->cc = (ccstart); \
5522 parent->top = backtrack; \
5523 } \
5524 while (0)
5525
5526 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5527 do \
5528 { \
5529 backtrack = sljit_alloc_memory(compiler, (size)); \
5530 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5531 return; \
5532 memset(backtrack, 0, size); \
5533 backtrack->prev = parent->top; \
5534 backtrack->cc = (ccstart); \
5535 parent->top = backtrack; \
5536 } \
5537 while (0)
5538
5539 #define BACKTRACK_AS(type) ((type *)backtrack)
5540
5541 static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5542 {
5543 /* The OVECTOR offset goes to TMP2. */
5544 DEFINE_COMPILER;
5545 int count = GET2(cc, 1 + IMM2_SIZE);
5546 pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
5547 unsigned int offset;
5548 jump_list *found = NULL;
5549
5550 SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
5551
5552 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5553
5554 count--;
5555 while (count-- > 0)
5556 {
5557 offset = GET2(slot, 0) << 1;
5558 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5559 add_jump(compiler, &found, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5560 slot += common->name_entry_size;
5561 }
5562
5563 offset = GET2(slot, 0) << 1;
5564 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
5565 if (backtracks != NULL && !common->jscript_compat)
5566 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0));
5567
5568 set_jumps(found, LABEL());
5569 }
5570
5571 static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5572 {
5573 DEFINE_COMPILER;
5574 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5575 int offset = 0;
5576 struct sljit_jump *jump = NULL;
5577 struct sljit_jump *partial;
5578 struct sljit_jump *nopartial;
5579
5580 if (ref)
5581 {
5582 offset = GET2(cc, 1) << 1;
5583 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5584 /* OVECTOR(1) contains the "string begin - 1" constant. */
5585 if (withchecks && !common->jscript_compat)
5586 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5587 }
5588 else
5589 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5590
5591 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5592 if (common->utf && *cc == OP_REFI)
5593 {
5594 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5595 if (ref)
5596 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5597 else
5598 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5599
5600 if (withchecks)
5601 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5602
5603 /* Needed to save important temporary registers. */
5604 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5605 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5606 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5607 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5608 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5609 if (common->mode == JIT_COMPILE)
5610 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5611 else
5612 {
5613 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5614 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5615 check_partial(common, FALSE);
5616 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5617 JUMPHERE(nopartial);
5618 }
5619 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5620 }
5621 else
5622 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5623 {
5624 if (ref)
5625 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5626 else
5627 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
5628
5629 if (withchecks)
5630 jump = JUMP(SLJIT_C_ZERO);
5631
5632 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5633 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5634 if (common->mode == JIT_COMPILE)
5635 add_jump(compiler, backtracks, partial);
5636
5637 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5638 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5639
5640 if (common->mode != JIT_COMPILE)
5641 {
5642 nopartial = JUMP(SLJIT_JUMP);
5643 JUMPHERE(partial);
5644 /* TMP2 -= STR_END - STR_PTR */
5645 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5646 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5647 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5648 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5649 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5650 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5651 JUMPHERE(partial);
5652 check_partial(common, FALSE);
5653 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5654 JUMPHERE(nopartial);
5655 }
5656 }
5657
5658 if (jump != NULL)
5659 {
5660 if (emptyfail)
5661 add_jump(compiler, backtracks, jump);
5662 else
5663 JUMPHERE(jump);
5664 }
5665 }
5666
5667 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5668 {
5669 DEFINE_COMPILER;
5670 BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
5671 backtrack_common *backtrack;
5672 pcre_uchar type;
5673 int offset = 0;
5674 struct sljit_label *label;
5675 struct sljit_jump *zerolength;
5676 struct sljit_jump *jump = NULL;
5677 pcre_uchar *ccbegin = cc;
5678 int min = 0, max = 0;
5679 BOOL minimize;
5680
5681 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5682
5683 if (ref)
5684 offset = GET2(cc, 1) << 1;
5685 else
5686 cc += IMM2_SIZE;
5687 type = cc[1 + IMM2_SIZE];
5688
5689 SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
5690 minimize = (type & 0x1) != 0;
5691 switch(type)
5692 {
5693 case OP_CRSTAR:
5694 case OP_CRMINSTAR:
5695 min = 0;
5696 max = 0;
5697 cc += 1 + IMM2_SIZE + 1;
5698 break;
5699 case OP_CRPLUS:
5700 case OP_CRMINPLUS:
5701 min = 1;
5702 max = 0;
5703 cc += 1 + IMM2_SIZE + 1;
5704 break;
5705 case OP_CRQUERY:
5706 case OP_CRMINQUERY:
5707 min = 0;
5708 max = 1;
5709 cc += 1 + IMM2_SIZE + 1;
5710 break;
5711 case OP_CRRANGE:
5712 case OP_CRMINRANGE:
5713 min = GET2(cc, 1 + IMM2_SIZE + 1);
5714 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5715 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5716 break;
5717 default:
5718 SLJIT_ASSERT_STOP();
5719 break;
5720 }
5721
5722 if (!minimize)
5723 {
5724 if (min == 0)
5725 {
5726 allocate_stack(common, 2);
5727 if (ref)
5728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5731 /* Temporary release of STR_PTR. */
5732 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5733 /* Handles both invalid and empty cases. Since the minimum repeat,
5734 is zero the invalid case is basically the same as an empty case. */
5735 if (ref)
5736 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5737 else
5738 {
5739 compile_dnref_search(common, ccbegin, NULL);
5740 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5742 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5743 }
5744 /* Restore if not zero length. */
5745 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5746 }
5747 else
5748 {
5749 allocate_stack(common, 1);
5750 if (ref)
5751 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5752 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5753 if (ref)
5754 {
5755 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5756 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5757 }
5758 else
5759 {
5760 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5761 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5762 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, TMP2, 0);
5763 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5764 }
5765 }
5766
5767 if (min > 1 || max > 1)
5768 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5769
5770 label = LABEL();
5771 if (!ref)
5772 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5773 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5774
5775 if (min > 1 || max > 1)
5776 {
5777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5778 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5779 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5780 if (min > 1)
5781 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5782 if (max > 1)
5783 {
5784 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5785 allocate_stack(common, 1);
5786 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5787 JUMPTO(SLJIT_JUMP, label);
5788 JUMPHERE(jump);
5789 }
5790 }
5791
5792 if (max == 0)
5793 {
5794 /* Includes min > 1 case as well. */
5795 allocate_stack(common, 1);
5796 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5797 JUMPTO(SLJIT_JUMP, label);
5798 }
5799
5800 JUMPHERE(zerolength);
5801 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5802
5803 count_match(common);
5804 return cc;
5805 }
5806
5807 allocate_stack(common, ref ? 2 : 3);
5808 if (ref)
5809 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5810 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5811 if (type != OP_CRMINSTAR)
5812 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5813
5814 if (min == 0)
5815 {
5816 /* Handles both invalid and empty cases. Since the minimum repeat,
5817 is zero the invalid case is basically the same as an empty case. */
5818 if (ref)
5819 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5820 else
5821 {
5822 compile_dnref_search(common, ccbegin, NULL);
5823 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5824 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5825 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5826 }
5827 /* Length is non-zero, we can match real repeats. */
5828 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5829 jump = JUMP(SLJIT_JUMP);
5830 }
5831 else
5832 {
5833 if (ref)
5834 {
5835 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5836 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5837 }
5838 else
5839 {
5840 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
5841 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
5842 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
5843 zerolength = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
5844 }
5845 }
5846
5847 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5848 if (max > 0)
5849 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5850
5851 if (!ref)
5852 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
5853 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5855
5856 if (min > 1)
5857 {
5858 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5859 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5860 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5861 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5862 }
5863 else if (max > 0)
5864 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5865
5866 if (jump != NULL)
5867 JUMPHERE(jump);
5868 JUMPHERE(zerolength);
5869
5870 count_match(common);
5871 return cc;
5872 }
5873
5874 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5875 {
5876 DEFINE_COMPILER;
5877 backtrack_common *backtrack;
5878 recurse_entry *entry = common->entries;
5879 recurse_entry *prev = NULL;
5880 sljit_sw start = GET(cc, 1);
5881 pcre_uchar *start_cc;
5882 BOOL needs_control_head;
5883
5884 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5885
5886 /* Inlining simple patterns. */
5887 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5888 {
5889 start_cc = common->start + start;
5890 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5891 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5892 return cc + 1 + LINK_SIZE;
5893 }
5894
5895 while (entry != NULL)
5896 {
5897 if (entry->start == start)
5898 break;
5899 prev = entry;
5900 entry = entry->next;
5901 }
5902
5903 if (entry == NULL)
5904 {
5905 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5906 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5907 return NULL;
5908 entry->next = NULL;
5909 entry->entry = NULL;
5910 entry->calls = NULL;
5911 entry->start = start;
5912
5913 if (prev != NULL)
5914 prev->next = entry;
5915 else
5916 common->entries = entry;
5917 }
5918
5919 if (common->has_set_som && common->mark_ptr != 0)
5920 {
5921 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5922 allocate_stack(common, 2);
5923 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5924 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5925 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5926 }
5927 else if (common->has_set_som || common->mark_ptr != 0)
5928 {
5929 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5930 allocate_stack(common, 1);
5931 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5932 }
5933
5934 if (entry->entry == NULL)
5935 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5936 else
5937 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5938 /* Leave if the match is failed. */
5939 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5940 return cc + 1 + LINK_SIZE;
5941 }
5942
5943 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5944 {
5945 const pcre_uchar *begin = arguments->begin;
5946 int *offset_vector = arguments->offsets;
5947 int offset_count = arguments->offset_count;
5948 int i;
5949
5950 if (PUBL(callout) == NULL)
5951 return 0;
5952
5953 callout_block->version = 2;
5954 callout_block->callout_data = arguments->callout_data;
5955
5956 /* Offsets in subject. */
5957 callout_block->subject_length = arguments->end - arguments->begin;
5958 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5959 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5960 #if defined COMPILE_PCRE8
5961 callout_block->subject = (PCRE_SPTR)begin;
5962 #elif defined COMPILE_PCRE16
5963 callout_block->subject = (PCRE_SPTR16)begin;
5964 #elif defined COMPILE_PCRE32
5965 callout_block->subject = (PCRE_SPTR32)begin;
5966 #endif
5967
5968 /* Convert and copy the JIT offset vector to the offset_vector array. */
5969 callout_block->capture_top = 0;
5970 callout_block->offset_vector = offset_vector;
5971 for (i = 2; i < offset_count; i += 2)
5972 {
5973 offset_vector[i] = jit_ovector[i] - begin;
5974 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5975 if (jit_ovector[i] >= begin)
5976 callout_block->capture_top = i;
5977 }
5978
5979 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5980 if (offset_count > 0)
5981 offset_vector[0] = -1;
5982 if (offset_count > 1)
5983 offset_vector[1] = -1;
5984 return (*PUBL(callout))(callout_block);
5985 }
5986
5987 /* Aligning to 8 byte. */
5988 #define CALLOUT_ARG_SIZE \
5989 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5990
5991 #define CALLOUT_ARG_OFFSET(arg) \
5992 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5993
5994 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5995 {
5996 DEFINE_COMPILER;
5997 backtrack_common *backtrack;
5998
5999 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6000
6001 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6002
6003 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6004 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6005 SLJIT_ASSERT(common->capture_last_ptr != 0);
6006 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
6007 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
6008
6009 /* These pointer sized fields temporarly stores internal variables. */
6010 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6011 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
6012 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
6013
6014 if (common->mark_ptr != 0)
6015 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
6016 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
6017 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
6018 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
6019
6020 /* Needed to save important temporary registers. */
6021 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
6022 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
6023 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
6024 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
6025 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
6026 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
6027 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
6028
6029 /* Check return value. */
6030 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
6031 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
6032 if (common->forced_quit_label == NULL)
6033 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
6034 else
6035 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
6036 return cc + 2 + 2 * LINK_SIZE;
6037 }
6038
6039 #undef CALLOUT_ARG_SIZE
6040 #undef CALLOUT_ARG_OFFSET
6041
6042 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
6043 {
6044 DEFINE_COMPILER;
6045 int framesize;
6046 int extrasize;
6047 BOOL needs_control_head;
6048 int private_data_ptr;
6049 backtrack_common altbacktrack;
6050 pcre_uchar *ccbegin;
6051 pcre_uchar opcode;
6052 pcre_uchar bra = OP_BRA;
6053 jump_list *tmp = NULL;
6054 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
6055 jump_list **found;
6056 /* Saving previous accept variables. */
6057 BOOL save_local_exit = common->local_exit;
6058 BOOL save_positive_assert = common->positive_assert;
6059 then_trap_backtrack *save_then_trap = common->then_trap;
6060 struct sljit_label *save_quit_label = common->quit_label;
6061 struct sljit_label *save_accept_label = common->accept_label;
6062 jump_list *save_quit = common->quit;
6063 jump_list *save_positive_assert_quit = common->positive_assert_quit;
6064 jump_list *save_accept = common->accept;
6065 struct sljit_jump *jump;
6066 struct sljit_jump *brajump = NULL;
6067
6068 /* Assert captures then. */
6069 common->then_trap = NULL;
6070
6071 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6072 {
6073 SLJIT_ASSERT(!conditional);
6074 bra = *cc;
6075 cc++;
6076 }
6077 private_data_ptr = PRIVATE_DATA(cc);
6078 SLJIT_ASSERT(private_data_ptr != 0);
6079 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6080 backtrack->framesize = framesize;
6081 backtrack->private_data_ptr = private_data_ptr;
6082 opcode = *cc;
6083 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
6084 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
6085 ccbegin = cc;
6086 cc += GET(cc, 1);
6087
6088 if (bra == OP_BRAMINZERO)
6089 {
6090 /* This is a braminzero backtrack path. */
6091 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6092 free_stack(common, 1);
6093 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6094 }
6095
6096 if (framesize < 0)
6097 {
6098 extrasize = needs_control_head ? 2 : 1;
6099 if (framesize == no_frame)
6100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6101 allocate_stack(common, extrasize);
6102 if (needs_control_head)
6103 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6104 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6105 if (needs_control_head)
6106 {
6107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6108 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6109 }
6110 }
6111 else
6112 {
6113 extrasize = needs_control_head ? 3 : 2;
6114 allocate_stack(common, framesize + extrasize);
6115 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6116 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6117 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6118 if (needs_control_head)
6119 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6121 if (needs_control_head)
6122 {
6123 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6124 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6125 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
6126 }
6127 else
6128 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6129 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
6130 }
6131
6132 memset(&altbacktrack, 0, sizeof(backtrack_common));
6133 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6134 {
6135 /* Negative assert is stronger than positive assert. */
6136 common->local_exit = TRUE;
6137 common->quit_label = NULL;
6138 common->quit = NULL;
6139 common->positive_assert = FALSE;
6140 }
6141 else
6142 common->positive_assert = TRUE;
6143 common->positive_assert_quit = NULL;
6144
6145 while (1)
6146 {
6147 common->accept_label = NULL;
6148 common->accept = NULL;
6149 altbacktrack.top = NULL;
6150 altbacktrack.topbacktracks = NULL;
6151
6152 if (*ccbegin == OP_ALT)
6153 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6154
6155 altbacktrack.cc = ccbegin;
6156 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
6157 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6158 {
6159 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6160 {
6161 common->local_exit = save_local_exit;
6162 common->quit_label = save_quit_label;
6163 common->quit = save_quit;
6164 }
6165 common->positive_assert = save_positive_assert;
6166 common->then_trap = save_then_trap;
6167 common->accept_label = save_accept_label;
6168 common->positive_assert_quit = save_positive_assert_quit;
6169 common->accept = save_accept;
6170 return NULL;
6171 }
6172 common->accept_label = LABEL();
6173 if (common->accept != NULL)
6174 set_jumps(common->accept, common->accept_label);
6175
6176 /* Reset stack. */
6177 if (framesize < 0)
6178 {
6179 if (framesize == no_frame)
6180 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6181 else
6182 free_stack(common, extrasize);
6183 if (needs_control_head)
6184 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6185 }
6186 else
6187 {
6188 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
6189 {
6190 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6191 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6192 if (needs_control_head)
6193 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
6194 }
6195 else
6196 {
6197 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6198 if (needs_control_head)
6199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
6200 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6201 }
6202 }
6203
6204 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6205 {
6206 /* We know that STR_PTR was stored on the top of the stack. */
6207 if (conditional)
6208 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
6209 else if (bra == OP_BRAZERO)
6210 {
6211 if (framesize < 0)
6212 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6213 else
6214 {
6215 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6216 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
6217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6218 }
6219 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6220 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6221 }
6222 else if (framesize >= 0)
6223 {
6224 /* For OP_BRA and OP_BRAMINZERO. */
6225 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6226 }
6227 }
6228 add_jump(compiler, found, JUMP(SLJIT_JUMP));
6229
6230 compile_backtrackingpath(common, altbacktrack.top);
6231 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6232 {
6233 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6234 {
6235 common->local_exit = save_local_exit;
6236 common->quit_label = save_quit_label;
6237 common->quit = save_quit;
6238 }
6239 common->positive_assert = save_positive_assert;
6240 common->then_trap = save_then_trap;
6241 common->accept_label = save_accept_label;
6242 common->positive_assert_quit = save_positive_assert_quit;
6243 common->accept = save_accept;
6244 return NULL;
6245 }
6246 set_jumps(altbacktrack.topbacktracks, LABEL());
6247
6248 if (*cc != OP_ALT)
6249 break;
6250
6251 ccbegin = cc;
6252 cc += GET(cc, 1);
6253 }
6254
6255 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6256 {
6257 SLJIT_ASSERT(common->positive_assert_quit == NULL);
6258 /* Makes the check less complicated below. */
6259 common->positive_assert_quit = common->quit;
6260 }
6261
6262 /* None of them matched. */
6263 if (common->positive_assert_quit != NULL)
6264 {
6265 jump = JUMP(SLJIT_JUMP);
6266 set_jumps(common->positive_assert_quit, LABEL());
6267 SLJIT_ASSERT(framesize != no_stack);
6268 if (framesize < 0)
6269 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
6270 else
6271 {
6272 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6273 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6274 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
6275 }
6276 JUMPHERE(jump);
6277 }
6278
6279 if (needs_control_head)
6280 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
6281
6282 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
6283 {
6284 /* Assert is failed. */
6285 if (conditional || bra == OP_BRAZERO)
6286 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6287
6288 if (framesize < 0)
6289 {
6290 /* The topmost item should be 0. */
6291 if (bra == OP_BRAZERO)
6292 {
6293 if (extrasize == 2)
6294 free_stack(common, 1);
6295 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6296 }
6297 else
6298 free_stack(common, extrasize);
6299 }
6300 else
6301 {
6302 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6303 /* The topmost item should be 0. */
6304 if (bra == OP_BRAZERO)
6305 {
6306 free_stack(common, framesize + extrasize - 1);
6307 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6308 }
6309 else
6310 free_stack(common, framesize + extrasize);
6311 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6312 }
6313 jump = JUMP(SLJIT_JUMP);
6314 if (bra != OP_BRAZERO)
6315 add_jump(compiler, target, jump);
6316
6317 /* Assert is successful. */
6318 set_jumps(tmp, LABEL());
6319 if (framesize < 0)
6320 {
6321 /* We know that STR_PTR was stored on the top of the stack. */
6322 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
6323 /* Keep the STR_PTR on the top of the stack. */
6324 if (bra == OP_BRAZERO)
6325 {
6326 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6327 if (extrasize == 2)
6328 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6329 }
6330 else if (bra == OP_BRAMINZERO)
6331 {
6332 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6334 }
6335 }
6336 else
6337 {
6338 if (bra == OP_BRA)
6339 {
6340 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6341 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
6342 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
6343 }
6344 else
6345 {
6346 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
6347 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
6348 if (extrasize == 2)
6349 {
6350 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6351 if (bra == OP_BRAMINZERO)
6352 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6353 }
6354 else
6355 {
6356 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
6357 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
6358 }
6359 }
6360 }
6361
6362 if (bra == OP_BRAZERO)
6363 {
6364 backtrack->matchingpath = LABEL();
6365 SET_LABEL(jump, backtrack->matchingpath);
6366 }
6367 else if (bra == OP_BRAMINZERO)
6368 {
6369 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6370 JUMPHERE(brajump);
6371 if (framesize >= 0)
6372 {
6373 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6374 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6375 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
6376 }
6377 set_jumps(backtrack->common.topbacktracks, LABEL());
6378 }
6379 }
6380 else
6381 {
6382 /* AssertNot is successful. */
6383 if (framesize < 0)
6384 {
6385 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6386 if (bra != OP_BRA)
6387 {
6388 if (extrasize == 2)
6389 free_stack(common, 1);
6390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6391 }
6392 else
6393 free_stack(common, extrasize);
6394 }
6395 else
6396 {
6397 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6398 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
6399 /* The topmost item should be 0. */
6400 if (bra != OP_BRA)
6401 {
6402 free_stack(common, framesize + extrasize - 1);
6403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6404 }
6405 else
6406 free_stack(common, framesize + extrasize);
6407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
6408 }
6409
6410 if (bra == OP_BRAZERO)
6411 backtrack->matchingpath = LABEL();
6412 else if (bra == OP_BRAMINZERO)
6413 {
6414 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
6415 JUMPHERE(brajump);
6416 }
6417
6418 if (bra != OP_BRA)
6419 {
6420 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
6421 set_jumps(backtrack->common.topbacktracks, LABEL());
6422 backtrack->common.topbacktracks = NULL;
6423 }
6424 }
6425
6426 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
6427 {
6428 common->local_exit = save_local_exit;
6429 common->quit_label = save_quit_label;
6430 common->quit = save_quit;
6431 }
6432 common->positive_assert = save_positive_assert;
6433 common->then_trap = save_then_trap;
6434 common->accept_label = save_accept_label;
6435 common->positive_assert_quit = save_positive_assert_quit;
6436 common->accept = save_accept;
6437 return cc + 1 + LINK_SIZE;
6438 }
6439
6440 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6441 {
6442 DEFINE_COMPILER;
6443 int stacksize;
6444
6445 if (framesize < 0)
6446 {
6447 if (framesize == no_frame)
6448 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6449 else
6450 {
6451 stacksize = needs_control_head ? 1 : 0;
6452 if (ket != OP_KET || has_alternatives)
6453 stacksize++;
6454 free_stack(common, stacksize);
6455 }
6456
6457 if (needs_control_head)
6458 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6459
6460 /* TMP2 which is set here used by OP_KETRMAX below. */
6461 if (ket == OP_KETRMAX)
6462 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6463 else if (ket == OP_KETRMIN)
6464 {
6465 /* Move the STR_PTR to the private_data_ptr. */
6466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6467 }
6468 }
6469 else
6470 {
6471 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6472 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6473 if (needs_control_head)
6474 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6475
6476 if (ket == OP_KETRMAX)
6477 {
6478 /* TMP2 which is set here used by OP_KETRMAX below. */
6479 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6480 }
6481 }
6482 if (needs_control_head)
6483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6484 }
6485
6486 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6487 {
6488 DEFINE_COMPILER;
6489
6490 if (common->capture_last_ptr != 0)
6491 {
6492 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6493 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6494 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6495 stacksize++;
6496 }
6497 if (common->optimized_cbracket[offset >> 1] == 0)
6498 {
6499 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6500 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6502 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6506 stacksize += 2;
6507 }
6508 return stacksize;
6509 }
6510
6511 /*
6512 Handling bracketed expressions is probably the most complex part.
6513
6514 Stack layout naming characters:
6515 S - Push the current STR_PTR
6516 0 - Push a 0 (NULL)
6517 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6518 before the next alternative. Not pushed if there are no alternatives.
6519 M - Any values pushed by the current alternative. Can be empty, or anything.
6520 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6521 L - Push the previous local (pointed by localptr) to the stack
6522 () - opional values stored on the stack
6523 ()* - optonal, can be stored multiple times
6524
6525 The following list shows the regular expression templates, their PCRE byte codes
6526 and stack layout supported by pcre-sljit.
6527
6528 (?:) OP_BRA | OP_KET A M
6529 () OP_CBRA | OP_KET C M
6530 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6531 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6532 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6533 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6534 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6535 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6536 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6537 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6538 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6539 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6540 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6541 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6542 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6543 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6544 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6545 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6546 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6547 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6548 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6549 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6550
6551
6552 Stack layout naming characters:
6553 A - Push the alternative index (starting from 0) on the stack.
6554 Not pushed if there is no alternatives.
6555 M - Any values pushed by the current alternative. Can be empty, or anything.
6556
6557 The next list shows the possible content of a bracket:
6558 (|) OP_*BRA | OP_ALT ... M A
6559 (?()|) OP_*COND | OP_ALT M A
6560 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6561 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6562 Or nothing, if trace is unnecessary
6563 */
6564
6565 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6566 {
6567 DEFINE_COMPILER;
6568 backtrack_common *backtrack;
6569 pcre_uchar opcode;
6570 int private_data_ptr = 0;
6571 int offset = 0;
6572 int i, stacksize;
6573 int repeat_ptr = 0, repeat_length = 0;
6574 int repeat_type = 0, repeat_count = 0;
6575 pcre_uchar *ccbegin;
6576 pcre_uchar *matchingpath;
6577 pcre_uchar *slot;
6578 pcre_uchar bra = OP_BRA;
6579 pcre_uchar ket;
6580 assert_backtrack *assert;
6581 BOOL has_alternatives;
6582 BOOL needs_control_head = FALSE;
6583 struct sljit_jump *jump;
6584 struct sljit_jump *skip;
6585 struct sljit_label *rmax_label = NULL;
6586 struct sljit_jump *braminzero = NULL;
6587
6588 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6589
6590 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6591 {
6592 bra = *cc;
6593 cc++;
6594 opcode = *cc;
6595 }
6596
6597 opcode = *cc;
6598 ccbegin = cc;
6599 matchingpath = bracketend(cc) - 1 - LINK_SIZE;
6600 ket = *matchingpath;
6601 if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
6602 {
6603 repeat_ptr = PRIVATE_DATA(matchingpath);
6604 repeat_length = PRIVATE_DATA(matchingpath + 1);
6605 repeat_type = PRIVATE_DATA(matchingpath + 2);
6606 repeat_count = PRIVATE_DATA(matchingpath + 3);
6607 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
6608 if (repeat_type == OP_UPTO)
6609 ket = OP_KETRMAX;
6610 if (repeat_type == OP_MINUPTO)
6611 ket = OP_KETRMIN;
6612 }
6613
6614 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6615 {
6616 /* Drop this bracket_backtrack. */
6617 parent->top = backtrack->prev;
6618 return matchingpath + 1 + LINK_SIZE + repeat_length;
6619 }
6620
6621 matchingpath = ccbegin + 1 + LINK_SIZE;
6622 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6623 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6624 cc += GET(cc, 1);
6625
6626 has_alternatives = *cc == OP_ALT;
6627 if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6628 has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
6629
6630 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6631 opcode = OP_SCOND;
6632 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6633 opcode = OP_ONCE;
6634
6635 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6636 {
6637 /* Capturing brackets has a pre-allocated space. */
6638 offset = GET2(ccbegin, 1 + LINK_SIZE);
6639 if (common->optimized_cbracket[offset] == 0)
6640 {
6641 private_data_ptr = OVECTOR_PRIV(offset);
6642 offset <<= 1;
6643 }
6644 else
6645 {
6646 offset <<= 1;
6647 private_data_ptr = OVECTOR(offset);
6648 }
6649 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6650 matchingpath += IMM2_SIZE;
6651 }
6652 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6653 {
6654 /* Other brackets simply allocate the next entry. */
6655 private_data_ptr = PRIVATE_DATA(ccbegin);
6656 SLJIT_ASSERT(private_data_ptr != 0);
6657 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6658 if (opcode == OP_ONCE)
6659 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6660 }
6661
6662 /* Instructions before the first alternative. */
6663 stacksize = 0;
6664 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6665 stacksize++;
6666 if (bra == OP_BRAZERO)
6667 stacksize++;
6668
6669 if (stacksize > 0)
6670 allocate_stack(common, stacksize);
6671
6672 stacksize = 0;
6673 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6674 {
6675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6676 stacksize++;
6677 }
6678
6679 if (bra == OP_BRAZERO)
6680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6681
6682 if (bra == OP_BRAMINZERO)
6683 {
6684 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6685 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6686 if (ket != OP_KETRMIN)
6687 {
6688 free_stack(common, 1);
6689 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6690 }
6691 else
6692 {
6693 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6694 {
6695 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6696 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6697 /* Nothing stored during the first run. */
6698 skip = JUMP(SLJIT_JUMP);
6699 JUMPHERE(jump);
6700 /* Checking zero-length iteration. */
6701 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6702 {
6703 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6704 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6705 }
6706 else
6707 {
6708 /* Except when the whole stack frame must be saved. */
6709 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6710 braminzero = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6711 }
6712 JUMPHERE(skip);
6713 }
6714 else
6715 {
6716 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6717 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6718 JUMPHERE(jump);
6719 }
6720 }
6721 }
6722
6723 if (repeat_type != 0)
6724 {
6725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), repeat_ptr, SLJIT_IMM, repeat_count);
6726 if (repeat_type == OP_EXACT)
6727 rmax_label = LABEL();
6728 }
6729
6730 if (ket == OP_KETRMIN)
6731 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6732
6733 if (ket == OP_KETRMAX)
6734 {
6735 rmax_label = LABEL();
6736 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0)
6737 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
6738 }
6739
6740 /* Handling capturing brackets and alternatives. */
6741 if (opcode == OP_ONCE)
6742 {
6743 stacksize = 0;
6744 if (needs_control_head)
6745 {
6746 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6747 stacksize++;
6748 }
6749