/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1282 - (show annotations)
Fri Mar 15 08:01:41 2013 UTC (6 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 303640 byte(s)
Fix a crash and an invalid return value in JIT when *THEN verb is used.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum bytecode_flag_types {
200 flag_optimized_cbracket = 1,
201 flag_then_start = 2,
202 };
203
204 enum frame_types {
205 no_frame = -1,
206 no_stack = -2
207 };
208
209 enum control_types {
210 type_commit = 0,
211 type_prune = 1,
212 type_skip = 2,
213 type_skip_arg = 3,
214 type_mark = 4,
215 type_then_trap = 5
216 };
217
218 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
219
220 /* The following structure is the key data type for the recursive
221 code generator. It is allocated by compile_matchingpath, and contains
222 the aguments for compile_backtrackingpath. Must be the first member
223 of its descendants. */
224 typedef struct backtrack_common {
225 /* Concatenation stack. */
226 struct backtrack_common *prev;
227 jump_list *nextbacktracks;
228 /* Internal stack (for component operators). */
229 struct backtrack_common *top;
230 jump_list *topbacktracks;
231 /* Opcode pointer. */
232 pcre_uchar *cc;
233 } backtrack_common;
234
235 typedef struct assert_backtrack {
236 backtrack_common common;
237 jump_list *condfailed;
238 /* Less than 0 if a frame is not needed. */
239 int framesize;
240 /* Points to our private memory word on the stack. */
241 int private_data_ptr;
242 /* For iterators. */
243 struct sljit_label *matchingpath;
244 } assert_backtrack;
245
246 typedef struct bracket_backtrack {
247 backtrack_common common;
248 /* Where to coninue if an alternative is successfully matched. */
249 struct sljit_label *alternative_matchingpath;
250 /* For rmin and rmax iterators. */
251 struct sljit_label *recursive_matchingpath;
252 /* For greedy ? operator. */
253 struct sljit_label *zero_matchingpath;
254 /* Contains the branches of a failed condition. */
255 union {
256 /* Both for OP_COND, OP_SCOND. */
257 jump_list *condfailed;
258 assert_backtrack *assert;
259 /* For OP_ONCE. Less than 0 if not needed. */
260 int framesize;
261 } u;
262 /* Points to our private memory word on the stack. */
263 int private_data_ptr;
264 } bracket_backtrack;
265
266 typedef struct bracketpos_backtrack {
267 backtrack_common common;
268 /* Points to our private memory word on the stack. */
269 int private_data_ptr;
270 /* Reverting stack is needed. */
271 int framesize;
272 /* Allocated stack size. */
273 int stacksize;
274 } bracketpos_backtrack;
275
276 typedef struct braminzero_backtrack {
277 backtrack_common common;
278 struct sljit_label *matchingpath;
279 } braminzero_backtrack;
280
281 typedef struct iterator_backtrack {
282 backtrack_common common;
283 /* Next iteration. */
284 struct sljit_label *matchingpath;
285 } iterator_backtrack;
286
287 typedef struct recurse_entry {
288 struct recurse_entry *next;
289 /* Contains the function entry. */
290 struct sljit_label *entry;
291 /* Collects the calls until the function is not created. */
292 jump_list *calls;
293 /* Points to the starting opcode. */
294 sljit_sw start;
295 } recurse_entry;
296
297 typedef struct recurse_backtrack {
298 backtrack_common common;
299 BOOL inlined_pattern;
300 } recurse_backtrack;
301
302 #define OP_THEN_TRAP OP_TABLE_LENGTH
303
304 typedef struct then_trap_backtrack {
305 backtrack_common common;
306 /* If then_trap is not NULL, this structure contains the real
307 then_trap for the backtracking path. */
308 struct then_trap_backtrack *then_trap;
309 /* Points to the starting opcode. */
310 sljit_sw start;
311 /* Exit point for the then opcodes of this alternative. */
312 jump_list *quit;
313 /* Frame size of the current alternative. */
314 int framesize;
315 } then_trap_backtrack;
316
317 #define MAX_RANGE_SIZE 6
318
319 typedef struct compiler_common {
320 /* The sljit ceneric compiler. */
321 struct sljit_compiler *compiler;
322 /* First byte code. */
323 pcre_uchar *start;
324 /* Maps private data offset to each opcode. */
325 int *private_data_ptrs;
326 /* Tells whether the capturing bracket is optimized. */
327 pcre_uint8 *optimized_cbracket;
328 /* Tells whether the starting offset is a target of then. */
329 pcre_uint8 *then_offsets;
330 /* Current position where a THEN must jump. */
331 then_trap_backtrack *then_trap;
332 /* Starting offset of private data for capturing brackets. */
333 int cbra_ptr;
334 /* Output vector starting point. Must be divisible by 2. */
335 int ovector_start;
336 /* Last known position of the requested byte. */
337 int req_char_ptr;
338 /* Head of the last recursion. */
339 int recursive_head_ptr;
340 /* First inspected character for partial matching. */
341 int start_used_ptr;
342 /* Starting pointer for partial soft matches. */
343 int hit_start;
344 /* End pointer of the first line. */
345 int first_line_end;
346 /* Points to the marked string. */
347 int mark_ptr;
348 /* Recursive control verb management chain. */
349 int control_head_ptr;
350 /* Points to the last matched capture block index. */
351 int capture_last_ptr;
352 /* Points to the starting position of the current match. */
353 int start_ptr;
354
355 /* Flipped and lower case tables. */
356 const pcre_uint8 *fcc;
357 sljit_sw lcc;
358 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
359 int mode;
360 /* \K is found in the pattern. */
361 BOOL has_set_som;
362 /* (*SKIP:arg) is found in the pattern. */
363 BOOL has_skip_arg;
364 /* (*THEN) is found in the pattern. */
365 BOOL has_then;
366 /* Needs to know the start position anytime. */
367 BOOL needs_start_ptr;
368 /* Currently in recurse or assert. */
369 BOOL local_exit;
370 /* Newline control. */
371 int nltype;
372 int newline;
373 int bsr_nltype;
374 /* Dollar endonly. */
375 int endonly;
376 /* Tables. */
377 sljit_sw ctypes;
378 int digits[2 + MAX_RANGE_SIZE];
379 /* Named capturing brackets. */
380 sljit_uw name_table;
381 sljit_sw name_count;
382 sljit_sw name_entry_size;
383
384 /* Labels and jump lists. */
385 struct sljit_label *partialmatchlabel;
386 struct sljit_label *quit_label;
387 struct sljit_label *forced_quit_label;
388 struct sljit_label *accept_label;
389 stub_list *stubs;
390 recurse_entry *entries;
391 recurse_entry *currententry;
392 jump_list *partialmatch;
393 jump_list *quit;
394 jump_list *forced_quit;
395 jump_list *accept;
396 jump_list *calllimit;
397 jump_list *stackalloc;
398 jump_list *revertframes;
399 jump_list *wordboundary;
400 jump_list *anynewline;
401 jump_list *hspace;
402 jump_list *vspace;
403 jump_list *casefulcmp;
404 jump_list *caselesscmp;
405 jump_list *reset_match;
406 BOOL jscript_compat;
407 #ifdef SUPPORT_UTF
408 BOOL utf;
409 #ifdef SUPPORT_UCP
410 BOOL use_ucp;
411 #endif
412 #ifndef COMPILE_PCRE32
413 jump_list *utfreadchar;
414 #endif
415 #ifdef COMPILE_PCRE8
416 jump_list *utfreadtype8;
417 #endif
418 #endif /* SUPPORT_UTF */
419 #ifdef SUPPORT_UCP
420 jump_list *getucd;
421 #endif
422 } compiler_common;
423
424 /* For byte_sequence_compare. */
425
426 typedef struct compare_context {
427 int length;
428 int sourcereg;
429 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
430 int ucharptr;
431 union {
432 sljit_si asint;
433 sljit_uh asushort;
434 #if defined COMPILE_PCRE8
435 sljit_ub asbyte;
436 sljit_ub asuchars[4];
437 #elif defined COMPILE_PCRE16
438 sljit_uh asuchars[2];
439 #elif defined COMPILE_PCRE32
440 sljit_ui asuchars[1];
441 #endif
442 } c;
443 union {
444 sljit_si asint;
445 sljit_uh asushort;
446 #if defined COMPILE_PCRE8
447 sljit_ub asbyte;
448 sljit_ub asuchars[4];
449 #elif defined COMPILE_PCRE16
450 sljit_uh asuchars[2];
451 #elif defined COMPILE_PCRE32
452 sljit_ui asuchars[1];
453 #endif
454 } oc;
455 #endif
456 } compare_context;
457
458 /* Undefine sljit macros. */
459 #undef CMP
460
461 /* Used for accessing the elements of the stack. */
462 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
463
464 #define TMP1 SLJIT_SCRATCH_REG1
465 #define TMP2 SLJIT_SCRATCH_REG3
466 #define TMP3 SLJIT_TEMPORARY_EREG2
467 #define STR_PTR SLJIT_SAVED_REG1
468 #define STR_END SLJIT_SAVED_REG2
469 #define STACK_TOP SLJIT_SCRATCH_REG2
470 #define STACK_LIMIT SLJIT_SAVED_REG3
471 #define ARGUMENTS SLJIT_SAVED_EREG1
472 #define CALL_COUNT SLJIT_SAVED_EREG2
473 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
474
475 /* Local space layout. */
476 /* These two locals can be used by the current opcode. */
477 #define LOCALS0 (0 * sizeof(sljit_sw))
478 #define LOCALS1 (1 * sizeof(sljit_sw))
479 /* Two local variables for possessive quantifiers (char1 cannot use them). */
480 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
481 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
482 /* Max limit of recursions. */
483 #define CALL_LIMIT (4 * sizeof(sljit_sw))
484 /* The output vector is stored on the stack, and contains pointers
485 to characters. The vector data is divided into two groups: the first
486 group contains the start / end character pointers, and the second is
487 the start pointers when the end of the capturing group has not yet reached. */
488 #define OVECTOR_START (common->ovector_start)
489 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
490 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
491 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
492
493 #if defined COMPILE_PCRE8
494 #define MOV_UCHAR SLJIT_MOV_UB
495 #define MOVU_UCHAR SLJIT_MOVU_UB
496 #elif defined COMPILE_PCRE16
497 #define MOV_UCHAR SLJIT_MOV_UH
498 #define MOVU_UCHAR SLJIT_MOVU_UH
499 #elif defined COMPILE_PCRE32
500 #define MOV_UCHAR SLJIT_MOV_UI
501 #define MOVU_UCHAR SLJIT_MOVU_UI
502 #else
503 #error Unsupported compiling mode
504 #endif
505
506 /* Shortcuts. */
507 #define DEFINE_COMPILER \
508 struct sljit_compiler *compiler = common->compiler
509 #define OP1(op, dst, dstw, src, srcw) \
510 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
511 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
512 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
513 #define LABEL() \
514 sljit_emit_label(compiler)
515 #define JUMP(type) \
516 sljit_emit_jump(compiler, (type))
517 #define JUMPTO(type, label) \
518 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
519 #define JUMPHERE(jump) \
520 sljit_set_label((jump), sljit_emit_label(compiler))
521 #define SET_LABEL(jump, label) \
522 sljit_set_label((jump), (label))
523 #define CMP(type, src1, src1w, src2, src2w) \
524 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
525 #define CMPTO(type, src1, src1w, src2, src2w, label) \
526 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
527 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
528 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
529 #define GET_LOCAL_BASE(dst, dstw, offset) \
530 sljit_get_local_base(compiler, (dst), (dstw), (offset))
531
532 static pcre_uchar* bracketend(pcre_uchar* cc)
533 {
534 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
535 do cc += GET(cc, 1); while (*cc == OP_ALT);
536 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
537 cc += 1 + LINK_SIZE;
538 return cc;
539 }
540
541 /* Functions whose might need modification for all new supported opcodes:
542 next_opcode
543 get_private_data_length
544 set_private_data_ptrs
545 get_framesize
546 init_frame
547 get_private_data_copy_length
548 copy_private_data
549 compile_matchingpath
550 compile_backtrackingpath
551 */
552
553 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
554 {
555 SLJIT_UNUSED_ARG(common);
556 switch(*cc)
557 {
558 case OP_SOD:
559 case OP_SOM:
560 case OP_SET_SOM:
561 case OP_NOT_WORD_BOUNDARY:
562 case OP_WORD_BOUNDARY:
563 case OP_NOT_DIGIT:
564 case OP_DIGIT:
565 case OP_NOT_WHITESPACE:
566 case OP_WHITESPACE:
567 case OP_NOT_WORDCHAR:
568 case OP_WORDCHAR:
569 case OP_ANY:
570 case OP_ALLANY:
571 case OP_NOTPROP:
572 case OP_PROP:
573 case OP_ANYNL:
574 case OP_NOT_HSPACE:
575 case OP_HSPACE:
576 case OP_NOT_VSPACE:
577 case OP_VSPACE:
578 case OP_EXTUNI:
579 case OP_EODN:
580 case OP_EOD:
581 case OP_CIRC:
582 case OP_CIRCM:
583 case OP_DOLL:
584 case OP_DOLLM:
585 case OP_CRSTAR:
586 case OP_CRMINSTAR:
587 case OP_CRPLUS:
588 case OP_CRMINPLUS:
589 case OP_CRQUERY:
590 case OP_CRMINQUERY:
591 case OP_CRRANGE:
592 case OP_CRMINRANGE:
593 case OP_CLASS:
594 case OP_NCLASS:
595 case OP_REF:
596 case OP_REFI:
597 case OP_RECURSE:
598 case OP_CALLOUT:
599 case OP_ALT:
600 case OP_KET:
601 case OP_KETRMAX:
602 case OP_KETRMIN:
603 case OP_KETRPOS:
604 case OP_REVERSE:
605 case OP_ASSERT:
606 case OP_ASSERT_NOT:
607 case OP_ASSERTBACK:
608 case OP_ASSERTBACK_NOT:
609 case OP_ONCE:
610 case OP_ONCE_NC:
611 case OP_BRA:
612 case OP_BRAPOS:
613 case OP_CBRA:
614 case OP_CBRAPOS:
615 case OP_COND:
616 case OP_SBRA:
617 case OP_SBRAPOS:
618 case OP_SCBRA:
619 case OP_SCBRAPOS:
620 case OP_SCOND:
621 case OP_CREF:
622 case OP_NCREF:
623 case OP_RREF:
624 case OP_NRREF:
625 case OP_DEF:
626 case OP_BRAZERO:
627 case OP_BRAMINZERO:
628 case OP_BRAPOSZERO:
629 case OP_PRUNE:
630 case OP_SKIP:
631 case OP_THEN:
632 case OP_COMMIT:
633 case OP_FAIL:
634 case OP_ACCEPT:
635 case OP_ASSERT_ACCEPT:
636 case OP_CLOSE:
637 case OP_SKIPZERO:
638 return cc + PRIV(OP_lengths)[*cc];
639
640 case OP_CHAR:
641 case OP_CHARI:
642 case OP_NOT:
643 case OP_NOTI:
644 case OP_STAR:
645 case OP_MINSTAR:
646 case OP_PLUS:
647 case OP_MINPLUS:
648 case OP_QUERY:
649 case OP_MINQUERY:
650 case OP_UPTO:
651 case OP_MINUPTO:
652 case OP_EXACT:
653 case OP_POSSTAR:
654 case OP_POSPLUS:
655 case OP_POSQUERY:
656 case OP_POSUPTO:
657 case OP_STARI:
658 case OP_MINSTARI:
659 case OP_PLUSI:
660 case OP_MINPLUSI:
661 case OP_QUERYI:
662 case OP_MINQUERYI:
663 case OP_UPTOI:
664 case OP_MINUPTOI:
665 case OP_EXACTI:
666 case OP_POSSTARI:
667 case OP_POSPLUSI:
668 case OP_POSQUERYI:
669 case OP_POSUPTOI:
670 case OP_NOTSTAR:
671 case OP_NOTMINSTAR:
672 case OP_NOTPLUS:
673 case OP_NOTMINPLUS:
674 case OP_NOTQUERY:
675 case OP_NOTMINQUERY:
676 case OP_NOTUPTO:
677 case OP_NOTMINUPTO:
678 case OP_NOTEXACT:
679 case OP_NOTPOSSTAR:
680 case OP_NOTPOSPLUS:
681 case OP_NOTPOSQUERY:
682 case OP_NOTPOSUPTO:
683 case OP_NOTSTARI:
684 case OP_NOTMINSTARI:
685 case OP_NOTPLUSI:
686 case OP_NOTMINPLUSI:
687 case OP_NOTQUERYI:
688 case OP_NOTMINQUERYI:
689 case OP_NOTUPTOI:
690 case OP_NOTMINUPTOI:
691 case OP_NOTEXACTI:
692 case OP_NOTPOSSTARI:
693 case OP_NOTPOSPLUSI:
694 case OP_NOTPOSQUERYI:
695 case OP_NOTPOSUPTOI:
696 cc += PRIV(OP_lengths)[*cc];
697 #ifdef SUPPORT_UTF
698 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
699 #endif
700 return cc;
701
702 /* Special cases. */
703 case OP_TYPESTAR:
704 case OP_TYPEMINSTAR:
705 case OP_TYPEPLUS:
706 case OP_TYPEMINPLUS:
707 case OP_TYPEQUERY:
708 case OP_TYPEMINQUERY:
709 case OP_TYPEUPTO:
710 case OP_TYPEMINUPTO:
711 case OP_TYPEEXACT:
712 case OP_TYPEPOSSTAR:
713 case OP_TYPEPOSPLUS:
714 case OP_TYPEPOSQUERY:
715 case OP_TYPEPOSUPTO:
716 return cc + PRIV(OP_lengths)[*cc] - 1;
717
718 case OP_ANYBYTE:
719 #ifdef SUPPORT_UTF
720 if (common->utf) return NULL;
721 #endif
722 return cc + 1;
723
724 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
725 case OP_XCLASS:
726 return cc + GET(cc, 1);
727 #endif
728
729 case OP_MARK:
730 case OP_PRUNE_ARG:
731 case OP_SKIP_ARG:
732 case OP_THEN_ARG:
733 return cc + 1 + 2 + cc[1];
734
735 default:
736 /* All opcodes are supported now! */
737 SLJIT_ASSERT_STOP();
738 return NULL;
739 }
740 }
741
742 #define CASE_ITERATOR_PRIVATE_DATA_1 \
743 case OP_MINSTAR: \
744 case OP_MINPLUS: \
745 case OP_QUERY: \
746 case OP_MINQUERY: \
747 case OP_MINSTARI: \
748 case OP_MINPLUSI: \
749 case OP_QUERYI: \
750 case OP_MINQUERYI: \
751 case OP_NOTMINSTAR: \
752 case OP_NOTMINPLUS: \
753 case OP_NOTQUERY: \
754 case OP_NOTMINQUERY: \
755 case OP_NOTMINSTARI: \
756 case OP_NOTMINPLUSI: \
757 case OP_NOTQUERYI: \
758 case OP_NOTMINQUERYI:
759
760 #define CASE_ITERATOR_PRIVATE_DATA_2A \
761 case OP_STAR: \
762 case OP_PLUS: \
763 case OP_STARI: \
764 case OP_PLUSI: \
765 case OP_NOTSTAR: \
766 case OP_NOTPLUS: \
767 case OP_NOTSTARI: \
768 case OP_NOTPLUSI:
769
770 #define CASE_ITERATOR_PRIVATE_DATA_2B \
771 case OP_UPTO: \
772 case OP_MINUPTO: \
773 case OP_UPTOI: \
774 case OP_MINUPTOI: \
775 case OP_NOTUPTO: \
776 case OP_NOTMINUPTO: \
777 case OP_NOTUPTOI: \
778 case OP_NOTMINUPTOI:
779
780 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
781 case OP_TYPEMINSTAR: \
782 case OP_TYPEMINPLUS: \
783 case OP_TYPEQUERY: \
784 case OP_TYPEMINQUERY:
785
786 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
787 case OP_TYPESTAR: \
788 case OP_TYPEPLUS:
789
790 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
791 case OP_TYPEUPTO: \
792 case OP_TYPEMINUPTO:
793
794 static int get_class_iterator_size(pcre_uchar *cc)
795 {
796 switch(*cc)
797 {
798 case OP_CRSTAR:
799 case OP_CRPLUS:
800 return 2;
801
802 case OP_CRMINSTAR:
803 case OP_CRMINPLUS:
804 case OP_CRQUERY:
805 case OP_CRMINQUERY:
806 return 1;
807
808 case OP_CRRANGE:
809 case OP_CRMINRANGE:
810 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
811 return 0;
812 return 2;
813
814 default:
815 return 0;
816 }
817 }
818
819 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
820 {
821 int private_data_length = 0;
822 pcre_uchar *alternative;
823 pcre_uchar *name;
824 pcre_uchar *end = NULL;
825 int space, size, i;
826 pcre_uint32 bracketlen;
827
828 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
829 while (cc < ccend)
830 {
831 space = 0;
832 size = 0;
833 bracketlen = 0;
834 switch(*cc)
835 {
836 case OP_SET_SOM:
837 common->has_set_som = TRUE;
838 cc += 1;
839 break;
840
841 case OP_REF:
842 case OP_REFI:
843 common->optimized_cbracket[GET2(cc, 1)] = 0;
844 cc += 1 + IMM2_SIZE;
845 break;
846
847 case OP_ASSERT:
848 case OP_ASSERT_NOT:
849 case OP_ASSERTBACK:
850 case OP_ASSERTBACK_NOT:
851 case OP_ONCE:
852 case OP_ONCE_NC:
853 case OP_BRAPOS:
854 case OP_SBRA:
855 case OP_SBRAPOS:
856 private_data_length += sizeof(sljit_sw);
857 bracketlen = 1 + LINK_SIZE;
858 break;
859
860 case OP_CBRAPOS:
861 case OP_SCBRAPOS:
862 private_data_length += sizeof(sljit_sw);
863 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
864 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
865 break;
866
867 case OP_COND:
868 case OP_SCOND:
869 /* Only AUTO_CALLOUT can insert this opcode. We do
870 not intend to support this case. */
871 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
872 return -1;
873
874 if (*cc == OP_COND)
875 {
876 /* Might be a hidden SCOND. */
877 alternative = cc + GET(cc, 1);
878 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
879 private_data_length += sizeof(sljit_sw);
880 }
881 else
882 private_data_length += sizeof(sljit_sw);
883 bracketlen = 1 + LINK_SIZE;
884 break;
885
886 case OP_CREF:
887 i = GET2(cc, 1);
888 common->optimized_cbracket[i] = 0;
889 cc += 1 + IMM2_SIZE;
890 break;
891
892 case OP_NCREF:
893 bracketlen = GET2(cc, 1);
894 name = (pcre_uchar *)common->name_table;
895 alternative = name;
896 for (i = 0; i < common->name_count; i++)
897 {
898 if (GET2(name, 0) == bracketlen) break;
899 name += common->name_entry_size;
900 }
901 SLJIT_ASSERT(i != common->name_count);
902
903 for (i = 0; i < common->name_count; i++)
904 {
905 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
906 common->optimized_cbracket[GET2(alternative, 0)] = 0;
907 alternative += common->name_entry_size;
908 }
909 bracketlen = 0;
910 cc += 1 + IMM2_SIZE;
911 break;
912
913 case OP_BRA:
914 bracketlen = 1 + LINK_SIZE;
915 break;
916
917 case OP_CBRA:
918 case OP_SCBRA:
919 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
920 break;
921
922 CASE_ITERATOR_PRIVATE_DATA_1
923 space = 1;
924 size = -2;
925 break;
926
927 CASE_ITERATOR_PRIVATE_DATA_2A
928 space = 2;
929 size = -2;
930 break;
931
932 CASE_ITERATOR_PRIVATE_DATA_2B
933 space = 2;
934 size = -(2 + IMM2_SIZE);
935 break;
936
937 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
938 space = 1;
939 size = 1;
940 break;
941
942 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
943 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
944 space = 2;
945 size = 1;
946 break;
947
948 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
949 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
950 space = 2;
951 size = 1 + IMM2_SIZE;
952 break;
953
954 case OP_CLASS:
955 case OP_NCLASS:
956 size += 1 + 32 / sizeof(pcre_uchar);
957 space = get_class_iterator_size(cc + size);
958 break;
959
960 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
961 case OP_XCLASS:
962 size = GET(cc, 1);
963 space = get_class_iterator_size(cc + size);
964 break;
965 #endif
966
967 case OP_RECURSE:
968 /* Set its value only once. */
969 if (common->recursive_head_ptr == 0)
970 {
971 common->recursive_head_ptr = common->ovector_start;
972 common->ovector_start += sizeof(sljit_sw);
973 }
974 cc += 1 + LINK_SIZE;
975 break;
976
977 case OP_CALLOUT:
978 if (common->capture_last_ptr == 0)
979 {
980 common->capture_last_ptr = common->ovector_start;
981 common->ovector_start += sizeof(sljit_sw);
982 }
983 cc += 2 + 2 * LINK_SIZE;
984 break;
985
986 case OP_THEN_ARG:
987 common->has_then = TRUE;
988 /* Fall through. */
989
990 case OP_PRUNE_ARG:
991 common->needs_start_ptr = TRUE;
992 common->control_head_ptr = 1;
993 /* Fall through. */
994
995 case OP_MARK:
996 if (common->mark_ptr == 0)
997 {
998 common->mark_ptr = common->ovector_start;
999 common->ovector_start += sizeof(sljit_sw);
1000 }
1001 cc += 1 + 2 + cc[1];
1002 break;
1003
1004 case OP_THEN:
1005 common->has_then = TRUE;
1006 /* Fall through. */
1007
1008 case OP_PRUNE:
1009 case OP_SKIP:
1010 common->needs_start_ptr = TRUE;
1011 common->control_head_ptr = 1;
1012 cc += 1;
1013 break;
1014
1015 case OP_SKIP_ARG:
1016 common->control_head_ptr = 1;
1017 common->has_skip_arg = TRUE;
1018 cc += 1 + 2 + cc[1];
1019 break;
1020
1021 default:
1022 cc = next_opcode(common, cc);
1023 if (cc == NULL)
1024 return -1;
1025 break;
1026 }
1027
1028 if (space > 0 && cc >= end)
1029 private_data_length += sizeof(sljit_sw) * space;
1030
1031 if (size != 0)
1032 {
1033 if (size < 0)
1034 {
1035 cc += -size;
1036 #ifdef SUPPORT_UTF
1037 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1038 #endif
1039 }
1040 else
1041 cc += size;
1042 }
1043
1044 if (bracketlen != 0)
1045 {
1046 if (cc >= end)
1047 {
1048 end = bracketend(cc);
1049 if (end[-1 - LINK_SIZE] == OP_KET)
1050 end = NULL;
1051 }
1052 cc += bracketlen;
1053 }
1054 }
1055 return private_data_length;
1056 }
1057
1058 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
1059 {
1060 pcre_uchar *cc = common->start;
1061 pcre_uchar *alternative;
1062 pcre_uchar *end = NULL;
1063 int space, size, bracketlen;
1064
1065 while (cc < ccend)
1066 {
1067 space = 0;
1068 size = 0;
1069 bracketlen = 0;
1070 switch(*cc)
1071 {
1072 case OP_ASSERT:
1073 case OP_ASSERT_NOT:
1074 case OP_ASSERTBACK:
1075 case OP_ASSERTBACK_NOT:
1076 case OP_ONCE:
1077 case OP_ONCE_NC:
1078 case OP_BRAPOS:
1079 case OP_SBRA:
1080 case OP_SBRAPOS:
1081 case OP_SCOND:
1082 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1083 private_data_ptr += sizeof(sljit_sw);
1084 bracketlen = 1 + LINK_SIZE;
1085 break;
1086
1087 case OP_CBRAPOS:
1088 case OP_SCBRAPOS:
1089 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1090 private_data_ptr += sizeof(sljit_sw);
1091 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1092 break;
1093
1094 case OP_COND:
1095 /* Might be a hidden SCOND. */
1096 alternative = cc + GET(cc, 1);
1097 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1098 {
1099 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1100 private_data_ptr += sizeof(sljit_sw);
1101 }
1102 bracketlen = 1 + LINK_SIZE;
1103 break;
1104
1105 case OP_BRA:
1106 bracketlen = 1 + LINK_SIZE;
1107 break;
1108
1109 case OP_CBRA:
1110 case OP_SCBRA:
1111 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1112 break;
1113
1114 CASE_ITERATOR_PRIVATE_DATA_1
1115 space = 1;
1116 size = -2;
1117 break;
1118
1119 CASE_ITERATOR_PRIVATE_DATA_2A
1120 space = 2;
1121 size = -2;
1122 break;
1123
1124 CASE_ITERATOR_PRIVATE_DATA_2B
1125 space = 2;
1126 size = -(2 + IMM2_SIZE);
1127 break;
1128
1129 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1130 space = 1;
1131 size = 1;
1132 break;
1133
1134 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1135 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1136 space = 2;
1137 size = 1;
1138 break;
1139
1140 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1141 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1142 space = 2;
1143 size = 1 + IMM2_SIZE;
1144 break;
1145
1146 case OP_CLASS:
1147 case OP_NCLASS:
1148 size += 1 + 32 / sizeof(pcre_uchar);
1149 space = get_class_iterator_size(cc + size);
1150 break;
1151
1152 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1153 case OP_XCLASS:
1154 size = GET(cc, 1);
1155 space = get_class_iterator_size(cc + size);
1156 break;
1157 #endif
1158
1159 default:
1160 cc = next_opcode(common, cc);
1161 SLJIT_ASSERT(cc != NULL);
1162 break;
1163 }
1164
1165 if (space > 0 && cc >= end)
1166 {
1167 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1168 private_data_ptr += sizeof(sljit_sw) * space;
1169 }
1170
1171 if (size != 0)
1172 {
1173 if (size < 0)
1174 {
1175 cc += -size;
1176 #ifdef SUPPORT_UTF
1177 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1178 #endif
1179 }
1180 else
1181 cc += size;
1182 }
1183
1184 if (bracketlen > 0)
1185 {
1186 if (cc >= end)
1187 {
1188 end = bracketend(cc);
1189 if (end[-1 - LINK_SIZE] == OP_KET)
1190 end = NULL;
1191 }
1192 cc += bracketlen;
1193 }
1194 }
1195 }
1196
1197 /* Returns with a frame_types (always < 0) if no need for frame. */
1198 static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL* needs_control_head)
1199 {
1200 int length = 0;
1201 int possessive = 0;
1202 BOOL stack_restore = FALSE;
1203 BOOL setsom_found = recursive;
1204 BOOL setmark_found = recursive;
1205 /* The last capture is a local variable even for recursions. */
1206 BOOL capture_last_found = FALSE;
1207
1208 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1209 SLJIT_ASSERT(common->control_head_ptr != 0);
1210 *needs_control_head = TRUE;
1211 #else
1212 *needs_control_head = FALSE;
1213 #endif
1214
1215 if (ccend == NULL)
1216 {
1217 ccend = bracketend(cc) - (1 + LINK_SIZE);
1218 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1219 {
1220 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1221 /* This is correct regardless of common->capture_last_ptr. */
1222 capture_last_found = TRUE;
1223 }
1224 cc = next_opcode(common, cc);
1225 }
1226
1227 SLJIT_ASSERT(cc != NULL);
1228 while (cc < ccend)
1229 switch(*cc)
1230 {
1231 case OP_SET_SOM:
1232 SLJIT_ASSERT(common->has_set_som);
1233 stack_restore = TRUE;
1234 if (!setsom_found)
1235 {
1236 length += 2;
1237 setsom_found = TRUE;
1238 }
1239 cc += 1;
1240 break;
1241
1242 case OP_MARK:
1243 case OP_PRUNE_ARG:
1244 case OP_THEN_ARG:
1245 SLJIT_ASSERT(common->mark_ptr != 0);
1246 stack_restore = TRUE;
1247 if (!setmark_found)
1248 {
1249 length += 2;
1250 setmark_found = TRUE;
1251 }
1252 if (common->control_head_ptr != 0)
1253 *needs_control_head = TRUE;
1254 cc += 1 + 2 + cc[1];
1255 break;
1256
1257 case OP_RECURSE:
1258 stack_restore = TRUE;
1259 if (common->has_set_som && !setsom_found)
1260 {
1261 length += 2;
1262 setsom_found = TRUE;
1263 }
1264 if (common->mark_ptr != 0 && !setmark_found)
1265 {
1266 length += 2;
1267 setmark_found = TRUE;
1268 }
1269 if (common->capture_last_ptr != 0 && !capture_last_found)
1270 {
1271 length += 2;
1272 capture_last_found = TRUE;
1273 }
1274 cc += 1 + LINK_SIZE;
1275 break;
1276
1277 case OP_CBRA:
1278 case OP_CBRAPOS:
1279 case OP_SCBRA:
1280 case OP_SCBRAPOS:
1281 stack_restore = TRUE;
1282 if (common->capture_last_ptr != 0 && !capture_last_found)
1283 {
1284 length += 2;
1285 capture_last_found = TRUE;
1286 }
1287 length += 3;
1288 cc += 1 + LINK_SIZE + IMM2_SIZE;
1289 break;
1290
1291 case OP_PRUNE:
1292 case OP_SKIP:
1293 case OP_SKIP_ARG:
1294 case OP_COMMIT:
1295 if (common->control_head_ptr != 0)
1296 *needs_control_head = TRUE;
1297 /* Fall through. */
1298
1299 default:
1300 stack_restore = TRUE;
1301 /* Fall through. */
1302
1303 case OP_NOT_WORD_BOUNDARY:
1304 case OP_WORD_BOUNDARY:
1305 case OP_NOT_DIGIT:
1306 case OP_DIGIT:
1307 case OP_NOT_WHITESPACE:
1308 case OP_WHITESPACE:
1309 case OP_NOT_WORDCHAR:
1310 case OP_WORDCHAR:
1311 case OP_ANY:
1312 case OP_ALLANY:
1313 case OP_ANYBYTE:
1314 case OP_NOTPROP:
1315 case OP_PROP:
1316 case OP_ANYNL:
1317 case OP_NOT_HSPACE:
1318 case OP_HSPACE:
1319 case OP_NOT_VSPACE:
1320 case OP_VSPACE:
1321 case OP_EXTUNI:
1322 case OP_EODN:
1323 case OP_EOD:
1324 case OP_CIRC:
1325 case OP_CIRCM:
1326 case OP_DOLL:
1327 case OP_DOLLM:
1328 case OP_CHAR:
1329 case OP_CHARI:
1330 case OP_NOT:
1331 case OP_NOTI:
1332
1333 case OP_EXACT:
1334 case OP_POSSTAR:
1335 case OP_POSPLUS:
1336 case OP_POSQUERY:
1337 case OP_POSUPTO:
1338
1339 case OP_EXACTI:
1340 case OP_POSSTARI:
1341 case OP_POSPLUSI:
1342 case OP_POSQUERYI:
1343 case OP_POSUPTOI:
1344
1345 case OP_NOTEXACT:
1346 case OP_NOTPOSSTAR:
1347 case OP_NOTPOSPLUS:
1348 case OP_NOTPOSQUERY:
1349 case OP_NOTPOSUPTO:
1350
1351 case OP_NOTEXACTI:
1352 case OP_NOTPOSSTARI:
1353 case OP_NOTPOSPLUSI:
1354 case OP_NOTPOSQUERYI:
1355 case OP_NOTPOSUPTOI:
1356
1357 case OP_TYPEEXACT:
1358 case OP_TYPEPOSSTAR:
1359 case OP_TYPEPOSPLUS:
1360 case OP_TYPEPOSQUERY:
1361 case OP_TYPEPOSUPTO:
1362
1363 case OP_CLASS:
1364 case OP_NCLASS:
1365 case OP_XCLASS:
1366
1367 cc = next_opcode(common, cc);
1368 SLJIT_ASSERT(cc != NULL);
1369 break;
1370 }
1371
1372 /* Possessive quantifiers can use a special case. */
1373 if (SLJIT_UNLIKELY(possessive == length))
1374 return stack_restore ? no_frame : no_stack;
1375
1376 if (length > 0)
1377 return length + 1;
1378 return stack_restore ? no_frame : no_stack;
1379 }
1380
1381 static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive)
1382 {
1383 DEFINE_COMPILER;
1384 BOOL setsom_found = recursive;
1385 BOOL setmark_found = recursive;
1386 /* The last capture is a local variable even for recursions. */
1387 BOOL capture_last_found = FALSE;
1388 int offset;
1389
1390 /* >= 1 + shortest item size (2) */
1391 SLJIT_UNUSED_ARG(stacktop);
1392 SLJIT_ASSERT(stackpos >= stacktop + 2);
1393
1394 stackpos = STACK(stackpos);
1395 if (ccend == NULL)
1396 {
1397 ccend = bracketend(cc) - (1 + LINK_SIZE);
1398 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1399 cc = next_opcode(common, cc);
1400 }
1401
1402 SLJIT_ASSERT(cc != NULL);
1403 while (cc < ccend)
1404 switch(*cc)
1405 {
1406 case OP_SET_SOM:
1407 SLJIT_ASSERT(common->has_set_som);
1408 if (!setsom_found)
1409 {
1410 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1411 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1412 stackpos += (int)sizeof(sljit_sw);
1413 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1414 stackpos += (int)sizeof(sljit_sw);
1415 setsom_found = TRUE;
1416 }
1417 cc += 1;
1418 break;
1419
1420 case OP_MARK:
1421 case OP_PRUNE_ARG:
1422 case OP_THEN_ARG:
1423 SLJIT_ASSERT(common->mark_ptr != 0);
1424 if (!setmark_found)
1425 {
1426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1428 stackpos += (int)sizeof(sljit_sw);
1429 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1430 stackpos += (int)sizeof(sljit_sw);
1431 setmark_found = TRUE;
1432 }
1433 cc += 1 + 2 + cc[1];
1434 break;
1435
1436 case OP_RECURSE:
1437 if (common->has_set_som && !setsom_found)
1438 {
1439 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1440 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1441 stackpos += (int)sizeof(sljit_sw);
1442 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1443 stackpos += (int)sizeof(sljit_sw);
1444 setsom_found = TRUE;
1445 }
1446 if (common->mark_ptr != 0 && !setmark_found)
1447 {
1448 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1449 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1450 stackpos += (int)sizeof(sljit_sw);
1451 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1452 stackpos += (int)sizeof(sljit_sw);
1453 setmark_found = TRUE;
1454 }
1455 if (common->capture_last_ptr != 0 && !capture_last_found)
1456 {
1457 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1458 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1459 stackpos += (int)sizeof(sljit_sw);
1460 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1461 stackpos += (int)sizeof(sljit_sw);
1462 capture_last_found = TRUE;
1463 }
1464 cc += 1 + LINK_SIZE;
1465 break;
1466
1467 case OP_CBRA:
1468 case OP_CBRAPOS:
1469 case OP_SCBRA:
1470 case OP_SCBRAPOS:
1471 if (common->capture_last_ptr != 0 && !capture_last_found)
1472 {
1473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1475 stackpos += (int)sizeof(sljit_sw);
1476 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1477 stackpos += (int)sizeof(sljit_sw);
1478 capture_last_found = TRUE;
1479 }
1480 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1482 stackpos += (int)sizeof(sljit_sw);
1483 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1484 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1486 stackpos += (int)sizeof(sljit_sw);
1487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1488 stackpos += (int)sizeof(sljit_sw);
1489
1490 cc += 1 + LINK_SIZE + IMM2_SIZE;
1491 break;
1492
1493 default:
1494 cc = next_opcode(common, cc);
1495 SLJIT_ASSERT(cc != NULL);
1496 break;
1497 }
1498
1499 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1500 SLJIT_ASSERT(stackpos == STACK(stacktop));
1501 }
1502
1503 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1504 {
1505 int private_data_length = needs_control_head ? 3 : 2;
1506 int size;
1507 pcre_uchar *alternative;
1508 /* Calculate the sum of the private machine words. */
1509 while (cc < ccend)
1510 {
1511 size = 0;
1512 switch(*cc)
1513 {
1514 case OP_ASSERT:
1515 case OP_ASSERT_NOT:
1516 case OP_ASSERTBACK:
1517 case OP_ASSERTBACK_NOT:
1518 case OP_ONCE:
1519 case OP_ONCE_NC:
1520 case OP_BRAPOS:
1521 case OP_SBRA:
1522 case OP_SBRAPOS:
1523 case OP_SCOND:
1524 private_data_length++;
1525 cc += 1 + LINK_SIZE;
1526 break;
1527
1528 case OP_CBRA:
1529 case OP_SCBRA:
1530 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1531 private_data_length++;
1532 cc += 1 + LINK_SIZE + IMM2_SIZE;
1533 break;
1534
1535 case OP_CBRAPOS:
1536 case OP_SCBRAPOS:
1537 private_data_length += 2;
1538 cc += 1 + LINK_SIZE + IMM2_SIZE;
1539 break;
1540
1541 case OP_COND:
1542 /* Might be a hidden SCOND. */
1543 alternative = cc + GET(cc, 1);
1544 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1545 private_data_length++;
1546 cc += 1 + LINK_SIZE;
1547 break;
1548
1549 CASE_ITERATOR_PRIVATE_DATA_1
1550 if (PRIVATE_DATA(cc))
1551 private_data_length++;
1552 cc += 2;
1553 #ifdef SUPPORT_UTF
1554 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1555 #endif
1556 break;
1557
1558 CASE_ITERATOR_PRIVATE_DATA_2A
1559 if (PRIVATE_DATA(cc))
1560 private_data_length += 2;
1561 cc += 2;
1562 #ifdef SUPPORT_UTF
1563 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1564 #endif
1565 break;
1566
1567 CASE_ITERATOR_PRIVATE_DATA_2B
1568 if (PRIVATE_DATA(cc))
1569 private_data_length += 2;
1570 cc += 2 + IMM2_SIZE;
1571 #ifdef SUPPORT_UTF
1572 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1573 #endif
1574 break;
1575
1576 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1577 if (PRIVATE_DATA(cc))
1578 private_data_length++;
1579 cc += 1;
1580 break;
1581
1582 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1583 if (PRIVATE_DATA(cc))
1584 private_data_length += 2;
1585 cc += 1;
1586 break;
1587
1588 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1589 if (PRIVATE_DATA(cc))
1590 private_data_length += 2;
1591 cc += 1 + IMM2_SIZE;
1592 break;
1593
1594 case OP_CLASS:
1595 case OP_NCLASS:
1596 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1597 case OP_XCLASS:
1598 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1599 #else
1600 size = 1 + 32 / (int)sizeof(pcre_uchar);
1601 #endif
1602 if (PRIVATE_DATA(cc))
1603 private_data_length += get_class_iterator_size(cc + size);
1604 cc += size;
1605 break;
1606
1607 default:
1608 cc = next_opcode(common, cc);
1609 SLJIT_ASSERT(cc != NULL);
1610 break;
1611 }
1612 }
1613 SLJIT_ASSERT(cc == ccend);
1614 return private_data_length;
1615 }
1616
1617 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1618 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1619 {
1620 DEFINE_COMPILER;
1621 int srcw[2];
1622 int count, size;
1623 BOOL tmp1next = TRUE;
1624 BOOL tmp1empty = TRUE;
1625 BOOL tmp2empty = TRUE;
1626 pcre_uchar *alternative;
1627 enum {
1628 start,
1629 loop,
1630 end
1631 } status;
1632
1633 status = save ? start : loop;
1634 stackptr = STACK(stackptr - 2);
1635 stacktop = STACK(stacktop - 1);
1636
1637 if (!save)
1638 {
1639 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1640 if (stackptr < stacktop)
1641 {
1642 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1643 stackptr += sizeof(sljit_sw);
1644 tmp1empty = FALSE;
1645 }
1646 if (stackptr < stacktop)
1647 {
1648 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1649 stackptr += sizeof(sljit_sw);
1650 tmp2empty = FALSE;
1651 }
1652 /* The tmp1next must be TRUE in either way. */
1653 }
1654
1655 do
1656 {
1657 count = 0;
1658 switch(status)
1659 {
1660 case start:
1661 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1662 count = 1;
1663 srcw[0] = common->recursive_head_ptr;
1664 if (needs_control_head)
1665 {
1666 SLJIT_ASSERT(common->control_head_ptr != 0);
1667 count = 2;
1668 srcw[1] = common->control_head_ptr;
1669 }
1670 status = loop;
1671 break;
1672
1673 case loop:
1674 if (cc >= ccend)
1675 {
1676 status = end;
1677 break;
1678 }
1679
1680 switch(*cc)
1681 {
1682 case OP_ASSERT:
1683 case OP_ASSERT_NOT:
1684 case OP_ASSERTBACK:
1685 case OP_ASSERTBACK_NOT:
1686 case OP_ONCE:
1687 case OP_ONCE_NC:
1688 case OP_BRAPOS:
1689 case OP_SBRA:
1690 case OP_SBRAPOS:
1691 case OP_SCOND:
1692 count = 1;
1693 srcw[0] = PRIVATE_DATA(cc);
1694 SLJIT_ASSERT(srcw[0] != 0);
1695 cc += 1 + LINK_SIZE;
1696 break;
1697
1698 case OP_CBRA:
1699 case OP_SCBRA:
1700 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1701 {
1702 count = 1;
1703 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1704 }
1705 cc += 1 + LINK_SIZE + IMM2_SIZE;
1706 break;
1707
1708 case OP_CBRAPOS:
1709 case OP_SCBRAPOS:
1710 count = 2;
1711 srcw[0] = PRIVATE_DATA(cc);
1712 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1713 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1714 cc += 1 + LINK_SIZE + IMM2_SIZE;
1715 break;
1716
1717 case OP_COND:
1718 /* Might be a hidden SCOND. */
1719 alternative = cc + GET(cc, 1);
1720 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1721 {
1722 count = 1;
1723 srcw[0] = PRIVATE_DATA(cc);
1724 SLJIT_ASSERT(srcw[0] != 0);
1725 }
1726 cc += 1 + LINK_SIZE;
1727 break;
1728
1729 CASE_ITERATOR_PRIVATE_DATA_1
1730 if (PRIVATE_DATA(cc))
1731 {
1732 count = 1;
1733 srcw[0] = PRIVATE_DATA(cc);
1734 }
1735 cc += 2;
1736 #ifdef SUPPORT_UTF
1737 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1738 #endif
1739 break;
1740
1741 CASE_ITERATOR_PRIVATE_DATA_2A
1742 if (PRIVATE_DATA(cc))
1743 {
1744 count = 2;
1745 srcw[0] = PRIVATE_DATA(cc);
1746 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1747 }
1748 cc += 2;
1749 #ifdef SUPPORT_UTF
1750 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1751 #endif
1752 break;
1753
1754 CASE_ITERATOR_PRIVATE_DATA_2B
1755 if (PRIVATE_DATA(cc))
1756 {
1757 count = 2;
1758 srcw[0] = PRIVATE_DATA(cc);
1759 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1760 }
1761 cc += 2 + IMM2_SIZE;
1762 #ifdef SUPPORT_UTF
1763 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1764 #endif
1765 break;
1766
1767 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1768 if (PRIVATE_DATA(cc))
1769 {
1770 count = 1;
1771 srcw[0] = PRIVATE_DATA(cc);
1772 }
1773 cc += 1;
1774 break;
1775
1776 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1777 if (PRIVATE_DATA(cc))
1778 {
1779 count = 2;
1780 srcw[0] = PRIVATE_DATA(cc);
1781 srcw[1] = srcw[0] + sizeof(sljit_sw);
1782 }
1783 cc += 1;
1784 break;
1785
1786 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1787 if (PRIVATE_DATA(cc))
1788 {
1789 count = 2;
1790 srcw[0] = PRIVATE_DATA(cc);
1791 srcw[1] = srcw[0] + sizeof(sljit_sw);
1792 }
1793 cc += 1 + IMM2_SIZE;
1794 break;
1795
1796 case OP_CLASS:
1797 case OP_NCLASS:
1798 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1799 case OP_XCLASS:
1800 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1801 #else
1802 size = 1 + 32 / (int)sizeof(pcre_uchar);
1803 #endif
1804 if (PRIVATE_DATA(cc))
1805 switch(get_class_iterator_size(cc + size))
1806 {
1807 case 1:
1808 count = 1;
1809 srcw[0] = PRIVATE_DATA(cc);
1810 break;
1811
1812 case 2:
1813 count = 2;
1814 srcw[0] = PRIVATE_DATA(cc);
1815 srcw[1] = srcw[0] + sizeof(sljit_sw);
1816 break;
1817
1818 default:
1819 SLJIT_ASSERT_STOP();
1820 break;
1821 }
1822 cc += size;
1823 break;
1824
1825 default:
1826 cc = next_opcode(common, cc);
1827 SLJIT_ASSERT(cc != NULL);
1828 break;
1829 }
1830 break;
1831
1832 case end:
1833 SLJIT_ASSERT_STOP();
1834 break;
1835 }
1836
1837 while (count > 0)
1838 {
1839 count--;
1840 if (save)
1841 {
1842 if (tmp1next)
1843 {
1844 if (!tmp1empty)
1845 {
1846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1847 stackptr += sizeof(sljit_sw);
1848 }
1849 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1850 tmp1empty = FALSE;
1851 tmp1next = FALSE;
1852 }
1853 else
1854 {
1855 if (!tmp2empty)
1856 {
1857 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1858 stackptr += sizeof(sljit_sw);
1859 }
1860 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1861 tmp2empty = FALSE;
1862 tmp1next = TRUE;
1863 }
1864 }
1865 else
1866 {
1867 if (tmp1next)
1868 {
1869 SLJIT_ASSERT(!tmp1empty);
1870 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1871 tmp1empty = stackptr >= stacktop;
1872 if (!tmp1empty)
1873 {
1874 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1875 stackptr += sizeof(sljit_sw);
1876 }
1877 tmp1next = FALSE;
1878 }
1879 else
1880 {
1881 SLJIT_ASSERT(!tmp2empty);
1882 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1883 tmp2empty = stackptr >= stacktop;
1884 if (!tmp2empty)
1885 {
1886 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1887 stackptr += sizeof(sljit_sw);
1888 }
1889 tmp1next = TRUE;
1890 }
1891 }
1892 }
1893 }
1894 while (status != end);
1895
1896 if (save)
1897 {
1898 if (tmp1next)
1899 {
1900 if (!tmp1empty)
1901 {
1902 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1903 stackptr += sizeof(sljit_sw);
1904 }
1905 if (!tmp2empty)
1906 {
1907 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1908 stackptr += sizeof(sljit_sw);
1909 }
1910 }
1911 else
1912 {
1913 if (!tmp2empty)
1914 {
1915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1916 stackptr += sizeof(sljit_sw);
1917 }
1918 if (!tmp1empty)
1919 {
1920 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1921 stackptr += sizeof(sljit_sw);
1922 }
1923 }
1924 }
1925 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1926 }
1927
1928 static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, pcre_uint8 *current_offset)
1929 {
1930 pcre_uchar *end = bracketend(cc);
1931 BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
1932
1933 /* Assert captures then. */
1934 if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT)
1935 current_offset = NULL;
1936 /* Conditional block does not. */
1937 if (*cc == OP_COND || *cc == OP_SCOND)
1938 has_alternatives = FALSE;
1939
1940 cc = next_opcode(common, cc);
1941 if (has_alternatives)
1942 current_offset = common->then_offsets + (cc - common->start);
1943
1944 while (cc < end)
1945 {
1946 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
1947 cc = set_then_offsets(common, cc, current_offset);
1948 else
1949 {
1950 if (*cc == OP_ALT && has_alternatives)
1951 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
1952 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
1953 *current_offset = 1;
1954 cc = next_opcode(common, cc);
1955 }
1956 }
1957
1958 return end;
1959 }
1960
1961 #undef CASE_ITERATOR_PRIVATE_DATA_1
1962 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1963 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1964 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1965 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1966 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1967
1968 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1969 {
1970 return (value & (value - 1)) == 0;
1971 }
1972
1973 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1974 {
1975 while (list)
1976 {
1977 /* sljit_set_label is clever enough to do nothing
1978 if either the jump or the label is NULL. */
1979 SET_LABEL(list->jump, label);
1980 list = list->next;
1981 }
1982 }
1983
1984 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1985 {
1986 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1987 if (list_item)
1988 {
1989 list_item->next = *list;
1990 list_item->jump = jump;
1991 *list = list_item;
1992 }
1993 }
1994
1995 static void add_stub(compiler_common *common, struct sljit_jump *start)
1996 {
1997 DEFINE_COMPILER;
1998 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1999
2000 if (list_item)
2001 {
2002 list_item->start = start;
2003 list_item->quit = LABEL();
2004 list_item->next = common->stubs;
2005 common->stubs = list_item;
2006 }
2007 }
2008
2009 static void flush_stubs(compiler_common *common)
2010 {
2011 DEFINE_COMPILER;
2012 stub_list* list_item = common->stubs;
2013
2014 while (list_item)
2015 {
2016 JUMPHERE(list_item->start);
2017 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
2018 JUMPTO(SLJIT_JUMP, list_item->quit);
2019 list_item = list_item->next;
2020 }
2021 common->stubs = NULL;
2022 }
2023
2024 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
2025 {
2026 DEFINE_COMPILER;
2027
2028 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
2029 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
2030 }
2031
2032 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
2033 {
2034 /* May destroy all locals and registers except TMP2. */
2035 DEFINE_COMPILER;
2036
2037 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2038 #ifdef DESTROY_REGISTERS
2039 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
2040 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2041 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
2042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
2043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
2044 #endif
2045 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
2046 }
2047
2048 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
2049 {
2050 DEFINE_COMPILER;
2051 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
2052 }
2053
2054 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
2055 {
2056 DEFINE_COMPILER;
2057 struct sljit_label *loop;
2058 int i;
2059
2060 /* At this point we can freely use all temporary registers. */
2061 SLJIT_ASSERT(length > 1);
2062 /* TMP1 returns with begin - 1. */
2063 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
2064 if (length < 8)
2065 {
2066 for (i = 1; i < length; i++)
2067 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
2068 }
2069 else
2070 {
2071 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
2072 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
2073 loop = LABEL();
2074 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
2075 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
2076 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2077 }
2078 }
2079
2080 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
2081 {
2082 DEFINE_COMPILER;
2083 struct sljit_label *loop;
2084 int i;
2085
2086 SLJIT_ASSERT(length > 1);
2087 /* OVECTOR(1) contains the "string begin - 1" constant. */
2088 if (length > 2)
2089 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2090 if (length < 8)
2091 {
2092 for (i = 2; i < length; i++)
2093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2094 }
2095 else
2096 {
2097 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2098 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2099 loop = LABEL();
2100 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2101 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2102 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2103 }
2104
2105 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2106 if (common->mark_ptr != 0)
2107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2108 SLJIT_ASSERT(common->control_head_ptr != 0);
2109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2110 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2111 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2112 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2113 }
2114
2115 static sljit_sw SLJIT_CALL do_check_control_chain(sljit_sw *current)
2116 {
2117 sljit_sw return_value = 0;
2118 const pcre_uchar *skip_arg = NULL;
2119
2120 SLJIT_ASSERT(current != NULL);
2121 do
2122 {
2123 switch (current[-2])
2124 {
2125 case type_commit:
2126 /* Commit overwrites all. */
2127 return -1;
2128
2129 case type_prune:
2130 case type_then_trap:
2131 break;
2132
2133 case type_skip:
2134 /* Overwrites prune, but not other skips. */
2135 if (return_value == 0 && skip_arg == NULL)
2136 return_value = current[-3];
2137 break;
2138
2139 case type_skip_arg:
2140 if (return_value == 0 && skip_arg == NULL)
2141 skip_arg = (pcre_uchar *)current[-3];
2142 break;
2143
2144 case type_mark:
2145 if (return_value == 0 && skip_arg != NULL)
2146 if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[-3]) == 0)
2147 return_value = current[-4];
2148 break;
2149
2150 default:
2151 SLJIT_ASSERT_STOP();
2152 break;
2153 }
2154 current = (sljit_sw*)current[-1];
2155 }
2156 while (current != NULL);
2157 return (return_value != 0 || skip_arg == NULL) ? return_value : -2;
2158 }
2159
2160 static sljit_sw SLJIT_CALL do_search_then_trap(sljit_sw *current, sljit_sw start)
2161 {
2162 do
2163 {
2164 switch (current[-2])
2165 {
2166 case type_commit:
2167 /* Commit overwrites all. */
2168 return 0;
2169
2170 case type_then_trap:
2171 if (current[-3] == start)
2172 return (sljit_sw)current;
2173 break;
2174
2175 case type_prune:
2176 case type_skip:
2177 case type_skip_arg:
2178 case type_mark:
2179 break;
2180
2181 default:
2182 SLJIT_ASSERT_STOP();
2183 break;
2184 }
2185 current = (sljit_sw*)current[-1];
2186 SLJIT_ASSERT(current != NULL);
2187 }
2188 while (TRUE);
2189 }
2190
2191 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2192 {
2193 DEFINE_COMPILER;
2194 struct sljit_label *loop;
2195 struct sljit_jump *early_quit;
2196
2197 /* At this point we can freely use all registers. */
2198 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2200
2201 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2202 if (common->mark_ptr != 0)
2203 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2204 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2205 if (common->mark_ptr != 0)
2206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2207 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2208 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2209 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2210 /* Unlikely, but possible */
2211 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2212 loop = LABEL();
2213 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2214 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2215 /* Copy the integer value to the output buffer */
2216 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2217 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2218 #endif
2219 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2220 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2221 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2222 JUMPHERE(early_quit);
2223
2224 /* Calculate the return value, which is the maximum ovector value. */
2225 if (topbracket > 1)
2226 {
2227 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2228 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2229
2230 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2231 loop = LABEL();
2232 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2233 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2234 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2235 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2236 }
2237 else
2238 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2239 }
2240
2241 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2242 {
2243 DEFINE_COMPILER;
2244 struct sljit_jump *jump;
2245
2246 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2247 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2248 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2249
2250 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2251 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2252 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2253 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2254
2255 /* Store match begin and end. */
2256 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2257 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2258
2259 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2260 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2261 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2262 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2263 #endif
2264 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2265 JUMPHERE(jump);
2266
2267 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2268 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2269 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2270 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2271 #endif
2272 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2273
2274 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2275 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2276 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2277 #endif
2278 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2279
2280 JUMPTO(SLJIT_JUMP, quit);
2281 }
2282
2283 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2284 {
2285 /* May destroy TMP1. */
2286 DEFINE_COMPILER;
2287 struct sljit_jump *jump;
2288
2289 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2290 {
2291 /* The value of -1 must be kept for start_used_ptr! */
2292 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2293 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2294 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2295 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2296 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2297 JUMPHERE(jump);
2298 }
2299 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2300 {
2301 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2302 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2303 JUMPHERE(jump);
2304 }
2305 }
2306
2307 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2308 {
2309 /* Detects if the character has an othercase. */
2310 unsigned int c;
2311
2312 #ifdef SUPPORT_UTF
2313 if (common->utf)
2314 {
2315 GETCHAR(c, cc);
2316 if (c > 127)
2317 {
2318 #ifdef SUPPORT_UCP
2319 return c != UCD_OTHERCASE(c);
2320 #else
2321 return FALSE;
2322 #endif
2323 }
2324 #ifndef COMPILE_PCRE8
2325 return common->fcc[c] != c;
2326 #endif
2327 }
2328 else
2329 #endif
2330 c = *cc;
2331 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2332 }
2333
2334 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2335 {
2336 /* Returns with the othercase. */
2337 #ifdef SUPPORT_UTF
2338 if (common->utf && c > 127)
2339 {
2340 #ifdef SUPPORT_UCP
2341 return UCD_OTHERCASE(c);
2342 #else
2343 return c;
2344 #endif
2345 }
2346 #endif
2347 return TABLE_GET(c, common->fcc, c);
2348 }
2349
2350 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2351 {
2352 /* Detects if the character and its othercase has only 1 bit difference. */
2353 unsigned int c, oc, bit;
2354 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2355 int n;
2356 #endif
2357
2358 #ifdef SUPPORT_UTF
2359 if (common->utf)
2360 {
2361 GETCHAR(c, cc);
2362 if (c <= 127)
2363 oc = common->fcc[c];
2364 else
2365 {
2366 #ifdef SUPPORT_UCP
2367 oc = UCD_OTHERCASE(c);
2368 #else
2369 oc = c;
2370 #endif
2371 }
2372 }
2373 else
2374 {
2375 c = *cc;
2376 oc = TABLE_GET(c, common->fcc, c);
2377 }
2378 #else
2379 c = *cc;
2380 oc = TABLE_GET(c, common->fcc, c);
2381 #endif
2382
2383 SLJIT_ASSERT(c != oc);
2384
2385 bit = c ^ oc;
2386 /* Optimized for English alphabet. */
2387 if (c <= 127 && bit == 0x20)
2388 return (0 << 8) | 0x20;
2389
2390 /* Since c != oc, they must have at least 1 bit difference. */
2391 if (!is_powerof2(bit))
2392 return 0;
2393
2394 #if defined COMPILE_PCRE8
2395
2396 #ifdef SUPPORT_UTF
2397 if (common->utf && c > 127)
2398 {
2399 n = GET_EXTRALEN(*cc);
2400 while ((bit & 0x3f) == 0)
2401 {
2402 n--;
2403 bit >>= 6;
2404 }
2405 return (n << 8) | bit;
2406 }
2407 #endif /* SUPPORT_UTF */
2408 return (0 << 8) | bit;
2409
2410 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2411
2412 #ifdef SUPPORT_UTF
2413 if (common->utf && c > 65535)
2414 {
2415 if (bit >= (1 << 10))
2416 bit >>= 10;
2417 else
2418 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2419 }
2420 #endif /* SUPPORT_UTF */
2421 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2422
2423 #endif /* COMPILE_PCRE[8|16|32] */
2424 }
2425
2426 static void check_partial(compiler_common *common, BOOL force)
2427 {
2428 /* Checks whether a partial matching is occured. Does not modify registers. */
2429 DEFINE_COMPILER;
2430 struct sljit_jump *jump = NULL;
2431
2432 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2433
2434 if (common->mode == JIT_COMPILE)
2435 return;
2436
2437 if (!force)
2438 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2439 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2440 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2441
2442 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2443 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2444 else
2445 {
2446 if (common->partialmatchlabel != NULL)
2447 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2448 else
2449 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2450 }
2451
2452 if (jump != NULL)
2453 JUMPHERE(jump);
2454 }
2455
2456 static void check_str_end(compiler_common *common, jump_list **end_reached)
2457 {
2458 /* Does not affect registers. Usually used in a tight spot. */
2459 DEFINE_COMPILER;
2460 struct sljit_jump *jump;
2461
2462 if (common->mode == JIT_COMPILE)
2463 {
2464 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2465 return;
2466 }
2467
2468 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2469 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2470 {
2471 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2472 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2473 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2474 }
2475 else
2476 {
2477 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2478 if (common->partialmatchlabel != NULL)
2479 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2480 else
2481 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2482 }
2483 JUMPHERE(jump);
2484 }
2485
2486 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2487 {
2488 DEFINE_COMPILER;
2489 struct sljit_jump *jump;
2490
2491 if (common->mode == JIT_COMPILE)
2492 {
2493 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2494 return;
2495 }
2496
2497 /* Partial matching mode. */
2498 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2499 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2500 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2501 {
2502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2503 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2504 }
2505 else
2506 {
2507 if (common->partialmatchlabel != NULL)
2508 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2509 else
2510 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2511 }
2512 JUMPHERE(jump);
2513 }
2514
2515 static void read_char(compiler_common *common)
2516 {
2517 /* Reads the character into TMP1, updates STR_PTR.
2518 Does not check STR_END. TMP2 Destroyed. */
2519 DEFINE_COMPILER;
2520 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2521 struct sljit_jump *jump;
2522 #endif
2523
2524 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2525 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2526 if (common->utf)
2527 {
2528 #if defined COMPILE_PCRE8
2529 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2530 #elif defined COMPILE_PCRE16
2531 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2532 #endif /* COMPILE_PCRE[8|16] */
2533 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2534 JUMPHERE(jump);
2535 }
2536 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2537 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2538 }
2539
2540 static void peek_char(compiler_common *common)
2541 {
2542 /* Reads the character into TMP1, keeps STR_PTR.
2543 Does not check STR_END. TMP2 Destroyed. */
2544 DEFINE_COMPILER;
2545 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2546 struct sljit_jump *jump;
2547 #endif
2548
2549 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2550 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2551 if (common->utf)
2552 {
2553 #if defined COMPILE_PCRE8
2554 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2555 #elif defined COMPILE_PCRE16
2556 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2557 #endif /* COMPILE_PCRE[8|16] */
2558 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2559 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2560 JUMPHERE(jump);
2561 }
2562 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2563 }
2564
2565 static void read_char8_type(compiler_common *common)
2566 {
2567 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2568 DEFINE_COMPILER;
2569 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2570 struct sljit_jump *jump;
2571 #endif
2572
2573 #ifdef SUPPORT_UTF
2574 if (common->utf)
2575 {
2576 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2577 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2578 #if defined COMPILE_PCRE8
2579 /* This can be an extra read in some situations, but hopefully
2580 it is needed in most cases. */
2581 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2582 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2583 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2584 JUMPHERE(jump);
2585 #elif defined COMPILE_PCRE16
2586 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2587 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2588 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2589 JUMPHERE(jump);
2590 /* Skip low surrogate if necessary. */
2591 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2592 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2593 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2594 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2595 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2596 #elif defined COMPILE_PCRE32
2597 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2598 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2599 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2600 JUMPHERE(jump);
2601 #endif /* COMPILE_PCRE[8|16|32] */
2602 return;
2603 }
2604 #endif /* SUPPORT_UTF */
2605 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2607 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2608 /* The ctypes array contains only 256 values. */
2609 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2610 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2611 #endif
2612 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2613 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2614 JUMPHERE(jump);
2615 #endif
2616 }
2617
2618 static void skip_char_back(compiler_common *common)
2619 {
2620 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2621 DEFINE_COMPILER;
2622 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2623 #if defined COMPILE_PCRE8
2624 struct sljit_label *label;
2625
2626 if (common->utf)
2627 {
2628 label = LABEL();
2629 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2630 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2631 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2632 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2633 return;
2634 }
2635 #elif defined COMPILE_PCRE16
2636 if (common->utf)
2637 {
2638 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2639 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2640 /* Skip low surrogate if necessary. */
2641 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2642 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2643 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2644 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2645 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2646 return;
2647 }
2648 #endif /* COMPILE_PCRE[8|16] */
2649 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2650 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2651 }
2652
2653 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2654 {
2655 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2656 DEFINE_COMPILER;
2657
2658 if (nltype == NLTYPE_ANY)
2659 {
2660 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2661 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2662 }
2663 else if (nltype == NLTYPE_ANYCRLF)
2664 {
2665 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2666 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2667 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2668 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2669 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2670 }
2671 else
2672 {
2673 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2674 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2675 }
2676 }
2677
2678 #ifdef SUPPORT_UTF
2679
2680 #if defined COMPILE_PCRE8
2681 static void do_utfreadchar(compiler_common *common)
2682 {
2683 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2684 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2685 DEFINE_COMPILER;
2686 struct sljit_jump *jump;
2687
2688 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2689 /* Searching for the first zero. */
2690 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2691 jump = JUMP(SLJIT_C_NOT_ZERO);
2692 /* Two byte sequence. */
2693 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2694 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2695 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2696 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2697 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2698 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2699 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2700 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2701 JUMPHERE(jump);
2702
2703 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2704 jump = JUMP(SLJIT_C_NOT_ZERO);
2705 /* Three byte sequence. */
2706 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2707 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2708 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2709 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2710 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2711 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2712 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2713 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2714 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2715 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2716 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2717 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2718 JUMPHERE(jump);
2719
2720 /* Four byte sequence. */
2721 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2722 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2723 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2724 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2725 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2726 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2727 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2728 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2729 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2730 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2731 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2732 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2733 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2734 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2735 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2736 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2737 }
2738
2739 static void do_utfreadtype8(compiler_common *common)
2740 {
2741 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2742 of the character (>= 0xc0). Return value in TMP1. */
2743 DEFINE_COMPILER;
2744 struct sljit_jump *jump;
2745 struct sljit_jump *compare;
2746
2747 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2748
2749 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2750 jump = JUMP(SLJIT_C_NOT_ZERO);
2751 /* Two byte sequence. */
2752 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2753 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2754 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2755 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2756 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2757 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2758 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2759 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2760 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2761
2762 JUMPHERE(compare);
2763 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2764 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2765 JUMPHERE(jump);
2766
2767 /* We only have types for characters less than 256. */
2768 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2769 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2770 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2771 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2772 }
2773
2774 #elif defined COMPILE_PCRE16
2775
2776 static void do_utfreadchar(compiler_common *common)
2777 {
2778 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2779 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2780 DEFINE_COMPILER;
2781 struct sljit_jump *jump;
2782
2783 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2784 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2785 /* Do nothing, only return. */
2786 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2787
2788 JUMPHERE(jump);
2789 /* Combine two 16 bit characters. */
2790 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2791 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2792 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2793 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2794 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2795 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2796 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2797 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2798 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2799 }
2800
2801 #endif /* COMPILE_PCRE[8|16] */
2802
2803 #endif /* SUPPORT_UTF */
2804
2805 #ifdef SUPPORT_UCP
2806
2807 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2808 #define UCD_BLOCK_MASK 127
2809 #define UCD_BLOCK_SHIFT 7
2810
2811 static void do_getucd(compiler_common *common)
2812 {
2813 /* Search the UCD record for the character comes in TMP1.
2814 Returns chartype in TMP1 and UCD offset in TMP2. */
2815 DEFINE_COMPILER;
2816
2817 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2818
2819 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2820 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2821 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2822 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2823 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2824 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2825 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2826 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2827 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2828 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2829 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2830 }
2831 #endif
2832
2833 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2834 {
2835 DEFINE_COMPILER;
2836 struct sljit_label *mainloop;
2837 struct sljit_label *newlinelabel = NULL;
2838 struct sljit_jump *start;
2839 struct sljit_jump *end = NULL;
2840 struct sljit_jump *nl = NULL;
2841 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2842 struct sljit_jump *singlechar;
2843 #endif
2844 jump_list *newline = NULL;
2845 BOOL newlinecheck = FALSE;
2846 BOOL readuchar = FALSE;
2847
2848 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2849 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2850 newlinecheck = TRUE;
2851
2852 if (firstline)
2853 {
2854 /* Search for the end of the first line. */
2855 SLJIT_ASSERT(common->first_line_end != 0);
2856 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2857
2858 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2859 {
2860 mainloop = LABEL();
2861 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2862 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2863 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2864 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2865 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2866 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2867 JUMPHERE(end);
2868 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2869 }
2870 else
2871 {
2872 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2873 mainloop = LABEL();
2874 /* Continual stores does not cause data dependency. */
2875 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2876 read_char(common);
2877 check_newlinechar(common, common->nltype, &newline, TRUE);
2878 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2879 JUMPHERE(end);
2880 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2881 set_jumps(newline, LABEL());
2882 }
2883
2884 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2885 }
2886
2887 start = JUMP(SLJIT_JUMP);
2888
2889 if (newlinecheck)
2890 {
2891 newlinelabel = LABEL();
2892 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2893 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2894 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2895 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2896 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2897 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2898 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2899 #endif
2900 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2901 nl = JUMP(SLJIT_JUMP);
2902 }
2903
2904 mainloop = LABEL();
2905
2906 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2907 #ifdef SUPPORT_UTF
2908 if (common->utf) readuchar = TRUE;
2909 #endif
2910 if (newlinecheck) readuchar = TRUE;
2911
2912 if (readuchar)
2913 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2914
2915 if (newlinecheck)
2916 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2917
2918 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2919 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2920 #if defined COMPILE_PCRE8
2921 if (common->utf)
2922 {
2923 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2924 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2925 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2926 JUMPHERE(singlechar);
2927 }
2928 #elif defined COMPILE_PCRE16
2929 if (common->utf)
2930 {
2931 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2932 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2933 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2934 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2935 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2936 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2937 JUMPHERE(singlechar);
2938 }
2939 #endif /* COMPILE_PCRE[8|16] */
2940 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2941 JUMPHERE(start);
2942
2943 if (newlinecheck)
2944 {
2945 JUMPHERE(end);
2946 JUMPHERE(nl);
2947 }
2948
2949 return mainloop;
2950 }
2951
2952 #define MAX_N_CHARS 3
2953
2954 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2955 {
2956 DEFINE_COMPILER;
2957 struct sljit_label *start;
2958 struct sljit_jump *quit;
2959 pcre_uint32 chars[MAX_N_CHARS * 2];
2960 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2961 int location = 0;
2962 pcre_int32 len, c, bit, caseless;
2963 int must_stop;
2964
2965 /* We do not support alternatives now. */
2966 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2967 return FALSE;
2968
2969 while (TRUE)
2970 {
2971 caseless = 0;
2972 must_stop = 1;
2973 switch(*cc)
2974 {
2975 case OP_CHAR:
2976 must_stop = 0;
2977 cc++;
2978 break;
2979
2980 case OP_CHARI:
2981 caseless = 1;
2982 must_stop = 0;
2983 cc++;
2984 break;
2985
2986 case OP_SOD:
2987 case OP_SOM:
2988 case OP_SET_SOM:
2989 case OP_NOT_WORD_BOUNDARY:
2990 case OP_WORD_BOUNDARY:
2991 case OP_EODN:
2992 case OP_EOD:
2993 case OP_CIRC:
2994 case OP_CIRCM:
2995 case OP_DOLL:
2996 case OP_DOLLM:
2997 /* Zero width assertions. */
2998 cc++;
2999 continue;
3000
3001 case OP_PLUS:
3002 case OP_MINPLUS:
3003 case OP_POSPLUS:
3004 cc++;
3005 break;
3006
3007 case OP_EXACT:
3008 cc += 1 + IMM2_SIZE;
3009 break;
3010
3011 case OP_PLUSI:
3012 case OP_MINPLUSI:
3013 case OP_POSPLUSI:
3014 caseless = 1;
3015 cc++;
3016 break;
3017
3018 case OP_EXACTI:
3019 caseless = 1;
3020 cc += 1 + IMM2_SIZE;
3021 break;
3022
3023 default:
3024 must_stop = 2;
3025 break;
3026 }
3027
3028 if (must_stop == 2)
3029 break;
3030
3031 len = 1;
3032 #ifdef SUPPORT_UTF
3033 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
3034 #endif
3035
3036 if (caseless && char_has_othercase(common, cc))
3037 {
3038 caseless = char_get_othercase_bit(common, cc);
3039 if (caseless == 0)
3040 return FALSE;
3041 #ifdef COMPILE_PCRE8
3042 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
3043 #else
3044 if ((caseless & 0x100) != 0)
3045 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
3046 else
3047 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
3048 #endif
3049 }
3050 else
3051 caseless = 0;
3052
3053 while (len > 0 && location < MAX_N_CHARS * 2)
3054 {
3055 c = *cc;
3056 bit = 0;
3057 if (len == (caseless & 0xff))
3058 {
3059 bit = caseless >> 8;
3060 c |= bit;
3061 }
3062
3063 chars[location] = c;
3064 chars[location + 1] = bit;
3065
3066 len--;
3067 location += 2;
3068 cc++;
3069 }
3070
3071 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
3072 break;
3073 }
3074
3075 /* At least two characters are required. */
3076 if (location < 2 * 2)
3077 return FALSE;
3078
3079 if (firstline)
3080 {
3081 SLJIT_ASSERT(common->first_line_end != 0);
3082 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3083 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3084 }
3085 else
3086 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3087
3088 start = LABEL();
3089 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3090
3091 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3092 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3093 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3094 if (chars[1] != 0)
3095 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
3096 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
3097 if (location > 2 * 2)
3098 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3099 if (chars[3] != 0)
3100 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
3101 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
3102 if (location > 2 * 2)
3103 {
3104 if (chars[5] != 0)
3105 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
3106 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
3107 }
3108 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3109
3110 JUMPHERE(quit);
3111
3112 if (firstline)
3113 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3114 else
3115 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
3116 return TRUE;
3117 }
3118
3119 #undef MAX_N_CHARS
3120
3121 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
3122 {
3123 DEFINE_COMPILER;
3124 struct sljit_label *start;
3125 struct sljit_jump *quit;
3126 struct sljit_jump *found;
3127 pcre_uchar oc, bit;
3128
3129 if (firstline)
3130 {
3131 SLJIT_ASSERT(common->first_line_end != 0);
3132 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3133 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3134 }
3135
3136 start = LABEL();
3137 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3138 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3139
3140 oc = first_char;
3141 if (caseless)
3142 {
3143 oc = TABLE_GET(first_char, common->fcc, first_char);
3144 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3145 if (first_char > 127 && common->utf)
3146 oc = UCD_OTHERCASE(first_char);
3147 #endif
3148 }
3149 if (first_char == oc)
3150 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3151 else
3152 {
3153 bit = first_char ^ oc;
3154 if (is_powerof2(bit))
3155 {
3156 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3157 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3158 }
3159 else
3160 {
3161 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3162 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3163 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3164 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3165 found = JUMP(SLJIT_C_NOT_ZERO);
3166 }
3167 }
3168
3169 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3170 JUMPTO(SLJIT_JUMP, start);
3171 JUMPHERE(found);
3172 JUMPHERE(quit);
3173
3174 if (firstline)
3175 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3176 }
3177
3178 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3179 {
3180 DEFINE_COMPILER;
3181 struct sljit_label *loop;
3182 struct sljit_jump *lastchar;
3183 struct sljit_jump *firstchar;
3184 struct sljit_jump *quit;
3185 struct sljit_jump *foundcr = NULL;
3186 struct sljit_jump *notfoundnl;
3187 jump_list *newline = NULL;
3188
3189 if (firstline)
3190 {
3191 SLJIT_ASSERT(common->first_line_end != 0);
3192 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3193 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3194 }
3195
3196 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3197 {
3198 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3199 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3200 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3201 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3202 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3203
3204 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3205 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3206 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3207 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3208 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3209 #endif
3210 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3211
3212 loop = LABEL();
3213 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3214 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3215 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3216 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3217 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3218 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3219
3220 JUMPHERE(quit);
3221 JUMPHERE(firstchar);
3222 JUMPHERE(lastchar);
3223
3224 if (firstline)
3225 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3226 return;
3227 }
3228
3229 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3230 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3231 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3232 skip_char_back(common);
3233
3234 loop = LABEL();
3235 read_char(common);
3236 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3237 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3238 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3239 check_newlinechar(common, common->nltype, &newline, FALSE);
3240 set_jumps(newline, loop);
3241
3242 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3243 {
3244 quit = JUMP(SLJIT_JUMP);
3245 JUMPHERE(foundcr);
3246 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3247 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3248 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3249 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3250 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3251 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3252 #endif
3253 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3254 JUMPHERE(notfoundnl);
3255 JUMPHERE(quit);
3256 }
3257 JUMPHERE(lastchar);
3258 JUMPHERE(firstchar);
3259
3260 if (firstline)
3261 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3262 }
3263
3264 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3265
3266 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3267 {
3268 DEFINE_COMPILER;
3269 struct sljit_label *start;
3270 struct sljit_jump *quit;
3271 struct sljit_jump *found = NULL;
3272 jump_list *matches = NULL;
3273 pcre_uint8 inverted_start_bits[32];
3274 int i;
3275 #ifndef COMPILE_PCRE8
3276 struct sljit_jump *jump;
3277 #endif
3278
3279 for (i = 0; i < 32; ++i)
3280 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3281
3282 if (firstline)
3283 {
3284 SLJIT_ASSERT(common->first_line_end != 0);
3285 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3286 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3287 }
3288
3289 start = LABEL();
3290 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3291 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3292 #ifdef SUPPORT_UTF
3293 if (common->utf)
3294 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3295 #endif
3296
3297 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3298 {
3299 #ifndef COMPILE_PCRE8
3300 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3302 JUMPHERE(jump);
3303 #endif
3304 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3305 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3306 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3307 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3308 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3309 found = JUMP(SLJIT_C_NOT_ZERO);
3310 }
3311
3312 #ifdef SUPPORT_UTF
3313 if (common->utf)
3314 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3315 #endif
3316 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3317 #ifdef SUPPORT_UTF
3318 #if defined COMPILE_PCRE8
3319 if (common->utf)
3320 {
3321 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3322 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3323 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3324 }
3325 #elif defined COMPILE_PCRE16
3326 if (common->utf)
3327 {
3328 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3329 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3330 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3331 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3332 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3333 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3334 }
3335 #endif /* COMPILE_PCRE[8|16] */
3336 #endif /* SUPPORT_UTF */
3337 JUMPTO(SLJIT_JUMP, start);
3338 if (found != NULL)
3339 JUMPHERE(found);
3340 if (matches != NULL)
3341 set_jumps(matches, LABEL());
3342 JUMPHERE(quit);
3343
3344 if (firstline)
3345 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3346 }
3347
3348 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3349 {
3350 DEFINE_COMPILER;
3351 struct sljit_label *loop;
3352 struct sljit_jump *toolong;
3353 struct sljit_jump *alreadyfound;
3354 struct sljit_jump *found;
3355 struct sljit_jump *foundoc = NULL;
3356 struct sljit_jump *notfound;
3357 pcre_uint32 oc, bit;
3358
3359 SLJIT_ASSERT(common->req_char_ptr != 0);
3360 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3361 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3362 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3363 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3364
3365 if (has_firstchar)
3366 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3367 else
3368 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3369
3370 loop = LABEL();
3371 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3372
3373 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3374 oc = req_char;
3375 if (caseless)
3376 {
3377 oc = TABLE_GET(req_char, common->fcc, req_char);
3378 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3379 if (req_char > 127 && common->utf)
3380 oc = UCD_OTHERCASE(req_char);
3381 #endif
3382 }
3383 if (req_char == oc)
3384 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3385 else
3386 {
3387 bit = req_char ^ oc;
3388 if (is_powerof2(bit))
3389 {
3390 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3391 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3392 }
3393 else
3394 {
3395 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3396 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3397 }
3398 }
3399 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3400 JUMPTO(SLJIT_JUMP, loop);
3401
3402 JUMPHERE(found);
3403 if (foundoc)
3404 JUMPHERE(foundoc);
3405 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3406 JUMPHERE(alreadyfound);
3407 JUMPHERE(toolong);
3408 return notfound;
3409 }
3410
3411 static void do_revertframes(compiler_common *common)
3412 {
3413 DEFINE_COMPILER;
3414 struct sljit_jump *jump;
3415 struct sljit_label *mainloop;
3416
3417 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3418 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3419 GET_LOCAL_BASE(TMP3, 0, 0);
3420
3421 /* Drop frames until we reach STACK_TOP. */
3422 mainloop = LABEL();
3423 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3424 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3425 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3426
3427 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3428 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3429 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3430 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3431 JUMPTO(SLJIT_JUMP, mainloop);
3432
3433 JUMPHERE(jump);
3434 jump = JUMP(SLJIT_C_SIG_LESS);
3435 /* End of dropping frames. */
3436 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3437
3438 JUMPHERE(jump);
3439 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3440 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3441 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3442 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3443 JUMPTO(SLJIT_JUMP, mainloop);
3444 }
3445
3446 static void check_wordboundary(compiler_common *common)
3447 {
3448 DEFINE_COMPILER;
3449 struct sljit_jump *skipread;
3450 jump_list *skipread_list = NULL;
3451 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3452 struct sljit_jump *jump;
3453 #endif
3454
3455 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3456
3457 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3458 /* Get type of the previous char, and put it to LOCALS1. */
3459 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3460 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3461 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3462 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3463 skip_char_back(common);
3464 check_start_used_ptr(common);
3465 read_char(common);
3466
3467 /* Testing char type. */
3468 #ifdef SUPPORT_UCP
3469 if (common->use_ucp)
3470 {
3471 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3472 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3473 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3474 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3475 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3476 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3477 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3478 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3479 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3480 JUMPHERE(jump);
3481 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3482 }
3483 else
3484 #endif
3485 {
3486 #ifndef COMPILE_PCRE8
3487 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3488 #elif defined SUPPORT_UTF
3489 /* Here LOCALS1 has already been zeroed. */
3490 jump = NULL;
3491 if (common->utf)
3492 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3493 #endif /* COMPILE_PCRE8 */
3494 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3495 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3496 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3497 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3498 #ifndef COMPILE_PCRE8
3499 JUMPHERE(jump);
3500 #elif defined SUPPORT_UTF
3501 if (jump != NULL)
3502 JUMPHERE(jump);
3503 #endif /* COMPILE_PCRE8 */
3504 }
3505 JUMPHERE(skipread);
3506
3507 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3508 check_str_end(common, &skipread_list);
3509 peek_char(common);
3510
3511 /* Testing char type. This is a code duplication. */
3512 #ifdef SUPPORT_UCP
3513 if (common->use_ucp)
3514 {
3515 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3516 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3517 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3518 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3519 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3520 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3521 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3522 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3523 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3524 JUMPHERE(jump);
3525 }
3526 else
3527 #endif
3528 {
3529 #ifndef COMPILE_PCRE8
3530 /* TMP2 may be destroyed by peek_char. */
3531 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3532 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3533 #elif defined SUPPORT_UTF
3534 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3535 jump = NULL;
3536 if (common->utf)
3537 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3538 #endif
3539 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3540 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3541 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3542 #ifndef COMPILE_PCRE8
3543 JUMPHERE(jump);
3544 #elif defined SUPPORT_UTF
3545 if (jump != NULL)
3546 JUMPHERE(jump);
3547 #endif /* COMPILE_PCRE8 */
3548 }
3549 set_jumps(skipread_list, LABEL());
3550
3551 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3552 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3553 }
3554
3555 /*
3556 range format:
3557
3558 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3559 ranges[1] = first bit (0 or 1)
3560 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3561 */
3562
3563 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3564 {
3565 DEFINE_COMPILER;
3566 struct sljit_jump *jump;
3567
3568 if (ranges[0] < 0)
3569 return FALSE;
3570
3571 switch(ranges[0])
3572 {
3573 case 1:
3574 if (readch)
3575 read_char(common);
3576 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3577 return TRUE;
3578
3579 case 2:
3580 if (readch)
3581 read_char(common);
3582 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3583 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3584 return TRUE;
3585
3586 case 4:
3587 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3588 {
3589 if (readch)
3590 read_char(common);
3591 if (ranges[1] != 0)
3592 {
3593 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3594 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3595 }
3596 else
3597 {
3598 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3599 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3600 JUMPHERE(jump);
3601 }
3602 return TRUE;
3603 }
3604 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3605 {
3606 if (readch)
3607 read_char(common);
3608 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3609 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3610 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3611 return TRUE;
3612 }
3613 return FALSE;
3614
3615 default:
3616 return FALSE;
3617 }
3618 }
3619
3620 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3621 {
3622 int i, bit, length;
3623 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3624
3625 bit = ctypes[0] & flag;
3626 ranges[0] = -1;
3627 ranges[1] = bit != 0 ? 1 : 0;
3628 length = 0;
3629
3630 for (i = 1; i < 256; i++)
3631 if ((ctypes[i] & flag) != bit)
3632 {
3633 if (length >= MAX_RANGE_SIZE)
3634 return;
3635 ranges[2 + length] = i;
3636 length++;
3637 bit ^= flag;
3638 }
3639
3640 if (bit != 0)
3641 {
3642 if (length >= MAX_RANGE_SIZE)
3643 return;
3644 ranges[2 + length] = 256;
3645 length++;
3646 }
3647 ranges[0] = length;
3648 }
3649
3650 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3651 {
3652 int ranges[2 + MAX_RANGE_SIZE];
3653 pcre_uint8 bit, cbit, all;
3654 int i, byte, length = 0;
3655
3656 bit = bits[0] & 0x1;
3657 ranges[1] = bit;
3658 /* Can be 0 or 255. */
3659 all = -bit;
3660
3661 for (i = 0; i < 256; )
3662 {
3663 byte = i >> 3;
3664 if ((i & 0x7) == 0 && bits[byte] == all)
3665 i += 8;
3666 else
3667 {
3668 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3669 if (cbit != bit)
3670 {
3671 if (length >= MAX_RANGE_SIZE)
3672 return FALSE;
3673 ranges[2 + length] = i;
3674 length++;
3675 bit = cbit;
3676 all = -cbit;
3677 }
3678 i++;
3679 }
3680 }
3681
3682 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3683 {
3684 if (length >= MAX_RANGE_SIZE)
3685 return FALSE;
3686 ranges[2 + length] = 256;
3687 length++;
3688 }
3689 ranges[0] = length;
3690
3691 return check_ranges(common, ranges, backtracks, FALSE);
3692 }
3693
3694 static void check_anynewline(compiler_common *common)
3695 {
3696 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3697 DEFINE_COMPILER;
3698
3699 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3700
3701 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3702 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3703 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3704 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3705 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3706 #ifdef COMPILE_PCRE8
3707 if (common->utf)
3708 {
3709 #endif
3710 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3711 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3712 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3713 #ifdef COMPILE_PCRE8
3714 }
3715 #endif
3716 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3717 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3718 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3719 }
3720
3721 static void check_hspace(compiler_common *common)
3722 {
3723 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3724 DEFINE_COMPILER;
3725
3726 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3727
3728 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3729 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3730 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3731 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3732 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3733 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3734 #ifdef COMPILE_PCRE8
3735 if (common->utf)
3736 {
3737 #endif
3738 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3739 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3740 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3741 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3742 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3743 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3744 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3745 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3746 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3747 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3748 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3749 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3750 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3751 #ifdef COMPILE_PCRE8
3752 }
3753 #endif
3754 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3755 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3756
3757 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3758 }
3759
3760 static void check_vspace(compiler_common *common)
3761 {
3762 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3763 DEFINE_COMPILER;
3764
3765 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3766
3767 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3768 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3769 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3770 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3771 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3772 #ifdef COMPILE_PCRE8
3773 if (common->utf)
3774 {
3775 #endif
3776 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3777 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3778 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3779 #ifdef COMPILE_PCRE8
3780 }
3781 #endif
3782 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3783 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3784
3785 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3786 }
3787
3788 #define CHAR1 STR_END
3789 #define CHAR2 STACK_TOP
3790
3791 static void do_casefulcmp(compiler_common *common)
3792 {
3793 DEFINE_COMPILER;
3794 struct sljit_jump *jump;
3795 struct sljit_label *label;
3796
3797 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3798 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3799 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3800 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3801 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3802 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3803
3804 label = LABEL();
3805 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3806 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3807 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3808 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3809 JUMPTO(SLJIT_C_NOT_ZERO, label);
3810
3811 JUMPHERE(jump);
3812 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3813 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3814 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3815 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3816 }
3817
3818 #define LCC_TABLE STACK_LIMIT
3819
3820 static void do_caselesscmp(compiler_common *common)
3821 {
3822 DEFINE_COMPILER;
3823 struct sljit_jump *jump;
3824 struct sljit_label *label;
3825
3826 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3827 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3828
3829 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3830 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3832 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3833 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3834 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3835
3836 label = LABEL();
3837 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3838 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3839 #ifndef COMPILE_PCRE8
3840 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3841 #endif
3842 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3843 #ifndef COMPILE_PCRE8
3844 JUMPHERE(jump);
3845 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3846 #endif
3847 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3848 #ifndef COMPILE_PCRE8
3849 JUMPHERE(jump);
3850 #endif
3851 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3852 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3853 JUMPTO(SLJIT_C_NOT_ZERO, label);
3854
3855 JUMPHERE(jump);
3856 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3857 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3858 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3859 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3860 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3861 }
3862
3863 #undef LCC_TABLE
3864 #undef CHAR1
3865 #undef CHAR2
3866
3867 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3868
3869 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3870 {
3871 /* This function would be ineffective to do in JIT level. */
3872 pcre_uint32 c1, c2;
3873 const pcre_uchar *src2 = args->uchar_ptr;
3874 const pcre_uchar *end2 = args->end;
3875 const ucd_record *ur;
3876 const pcre_uint32 *pp;
3877
3878 while (src1 < end1)
3879 {
3880 if (src2 >= end2)
3881 return (pcre_uchar*)1;
3882 GETCHARINC(c1, src1);
3883 GETCHARINC(c2, src2);
3884 ur = GET_UCD(c2);
3885 if (c1 != c2 && c1 != c2 + ur->other_case)
3886 {
3887 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3888 for (;;)
3889 {
3890 if (c1 < *pp) return NULL;
3891 if (c1 == *pp++) break;
3892 }
3893 }
3894 }
3895 return src2;
3896 }
3897
3898 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3899
3900 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3901 compare_context* context, jump_list **backtracks)
3902 {
3903 DEFINE_COMPILER;
3904 unsigned int othercasebit = 0;
3905 pcre_uchar *othercasechar = NULL;
3906 #ifdef SUPPORT_UTF
3907 int utflength;
3908 #endif
3909
3910 if (caseless && char_has_othercase(common, cc))
3911 {
3912 othercasebit = char_get_othercase_bit(common, cc);
3913 SLJIT_ASSERT(othercasebit);
3914 /* Extracting bit difference info. */
3915 #if defined COMPILE_PCRE8
3916 othercasechar = cc + (othercasebit >> 8);
3917 othercasebit &= 0xff;
3918 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3919 /* Note that this code only handles characters in the BMP. If there
3920 ever are characters outside the BMP whose othercase differs in only one
3921 bit from itself (there currently are none), this code will need to be
3922 revised for COMPILE_PCRE32. */
3923 othercasechar = cc + (othercasebit >> 9);
3924 if ((othercasebit & 0x100) != 0)
3925 othercasebit = (othercasebit & 0xff) << 8;
3926 else
3927 othercasebit &= 0xff;
3928 #endif /* COMPILE_PCRE[8|16|32] */
3929 }
3930
3931 if (context->sourcereg == -1)
3932 {
3933 #if defined COMPILE_PCRE8
3934 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3935 if (context->length >= 4)
3936 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3937 else if (context->length >= 2)
3938 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3939 else
3940 #endif
3941 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3942 #elif defined COMPILE_PCRE16
3943 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3944 if (context->length >= 4)
3945 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3946 else
3947 #endif
3948 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3949 #elif defined COMPILE_PCRE32
3950 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3951 #endif /* COMPILE_PCRE[8|16|32] */
3952 context->sourcereg = TMP2;
3953 }
3954
3955 #ifdef SUPPORT_UTF
3956 utflength = 1;
3957 if (common->utf && HAS_EXTRALEN(*cc))
3958 utflength += GET_EXTRALEN(*cc);
3959
3960 do
3961 {
3962 #endif
3963
3964 context->length -= IN_UCHARS(1);
3965 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3966
3967 /* Unaligned read is supported. */
3968 if (othercasebit != 0 && othercasechar == cc)
3969 {
3970 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3971 context->oc.asuchars[context->ucharptr] = othercasebit;
3972 }
3973 else
3974 {
3975 context->c.asuchars[context->ucharptr] = *cc;
3976 context->oc.asuchars[context->ucharptr] = 0;
3977 }
3978 context->ucharptr++;
3979
3980 #if defined COMPILE_PCRE8
3981 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3982 #else
3983 if (context->ucharptr >= 2 || context->length == 0)
3984 #endif
3985 {
3986 if (context->length >= 4)
3987 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3988 else if (context->length >= 2)
3989 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3990 #if defined COMPILE_PCRE8
3991 else if (context->length >= 1)
3992 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3993 #endif /* COMPILE_PCRE8 */
3994 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3995
3996 switch(context->ucharptr)
3997 {
3998 case 4 / sizeof(pcre_uchar):
3999 if (context->oc.asint != 0)
4000 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
4001 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
4002 break;
4003
4004 case 2 / sizeof(pcre_uchar):
4005 if (context->oc.asushort != 0)
4006 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
4007 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
4008 break;
4009
4010 #ifdef COMPILE_PCRE8
4011 case 1:
4012 if (context->oc.asbyte != 0)
4013 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
4014 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
4015 break;
4016 #endif
4017
4018 default:
4019 SLJIT_ASSERT_STOP();
4020 break;
4021 }
4022 context->ucharptr = 0;
4023 }
4024
4025 #else
4026
4027 /* Unaligned read is unsupported or in 32 bit mode. */
4028 if (context->length >= 1)
4029 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
4030
4031 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
4032
4033 if (othercasebit != 0 && othercasechar == cc)
4034 {
4035 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
4036 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
4037 }
4038 else
4039 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
4040
4041 #endif
4042
4043 cc++;
4044 #ifdef SUPPORT_UTF
4045 utflength--;
4046 }
4047 while (utflength > 0);
4048 #endif
4049
4050 return cc;
4051 }
4052
4053 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4054
4055 #define SET_TYPE_OFFSET(value) \
4056 if ((value) != typeoffset) \
4057 { \
4058 if ((value) > typeoffset) \
4059 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
4060 else \
4061 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
4062 } \
4063 typeoffset = (value);
4064
4065 #define SET_CHAR_OFFSET(value) \
4066 if ((value) != charoffset) \
4067 { \
4068 if ((value) > charoffset) \
4069 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
4070 else \
4071 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
4072 } \
4073 charoffset = (value);
4074
4075 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4076 {
4077 DEFINE_COMPILER;
4078 jump_list *found = NULL;
4079 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
4080 pcre_int32 c, charoffset;
4081 const pcre_uint32 *other_cases;
4082 struct sljit_jump *jump = NULL;
4083 pcre_uchar *ccbegin;
4084 int compares, invertcmp, numberofcmps;
4085 #ifdef SUPPORT_UCP
4086 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
4087 BOOL charsaved = FALSE;
4088 int typereg = TMP1, scriptreg = TMP1;
4089 pcre_int32 typeoffset;
4090 #endif
4091
4092 /* Although SUPPORT_UTF must be defined, we are
4093 not necessary in utf mode even in 8 bit mode. */
4094 detect_partial_match(common, backtracks);
4095 read_char(common);
4096
4097 if ((*cc++ & XCL_MAP) != 0)
4098 {
4099 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4100 #ifndef COMPILE_PCRE8
4101 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4102 #elif defined SUPPORT_UTF
4103 if (common->utf)
4104 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4105 #endif
4106
4107 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
4108 {
4109 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4110 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4111 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4112 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4113 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4114 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
4115 }
4116
4117 #ifndef COMPILE_PCRE8
4118 JUMPHERE(jump);
4119 #elif defined SUPPORT_UTF
4120 if (common->utf)
4121 JUMPHERE(jump);
4122 #endif
4123 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4124 #ifdef SUPPORT_UCP
4125 charsaved = TRUE;
4126 #endif
4127 cc += 32 / sizeof(pcre_uchar);
4128 }
4129
4130 /* Scanning the necessary info. */
4131 ccbegin = cc;
4132 compares = 0;
4133 while (*cc != XCL_END)
4134 {
4135 compares++;
4136 if (*cc == XCL_SINGLE)
4137 {
4138 cc += 2;
4139 #ifdef SUPPORT_UTF
4140 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4141 #endif
4142 #ifdef SUPPORT_UCP
4143 needschar = TRUE;
4144 #endif
4145 }
4146 else if (*cc == XCL_RANGE)
4147 {
4148 cc += 2;
4149 #ifdef SUPPORT_UTF
4150 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4151 #endif
4152 cc++;
4153 #ifdef SUPPORT_UTF
4154 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4155 #endif
4156 #ifdef SUPPORT_UCP
4157 needschar = TRUE;
4158 #endif
4159 }
4160 #ifdef SUPPORT_UCP
4161 else
4162 {
4163 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4164 cc++;
4165 switch(*cc)
4166 {
4167 case PT_ANY:
4168 break;
4169
4170 case PT_LAMP:
4171 case PT_GC:
4172 case PT_PC:
4173 case PT_ALNUM:
4174 needstype = TRUE;
4175 break;
4176
4177 case PT_SC:
4178 needsscript = TRUE;
4179 break;
4180
4181 case PT_SPACE:
4182 case PT_PXSPACE:
4183 case PT_WORD:
4184 needstype = TRUE;
4185 needschar = TRUE;
4186 break;
4187
4188 case PT_CLIST:
4189 case PT_UCNC:
4190 needschar = TRUE;
4191 break;
4192
4193 default:
4194 SLJIT_ASSERT_STOP();
4195 break;
4196 }
4197 cc += 2;
4198 }
4199 #endif
4200 }
4201
4202 #ifdef SUPPORT_UCP
4203 /* Simple register allocation. TMP1 is preferred if possible. */
4204 if (needstype || needsscript)
4205 {
4206 if (needschar && !charsaved)
4207 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4208 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4209 if (needschar)
4210 {
4211 if (needstype)
4212 {
4213 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4214 typereg = RETURN_ADDR;
4215 }
4216
4217 if (needsscript)
4218 scriptreg = TMP3;
4219 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4220 }
4221 else if (needstype && needsscript)
4222 scriptreg = TMP3;
4223 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4224
4225 if (needsscript)
4226 {
4227 if (scriptreg == TMP1)
4228 {
4229 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4230 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4231 }
4232 else
4233 {
4234 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4235 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4236 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4237 }
4238 }
4239 }
4240 #endif
4241
4242 /* Generating code. */
4243 cc = ccbegin;
4244 charoffset = 0;
4245 numberofcmps = 0;
4246 #ifdef SUPPORT_UCP
4247 typeoffset = 0;
4248 #endif
4249
4250 while (*cc != XCL_END)
4251 {
4252 compares--;
4253 invertcmp = (compares == 0 && list != backtracks);
4254 jump = NULL;
4255
4256 if (*cc == XCL_SINGLE)
4257 {
4258 cc ++;
4259 #ifdef SUPPORT_UTF
4260 if (common->utf)
4261 {
4262 GETCHARINC(c, cc);
4263 }
4264 else
4265 #endif
4266 c = *cc++;
4267
4268 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4269 {
4270 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4271 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4272 numberofcmps++;
4273 }
4274 else if (numberofcmps > 0)
4275 {
4276 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4277 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4278 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4279 numberofcmps = 0;
4280 }
4281 else
4282 {
4283 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4284 numberofcmps = 0;
4285 }
4286 }
4287 else if (*cc == XCL_RANGE)
4288 {
4289 cc ++;
4290 #ifdef SUPPORT_UTF
4291 if (common->utf)
4292 {
4293 GETCHARINC(c, cc);
4294 }
4295 else
4296 #endif
4297 c = *cc++;
4298 SET_CHAR_OFFSET(c);
4299 #ifdef SUPPORT_UTF
4300 if (common->utf)
4301 {
4302 GETCHARINC(c, cc);
4303 }
4304 else
4305 #endif
4306 c = *cc++;
4307 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4308 {
4309 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4310 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4311 numberofcmps++;
4312 }
4313 else if (numberofcmps > 0)
4314 {
4315 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4316 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4317 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4318 numberofcmps = 0;
4319 }
4320 else
4321 {
4322 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4323 numberofcmps = 0;
4324 }
4325 }
4326 #ifdef SUPPORT_UCP
4327 else
4328 {
4329 if (*cc == XCL_NOTPROP)
4330 invertcmp ^= 0x1;
4331 cc++;
4332 switch(*cc)
4333 {
4334 case PT_ANY:
4335 if (list != backtracks)
4336 {
4337 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4338 continue;
4339 }
4340 else if (cc[-1] == XCL_NOTPROP)
4341 continue;
4342 jump = JUMP(SLJIT_JUMP);
4343 break;
4344
4345 case PT_LAMP:
4346 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4347 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4348 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4349 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4350 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4351 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4352 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4353 break;
4354
4355 case PT_GC:
4356 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4357 SET_TYPE_OFFSET(c);
4358 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4359 break;
4360
4361 case PT_PC:
4362 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4363 break;
4364
4365 case PT_SC:
4366 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4367 break;
4368
4369 case PT_SPACE:
4370 case PT_PXSPACE:
4371 if (*cc == PT_SPACE)
4372 {
4373 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4374 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4375 }
4376 SET_CHAR_OFFSET(9);
4377 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4378 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4379 if (*cc == PT_SPACE)
4380 JUMPHERE(jump);
4381
4382 SET_TYPE_OFFSET(ucp_Zl);
4383 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4384 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4385 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4386 break;
4387
4388 case PT_WORD:
4389 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4390 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4391 /* Fall through. */
4392
4393 case PT_ALNUM:
4394 SET_TYPE_OFFSET(ucp_Ll);
4395 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4396 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4397 SET_TYPE_OFFSET(ucp_Nd);
4398 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4399 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4400 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4401 break;
4402
4403 case PT_CLIST:
4404 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4405
4406 /* At least three characters are required.
4407 Otherwise this case would be handled by the normal code path. */
4408 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4409 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4410
4411 /* Optimizing character pairs, if their difference is power of 2. */
4412 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4413 {
4414 if (charoffset == 0)
4415 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4416 else
4417 {
4418 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4419 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4420 }
4421 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4422 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4423 other_cases += 2;
4424 }
4425 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4426 {
4427 if (charoffset == 0)
4428 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4429 else
4430 {
4431 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4432 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4433 }
4434 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4435 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4436
4437 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4438 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4439
4440 other_cases += 3;
4441 }
4442 else
4443 {
4444 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4445 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4446 }
4447
4448 while (*other_cases != NOTACHAR)
4449 {
4450 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4451 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4452 }
4453 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4454 break;
4455
4456 case PT_UCNC:
4457 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4458 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4459 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4460 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4461 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4462 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4463
4464 SET_CHAR_OFFSET(0xa0);
4465 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4466 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4467 SET_CHAR_OFFSET(0);
4468 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4469 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4470 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4471 break;
4472 }
4473 cc += 2;
4474 }
4475 #endif
4476
4477 if (jump != NULL)
4478 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4479 }
4480
4481 if (found != NULL)
4482 set_jumps(found, LABEL());
4483 }
4484
4485 #undef SET_TYPE_OFFSET
4486 #undef SET_CHAR_OFFSET
4487
4488 #endif
4489
4490 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4491 {
4492 DEFINE_COMPILER;
4493 int length;
4494 unsigned int c, oc, bit;
4495 compare_context context;
4496 struct sljit_jump *jump[4];
4497 jump_list *end_list;
4498 #ifdef SUPPORT_UTF
4499 struct sljit_label *label;
4500 #ifdef SUPPORT_UCP
4501 pcre_uchar propdata[5];
4502 #endif
4503 #endif
4504
4505 switch(type)
4506 {
4507 case OP_SOD:
4508 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4509 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4510 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4511 return cc;
4512
4513 case OP_SOM:
4514 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4515 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4516 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4517 return cc;
4518
4519 case OP_NOT_WORD_BOUNDARY:
4520 case OP_WORD_BOUNDARY:
4521 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4522 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4523 return cc;
4524
4525 case OP_NOT_DIGIT:
4526 case OP_DIGIT:
4527 /* Digits are usually 0-9, so it is worth to optimize them. */
4528 if (common->digits[0] == -2)
4529 get_ctype_ranges(common, ctype_digit, common->digits);
4530 detect_partial_match(common, backtracks);
4531 /* Flip the starting bit in the negative case. */
4532 if (type == OP_NOT_DIGIT)
4533 common->digits[1] ^= 1;
4534 if (!check_ranges(common, common->digits, backtracks, TRUE))
4535 {
4536 read_char8_type(common);
4537 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4538 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4539 }
4540 if (type == OP_NOT_DIGIT)
4541 common->digits[1] ^= 1;
4542 return cc;
4543
4544 case OP_NOT_WHITESPACE:
4545 case OP_WHITESPACE:
4546 detect_partial_match(common, backtracks);
4547 read_char8_type(common);
4548 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4549 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4550 return cc;
4551
4552 case OP_NOT_WORDCHAR:
4553 case OP_WORDCHAR:
4554 detect_partial_match(common, backtracks);
4555 read_char8_type(common);
4556 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4557 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4558 return cc;
4559
4560 case OP_ANY:
4561 detect_partial_match(common, backtracks);
4562 read_char(common);
4563 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4564 {
4565 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4566 end_list = NULL;
4567 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4568 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4569 else
4570 check_str_end(common, &end_list);
4571
4572 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4573 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4574 set_jumps(end_list, LABEL());
4575 JUMPHERE(jump[0]);
4576 }
4577 else
4578 check_newlinechar(common, common->nltype, backtracks, TRUE);
4579 return cc;
4580
4581 case OP_ALLANY:
4582 detect_partial_match(common, backtracks);
4583 #ifdef SUPPORT_UTF
4584 if (common->utf)
4585 {
4586 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4587 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4588 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4589 #if defined COMPILE_PCRE8
4590 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4591 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4592 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4593 #elif defined COMPILE_PCRE16
4594 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4595 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4596 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4597 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4598 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4599 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4600 #endif
4601 JUMPHERE(jump[0]);
4602 #endif /* COMPILE_PCRE[8|16] */
4603 return cc;
4604 }
4605 #endif
4606 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4607 return cc;
4608
4609 case OP_ANYBYTE:
4610 detect_partial_match(common, backtracks);
4611 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4612 return cc;
4613
4614 #ifdef SUPPORT_UTF
4615 #ifdef SUPPORT_UCP
4616 case OP_NOTPROP:
4617 case OP_PROP:
4618 propdata[0] = 0;
4619 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4620 propdata[2] = cc[0];
4621 propdata[3] = cc[1];
4622 propdata[4] = XCL_END;
4623 compile_xclass_matchingpath(common, propdata, backtracks);
4624 return cc + 2;
4625 #endif
4626 #endif
4627
4628 case OP_ANYNL:
4629 detect_partial_match(common, backtracks);
4630 read_char(common);
4631 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4632 /* We don't need to handle soft partial matching case. */
4633 end_list = NULL;
4634 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4635 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4636 else
4637 check_str_end(common, &end_list);
4638 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4639 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4640 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4641 jump[2] = JUMP(SLJIT_JUMP);
4642 JUMPHERE(jump[0]);
4643 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4644 set_jumps(end_list, LABEL());
4645 JUMPHERE(jump[1]);
4646 JUMPHERE(jump[2]);
4647 return cc;
4648
4649 case OP_NOT_HSPACE:
4650 case OP_HSPACE:
4651 detect_partial_match(common, backtracks);
4652 read_char(common);
4653 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4654 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4655 return cc;
4656
4657 case OP_NOT_VSPACE:
4658 case OP_VSPACE:
4659 detect_partial_match(common, backtracks);
4660 read_char(common);
4661 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4662 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4663 return cc;
4664
4665 #ifdef SUPPORT_UCP
4666 case OP_EXTUNI:
4667 detect_partial_match(common, backtracks);
4668 read_char(common);
4669 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4670 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4671 /* Optimize register allocation: use a real register. */
4672 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4673 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4674
4675 label = LABEL();
4676 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4677 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4678 read_char(common);
4679 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4680 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4681 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4682
4683 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4684 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4685 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4686 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4687 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4688 JUMPTO(SLJIT_C_NOT_ZERO, label);
4689
4690 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4691 JUMPHERE(jump[0]);
4692 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4693
4694 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4695 {
4696 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4697 /* Since we successfully read a char above, partial matching must occure. */
4698 check_partial(common, TRUE);
4699 JUMPHERE(jump[0]);
4700 }
4701 return cc;
4702 #endif
4703
4704 case OP_EODN:
4705 /* Requires rather complex checks. */
4706 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4707 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4708 {
4709 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4710 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4711 if (common->mode == JIT_COMPILE)
4712 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4713 else
4714 {
4715 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4716 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4717 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4718 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4719 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4720 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4721 check_partial(common, TRUE);
4722 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4723 JUMPHERE(jump[1]);
4724 }
4725 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4726 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4727 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4728 }
4729 else if (common->nltype == NLTYPE_FIXED)
4730 {
4731 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4732 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4733 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4734 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4735 }
4736 else
4737 {
4738 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4739 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4740 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4741 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4742 jump[2] = JUMP(SLJIT_C_GREATER);
4743 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4744 /* Equal. */
4745 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4746 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4747 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4748
4749 JUMPHERE(jump[1]);
4750 if (common->nltype == NLTYPE_ANYCRLF)
4751 {
4752 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4753 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4754 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4755 }
4756 else
4757 {
4758 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4759 read_char(common);
4760 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4761 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4762 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4763 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4764 }
4765 JUMPHERE(jump[2]);
4766 JUMPHERE(jump[3]);
4767 }
4768 JUMPHERE(jump[0]);
4769 check_partial(common, FALSE);
4770 return cc;
4771
4772 case OP_EOD:
4773 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4774 check_partial(common, FALSE);
4775 return cc;
4776
4777 case OP_CIRC:
4778 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4779 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4780 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4781 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4782 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4783 return cc;
4784
4785 case OP_CIRCM:
4786 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4787 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4788 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4789 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4790 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4791 jump[0] = JUMP(SLJIT_JUMP);
4792 JUMPHERE(jump[1]);
4793
4794 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4795 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4796 {
4797 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4798 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4799 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4800 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4801 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4802 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4803 }
4804 else
4805 {
4806 skip_char_back(common);
4807 read_char(common);
4808 check_newlinechar(common, common->nltype, backtracks, FALSE);
4809 }
4810 JUMPHERE(jump[0]);
4811 return cc;
4812
4813 case OP_DOLL:
4814 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4815 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4816 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4817
4818 if (!common->endonly)
4819 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4820 else
4821 {
4822 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4823 check_partial(common, FALSE);
4824 }
4825 return cc;
4826
4827 case OP_DOLLM:
4828 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4829 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4830 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4831 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4832 check_partial(common, FALSE);
4833 jump[0] = JUMP(SLJIT_JUMP);
4834 JUMPHERE(jump[1]);
4835
4836 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4837 {
4838 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4839 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4840 if (common->mode == JIT_COMPILE)
4841 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4842 else
4843 {
4844 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4845 /* STR_PTR = STR_END - IN_UCHARS(1) */
4846 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4847 check_partial(common, TRUE);
4848 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4849 JUMPHERE(jump[1]);
4850 }
4851
4852 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4853 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4854 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4855 }
4856 else
4857 {
4858 peek_char(common);
4859 check_newlinechar(common, common->nltype, backtracks, FALSE);
4860 }
4861 JUMPHERE(jump[0]);
4862 return cc;
4863
4864 case OP_CHAR:
4865 case OP_CHARI:
4866 length = 1;
4867 #ifdef SUPPORT_UTF
4868 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4869 #endif
4870 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4871 {
4872 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4873 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4874
4875 context.length = IN_UCHARS(length);
4876 context.sourcereg = -1;
4877 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4878 context.ucharptr = 0;
4879 #endif
4880 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4881 }
4882 detect_partial_match(common, backtracks);
4883 read_char(common);
4884 #ifdef SUPPORT_UTF
4885 if (common->utf)
4886 {
4887 GETCHAR(c, cc);
4888 }
4889 else
4890 #endif
4891 c = *cc;
4892 if (type == OP_CHAR || !char_has_othercase(common, cc))
4893 {
4894 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4895 return cc + length;
4896 }
4897 oc = char_othercase(common, c);
4898 bit = c ^ oc;
4899 if (is_powerof2(bit))
4900 {
4901 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4902 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4903 return cc + length;
4904 }
4905 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4906 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4907 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4908 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4909 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4910 return cc + length;
4911
4912 case OP_NOT:
4913 case OP_NOTI:
4914 detect_partial_match(common, backtracks);
4915 length = 1;
4916 #ifdef SUPPORT_UTF
4917 if (common->utf)
4918 {
4919 #ifdef COMPILE_PCRE8
4920 c = *cc;
4921 if (c < 128)
4922 {
4923 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4924 if (type == OP_NOT || !char_has_othercase(common, cc))
4925 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4926 else
4927 {
4928 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4929 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4930 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4931 }
4932 /* Skip the variable-length character. */
4933 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4934 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4935 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4936 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4937 JUMPHERE(jump[0]);
4938 return cc + 1;
4939 }
4940 else
4941 #endif /* COMPILE_PCRE8 */
4942 {
4943 GETCHARLEN(c, cc, length);
4944 read_char(common);
4945 }
4946 }
4947 else
4948 #endif /* SUPPORT_UTF */
4949 {
4950 read_char(common);
4951 c = *cc;
4952 }
4953
4954 if (type == OP_NOT || !char_has_othercase(common, cc))
4955 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4956 else
4957 {
4958 oc = char_othercase(common, c);
4959 bit = c ^ oc;
4960 if (is_powerof2(bit))
4961 {
4962 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4963 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4964 }
4965 else
4966 {
4967 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4968 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4969 }
4970 }
4971 return cc + length;
4972
4973 case OP_CLASS:
4974 case OP_NCLASS:
4975 detect_partial_match(common, backtracks);
4976 read_char(common);
4977 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4978 return cc + 32 / sizeof(pcre_uchar);
4979
4980 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4981 jump[0] = NULL;
4982 #ifdef COMPILE_PCRE8
4983 /* This check only affects 8 bit mode. In other modes, we
4984 always need to compare the value with 255. */
4985 if (common->utf)
4986 #endif /* COMPILE_PCRE8 */
4987 {
4988 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4989 if (type == OP_CLASS)
4990 {
4991 add_jump(compiler, backtracks, jump[0]);
4992 jump[0] = NULL;
4993 }
4994 }
4995 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4996 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4997 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4998 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4999 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
5000 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
5001 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
5002 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5003 if (jump[0] != NULL)
5004 JUMPHERE(jump[0]);
5005 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
5006 return cc + 32 / sizeof(pcre_uchar);
5007
5008 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5009 case OP_XCLASS:
5010 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
5011 return cc + GET(cc, 0) - 1;
5012 #endif
5013
5014 case OP_REVERSE:
5015 length = GET(cc, 0);
5016 if (length == 0)
5017 return cc + LINK_SIZE;
5018 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5019 #ifdef SUPPORT_UTF
5020 if (common->utf)
5021 {
5022 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5023 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
5024 label = LABEL();
5025 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
5026 skip_char_back(common);
5027 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5028 JUMPTO(SLJIT_C_NOT_ZERO, label);
5029 }
5030 else
5031 #endif
5032 {
5033 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
5034 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
5035 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
5036 }
5037 check_start_used_ptr(common);
5038 return cc + LINK_SIZE;
5039 }
5040 SLJIT_ASSERT_STOP();
5041 return cc;
5042 }
5043
5044 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
5045 {
5046 /* This function consumes at least one input character. */
5047 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
5048 DEFINE_COMPILER;
5049 pcre_uchar *ccbegin = cc;
5050 compare_context context;
5051 int size;
5052
5053 context.length = 0;
5054 do
5055 {
5056 if (cc >= ccend)
5057 break;
5058
5059 if (*cc == OP_CHAR)
5060 {
5061 size = 1;
5062 #ifdef SUPPORT_UTF
5063 if (common->utf && HAS_EXTRALEN(cc[1]))
5064 size += GET_EXTRALEN(cc[1]);
5065 #endif
5066 }
5067 else if (*cc == OP_CHARI)
5068 {
5069 size = 1;
5070 #ifdef SUPPORT_UTF
5071 if (common->utf)
5072 {
5073 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5074 size = 0;
5075 else if (HAS_EXTRALEN(cc[1]))
5076 size += GET_EXTRALEN(cc[1]);
5077 }
5078 else
5079 #endif
5080 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
5081 size = 0;
5082 }
5083 else
5084 size = 0;
5085
5086 cc += 1 + size;
5087 context.length += IN_UCHARS(size);
5088 }
5089 while (size > 0 && context.length <= 128);
5090
5091 cc = ccbegin;
5092 if (context.length > 0)
5093 {
5094 /* We have a fixed-length byte sequence. */
5095 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
5096 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
5097
5098 context.sourcereg = -1;
5099 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
5100 context.ucharptr = 0;
5101 #endif
5102 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
5103 return cc;
5104 }
5105
5106 /* A non-fixed length character will be checked if length == 0. */
5107 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
5108 }
5109
5110 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
5111 {
5112 DEFINE_COMPILER;
5113 int offset = GET2(cc, 1) << 1;
5114
5115 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5116 if (!common->jscript_compat)
5117 {
5118 if (backtracks == NULL)
5119 {
5120 /* OVECTOR(1) contains the "string begin - 1" constant. */
5121 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5122 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
5123 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5124 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
5125 return JUMP(SLJIT_C_NOT_ZERO);
5126 }
5127 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5128 }
5129 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5130 }
5131
5132 /* Forward definitions. */
5133 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5134 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5135
5136 #define PUSH_BACKTRACK(size, ccstart, error) \
5137 do \
5138 { \
5139 backtrack = sljit_alloc_memory(compiler, (size)); \
5140 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5141 return error; \
5142 memset(backtrack, 0, size); \
5143 backtrack->prev = parent->top; \
5144 backtrack->cc = (ccstart); \
5145 parent->top = backtrack; \
5146 } \
5147 while (0)
5148
5149 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5150 do \
5151 { \
5152 backtrack = sljit_alloc_memory(compiler, (size)); \
5153 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5154 return; \
5155 memset(backtrack, 0, size); \
5156 backtrack->prev = parent->top; \
5157 backtrack->cc = (ccstart); \
5158 parent->top = backtrack; \
5159 } \
5160 while (0)
5161
5162 #define BACKTRACK_AS(type) ((type *)backtrack)
5163
5164 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5165 {
5166 DEFINE_COMPILER;
5167 int offset = GET2(cc, 1) << 1;
5168 struct sljit_jump *jump = NULL;
5169 struct sljit_jump *partial;
5170 struct sljit_jump *nopartial;
5171
5172 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5173 /* OVECTOR(1) contains the "string begin - 1" constant. */
5174 if (withchecks && !common->jscript_compat)
5175 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5176
5177 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5178 if (common->utf && *cc == OP_REFI)
5179 {
5180 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5181 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5182 if (withchecks)
5183 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5184
5185 /* Needed to save important temporary registers. */
5186 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5187 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5188 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5189 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5190 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5191 if (common->mode == JIT_COMPILE)
5192 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5193 else
5194 {
5195 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5196 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5197 check_partial(common, FALSE);
5198 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5199 JUMPHERE(nopartial);
5200 }
5201 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5202 }
5203 else
5204 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5205 {
5206 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5207 if (withchecks)
5208 jump = JUMP(SLJIT_C_ZERO);
5209
5210 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5211 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5212 if (common->mode == JIT_COMPILE)
5213 add_jump(compiler, backtracks, partial);
5214
5215 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5216 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5217
5218 if (common->mode != JIT_COMPILE)
5219 {
5220 nopartial = JUMP(SLJIT_JUMP);
5221 JUMPHERE(partial);
5222 /* TMP2 -= STR_END - STR_PTR */
5223 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5224 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5225 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5226 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5227 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5228 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5229 JUMPHERE(partial);
5230 check_partial(common, FALSE);
5231 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5232 JUMPHERE(nopartial);
5233 }
5234 }
5235
5236 if (jump != NULL)
5237 {
5238 if (emptyfail)
5239 add_jump(compiler, backtracks, jump);
5240 else
5241 JUMPHERE(jump);
5242 }
5243 return cc + 1 + IMM2_SIZE;
5244 }
5245
5246 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5247 {
5248 DEFINE_COMPILER;
5249 backtrack_common *backtrack;
5250 pcre_uchar type;
5251 struct sljit_label *label;
5252 struct sljit_jump *zerolength;
5253 struct sljit_jump *jump = NULL;
5254 pcre_uchar *ccbegin = cc;
5255 int min = 0, max = 0;
5256 BOOL minimize;
5257
5258 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5259
5260 type = cc[1 + IMM2_SIZE];
5261 minimize = (type & 0x1) != 0;
5262 switch(type)
5263 {
5264 case OP_CRSTAR:
5265 case OP_CRMINSTAR:
5266 min = 0;
5267 max = 0;
5268 cc += 1 + IMM2_SIZE + 1;
5269 break;
5270 case OP_CRPLUS:
5271 case OP_CRMINPLUS:
5272 min = 1;
5273 max = 0;
5274 cc += 1 + IMM2_SIZE + 1;
5275 break;
5276 case OP_CRQUERY:
5277 case OP_CRMINQUERY:
5278 min = 0;
5279 max = 1;
5280 cc += 1 + IMM2_SIZE + 1;
5281 break;
5282 case OP_CRRANGE:
5283 case OP_CRMINRANGE:
5284 min = GET2(cc, 1 + IMM2_SIZE + 1);
5285 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5286 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5287 break;
5288 default:
5289 SLJIT_ASSERT_STOP();
5290 break;
5291 }
5292
5293 if (!minimize)
5294 {
5295 if (min == 0)
5296 {
5297 allocate_stack(common, 2);
5298 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5299 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5300 /* Temporary release of STR_PTR. */
5301 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5302 zerolength = compile_ref_checks(common, ccbegin, NULL);
5303 /* Restore if not zero length. */
5304 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5305 }
5306 else
5307 {
5308 allocate_stack(common, 1);
5309 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5310 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5311 }
5312
5313 if (min > 1 || max > 1)
5314 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5315
5316 label = LABEL();
5317 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5318
5319 if (min > 1 || max > 1)
5320 {
5321 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5322 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5323 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5324 if (min > 1)
5325 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5326 if (max > 1)
5327 {
5328 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5329 allocate_stack(common, 1);
5330 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5331 JUMPTO(SLJIT_JUMP, label);
5332 JUMPHERE(jump);
5333 }
5334 }
5335
5336 if (max == 0)
5337 {
5338 /* Includes min > 1 case as well. */
5339 allocate_stack(common, 1);
5340 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5341 JUMPTO(SLJIT_JUMP, label);
5342 }
5343
5344 JUMPHERE(zerolength);
5345 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5346
5347 decrease_call_count(common);
5348 return cc;
5349 }
5350
5351 allocate_stack(common, 2);
5352 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5353 if (type != OP_CRMINSTAR)
5354 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5355
5356 if (min == 0)
5357 {
5358 zerolength = compile_ref_checks(common, ccbegin, NULL);
5359 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5360 jump = JUMP(SLJIT_JUMP);
5361 }
5362 else
5363 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5364
5365 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5366 if (max > 0)
5367 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5368
5369 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5371
5372 if (min > 1)
5373 {
5374 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5375 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5377 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5378 }
5379 else if (max > 0)
5380 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5381
5382 if (jump != NULL)
5383 JUMPHERE(jump);
5384 JUMPHERE(zerolength);
5385
5386 decrease_call_count(common);
5387 return cc;
5388 }
5389
5390 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5391 {
5392 DEFINE_COMPILER;
5393 backtrack_common *backtrack;
5394 recurse_entry *entry = common->entries;
5395 recurse_entry *prev = NULL;
5396 sljit_sw start = GET(cc, 1);
5397 pcre_uchar *start_cc;
5398 BOOL needs_control_head;
5399
5400 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5401
5402 /* Inlining simple patterns. */
5403 if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
5404 {
5405 start_cc = common->start + start;
5406 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5407 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5408 return cc + 1 + LINK_SIZE;
5409 }
5410
5411 while (entry != NULL)
5412 {
5413 if (entry->start == start)
5414 break;
5415 prev = entry;
5416 entry = entry->next;
5417 }
5418
5419 if (entry == NULL)
5420 {
5421 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5422 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5423 return NULL;
5424 entry->next = NULL;
5425 entry->entry = NULL;
5426 entry->calls = NULL;
5427 entry->start = start;
5428
5429 if (prev != NULL)
5430 prev->next = entry;
5431 else
5432 common->entries = entry;
5433 }
5434
5435 if (common->has_set_som && common->mark_ptr != 0)
5436 {
5437 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5438 allocate_stack(common, 2);
5439 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5440 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5441 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5442 }
5443 else if (common->has_set_som || common->mark_ptr != 0)
5444 {
5445 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5446 allocate_stack(common, 1);
5447 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5448 }
5449
5450 if (entry->entry == NULL)
5451 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5452 else
5453 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5454 /* Leave if the match is failed. */
5455 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5456 return cc + 1 + LINK_SIZE;
5457 }
5458
5459 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5460 {
5461 const pcre_uchar *begin = arguments->begin;
5462 int *offset_vector = arguments->offsets;
5463 int offset_count = arguments->offset_count;
5464 int i;
5465
5466 if (PUBL(callout) == NULL)
5467 return 0;
5468
5469 callout_block->version = 2;
5470 callout_block->callout_data = arguments->callout_data;
5471
5472 /* Offsets in subject. */
5473 callout_block->subject_length = arguments->end - arguments->begin;
5474 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5475 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5476 #if defined COMPILE_PCRE8
5477 callout_block->subject = (PCRE_SPTR)begin;
5478 #elif defined COMPILE_PCRE16
5479 callout_block->subject = (PCRE_SPTR16)begin;
5480 #elif defined COMPILE_PCRE32
5481 callout_block->subject = (PCRE_SPTR32)begin;
5482 #endif
5483
5484 /* Convert and copy the JIT offset vector to the offset_vector array. */
5485 callout_block->capture_top = 0;
5486 callout_block->offset_vector = offset_vector;
5487 for (i = 2; i < offset_count; i += 2)
5488 {
5489 offset_vector[i] = jit_ovector[i] - begin;
5490 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5491 if (jit_ovector[i] >= begin)
5492 callout_block->capture_top = i;
5493 }
5494
5495 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5496 if (offset_count > 0)
5497 offset_vector[0] = -1;
5498 if (offset_count > 1)
5499 offset_vector[1] = -1;
5500 return (*PUBL(callout))(callout_block);
5501 }
5502
5503 /* Aligning to 8 byte. */
5504 #define CALLOUT_ARG_SIZE \
5505 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5506
5507 #define CALLOUT_ARG_OFFSET(arg) \
5508 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5509
5510 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5511 {
5512 DEFINE_COMPILER;
5513 backtrack_common *backtrack;
5514
5515 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5516
5517 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5518
5519 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5520 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5521 SLJIT_ASSERT(common->capture_last_ptr != 0);
5522 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5523 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5524
5525 /* These pointer sized fields temporarly stores internal variables. */
5526 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5527 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5529
5530 if (common->mark_ptr != 0)
5531 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5532 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5533 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5534 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5535
5536 /* Needed to save important temporary registers. */
5537 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5538 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5539 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5540 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5541 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5542 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5543 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5544
5545 /* Check return value. */
5546 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5547 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5548 if (common->forced_quit_label == NULL)
5549 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5550 else
5551 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5552 return cc + 2 + 2 * LINK_SIZE;
5553 }
5554
5555 #undef CALLOUT_ARG_SIZE
5556 #undef CALLOUT_ARG_OFFSET
5557
5558 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5559 {
5560 DEFINE_COMPILER;
5561 int framesize;
5562 int extrasize;
5563 BOOL needs_control_head;
5564 int private_data_ptr;
5565 backtrack_common altbacktrack;
5566 pcre_uchar *ccbegin;
5567 pcre_uchar opcode;
5568 pcre_uchar bra = OP_BRA;
5569 jump_list *tmp = NULL;
5570 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5571 jump_list **found;
5572 /* Saving previous accept variables. */
5573 BOOL save_local_exit = common->local_exit;
5574 then_trap_backtrack *save_then_trap = common->then_trap;
5575 struct sljit_label *save_quit_label = common->quit_label;
5576 struct sljit_label *save_accept_label = common->accept_label;
5577 jump_list *save_quit = common->quit;
5578 jump_list *save_accept = common->accept;
5579 struct sljit_jump *jump;
5580 struct sljit_jump *brajump = NULL;
5581
5582 /* Assert captures then. */
5583 common->then_trap = NULL;
5584
5585 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5586 {
5587 SLJIT_ASSERT(!conditional);
5588 bra = *cc;
5589 cc++;
5590 }
5591 private_data_ptr = PRIVATE_DATA(cc);
5592 SLJIT_ASSERT(private_data_ptr != 0);
5593 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
5594 backtrack->framesize = framesize;
5595 backtrack->private_data_ptr = private_data_ptr;
5596 opcode = *cc;
5597 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5598 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5599 ccbegin = cc;
5600 cc += GET(cc, 1);
5601
5602 if (bra == OP_BRAMINZERO)
5603 {
5604 /* This is a braminzero backtrack path. */
5605 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5606 free_stack(common, 1);
5607 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5608 }
5609
5610 if (framesize < 0)
5611 {
5612 extrasize = needs_control_head ? 2 : 1;
5613 if (framesize == no_frame)
5614 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5615 allocate_stack(common, extrasize);
5616 if (needs_control_head)
5617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5619 if (needs_control_head)
5620 {
5621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5622 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5623 }
5624 }
5625 else
5626 {
5627 extrasize = needs_control_head ? 3 : 2;
5628 allocate_stack(common, framesize + extrasize);
5629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5630 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5632 if (needs_control_head)
5633 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5634 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5635 if (needs_control_head)
5636 {
5637 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5638 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5639 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5640 }
5641 else
5642 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5643 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE);
5644 }
5645
5646 memset(&altbacktrack, 0, sizeof(backtrack_common));
5647 common->local_exit = TRUE;
5648 common->quit_label = NULL;
5649 common->quit = NULL;
5650 while (1)
5651 {
5652 common->accept_label = NULL;
5653 common->accept = NULL;
5654 altbacktrack.top = NULL;
5655 altbacktrack.topbacktracks = NULL;
5656
5657 if (*ccbegin == OP_ALT)
5658 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5659
5660 altbacktrack.cc = ccbegin;
5661 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5662 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5663 {
5664 common->local_exit = save_local_exit;
5665 common->then_trap = save_then_trap;
5666 common->quit_label = save_quit_label;
5667 common->accept_label = save_accept_label;
5668 common->quit = save_quit;
5669 common->accept = save_accept;
5670 return NULL;
5671 }
5672 common->accept_label = LABEL();
5673 if (common->accept != NULL)
5674 set_jumps(common->accept, common->accept_label);
5675
5676 /* Reset stack. */
5677 if (framesize < 0)
5678 {
5679 if (framesize == no_frame)
5680 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5681 else
5682 free_stack(common, extrasize);
5683 if (needs_control_head)
5684 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5685 }
5686 else
5687 {
5688 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5689 {
5690 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5691 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5692 if (needs_control_head)
5693 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5694 }
5695 else
5696 {
5697 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5698 if (needs_control_head)
5699 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5700 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5701 }
5702 }
5703
5704 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5705 {
5706 /* We know that STR_PTR was stored on the top of the stack. */
5707 if (conditional)
5708 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5709 else if (bra == OP_BRAZERO)
5710 {
5711 if (framesize < 0)
5712 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5713 else
5714 {
5715 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5716 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5718 }
5719 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5720 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5721 }
5722 else if (framesize >= 0)
5723 {
5724 /* For OP_BRA and OP_BRAMINZERO. */
5725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5726 }
5727 }
5728 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5729
5730 compile_backtrackingpath(common, altbacktrack.top);
5731 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5732 {
5733 common->local_exit = save_local_exit;
5734 common->then_trap = save_then_trap;
5735 common->quit_label = save_quit_label;
5736 common->accept_label = save_accept_label;
5737 common->quit = save_quit;
5738 common->accept = save_accept;
5739 return NULL;
5740 }
5741 set_jumps(altbacktrack.topbacktracks, LABEL());
5742
5743 if (*cc != OP_ALT)
5744 break;
5745
5746 ccbegin = cc;
5747 cc += GET(cc, 1);
5748 }
5749
5750 /* None of them matched. */
5751 if (common->quit != NULL)
5752 {
5753 jump = JUMP(SLJIT_JUMP);
5754 set_jumps(common->quit, LABEL());
5755 SLJIT_ASSERT(framesize != no_stack);
5756 if (framesize < 0)
5757 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5758 else
5759 {
5760 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5761 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5762 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5763 }
5764 JUMPHERE(jump);
5765 }
5766
5767 if (needs_control_head)
5768 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5769
5770 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5771 {
5772 /* Assert is failed. */
5773 if (conditional || bra == OP_BRAZERO)
5774 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5775
5776 if (framesize < 0)
5777 {
5778 /* The topmost item should be 0. */
5779 if (bra == OP_BRAZERO)
5780 {
5781 if (extrasize == 2)
5782 free_stack(common, 1);
5783 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5784 }
5785 else
5786 free_stack(common, extrasize);
5787 }
5788 else
5789 {
5790 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5791 /* The topmost item should be 0. */
5792 if (bra == OP_BRAZERO)
5793 {
5794 free_stack(common, framesize + extrasize - 1);
5795 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5796 }
5797 else
5798 free_stack(common, framesize + extrasize);
5799 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5800 }
5801 jump = JUMP(SLJIT_JUMP);
5802 if (bra != OP_BRAZERO)
5803 add_jump(compiler, target, jump);
5804
5805 /* Assert is successful. */
5806 set_jumps(tmp, LABEL());
5807 if (framesize < 0)
5808 {
5809 /* We know that STR_PTR was stored on the top of the stack. */
5810 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5811 /* Keep the STR_PTR on the top of the stack. */
5812 if (bra == OP_BRAZERO)
5813 {
5814 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5815 if (extrasize == 2)
5816 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5817 }
5818 else if (bra == OP_BRAMINZERO)
5819 {
5820 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5821 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5822 }
5823 }
5824 else
5825 {
5826 if (bra == OP_BRA)
5827 {
5828 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5829 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5830 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5831 }
5832 else
5833 {
5834 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5835 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5836 if (extrasize == 2)
5837 {
5838 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5839 if (bra == OP_BRAMINZERO)
5840 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5841 }
5842 else
5843 {
5844 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5845 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5846 }
5847 }
5848 }
5849
5850 if (bra == OP_BRAZERO)
5851 {
5852 backtrack->matchingpath = LABEL();
5853 SET_LABEL(jump, backtrack->matchingpath);
5854 }
5855 else if (bra == OP_BRAMINZERO)
5856 {
5857 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5858 JUMPHERE(brajump);
5859 if (framesize >= 0)
5860 {
5861 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5862 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5863 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5864 }
5865 set_jumps(backtrack->common.topbacktracks, LABEL());
5866 }
5867 }
5868 else
5869 {
5870 /* AssertNot is successful. */
5871 if (framesize < 0)
5872 {
5873 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5874 if (bra != OP_BRA)
5875 {
5876 if (extrasize == 2)
5877 free_stack(common, 1);
5878 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5879 }
5880 else
5881 free_stack(common, extrasize);
5882 }
5883 else
5884 {
5885 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5886 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5887 /* The topmost item should be 0. */
5888 if (bra != OP_BRA)
5889 {
5890 free_stack(common, framesize + extrasize - 1);
5891 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5892 }
5893 else
5894 free_stack(common, framesize + extrasize);
5895 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5896 }
5897
5898 if (bra == OP_BRAZERO)
5899 backtrack->matchingpath = LABEL();
5900 else if (bra == OP_BRAMINZERO)
5901 {
5902 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5903 JUMPHERE(brajump);
5904 }
5905
5906 if (bra != OP_BRA)
5907 {
5908 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5909 set_jumps(backtrack->common.topbacktracks, LABEL());
5910 backtrack->common.topbacktracks = NULL;
5911 }
5912 }
5913
5914 common->local_exit = save_local_exit;
5915 common->then_trap = save_then_trap;
5916 common->quit_label = save_quit_label;
5917 common->accept_label = save_accept_label;
5918 common->quit = save_quit;
5919 common->accept = save_accept;
5920 return cc + 1 + LINK_SIZE;
5921 }
5922
5923 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5924 {
5925 int condition = FALSE;
5926 pcre_uchar *slotA = name_table;
5927 pcre_uchar *slotB;
5928 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5929 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5930 sljit_sw no_capture;
5931 int i;
5932
5933 locals += refno & 0xff;
5934 refno >>= 8;
5935 no_capture = locals[1];
5936
5937 for (i = 0; i < name_count; i++)
5938 {
5939 if (GET2(slotA, 0) == refno) break;
5940 slotA += name_entry_size;
5941 }
5942
5943 if (i < name_count)
5944 {
5945 /* Found a name for the number - there can be only one; duplicate names
5946 for different numbers are allowed, but not vice versa. First scan down
5947 for duplicates. */
5948
5949 slotB = slotA;
5950 while (slotB > name_table)
5951 {
5952 slotB -= name_entry_size;
5953 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5954 {
5955 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5956 if (condition) break;
5957 }
5958 else break;
5959 }
5960
5961 /* Scan up for duplicates */
5962 if (!condition)
5963 {
5964 slotB = slotA;
5965 for (i++; i < name_count; i++)
5966 {
5967 slotB += name_entry_size;
5968 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5969 {
5970 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5971 if (condition) break;
5972 }
5973 else break;
5974 }
5975 }
5976 }
5977 return condition;
5978 }
5979
5980 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5981 {
5982 int condition = FALSE;
5983 pcre_uchar *slotA = name_table;
5984 pcre_uchar *slotB;
5985 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5986 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5987 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5988 sljit_uw i;
5989
5990 for (i = 0; i < name_count; i++)
5991 {
5992 if (GET2(slotA, 0) == recno) break;
5993 slotA += name_entry_size;
5994 }
5995
5996 if (i < name_count)
5997 {
5998 /* Found a name for the number - there can be only one; duplicate
5999 names for different numbers are allowed, but not vice versa. First
6000 scan down for duplicates. */
6001
6002 slotB = slotA;
6003 while (slotB > name_table)
6004 {
6005 slotB -= name_entry_size;
6006 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
6007 {
6008 condition = GET2(slotB, 0) == group_num;
6009 if (condition) break;
6010 }
6011 else break;
6012 }
6013
6014 /* Scan up for duplicates */
6015 if (!condition)
6016 {
6017 slotB = slotA;
6018 for (i++; i < name_count; i++)
6019 {
6020 slotB += name_entry_size;
6021 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
6022 {
6023 condition = GET2(slotB, 0) == group_num;
6024 if (condition) break;
6025 }
6026 else break;
6027 }
6028 }
6029 }
6030 return condition;
6031 }
6032
6033 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
6034 {
6035 DEFINE_COMPILER;
6036 int stacksize;
6037
6038 if (framesize < 0)
6039 {
6040 if (framesize == no_frame)
6041 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6042 else
6043 {
6044 stacksize = needs_control_head ? 1 : 0;
6045 if (ket != OP_KET || has_alternatives)
6046 stacksize++;
6047 free_stack(common, stacksize);
6048 }
6049
6050 if (needs_control_head)
6051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
6052
6053 /* TMP2 which is set here used by OP_KETRMAX below. */
6054 if (ket == OP_KETRMAX)
6055 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6056 else if (ket == OP_KETRMIN)
6057 {
6058 /* Move the STR_PTR to the private_data_ptr. */
6059 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6060 }
6061 }
6062 else
6063 {
6064 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
6065 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
6066 if (needs_control_head)
6067 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
6068
6069 if (ket == OP_KETRMAX)
6070 {
6071 /* TMP2 which is set here used by OP_KETRMAX below. */
6072 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6073 }
6074 }
6075 if (needs_control_head)
6076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
6077 }
6078
6079 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
6080 {
6081 DEFINE_COMPILER;
6082
6083 if (common->capture_last_ptr != 0)
6084 {
6085 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6086 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6087 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6088 stacksize++;
6089 }
6090 if (common->optimized_cbracket[offset >> 1] == 0)
6091 {
6092 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6093 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6094 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6095 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6096 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6097 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6099 stacksize += 2;
6100 }
6101 return stacksize;
6102 }
6103
6104 /*
6105 Handling bracketed expressions is probably the most complex part.
6106
6107 Stack layout naming characters:
6108 S - Push the current STR_PTR
6109 0 - Push a 0 (NULL)
6110 A - Push the current STR_PTR. Needed for restoring the STR_PTR
6111 before the next alternative. Not pushed if there are no alternatives.
6112 M - Any values pushed by the current alternative. Can be empty, or anything.
6113 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
6114 L - Push the previous local (pointed by localptr) to the stack
6115 () - opional values stored on the stack
6116 ()* - optonal, can be stored multiple times
6117
6118 The following list shows the regular expression templates, their PCRE byte codes
6119 and stack layout supported by pcre-sljit.
6120
6121 (?:) OP_BRA | OP_KET A M
6122 () OP_CBRA | OP_KET C M
6123 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
6124 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
6125 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
6126 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
6127 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
6128 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
6129 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
6130 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
6131 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
6132 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
6133 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
6134 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
6135 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
6136 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
6137 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
6138 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
6139 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
6140 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6141 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6142 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6143
6144
6145 Stack layout naming characters:
6146 A - Push the alternative index (starting from 0) on the stack.
6147 Not pushed if there is no alternatives.
6148 M - Any values pushed by the current alternative. Can be empty, or anything.
6149
6150 The next list shows the possible content of a bracket:
6151 (|) OP_*BRA | OP_ALT ... M A
6152 (?()|) OP_*COND | OP_ALT M A
6153 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6154 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6155 Or nothing, if trace is unnecessary
6156 */
6157
6158 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6159 {
6160 DEFINE_COMPILER;
6161 backtrack_common *backtrack;
6162 pcre_uchar opcode;
6163 int private_data_ptr = 0;
6164 int offset = 0;
6165 int stacksize;
6166 pcre_uchar *ccbegin;
6167 pcre_uchar *matchingpath;
6168 pcre_uchar bra = OP_BRA;
6169 pcre_uchar ket;
6170 assert_backtrack *assert;
6171 BOOL has_alternatives;
6172 BOOL needs_control_head = FALSE;
6173 struct sljit_jump *jump;
6174 struct sljit_jump *skip;
6175 struct sljit_label *rmaxlabel = NULL;
6176 struct sljit_jump *braminzerojump = NULL;
6177
6178 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6179
6180 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6181 {
6182 bra = *cc;
6183 cc++;
6184 opcode = *cc;
6185 }
6186
6187 opcode = *cc;
6188 ccbegin = cc;
6189 matchingpath = ccbegin + 1 + LINK_SIZE;
6190
6191 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6192 {
6193 /* Drop this bracket_backtrack. */
6194 parent->top = backtrack->prev;
6195 return bracketend(cc);
6196 }
6197
6198 ket = *(bracketend(cc) - 1 - LINK_SIZE);
6199 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6200 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6201 cc += GET(cc, 1);
6202
6203 has_alternatives = *cc == OP_ALT;
6204 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6205 {
6206 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6207 if (*matchingpath == OP_NRREF)
6208 {
6209 stacksize = GET2(matchingpath, 1);
6210 if (common->currententry == NULL || stacksize == RREF_ANY)
6211 has_alternatives = FALSE;
6212 else if (common->currententry->start == 0)
6213 has_alternatives = stacksize != 0;
6214 else
6215 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6216 }
6217 }
6218
6219 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6220 opcode = OP_SCOND;
6221 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6222 opcode = OP_ONCE;
6223
6224 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6225 {
6226 /* Capturing brackets has a pre-allocated space. */
6227 offset = GET2(ccbegin, 1 + LINK_SIZE);
6228 if (common->optimized_cbracket[offset] == 0)
6229 {
6230 private_data_ptr = OVECTOR_PRIV(offset);
6231 offset <<= 1;
6232 }
6233 else
6234 {
6235 offset <<= 1;
6236 private_data_ptr = OVECTOR(offset);
6237 }
6238 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6239 matchingpath += IMM2_SIZE;
6240 }
6241 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6242 {
6243 /* Other brackets simply allocate the next entry. */
6244 private_data_ptr = PRIVATE_DATA(ccbegin);
6245 SLJIT_ASSERT(private_data_ptr != 0);
6246 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6247 if (opcode == OP_ONCE)
6248 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
6249 }
6250
6251 /* Instructions before the first alternative. */
6252 stacksize = 0;
6253 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6254 stacksize++;
6255 if (bra == OP_BRAZERO)
6256 stacksize++;
6257
6258 if (stacksize > 0)
6259 allocate_stack(common, stacksize);
6260
6261 stacksize = 0;
6262 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6263 {
6264 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6265 stacksize++;
6266 }
6267
6268 if (bra == OP_BRAZERO)
6269 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6270
6271 if (bra == OP_BRAMINZERO)
6272 {
6273 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6274 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6275 if (ket != OP_KETRMIN)
6276 {
6277 free_stack(common, 1);
6278 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6279 }
6280 else
6281 {
6282 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6283 {
6284 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6285 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6286 /* Nothing stored during the first run. */
6287 skip = JUMP(SLJIT_JUMP);
6288 JUMPHERE(jump);
6289 /* Checking zero-length iteration. */
6290 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6291 {
6292 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6293 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6294 }
6295 else
6296 {
6297 /* Except when the whole stack frame must be saved. */
6298 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6299 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6300 }
6301 JUMPHERE(skip);
6302 }
6303 else
6304 {
6305 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6306 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6307 JUMPHERE(jump);
6308 }
6309 }
6310 }
6311
6312 if (ket == OP_KETRMIN)
6313 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6314
6315 if (ket == OP_KETRMAX)
6316 {
6317 rmaxlabel = LABEL();
6318 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
6319 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
6320 }
6321
6322 /* Handling capturing brackets and alternatives. */
6323 if (opcode == OP_ONCE)
6324 {
6325 stacksize = 0;
6326 if (needs_control_head)
6327 {
6328 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6329 stacksize++;
6330 }
6331
6332 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6333 {
6334 /* Neither capturing brackets nor recursions are found in the block. */
6335 if (ket == OP_KETRMIN)
6336 {
6337 stacksize += 2;
6338 if (!needs_control_head)
6339 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6340 }
6341 else
6342 {
6343 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6344 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6345 if (ket == OP_KETRMAX || has_alternatives)
6346 stacksize++;
6347 }
6348
6349 if (stacksize > 0)
6350 allocate_stack(common, stacksize);
6351
6352 stacksize = 0;
6353 if (needs_control_head)
6354 {
6355 stacksize++;
6356 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6357 }
6358
6359 if (ket == OP_KETRMIN)
6360 {
6361 if (needs_control_head)
6362 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6363 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6364 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6365 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6366 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6367 }
6368 else if (ket == OP_KETRMAX || has_alternatives)
6369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6370 }
6371 else
6372 {
6373 if (ket != OP_KET || has_alternatives)
6374 stacksize++;
6375
6376 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6377 allocate_stack(common, stacksize);
6378
6379 if (needs_control_head)
6380 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6381
6382 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6383 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6384
6385 stacksize = needs_control_head ? 1 : 0;
6386 if (ket != OP_KET || has_alternatives)
6387 {
6388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6389 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6390 stacksize++;
6391 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6392 }
6393 else
6394 {
6395 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6396 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6397 }
6398 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6399 }
6400 }
6401 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6402 {
6403 /* Saving the previous values. */
6404 if (common->optimized_cbracket[offset >> 1] != 0)
6405 {
6406 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6407 allocate_stack(common, 2);
6408 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6409 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6410 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6411 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6413 }
6414 else
6415 {
6416 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6417 allocate_stack(common, 1);
6418 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6419 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6420 }
6421 }
6422 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6423 {
6424 /* Saving the previous value. */
6425 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6426 allocate_stack(common, 1);
6427 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6429 }
6430 else if (has_alternatives)
6431 {
6432 /* Pushing the starting string pointer. */
6433 allocate_stack(common, 1);
6434 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6435 }
6436
6437 /* Generating code for the first alternative. */
6438 if (opcode == OP_COND || opcode == OP_SCOND)
6439 {
6440 if (*matchingpath == OP_CREF)
6441 {
6442 SLJIT_ASSERT(has_alternatives);
6443 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6444 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6445 matchingpath += 1 + IMM2_SIZE;
6446 }
6447 else if (*matchingpath == OP_NCREF)
6448 {
6449 SLJIT_ASSERT(has_alternatives);
6450 stacksize = GET2(matchingpath, 1);
6451 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6452
6453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6454 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6455 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6456 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6457 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6458 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6459 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6460 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6461 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6462
6463 JUMPHERE(jump);
6464 matchingpath += 1 + IMM2_SIZE;
6465 }
6466 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6467 {
6468 /* Never has other case. */
6469 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6470
6471 stacksize = GET2(matchingpath, 1);
6472 if (common->currententry == NULL)
6473 stacksize = 0;
6474 else if (stacksize == RREF_ANY)
6475 stacksize = 1;
6476 else if (common->currententry->start == 0)
6477 stacksize = stacksize == 0;
6478 else
6479 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6480
6481 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6482 {
6483 SLJIT_ASSERT(!has_alternatives);
6484 if (stacksize != 0)
6485 matchingpath += 1 + IMM2_SIZE;
6486 else
6487 {
6488 if (*cc == OP_ALT)
6489 {
6490 matchingpath = cc + 1 + LINK_SIZE;
6491 cc += GET(cc, 1);
6492 }
6493 else
6494 matchingpath = cc;
6495 }
6496 }
6497 else
6498 {
6499 SLJIT_ASSERT(has_alternatives);
6500
6501 stacksize = GET2(matchingpath, 1);
6502 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6505 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6506 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6507 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6508 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6509 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6510 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6511 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6512 matchingpath += 1 + IMM2_SIZE;
6513 }
6514 }
6515 else
6516 {
6517 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6518 /* Similar code as PUSH_BACKTRACK macro. */
6519 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6520 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6521 return NULL;
6522 memset(assert, 0, sizeof(assert_backtrack));
6523 assert->common.cc = matchingpath;
6524 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6525 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6526 }
6527 }
6528
6529 compile_matchingpath(common, matchingpath, cc, backtrack);
6530 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6531 return NULL;
6532
6533 if (opcode == OP_ONCE)
6534 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6535
6536 stacksize = 0;
6537 if (ket != OP_KET || bra != OP_BRA)
6538 stacksize++;
6539 if (offset != 0)
6540 {
6541 if (common->capture_last_ptr != 0)
6542 stacksize++;
6543 if (common->optimized_cbracket[offset >> 1] == 0)
6544 stacksize += 2;
6545 }
6546 if (has_alternatives && opcode != OP_ONCE)
6547 stacksize++;
6548
6549 if (stacksize > 0)
6550 allocate_stack(common, stacksize);
6551
6552 stacksize = 0;
6553 if (ket != OP_KET || bra != OP_BRA)
6554 {
6555 if (ket != OP_KET)
6556 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6557 else
6558 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6559 stacksize++;
6560 }
6561
6562 if (offset != 0)
6563 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6564
6565 if (has_alternatives)
6566 {
6567 if (opcode != OP_ONCE)
6568 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6569 if (ket != OP_KETRMAX)
6570 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6571 }
6572
6573 /* Must be after the matchingpath label. */
6574 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6575 {
6576 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6577 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6578 }
6579
6580 if (ket == OP_KETRMAX)
6581 {
6582 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6583 {
6584 if (has_alternatives)
6585 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6586 /* Checking zero-length iteration. */
6587 if (opcode != OP_ONCE)
6588 {
6589 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6590 /* Drop STR_PTR for greedy plus quantifier. */
6591 if (bra != OP_BRAZERO)
6592 free_stack(common, 1);
6593 }
6594 else
6595 /* TMP2 must contain the starting STR_PTR. */
6596 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6597 }
6598 else
6599 JUMPTO(SLJIT_JUMP, rmaxlabel);
6600 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6601 }
6602
6603 if (bra == OP_BRAZERO)
6604 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6605
6606 if (bra == OP_BRAMINZERO)
6607 {
6608 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6609 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6610 if (braminzerojump != NULL)
6611 {
6612 JUMPHERE(braminzerojump);
6613 /* We need to release the end pointer to perform the
6614 backtrack for the zero-length iteration. When
6615 framesize is < 0, OP_ONCE will do the release itself. */
6616 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6617 {
6618 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6619 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6620 }
6621 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6622 free_stack(common, 1);
6623 }
6624 /* Continue to the normal backtrack. */
6625 }
6626
6627 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6628 decrease_call_count(common);
6629
6630 /* Skip the other alternatives. */
6631 while (*cc == OP_ALT)
6632 cc += GET(cc, 1);
6633 cc += 1 + LINK_SIZE;
6634
6635 /* Temporarily encoding the needs_control_head in framesize. */
6636 if (opcode == OP_ONCE)
6637 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6638 return cc;
6639 }
6640
6641 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6642 {
6643 DEFINE_COMPILER;
6644 backtrack_common *backtrack;
6645 pcre_uchar opcode;
6646 int private_data_ptr;
6647 int cbraprivptr = 0;
6648 BOOL needs_control_head;
6649 int framesize;
6650 int stacksize;
6651 int offset = 0;
6652 BOOL zero = FALSE;
6653 pcre_uchar *ccbegin = NULL;
6654 int stack; /* Also contains the offset of control head. */
6655 struct sljit_label *loop = NULL;
6656 struct jump_list *emptymatch = NULL;
6657
6658 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6659 if (*cc == OP_BRAPOSZERO)
6660 {
6661 zero = TRUE;
6662 cc++;
6663 }
6664
6665 opcode = *cc;
6666 private_data_ptr = PRIVATE_DATA(cc);
6667 SLJIT_ASSERT(private_data_ptr != 0);
6668 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6669 switch(opcode)
6670 {
6671 case OP_BRAPOS:
6672 case OP_SBRAPOS:
6673 ccbegin = cc + 1 + LINK_SIZE;
6674 break;
6675
6676 case OP_CBRAPOS:
6677 case OP_SCBRAPOS:
6678 offset = GET2(cc, 1 + LINK_SIZE);
6679 /* This case cannot be optimized in the same was as
6680 normal capturing brackets. */
6681 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6682 cbraprivptr = OVECTOR_PRIV(offset);
6683 offset <<= 1;
6684 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6685 break;
6686
6687 default:
6688 SLJIT_ASSERT_STOP();
6689 break;
6690 }
6691
6692 framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
6693 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6694 if (framesize < 0)
6695 {
6696 if (offset != 0)
6697 {
6698 stacksize = 2;
6699 if (common->capture_last_ptr != 0)
6700 stacksize++;
6701 }
6702 else
6703 stacksize = 1;
6704
6705 if (needs_control_head)
6706 stacksize++;
6707 if (!zero)
6708 stacksize++;
6709
6710 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6711 allocate_stack(common, stacksize);
6712 if (framesize == no_frame)
6713 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6714
6715 stack = 0;
6716 if (offset != 0)
6717 {
6718 stack = 2;
6719 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6720 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6722 if (common->capture_last_ptr != 0)
6723 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6725 if (needs_control_head)
6726 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6727 if (common->capture_last_ptr != 0)
6728 {
6729 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6730 stack = 3;
6731 }
6732 }
6733 else
6734 {
6735 if (needs_control_head)
6736 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6737 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6738 stack = 1;
6739 }
6740
6741 if (needs_control_head)
6742 stack++;
6743 if (!zero)
6744 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6745 if (needs_control_head)
6746 {
6747 stack--;
6748 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6749 }
6750 }