/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1275 - (show annotations)
Sun Mar 10 05:32:10 2013 UTC (6 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 289698 byte(s)
Error occurred while calculating annotation data.
Experimental support of (*SKIP) backtracking verb in the JIT compiler.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum frame_types {
200 no_frame = -1,
201 no_stack = -2
202 };
203
204 enum control_types {
205 type_commit = 0,
206 type_prune = 1,
207 type_skip = 2
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the aguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 (-1) if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. -1 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 int start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define MAX_RANGE_SIZE 6
295
296 typedef struct compiler_common {
297 /* The sljit ceneric compiler. */
298 struct sljit_compiler *compiler;
299 /* First byte code. */
300 pcre_uchar *start;
301 /* Maps private data offset to each opcode. */
302 int *private_data_ptrs;
303 /* Tells whether the capturing bracket is optimized. */
304 pcre_uint8 *optimized_cbracket;
305 /* Starting offset of private data for capturing brackets. */
306 int cbra_ptr;
307 /* Output vector starting point. Must be divisible by 2. */
308 int ovector_start;
309 /* Last known position of the requested byte. */
310 int req_char_ptr;
311 /* Head of the last recursion. */
312 int recursive_head_ptr;
313 /* First inspected character for partial matching. */
314 int start_used_ptr;
315 /* Starting pointer for partial soft matches. */
316 int hit_start;
317 /* End pointer of the first line. */
318 int first_line_end;
319 /* Points to the marked string. */
320 int mark_ptr;
321 /* Recursive control verb management chain. */
322 int control_head_ptr;
323 /* Points to the last matched capture block index. */
324 int capture_last_ptr;
325 /* Points to the starting position of the current match. */
326 int start_ptr;
327
328 /* Flipped and lower case tables. */
329 const pcre_uint8 *fcc;
330 sljit_sw lcc;
331 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
332 int mode;
333 /* \K is in the pattern. */
334 BOOL has_set_som;
335 /* Needs to know the start position anytime. */
336 BOOL needs_start_ptr;
337 /* Currently in recurse or assert. */
338 BOOL local_exit;
339 /* Newline control. */
340 int nltype;
341 int newline;
342 int bsr_nltype;
343 /* Dollar endonly. */
344 int endonly;
345 /* Tables. */
346 sljit_sw ctypes;
347 int digits[2 + MAX_RANGE_SIZE];
348 /* Named capturing brackets. */
349 sljit_uw name_table;
350 sljit_sw name_count;
351 sljit_sw name_entry_size;
352
353 /* Labels and jump lists. */
354 struct sljit_label *partialmatchlabel;
355 struct sljit_label *quit_label;
356 struct sljit_label *forced_quit_label;
357 struct sljit_label *accept_label;
358 stub_list *stubs;
359 recurse_entry *entries;
360 recurse_entry *currententry;
361 jump_list *partialmatch;
362 jump_list *quit;
363 jump_list *forced_quit;
364 jump_list *accept;
365 jump_list *calllimit;
366 jump_list *stackalloc;
367 jump_list *revertframes;
368 jump_list *wordboundary;
369 jump_list *anynewline;
370 jump_list *hspace;
371 jump_list *vspace;
372 jump_list *casefulcmp;
373 jump_list *caselesscmp;
374 jump_list *reset_match;
375 BOOL jscript_compat;
376 #ifdef SUPPORT_UTF
377 BOOL utf;
378 #ifdef SUPPORT_UCP
379 BOOL use_ucp;
380 #endif
381 #ifndef COMPILE_PCRE32
382 jump_list *utfreadchar;
383 #endif
384 #ifdef COMPILE_PCRE8
385 jump_list *utfreadtype8;
386 #endif
387 #endif /* SUPPORT_UTF */
388 #ifdef SUPPORT_UCP
389 jump_list *getucd;
390 #endif
391 } compiler_common;
392
393 /* For byte_sequence_compare. */
394
395 typedef struct compare_context {
396 int length;
397 int sourcereg;
398 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
399 int ucharptr;
400 union {
401 sljit_si asint;
402 sljit_uh asushort;
403 #if defined COMPILE_PCRE8
404 sljit_ub asbyte;
405 sljit_ub asuchars[4];
406 #elif defined COMPILE_PCRE16
407 sljit_uh asuchars[2];
408 #elif defined COMPILE_PCRE32
409 sljit_ui asuchars[1];
410 #endif
411 } c;
412 union {
413 sljit_si asint;
414 sljit_uh asushort;
415 #if defined COMPILE_PCRE8
416 sljit_ub asbyte;
417 sljit_ub asuchars[4];
418 #elif defined COMPILE_PCRE16
419 sljit_uh asuchars[2];
420 #elif defined COMPILE_PCRE32
421 sljit_ui asuchars[1];
422 #endif
423 } oc;
424 #endif
425 } compare_context;
426
427 /* Undefine sljit macros. */
428 #undef CMP
429
430 /* Used for accessing the elements of the stack. */
431 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
432
433 #define TMP1 SLJIT_SCRATCH_REG1
434 #define TMP2 SLJIT_SCRATCH_REG3
435 #define TMP3 SLJIT_TEMPORARY_EREG2
436 #define STR_PTR SLJIT_SAVED_REG1
437 #define STR_END SLJIT_SAVED_REG2
438 #define STACK_TOP SLJIT_SCRATCH_REG2
439 #define STACK_LIMIT SLJIT_SAVED_REG3
440 #define ARGUMENTS SLJIT_SAVED_EREG1
441 #define CALL_COUNT SLJIT_SAVED_EREG2
442 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
443
444 /* Local space layout. */
445 /* These two locals can be used by the current opcode. */
446 #define LOCALS0 (0 * sizeof(sljit_sw))
447 #define LOCALS1 (1 * sizeof(sljit_sw))
448 /* Two local variables for possessive quantifiers (char1 cannot use them). */
449 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
450 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
451 /* Max limit of recursions. */
452 #define CALL_LIMIT (4 * sizeof(sljit_sw))
453 /* The output vector is stored on the stack, and contains pointers
454 to characters. The vector data is divided into two groups: the first
455 group contains the start / end character pointers, and the second is
456 the start pointers when the end of the capturing group has not yet reached. */
457 #define OVECTOR_START (common->ovector_start)
458 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
459 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
460 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
461
462 #if defined COMPILE_PCRE8
463 #define MOV_UCHAR SLJIT_MOV_UB
464 #define MOVU_UCHAR SLJIT_MOVU_UB
465 #elif defined COMPILE_PCRE16
466 #define MOV_UCHAR SLJIT_MOV_UH
467 #define MOVU_UCHAR SLJIT_MOVU_UH
468 #elif defined COMPILE_PCRE32
469 #define MOV_UCHAR SLJIT_MOV_UI
470 #define MOVU_UCHAR SLJIT_MOVU_UI
471 #else
472 #error Unsupported compiling mode
473 #endif
474
475 /* Shortcuts. */
476 #define DEFINE_COMPILER \
477 struct sljit_compiler *compiler = common->compiler
478 #define OP1(op, dst, dstw, src, srcw) \
479 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
480 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
481 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
482 #define LABEL() \
483 sljit_emit_label(compiler)
484 #define JUMP(type) \
485 sljit_emit_jump(compiler, (type))
486 #define JUMPTO(type, label) \
487 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
488 #define JUMPHERE(jump) \
489 sljit_set_label((jump), sljit_emit_label(compiler))
490 #define SET_LABEL(jump, label) \
491 sljit_set_label((jump), (label))
492 #define CMP(type, src1, src1w, src2, src2w) \
493 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
494 #define CMPTO(type, src1, src1w, src2, src2w, label) \
495 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
496 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
497 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
498 #define GET_LOCAL_BASE(dst, dstw, offset) \
499 sljit_get_local_base(compiler, (dst), (dstw), (offset))
500
501 static pcre_uchar* bracketend(pcre_uchar* cc)
502 {
503 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
504 do cc += GET(cc, 1); while (*cc == OP_ALT);
505 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
506 cc += 1 + LINK_SIZE;
507 return cc;
508 }
509
510 /* Functions whose might need modification for all new supported opcodes:
511 next_opcode
512 get_private_data_length
513 set_private_data_ptrs
514 get_framesize
515 init_frame
516 get_private_data_length_for_copy
517 copy_private_data
518 compile_matchingpath
519 compile_backtrackingpath
520 */
521
522 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
523 {
524 SLJIT_UNUSED_ARG(common);
525 switch(*cc)
526 {
527 case OP_SOD:
528 case OP_SOM:
529 case OP_SET_SOM:
530 case OP_NOT_WORD_BOUNDARY:
531 case OP_WORD_BOUNDARY:
532 case OP_NOT_DIGIT:
533 case OP_DIGIT:
534 case OP_NOT_WHITESPACE:
535 case OP_WHITESPACE:
536 case OP_NOT_WORDCHAR:
537 case OP_WORDCHAR:
538 case OP_ANY:
539 case OP_ALLANY:
540 case OP_NOTPROP:
541 case OP_PROP:
542 case OP_ANYNL:
543 case OP_NOT_HSPACE:
544 case OP_HSPACE:
545 case OP_NOT_VSPACE:
546 case OP_VSPACE:
547 case OP_EXTUNI:
548 case OP_EODN:
549 case OP_EOD:
550 case OP_CIRC:
551 case OP_CIRCM:
552 case OP_DOLL:
553 case OP_DOLLM:
554 case OP_CRSTAR:
555 case OP_CRMINSTAR:
556 case OP_CRPLUS:
557 case OP_CRMINPLUS:
558 case OP_CRQUERY:
559 case OP_CRMINQUERY:
560 case OP_CRRANGE:
561 case OP_CRMINRANGE:
562 case OP_CLASS:
563 case OP_NCLASS:
564 case OP_REF:
565 case OP_REFI:
566 case OP_RECURSE:
567 case OP_CALLOUT:
568 case OP_ALT:
569 case OP_KET:
570 case OP_KETRMAX:
571 case OP_KETRMIN:
572 case OP_KETRPOS:
573 case OP_REVERSE:
574 case OP_ASSERT:
575 case OP_ASSERT_NOT:
576 case OP_ASSERTBACK:
577 case OP_ASSERTBACK_NOT:
578 case OP_ONCE:
579 case OP_ONCE_NC:
580 case OP_BRA:
581 case OP_BRAPOS:
582 case OP_CBRA:
583 case OP_CBRAPOS:
584 case OP_COND:
585 case OP_SBRA:
586 case OP_SBRAPOS:
587 case OP_SCBRA:
588 case OP_SCBRAPOS:
589 case OP_SCOND:
590 case OP_CREF:
591 case OP_NCREF:
592 case OP_RREF:
593 case OP_NRREF:
594 case OP_DEF:
595 case OP_BRAZERO:
596 case OP_BRAMINZERO:
597 case OP_BRAPOSZERO:
598 case OP_PRUNE:
599 case OP_SKIP:
600 case OP_COMMIT:
601 case OP_FAIL:
602 case OP_ACCEPT:
603 case OP_ASSERT_ACCEPT:
604 case OP_CLOSE:
605 case OP_SKIPZERO:
606 return cc + PRIV(OP_lengths)[*cc];
607
608 case OP_CHAR:
609 case OP_CHARI:
610 case OP_NOT:
611 case OP_NOTI:
612 case OP_STAR:
613 case OP_MINSTAR:
614 case OP_PLUS:
615 case OP_MINPLUS:
616 case OP_QUERY:
617 case OP_MINQUERY:
618 case OP_UPTO:
619 case OP_MINUPTO:
620 case OP_EXACT:
621 case OP_POSSTAR:
622 case OP_POSPLUS:
623 case OP_POSQUERY:
624 case OP_POSUPTO:
625 case OP_STARI:
626 case OP_MINSTARI:
627 case OP_PLUSI:
628 case OP_MINPLUSI:
629 case OP_QUERYI:
630 case OP_MINQUERYI:
631 case OP_UPTOI:
632 case OP_MINUPTOI:
633 case OP_EXACTI:
634 case OP_POSSTARI:
635 case OP_POSPLUSI:
636 case OP_POSQUERYI:
637 case OP_POSUPTOI:
638 case OP_NOTSTAR:
639 case OP_NOTMINSTAR:
640 case OP_NOTPLUS:
641 case OP_NOTMINPLUS:
642 case OP_NOTQUERY:
643 case OP_NOTMINQUERY:
644 case OP_NOTUPTO:
645 case OP_NOTMINUPTO:
646 case OP_NOTEXACT:
647 case OP_NOTPOSSTAR:
648 case OP_NOTPOSPLUS:
649 case OP_NOTPOSQUERY:
650 case OP_NOTPOSUPTO:
651 case OP_NOTSTARI:
652 case OP_NOTMINSTARI:
653 case OP_NOTPLUSI:
654 case OP_NOTMINPLUSI:
655 case OP_NOTQUERYI:
656 case OP_NOTMINQUERYI:
657 case OP_NOTUPTOI:
658 case OP_NOTMINUPTOI:
659 case OP_NOTEXACTI:
660 case OP_NOTPOSSTARI:
661 case OP_NOTPOSPLUSI:
662 case OP_NOTPOSQUERYI:
663 case OP_NOTPOSUPTOI:
664 cc += PRIV(OP_lengths)[*cc];
665 #ifdef SUPPORT_UTF
666 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
667 #endif
668 return cc;
669
670 /* Special cases. */
671 case OP_TYPESTAR:
672 case OP_TYPEMINSTAR:
673 case OP_TYPEPLUS:
674 case OP_TYPEMINPLUS:
675 case OP_TYPEQUERY:
676 case OP_TYPEMINQUERY:
677 case OP_TYPEUPTO:
678 case OP_TYPEMINUPTO:
679 case OP_TYPEEXACT:
680 case OP_TYPEPOSSTAR:
681 case OP_TYPEPOSPLUS:
682 case OP_TYPEPOSQUERY:
683 case OP_TYPEPOSUPTO:
684 return cc + PRIV(OP_lengths)[*cc] - 1;
685
686 case OP_ANYBYTE:
687 #ifdef SUPPORT_UTF
688 if (common->utf) return NULL;
689 #endif
690 return cc + 1;
691
692 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
693 case OP_XCLASS:
694 return cc + GET(cc, 1);
695 #endif
696
697 case OP_MARK:
698 case OP_PRUNE_ARG:
699 return cc + 1 + 2 + cc[1];
700
701 default:
702 return NULL;
703 }
704 }
705
706 #define CASE_ITERATOR_PRIVATE_DATA_1 \
707 case OP_MINSTAR: \
708 case OP_MINPLUS: \
709 case OP_QUERY: \
710 case OP_MINQUERY: \
711 case OP_MINSTARI: \
712 case OP_MINPLUSI: \
713 case OP_QUERYI: \
714 case OP_MINQUERYI: \
715 case OP_NOTMINSTAR: \
716 case OP_NOTMINPLUS: \
717 case OP_NOTQUERY: \
718 case OP_NOTMINQUERY: \
719 case OP_NOTMINSTARI: \
720 case OP_NOTMINPLUSI: \
721 case OP_NOTQUERYI: \
722 case OP_NOTMINQUERYI:
723
724 #define CASE_ITERATOR_PRIVATE_DATA_2A \
725 case OP_STAR: \
726 case OP_PLUS: \
727 case OP_STARI: \
728 case OP_PLUSI: \
729 case OP_NOTSTAR: \
730 case OP_NOTPLUS: \
731 case OP_NOTSTARI: \
732 case OP_NOTPLUSI:
733
734 #define CASE_ITERATOR_PRIVATE_DATA_2B \
735 case OP_UPTO: \
736 case OP_MINUPTO: \
737 case OP_UPTOI: \
738 case OP_MINUPTOI: \
739 case OP_NOTUPTO: \
740 case OP_NOTMINUPTO: \
741 case OP_NOTUPTOI: \
742 case OP_NOTMINUPTOI:
743
744 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
745 case OP_TYPEMINSTAR: \
746 case OP_TYPEMINPLUS: \
747 case OP_TYPEQUERY: \
748 case OP_TYPEMINQUERY:
749
750 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
751 case OP_TYPESTAR: \
752 case OP_TYPEPLUS:
753
754 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
755 case OP_TYPEUPTO: \
756 case OP_TYPEMINUPTO:
757
758 static int get_class_iterator_size(pcre_uchar *cc)
759 {
760 switch(*cc)
761 {
762 case OP_CRSTAR:
763 case OP_CRPLUS:
764 return 2;
765
766 case OP_CRMINSTAR:
767 case OP_CRMINPLUS:
768 case OP_CRQUERY:
769 case OP_CRMINQUERY:
770 return 1;
771
772 case OP_CRRANGE:
773 case OP_CRMINRANGE:
774 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
775 return 0;
776 return 2;
777
778 default:
779 return 0;
780 }
781 }
782
783 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
784 {
785 int private_data_length = 0;
786 pcre_uchar *alternative;
787 pcre_uchar *name;
788 pcre_uchar *end = NULL;
789 int space, size, i;
790 pcre_uint32 bracketlen;
791
792 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
793 while (cc < ccend)
794 {
795 space = 0;
796 size = 0;
797 bracketlen = 0;
798 switch(*cc)
799 {
800 case OP_SET_SOM:
801 common->has_set_som = TRUE;
802 cc += 1;
803 break;
804
805 case OP_REF:
806 case OP_REFI:
807 common->optimized_cbracket[GET2(cc, 1)] = 0;
808 cc += 1 + IMM2_SIZE;
809 break;
810
811 case OP_ASSERT:
812 case OP_ASSERT_NOT:
813 case OP_ASSERTBACK:
814 case OP_ASSERTBACK_NOT:
815 case OP_ONCE:
816 case OP_ONCE_NC:
817 case OP_BRAPOS:
818 case OP_SBRA:
819 case OP_SBRAPOS:
820 private_data_length += sizeof(sljit_sw);
821 bracketlen = 1 + LINK_SIZE;
822 break;
823
824 case OP_CBRAPOS:
825 case OP_SCBRAPOS:
826 private_data_length += sizeof(sljit_sw);
827 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
828 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
829 break;
830
831 case OP_COND:
832 case OP_SCOND:
833 /* Only AUTO_CALLOUT can insert this opcode. We do
834 not intend to support this case. */
835 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
836 return -1;
837
838 if (*cc == OP_COND)
839 {
840 /* Might be a hidden SCOND. */
841 alternative = cc + GET(cc, 1);
842 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
843 private_data_length += sizeof(sljit_sw);
844 }
845 else
846 private_data_length += sizeof(sljit_sw);
847 bracketlen = 1 + LINK_SIZE;
848 break;
849
850 case OP_CREF:
851 i = GET2(cc, 1);
852 common->optimized_cbracket[i] = 0;
853 cc += 1 + IMM2_SIZE;
854 break;
855
856 case OP_NCREF:
857 bracketlen = GET2(cc, 1);
858 name = (pcre_uchar *)common->name_table;
859 alternative = name;
860 for (i = 0; i < common->name_count; i++)
861 {
862 if (GET2(name, 0) == bracketlen) break;
863 name += common->name_entry_size;
864 }
865 SLJIT_ASSERT(i != common->name_count);
866
867 for (i = 0; i < common->name_count; i++)
868 {
869 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
870 common->optimized_cbracket[GET2(alternative, 0)] = 0;
871 alternative += common->name_entry_size;
872 }
873 bracketlen = 0;
874 cc += 1 + IMM2_SIZE;
875 break;
876
877 case OP_BRA:
878 bracketlen = 1 + LINK_SIZE;
879 break;
880
881 case OP_CBRA:
882 case OP_SCBRA:
883 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
884 break;
885
886 CASE_ITERATOR_PRIVATE_DATA_1
887 space = 1;
888 size = -2;
889 break;
890
891 CASE_ITERATOR_PRIVATE_DATA_2A
892 space = 2;
893 size = -2;
894 break;
895
896 CASE_ITERATOR_PRIVATE_DATA_2B
897 space = 2;
898 size = -(2 + IMM2_SIZE);
899 break;
900
901 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
902 space = 1;
903 size = 1;
904 break;
905
906 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
907 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
908 space = 2;
909 size = 1;
910 break;
911
912 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
913 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
914 space = 2;
915 size = 1 + IMM2_SIZE;
916 break;
917
918 case OP_CLASS:
919 case OP_NCLASS:
920 size += 1 + 32 / sizeof(pcre_uchar);
921 space = get_class_iterator_size(cc + size);
922 break;
923
924 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
925 case OP_XCLASS:
926 size = GET(cc, 1);
927 space = get_class_iterator_size(cc + size);
928 break;
929 #endif
930
931 case OP_RECURSE:
932 /* Set its value only once. */
933 if (common->recursive_head_ptr == 0)
934 {
935 common->recursive_head_ptr = common->ovector_start;
936 common->ovector_start += sizeof(sljit_sw);
937 }
938 cc += 1 + LINK_SIZE;
939 break;
940
941 case OP_CALLOUT:
942 if (common->capture_last_ptr == 0)
943 {
944 common->capture_last_ptr = common->ovector_start;
945 common->ovector_start += sizeof(sljit_sw);
946 }
947 cc += 2 + 2 * LINK_SIZE;
948 break;
949
950 case OP_PRUNE_ARG:
951 common->needs_start_ptr = TRUE;
952 common->control_head_ptr = 1;
953 /* Fall through. */
954
955 case OP_MARK:
956 if (common->mark_ptr == 0)
957 {
958 common->mark_ptr = common->ovector_start;
959 common->ovector_start += sizeof(sljit_sw);
960 }
961 cc += 1 + 2 + cc[1];
962 break;
963
964 case OP_PRUNE:
965 case OP_SKIP:
966 common->needs_start_ptr = TRUE;
967 /* Fall through. */
968
969 case OP_COMMIT:
970 common->control_head_ptr = 1;
971 cc += 1;
972 break;
973
974 default:
975 cc = next_opcode(common, cc);
976 if (cc == NULL)
977 return -1;
978 break;
979 }
980
981 if (space > 0 && cc >= end)
982 private_data_length += sizeof(sljit_sw) * space;
983
984 if (size != 0)
985 {
986 if (size < 0)
987 {
988 cc += -size;
989 #ifdef SUPPORT_UTF
990 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
991 #endif
992 }
993 else
994 cc += size;
995 }
996
997 if (bracketlen != 0)
998 {
999 if (cc >= end)
1000 {
1001 end = bracketend(cc);
1002 if (end[-1 - LINK_SIZE] == OP_KET)
1003 end = NULL;
1004 }
1005 cc += bracketlen;
1006 }
1007 }
1008 return private_data_length;
1009 }
1010
1011 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
1012 {
1013 pcre_uchar *cc = common->start;
1014 pcre_uchar *alternative;
1015 pcre_uchar *end = NULL;
1016 int space, size, bracketlen;
1017
1018 while (cc < ccend)
1019 {
1020 space = 0;
1021 size = 0;
1022 bracketlen = 0;
1023 switch(*cc)
1024 {
1025 case OP_ASSERT:
1026 case OP_ASSERT_NOT:
1027 case OP_ASSERTBACK:
1028 case OP_ASSERTBACK_NOT:
1029 case OP_ONCE:
1030 case OP_ONCE_NC:
1031 case OP_BRAPOS:
1032 case OP_SBRA:
1033 case OP_SBRAPOS:
1034 case OP_SCOND:
1035 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1036 private_data_ptr += sizeof(sljit_sw);
1037 bracketlen = 1 + LINK_SIZE;
1038 break;
1039
1040 case OP_CBRAPOS:
1041 case OP_SCBRAPOS:
1042 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1043 private_data_ptr += sizeof(sljit_sw);
1044 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1045 break;
1046
1047 case OP_COND:
1048 /* Might be a hidden SCOND. */
1049 alternative = cc + GET(cc, 1);
1050 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1051 {
1052 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1053 private_data_ptr += sizeof(sljit_sw);
1054 }
1055 bracketlen = 1 + LINK_SIZE;
1056 break;
1057
1058 case OP_BRA:
1059 bracketlen = 1 + LINK_SIZE;
1060 break;
1061
1062 case OP_CBRA:
1063 case OP_SCBRA:
1064 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1065 break;
1066
1067 CASE_ITERATOR_PRIVATE_DATA_1
1068 space = 1;
1069 size = -2;
1070 break;
1071
1072 CASE_ITERATOR_PRIVATE_DATA_2A
1073 space = 2;
1074 size = -2;
1075 break;
1076
1077 CASE_ITERATOR_PRIVATE_DATA_2B
1078 space = 2;
1079 size = -(2 + IMM2_SIZE);
1080 break;
1081
1082 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1083 space = 1;
1084 size = 1;
1085 break;
1086
1087 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1088 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1089 space = 2;
1090 size = 1;
1091 break;
1092
1093 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1094 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1095 space = 2;
1096 size = 1 + IMM2_SIZE;
1097 break;
1098
1099 case OP_CLASS:
1100 case OP_NCLASS:
1101 size += 1 + 32 / sizeof(pcre_uchar);
1102 space = get_class_iterator_size(cc + size);
1103 break;
1104
1105 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1106 case OP_XCLASS:
1107 size = GET(cc, 1);
1108 space = get_class_iterator_size(cc + size);
1109 break;
1110 #endif
1111
1112 default:
1113 cc = next_opcode(common, cc);
1114 SLJIT_ASSERT(cc != NULL);
1115 break;
1116 }
1117
1118 if (space > 0 && cc >= end)
1119 {
1120 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1121 private_data_ptr += sizeof(sljit_sw) * space;
1122 }
1123
1124 if (size != 0)
1125 {
1126 if (size < 0)
1127 {
1128 cc += -size;
1129 #ifdef SUPPORT_UTF
1130 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1131 #endif
1132 }
1133 else
1134 cc += size;
1135 }
1136
1137 if (bracketlen > 0)
1138 {
1139 if (cc >= end)
1140 {
1141 end = bracketend(cc);
1142 if (end[-1 - LINK_SIZE] == OP_KET)
1143 end = NULL;
1144 }
1145 cc += bracketlen;
1146 }
1147 }
1148 }
1149
1150 /* Returns with a frame_types (always < 0) if no need for frame. */
1151 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1152 {
1153 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1154 int length = 0;
1155 int possessive = 0;
1156 BOOL stack_restore = FALSE;
1157 BOOL setsom_found = recursive;
1158 BOOL setmark_found = recursive;
1159 /* The last capture is a local variable even for recursions. */
1160 BOOL capture_last_found = FALSE;
1161
1162 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1163 {
1164 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1165 /* This is correct regardless of common->capture_last_ptr. */
1166 capture_last_found = TRUE;
1167 }
1168
1169 cc = next_opcode(common, cc);
1170 SLJIT_ASSERT(cc != NULL);
1171 while (cc < ccend)
1172 switch(*cc)
1173 {
1174 case OP_SET_SOM:
1175 SLJIT_ASSERT(common->has_set_som);
1176 stack_restore = TRUE;
1177 if (!setsom_found)
1178 {
1179 length += 2;
1180 setsom_found = TRUE;
1181 }
1182 cc += 1;
1183 break;
1184
1185 case OP_MARK:
1186 case OP_PRUNE_ARG:
1187 SLJIT_ASSERT(common->mark_ptr != 0);
1188 stack_restore = TRUE;
1189 if (!setmark_found)
1190 {
1191 length += 2;
1192 setmark_found = TRUE;
1193 }
1194 cc += 1 + 2 + cc[1];
1195 break;
1196
1197 case OP_RECURSE:
1198 stack_restore = TRUE;
1199 if (common->has_set_som && !setsom_found)
1200 {
1201 length += 2;
1202 setsom_found = TRUE;
1203 }
1204 if (common->mark_ptr != 0 && !setmark_found)
1205 {
1206 length += 2;
1207 setmark_found = TRUE;
1208 }
1209 if (common->capture_last_ptr != 0 && !capture_last_found)
1210 {
1211 length += 2;
1212 capture_last_found = TRUE;
1213 }
1214 cc += 1 + LINK_SIZE;
1215 break;
1216
1217 case OP_CBRA:
1218 case OP_CBRAPOS:
1219 case OP_SCBRA:
1220 case OP_SCBRAPOS:
1221 stack_restore = TRUE;
1222 if (common->capture_last_ptr != 0 && !capture_last_found)
1223 {
1224 length += 2;
1225 capture_last_found = TRUE;
1226 }
1227 length += 3;
1228 cc += 1 + LINK_SIZE + IMM2_SIZE;
1229 break;
1230
1231 default:
1232 stack_restore = TRUE;
1233 /* Fall through. */
1234
1235 case OP_NOT_WORD_BOUNDARY:
1236 case OP_WORD_BOUNDARY:
1237 case OP_NOT_DIGIT:
1238 case OP_DIGIT:
1239 case OP_NOT_WHITESPACE:
1240 case OP_WHITESPACE:
1241 case OP_NOT_WORDCHAR:
1242 case OP_WORDCHAR:
1243 case OP_ANY:
1244 case OP_ALLANY:
1245 case OP_ANYBYTE:
1246 case OP_NOTPROP:
1247 case OP_PROP:
1248 case OP_ANYNL:
1249 case OP_NOT_HSPACE:
1250 case OP_HSPACE:
1251 case OP_NOT_VSPACE:
1252 case OP_VSPACE:
1253 case OP_EXTUNI:
1254 case OP_EODN:
1255 case OP_EOD:
1256 case OP_CIRC:
1257 case OP_CIRCM:
1258 case OP_DOLL:
1259 case OP_DOLLM:
1260 case OP_CHAR:
1261 case OP_CHARI:
1262 case OP_NOT:
1263 case OP_NOTI:
1264
1265 case OP_EXACT:
1266 case OP_POSSTAR:
1267 case OP_POSPLUS:
1268 case OP_POSQUERY:
1269 case OP_POSUPTO:
1270
1271 case OP_EXACTI:
1272 case OP_POSSTARI:
1273 case OP_POSPLUSI:
1274 case OP_POSQUERYI:
1275 case OP_POSUPTOI:
1276
1277 case OP_NOTEXACT:
1278 case OP_NOTPOSSTAR:
1279 case OP_NOTPOSPLUS:
1280 case OP_NOTPOSQUERY:
1281 case OP_NOTPOSUPTO:
1282
1283 case OP_NOTEXACTI:
1284 case OP_NOTPOSSTARI:
1285 case OP_NOTPOSPLUSI:
1286 case OP_NOTPOSQUERYI:
1287 case OP_NOTPOSUPTOI:
1288
1289 case OP_TYPEEXACT:
1290 case OP_TYPEPOSSTAR:
1291 case OP_TYPEPOSPLUS:
1292 case OP_TYPEPOSQUERY:
1293 case OP_TYPEPOSUPTO:
1294
1295 case OP_CLASS:
1296 case OP_NCLASS:
1297 case OP_XCLASS:
1298
1299 cc = next_opcode(common, cc);
1300 SLJIT_ASSERT(cc != NULL);
1301 break;
1302 }
1303
1304 /* Possessive quantifiers can use a special case. */
1305 if (SLJIT_UNLIKELY(possessive == length))
1306 return stack_restore ? no_frame : no_stack;
1307
1308 if (length > 0)
1309 return length + 1;
1310 return stack_restore ? no_frame : no_stack;
1311 }
1312
1313 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1314 {
1315 DEFINE_COMPILER;
1316 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1317 BOOL setsom_found = recursive;
1318 BOOL setmark_found = recursive;
1319 /* The last capture is a local variable even for recursions. */
1320 BOOL capture_last_found = FALSE;
1321 int offset;
1322
1323 /* >= 1 + shortest item size (2) */
1324 SLJIT_UNUSED_ARG(stacktop);
1325 SLJIT_ASSERT(stackpos >= stacktop + 2);
1326
1327 stackpos = STACK(stackpos);
1328 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1329 cc = next_opcode(common, cc);
1330 SLJIT_ASSERT(cc != NULL);
1331 while (cc < ccend)
1332 switch(*cc)
1333 {
1334 case OP_SET_SOM:
1335 SLJIT_ASSERT(common->has_set_som);
1336 if (!setsom_found)
1337 {
1338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1339 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1340 stackpos += (int)sizeof(sljit_sw);
1341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1342 stackpos += (int)sizeof(sljit_sw);
1343 setsom_found = TRUE;
1344 }
1345 cc += 1;
1346 break;
1347
1348 case OP_MARK:
1349 case OP_PRUNE_ARG:
1350 SLJIT_ASSERT(common->mark_ptr != 0);
1351 if (!setmark_found)
1352 {
1353 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1354 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1355 stackpos += (int)sizeof(sljit_sw);
1356 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1357 stackpos += (int)sizeof(sljit_sw);
1358 setmark_found = TRUE;
1359 }
1360 cc += 1 + 2 + cc[1];
1361 break;
1362
1363 case OP_RECURSE:
1364 if (common->has_set_som && !setsom_found)
1365 {
1366 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1367 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1368 stackpos += (int)sizeof(sljit_sw);
1369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1370 stackpos += (int)sizeof(sljit_sw);
1371 setsom_found = TRUE;
1372 }
1373 if (common->mark_ptr != 0 && !setmark_found)
1374 {
1375 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1377 stackpos += (int)sizeof(sljit_sw);
1378 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1379 stackpos += (int)sizeof(sljit_sw);
1380 setmark_found = TRUE;
1381 }
1382 if (common->capture_last_ptr != 0 && !capture_last_found)
1383 {
1384 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1385 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1386 stackpos += (int)sizeof(sljit_sw);
1387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1388 stackpos += (int)sizeof(sljit_sw);
1389 capture_last_found = TRUE;
1390 }
1391 cc += 1 + LINK_SIZE;
1392 break;
1393
1394 case OP_CBRA:
1395 case OP_CBRAPOS:
1396 case OP_SCBRA:
1397 case OP_SCBRAPOS:
1398 if (common->capture_last_ptr != 0 && !capture_last_found)
1399 {
1400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1402 stackpos += (int)sizeof(sljit_sw);
1403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1404 stackpos += (int)sizeof(sljit_sw);
1405 capture_last_found = TRUE;
1406 }
1407 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1408 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1409 stackpos += (int)sizeof(sljit_sw);
1410 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1411 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1413 stackpos += (int)sizeof(sljit_sw);
1414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1415 stackpos += (int)sizeof(sljit_sw);
1416
1417 cc += 1 + LINK_SIZE + IMM2_SIZE;
1418 break;
1419
1420 default:
1421 cc = next_opcode(common, cc);
1422 SLJIT_ASSERT(cc != NULL);
1423 break;
1424 }
1425
1426 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1427 SLJIT_ASSERT(stackpos == STACK(stacktop));
1428 }
1429
1430 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1431 {
1432 int private_data_length = common->control_head_ptr ? 3 : 2;
1433 int size;
1434 pcre_uchar *alternative;
1435 /* Calculate the sum of the private machine words. */
1436 while (cc < ccend)
1437 {
1438 size = 0;
1439 switch(*cc)
1440 {
1441 case OP_ASSERT:
1442 case OP_ASSERT_NOT:
1443 case OP_ASSERTBACK:
1444 case OP_ASSERTBACK_NOT:
1445 case OP_ONCE:
1446 case OP_ONCE_NC:
1447 case OP_BRAPOS:
1448 case OP_SBRA:
1449 case OP_SBRAPOS:
1450 case OP_SCOND:
1451 private_data_length++;
1452 cc += 1 + LINK_SIZE;
1453 break;
1454
1455 case OP_CBRA:
1456 case OP_SCBRA:
1457 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1458 private_data_length++;
1459 cc += 1 + LINK_SIZE + IMM2_SIZE;
1460 break;
1461
1462 case OP_CBRAPOS:
1463 case OP_SCBRAPOS:
1464 private_data_length += 2;
1465 cc += 1 + LINK_SIZE + IMM2_SIZE;
1466 break;
1467
1468 case OP_COND:
1469 /* Might be a hidden SCOND. */
1470 alternative = cc + GET(cc, 1);
1471 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1472 private_data_length++;
1473 cc += 1 + LINK_SIZE;
1474 break;
1475
1476 CASE_ITERATOR_PRIVATE_DATA_1
1477 if (PRIVATE_DATA(cc))
1478 private_data_length++;
1479 cc += 2;
1480 #ifdef SUPPORT_UTF
1481 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1482 #endif
1483 break;
1484
1485 CASE_ITERATOR_PRIVATE_DATA_2A
1486 if (PRIVATE_DATA(cc))
1487 private_data_length += 2;
1488 cc += 2;
1489 #ifdef SUPPORT_UTF
1490 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1491 #endif
1492 break;
1493
1494 CASE_ITERATOR_PRIVATE_DATA_2B
1495 if (PRIVATE_DATA(cc))
1496 private_data_length += 2;
1497 cc += 2 + IMM2_SIZE;
1498 #ifdef SUPPORT_UTF
1499 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1500 #endif
1501 break;
1502
1503 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1504 if (PRIVATE_DATA(cc))
1505 private_data_length++;
1506 cc += 1;
1507 break;
1508
1509 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1510 if (PRIVATE_DATA(cc))
1511 private_data_length += 2;
1512 cc += 1;
1513 break;
1514
1515 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1516 if (PRIVATE_DATA(cc))
1517 private_data_length += 2;
1518 cc += 1 + IMM2_SIZE;
1519 break;
1520
1521 case OP_CLASS:
1522 case OP_NCLASS:
1523 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1524 case OP_XCLASS:
1525 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1526 #else
1527 size = 1 + 32 / (int)sizeof(pcre_uchar);
1528 #endif
1529 if (PRIVATE_DATA(cc))
1530 private_data_length += get_class_iterator_size(cc + size);
1531 cc += size;
1532 break;
1533
1534 default:
1535 cc = next_opcode(common, cc);
1536 SLJIT_ASSERT(cc != NULL);
1537 break;
1538 }
1539 }
1540 SLJIT_ASSERT(cc == ccend);
1541 return private_data_length;
1542 }
1543
1544 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1545 BOOL save, int stackptr, int stacktop)
1546 {
1547 DEFINE_COMPILER;
1548 int srcw[2];
1549 int count, size;
1550 BOOL tmp1next = TRUE;
1551 BOOL tmp1empty = TRUE;
1552 BOOL tmp2empty = TRUE;
1553 pcre_uchar *alternative;
1554 enum {
1555 start,
1556 loop,
1557 end
1558 } status;
1559
1560 status = save ? start : loop;
1561 stackptr = STACK(stackptr - 2);
1562 stacktop = STACK(stacktop - 1);
1563
1564 if (!save)
1565 {
1566 stackptr += (common->control_head_ptr ? 2 : 1) * sizeof(sljit_sw);
1567 if (stackptr < stacktop)
1568 {
1569 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1570 stackptr += sizeof(sljit_sw);
1571 tmp1empty = FALSE;
1572 }
1573 if (stackptr < stacktop)
1574 {
1575 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1576 stackptr += sizeof(sljit_sw);
1577 tmp2empty = FALSE;
1578 }
1579 /* The tmp1next must be TRUE in either way. */
1580 }
1581
1582 do
1583 {
1584 count = 0;
1585 switch(status)
1586 {
1587 case start:
1588 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1589 count = 1;
1590 srcw[0] = common->recursive_head_ptr;
1591 if (common->control_head_ptr != 0)
1592 {
1593 count = 2;
1594 srcw[1] = common->control_head_ptr;
1595 }
1596 status = loop;
1597 break;
1598
1599 case loop:
1600 if (cc >= ccend)
1601 {
1602 status = end;
1603 break;
1604 }
1605
1606 switch(*cc)
1607 {
1608 case OP_ASSERT:
1609 case OP_ASSERT_NOT:
1610 case OP_ASSERTBACK:
1611 case OP_ASSERTBACK_NOT:
1612 case OP_ONCE:
1613 case OP_ONCE_NC:
1614 case OP_BRAPOS:
1615 case OP_SBRA:
1616 case OP_SBRAPOS:
1617 case OP_SCOND:
1618 count = 1;
1619 srcw[0] = PRIVATE_DATA(cc);
1620 SLJIT_ASSERT(srcw[0] != 0);
1621 cc += 1 + LINK_SIZE;
1622 break;
1623
1624 case OP_CBRA:
1625 case OP_SCBRA:
1626 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1627 {
1628 count = 1;
1629 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1630 }
1631 cc += 1 + LINK_SIZE + IMM2_SIZE;
1632 break;
1633
1634 case OP_CBRAPOS:
1635 case OP_SCBRAPOS:
1636 count = 2;
1637 srcw[0] = PRIVATE_DATA(cc);
1638 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1639 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1640 cc += 1 + LINK_SIZE + IMM2_SIZE;
1641 break;
1642
1643 case OP_COND:
1644 /* Might be a hidden SCOND. */
1645 alternative = cc + GET(cc, 1);
1646 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1647 {
1648 count = 1;
1649 srcw[0] = PRIVATE_DATA(cc);
1650 SLJIT_ASSERT(srcw[0] != 0);
1651 }
1652 cc += 1 + LINK_SIZE;
1653 break;
1654
1655 CASE_ITERATOR_PRIVATE_DATA_1
1656 if (PRIVATE_DATA(cc))
1657 {
1658 count = 1;
1659 srcw[0] = PRIVATE_DATA(cc);
1660 }
1661 cc += 2;
1662 #ifdef SUPPORT_UTF
1663 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1664 #endif
1665 break;
1666
1667 CASE_ITERATOR_PRIVATE_DATA_2A
1668 if (PRIVATE_DATA(cc))
1669 {
1670 count = 2;
1671 srcw[0] = PRIVATE_DATA(cc);
1672 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1673 }
1674 cc += 2;
1675 #ifdef SUPPORT_UTF
1676 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1677 #endif
1678 break;
1679
1680 CASE_ITERATOR_PRIVATE_DATA_2B
1681 if (PRIVATE_DATA(cc))
1682 {
1683 count = 2;
1684 srcw[0] = PRIVATE_DATA(cc);
1685 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1686 }
1687 cc += 2 + IMM2_SIZE;
1688 #ifdef SUPPORT_UTF
1689 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1690 #endif
1691 break;
1692
1693 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1694 if (PRIVATE_DATA(cc))
1695 {
1696 count = 1;
1697 srcw[0] = PRIVATE_DATA(cc);
1698 }
1699 cc += 1;
1700 break;
1701
1702 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1703 if (PRIVATE_DATA(cc))
1704 {
1705 count = 2;
1706 srcw[0] = PRIVATE_DATA(cc);
1707 srcw[1] = srcw[0] + sizeof(sljit_sw);
1708 }
1709 cc += 1;
1710 break;
1711
1712 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1713 if (PRIVATE_DATA(cc))
1714 {
1715 count = 2;
1716 srcw[0] = PRIVATE_DATA(cc);
1717 srcw[1] = srcw[0] + sizeof(sljit_sw);
1718 }
1719 cc += 1 + IMM2_SIZE;
1720 break;
1721
1722 case OP_CLASS:
1723 case OP_NCLASS:
1724 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1725 case OP_XCLASS:
1726 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1727 #else
1728 size = 1 + 32 / (int)sizeof(pcre_uchar);
1729 #endif
1730 if (PRIVATE_DATA(cc))
1731 switch(get_class_iterator_size(cc + size))
1732 {
1733 case 1:
1734 count = 1;
1735 srcw[0] = PRIVATE_DATA(cc);
1736 break;
1737
1738 case 2:
1739 count = 2;
1740 srcw[0] = PRIVATE_DATA(cc);
1741 srcw[1] = srcw[0] + sizeof(sljit_sw);
1742 break;
1743
1744 default:
1745 SLJIT_ASSERT_STOP();
1746 break;
1747 }
1748 cc += size;
1749 break;
1750
1751 default:
1752 cc = next_opcode(common, cc);
1753 SLJIT_ASSERT(cc != NULL);
1754 break;
1755 }
1756 break;
1757
1758 case end:
1759 SLJIT_ASSERT_STOP();
1760 break;
1761 }
1762
1763 while (count > 0)
1764 {
1765 count--;
1766 if (save)
1767 {
1768 if (tmp1next)
1769 {
1770 if (!tmp1empty)
1771 {
1772 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1773 stackptr += sizeof(sljit_sw);
1774 }
1775 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1776 tmp1empty = FALSE;
1777 tmp1next = FALSE;
1778 }
1779 else
1780 {
1781 if (!tmp2empty)
1782 {
1783 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1784 stackptr += sizeof(sljit_sw);
1785 }
1786 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1787 tmp2empty = FALSE;
1788 tmp1next = TRUE;
1789 }
1790 }
1791 else
1792 {
1793 if (tmp1next)
1794 {
1795 SLJIT_ASSERT(!tmp1empty);
1796 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1797 tmp1empty = stackptr >= stacktop;
1798 if (!tmp1empty)
1799 {
1800 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1801 stackptr += sizeof(sljit_sw);
1802 }
1803 tmp1next = FALSE;
1804 }
1805 else
1806 {
1807 SLJIT_ASSERT(!tmp2empty);
1808 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1809 tmp2empty = stackptr >= stacktop;
1810 if (!tmp2empty)
1811 {
1812 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1813 stackptr += sizeof(sljit_sw);
1814 }
1815 tmp1next = TRUE;
1816 }
1817 }
1818 }
1819 }
1820 while (status != end);
1821
1822 if (save)
1823 {
1824 if (tmp1next)
1825 {
1826 if (!tmp1empty)
1827 {
1828 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1829 stackptr += sizeof(sljit_sw);
1830 }
1831 if (!tmp2empty)
1832 {
1833 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1834 stackptr += sizeof(sljit_sw);
1835 }
1836 }
1837 else
1838 {
1839 if (!tmp2empty)
1840 {
1841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1842 stackptr += sizeof(sljit_sw);
1843 }
1844 if (!tmp1empty)
1845 {
1846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1847 stackptr += sizeof(sljit_sw);
1848 }
1849 }
1850 }
1851 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1852 }
1853
1854 #undef CASE_ITERATOR_PRIVATE_DATA_1
1855 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1856 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1857 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1858 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1859 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1860
1861 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1862 {
1863 return (value & (value - 1)) == 0;
1864 }
1865
1866 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1867 {
1868 while (list)
1869 {
1870 /* sljit_set_label is clever enough to do nothing
1871 if either the jump or the label is NULL. */
1872 SET_LABEL(list->jump, label);
1873 list = list->next;
1874 }
1875 }
1876
1877 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1878 {
1879 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1880 if (list_item)
1881 {
1882 list_item->next = *list;
1883 list_item->jump = jump;
1884 *list = list_item;
1885 }
1886 }
1887
1888 static void add_stub(compiler_common *common, struct sljit_jump *start)
1889 {
1890 DEFINE_COMPILER;
1891 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1892
1893 if (list_item)
1894 {
1895 list_item->start = start;
1896 list_item->quit = LABEL();
1897 list_item->next = common->stubs;
1898 common->stubs = list_item;
1899 }
1900 }
1901
1902 static void flush_stubs(compiler_common *common)
1903 {
1904 DEFINE_COMPILER;
1905 stub_list* list_item = common->stubs;
1906
1907 while (list_item)
1908 {
1909 JUMPHERE(list_item->start);
1910 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1911 JUMPTO(SLJIT_JUMP, list_item->quit);
1912 list_item = list_item->next;
1913 }
1914 common->stubs = NULL;
1915 }
1916
1917 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1918 {
1919 DEFINE_COMPILER;
1920
1921 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1922 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1923 }
1924
1925 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1926 {
1927 /* May destroy all locals and registers except TMP2. */
1928 DEFINE_COMPILER;
1929
1930 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1931 #ifdef DESTROY_REGISTERS
1932 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1933 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1934 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1936 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1937 #endif
1938 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1939 }
1940
1941 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1942 {
1943 DEFINE_COMPILER;
1944 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1945 }
1946
1947 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1948 {
1949 DEFINE_COMPILER;
1950 struct sljit_label *loop;
1951 int i;
1952
1953 /* At this point we can freely use all temporary registers. */
1954 SLJIT_ASSERT(length > 1);
1955 /* TMP1 returns with begin - 1. */
1956 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1957 if (length < 8)
1958 {
1959 for (i = 1; i < length; i++)
1960 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1961 }
1962 else
1963 {
1964 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
1965 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
1966 loop = LABEL();
1967 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1968 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1969 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1970 }
1971 }
1972
1973 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
1974 {
1975 DEFINE_COMPILER;
1976 struct sljit_label *loop;
1977 int i;
1978
1979 SLJIT_ASSERT(length > 1);
1980 /* OVECTOR(1) contains the "string begin - 1" constant. */
1981 if (length > 2)
1982 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1983 if (length < 8)
1984 {
1985 for (i = 2; i < length; i++)
1986 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
1987 }
1988 else
1989 {
1990 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
1991 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
1992 loop = LABEL();
1993 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
1994 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
1995 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1996 }
1997
1998 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
1999 if (common->mark_ptr != 0)
2000 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2001 SLJIT_ASSERT(common->control_head_ptr != 0);
2002 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2003 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2004 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2005 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2006 }
2007
2008 static sljit_sw do_check_control_chain(sljit_sw *current)
2009 {
2010 sljit_sw return_value = 0;
2011
2012 SLJIT_ASSERT(current != NULL);
2013 do
2014 {
2015 switch (current[-2])
2016 {
2017 case type_commit:
2018 /* Commit overwrites all. */
2019 return -1;
2020
2021 case type_prune:
2022 break;
2023
2024 case type_skip:
2025 /* Overwrites prune, but not other skips. */
2026 if (return_value == 0)
2027 return_value = current[-3];
2028 break;
2029
2030 default:
2031 SLJIT_ASSERT_STOP();
2032 break;
2033 }
2034 current = (sljit_sw*)current[-1];
2035 }
2036 while (current != NULL);
2037 return return_value;
2038 }
2039
2040 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2041 {
2042 DEFINE_COMPILER;
2043 struct sljit_label *loop;
2044 struct sljit_jump *early_quit;
2045
2046 /* At this point we can freely use all registers. */
2047 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2048 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2049
2050 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2051 if (common->mark_ptr != 0)
2052 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2053 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2054 if (common->mark_ptr != 0)
2055 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2056 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2057 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2058 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2059 /* Unlikely, but possible */
2060 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2061 loop = LABEL();
2062 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2063 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2064 /* Copy the integer value to the output buffer */
2065 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2066 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2067 #endif
2068 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2069 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2070 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2071 JUMPHERE(early_quit);
2072
2073 /* Calculate the return value, which is the maximum ovector value. */
2074 if (topbracket > 1)
2075 {
2076 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2077 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2078
2079 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2080 loop = LABEL();
2081 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2082 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2083 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2084 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2085 }
2086 else
2087 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2088 }
2089
2090 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2091 {
2092 DEFINE_COMPILER;
2093 struct sljit_jump *jump;
2094
2095 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2096 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2097 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2098
2099 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2100 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2101 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2102 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2103
2104 /* Store match begin and end. */
2105 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2106 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2107
2108 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2109 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2110 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2111 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2112 #endif
2113 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2114 JUMPHERE(jump);
2115
2116 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2117 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2118 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2119 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2120 #endif
2121 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2122
2123 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2124 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2125 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2126 #endif
2127 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2128
2129 JUMPTO(SLJIT_JUMP, quit);
2130 }
2131
2132 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2133 {
2134 /* May destroy TMP1. */
2135 DEFINE_COMPILER;
2136 struct sljit_jump *jump;
2137
2138 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2139 {
2140 /* The value of -1 must be kept for start_used_ptr! */
2141 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2142 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2143 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2144 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2145 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2146 JUMPHERE(jump);
2147 }
2148 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2149 {
2150 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2151 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2152 JUMPHERE(jump);
2153 }
2154 }
2155
2156 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2157 {
2158 /* Detects if the character has an othercase. */
2159 unsigned int c;
2160
2161 #ifdef SUPPORT_UTF
2162 if (common->utf)
2163 {
2164 GETCHAR(c, cc);
2165 if (c > 127)
2166 {
2167 #ifdef SUPPORT_UCP
2168 return c != UCD_OTHERCASE(c);
2169 #else
2170 return FALSE;
2171 #endif
2172 }
2173 #ifndef COMPILE_PCRE8
2174 return common->fcc[c] != c;
2175 #endif
2176 }
2177 else
2178 #endif
2179 c = *cc;
2180 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2181 }
2182
2183 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2184 {
2185 /* Returns with the othercase. */
2186 #ifdef SUPPORT_UTF
2187 if (common->utf && c > 127)
2188 {
2189 #ifdef SUPPORT_UCP
2190 return UCD_OTHERCASE(c);
2191 #else
2192 return c;
2193 #endif
2194 }
2195 #endif
2196 return TABLE_GET(c, common->fcc, c);
2197 }
2198
2199 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2200 {
2201 /* Detects if the character and its othercase has only 1 bit difference. */
2202 unsigned int c, oc, bit;
2203 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2204 int n;
2205 #endif
2206
2207 #ifdef SUPPORT_UTF
2208 if (common->utf)
2209 {
2210 GETCHAR(c, cc);
2211 if (c <= 127)
2212 oc = common->fcc[c];
2213 else
2214 {
2215 #ifdef SUPPORT_UCP
2216 oc = UCD_OTHERCASE(c);
2217 #else
2218 oc = c;
2219 #endif
2220 }
2221 }
2222 else
2223 {
2224 c = *cc;
2225 oc = TABLE_GET(c, common->fcc, c);
2226 }
2227 #else
2228 c = *cc;
2229 oc = TABLE_GET(c, common->fcc, c);
2230 #endif
2231
2232 SLJIT_ASSERT(c != oc);
2233
2234 bit = c ^ oc;
2235 /* Optimized for English alphabet. */
2236 if (c <= 127 && bit == 0x20)
2237 return (0 << 8) | 0x20;
2238
2239 /* Since c != oc, they must have at least 1 bit difference. */
2240 if (!is_powerof2(bit))
2241 return 0;
2242
2243 #if defined COMPILE_PCRE8
2244
2245 #ifdef SUPPORT_UTF
2246 if (common->utf && c > 127)
2247 {
2248 n = GET_EXTRALEN(*cc);
2249 while ((bit & 0x3f) == 0)
2250 {
2251 n--;
2252 bit >>= 6;
2253 }
2254 return (n << 8) | bit;
2255 }
2256 #endif /* SUPPORT_UTF */
2257 return (0 << 8) | bit;
2258
2259 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2260
2261 #ifdef SUPPORT_UTF
2262 if (common->utf && c > 65535)
2263 {
2264 if (bit >= (1 << 10))
2265 bit >>= 10;
2266 else
2267 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2268 }
2269 #endif /* SUPPORT_UTF */
2270 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2271
2272 #endif /* COMPILE_PCRE[8|16|32] */
2273 }
2274
2275 static void check_partial(compiler_common *common, BOOL force)
2276 {
2277 /* Checks whether a partial matching is occured. Does not modify registers. */
2278 DEFINE_COMPILER;
2279 struct sljit_jump *jump = NULL;
2280
2281 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2282
2283 if (common->mode == JIT_COMPILE)
2284 return;
2285
2286 if (!force)
2287 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2288 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2289 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2290
2291 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2292 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2293 else
2294 {
2295 if (common->partialmatchlabel != NULL)
2296 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2297 else
2298 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2299 }
2300
2301 if (jump != NULL)
2302 JUMPHERE(jump);
2303 }
2304
2305 static void check_str_end(compiler_common *common, jump_list **end_reached)
2306 {
2307 /* Does not affect registers. Usually used in a tight spot. */
2308 DEFINE_COMPILER;
2309 struct sljit_jump *jump;
2310
2311 if (common->mode == JIT_COMPILE)
2312 {
2313 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2314 return;
2315 }
2316
2317 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2318 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2319 {
2320 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2321 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2322 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2323 }
2324 else
2325 {
2326 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2327 if (common->partialmatchlabel != NULL)
2328 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2329 else
2330 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2331 }
2332 JUMPHERE(jump);
2333 }
2334
2335 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2336 {
2337 DEFINE_COMPILER;
2338 struct sljit_jump *jump;
2339
2340 if (common->mode == JIT_COMPILE)
2341 {
2342 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2343 return;
2344 }
2345
2346 /* Partial matching mode. */
2347 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2348 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2349 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2350 {
2351 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2352 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2353 }
2354 else
2355 {
2356 if (common->partialmatchlabel != NULL)
2357 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2358 else
2359 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2360 }
2361 JUMPHERE(jump);
2362 }
2363
2364 static void read_char(compiler_common *common)
2365 {
2366 /* Reads the character into TMP1, updates STR_PTR.
2367 Does not check STR_END. TMP2 Destroyed. */
2368 DEFINE_COMPILER;
2369 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2370 struct sljit_jump *jump;
2371 #endif
2372
2373 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2374 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2375 if (common->utf)
2376 {
2377 #if defined COMPILE_PCRE8
2378 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2379 #elif defined COMPILE_PCRE16
2380 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2381 #endif /* COMPILE_PCRE[8|16] */
2382 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2383 JUMPHERE(jump);
2384 }
2385 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2386 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2387 }
2388
2389 static void peek_char(compiler_common *common)
2390 {
2391 /* Reads the character into TMP1, keeps STR_PTR.
2392 Does not check STR_END. TMP2 Destroyed. */
2393 DEFINE_COMPILER;
2394 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2395 struct sljit_jump *jump;
2396 #endif
2397
2398 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2399 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2400 if (common->utf)
2401 {
2402 #if defined COMPILE_PCRE8
2403 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2404 #elif defined COMPILE_PCRE16
2405 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2406 #endif /* COMPILE_PCRE[8|16] */
2407 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2408 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2409 JUMPHERE(jump);
2410 }
2411 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2412 }
2413
2414 static void read_char8_type(compiler_common *common)
2415 {
2416 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2417 DEFINE_COMPILER;
2418 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2419 struct sljit_jump *jump;
2420 #endif
2421
2422 #ifdef SUPPORT_UTF
2423 if (common->utf)
2424 {
2425 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2426 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2427 #if defined COMPILE_PCRE8
2428 /* This can be an extra read in some situations, but hopefully
2429 it is needed in most cases. */
2430 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2431 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2432 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2433 JUMPHERE(jump);
2434 #elif defined COMPILE_PCRE16
2435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2436 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2437 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2438 JUMPHERE(jump);
2439 /* Skip low surrogate if necessary. */
2440 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2441 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2442 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2443 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2444 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2445 #elif defined COMPILE_PCRE32
2446 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2447 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2448 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2449 JUMPHERE(jump);
2450 #endif /* COMPILE_PCRE[8|16|32] */
2451 return;
2452 }
2453 #endif /* SUPPORT_UTF */
2454 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2455 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2456 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2457 /* The ctypes array contains only 256 values. */
2458 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2459 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2460 #endif
2461 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2462 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2463 JUMPHERE(jump);
2464 #endif
2465 }
2466
2467 static void skip_char_back(compiler_common *common)
2468 {
2469 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2470 DEFINE_COMPILER;
2471 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2472 #if defined COMPILE_PCRE8
2473 struct sljit_label *label;
2474
2475 if (common->utf)
2476 {
2477 label = LABEL();
2478 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2479 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2480 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2481 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2482 return;
2483 }
2484 #elif defined COMPILE_PCRE16
2485 if (common->utf)
2486 {
2487 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2488 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2489 /* Skip low surrogate if necessary. */
2490 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2491 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2492 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2493 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2494 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2495 return;
2496 }
2497 #endif /* COMPILE_PCRE[8|16] */
2498 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2499 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2500 }
2501
2502 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2503 {
2504 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2505 DEFINE_COMPILER;
2506
2507 if (nltype == NLTYPE_ANY)
2508 {
2509 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2510 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2511 }
2512 else if (nltype == NLTYPE_ANYCRLF)
2513 {
2514 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2515 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2516 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2517 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2518 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2519 }
2520 else
2521 {
2522 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2523 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2524 }
2525 }
2526
2527 #ifdef SUPPORT_UTF
2528
2529 #if defined COMPILE_PCRE8
2530 static void do_utfreadchar(compiler_common *common)
2531 {
2532 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2533 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2534 DEFINE_COMPILER;
2535 struct sljit_jump *jump;
2536
2537 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2538 /* Searching for the first zero. */
2539 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2540 jump = JUMP(SLJIT_C_NOT_ZERO);
2541 /* Two byte sequence. */
2542 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2544 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2545 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2546 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2547 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2548 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2549 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2550 JUMPHERE(jump);
2551
2552 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2553 jump = JUMP(SLJIT_C_NOT_ZERO);
2554 /* Three byte sequence. */
2555 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2556 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2557 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2558 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2559 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2560 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2561 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2562 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2563 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2564 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2565 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2566 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2567 JUMPHERE(jump);
2568
2569 /* Four byte sequence. */
2570 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2571 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2572 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2573 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2574 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2575 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2576 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2577 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2578 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2579 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2580 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2581 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2582 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2583 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2584 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2585 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2586 }
2587
2588 static void do_utfreadtype8(compiler_common *common)
2589 {
2590 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2591 of the character (>= 0xc0). Return value in TMP1. */
2592 DEFINE_COMPILER;
2593 struct sljit_jump *jump;
2594 struct sljit_jump *compare;
2595
2596 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2597
2598 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2599 jump = JUMP(SLJIT_C_NOT_ZERO);
2600 /* Two byte sequence. */
2601 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2602 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2603 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2604 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2605 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2606 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2607 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2608 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2609 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2610
2611 JUMPHERE(compare);
2612 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2613 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2614 JUMPHERE(jump);
2615
2616 /* We only have types for characters less than 256. */
2617 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2618 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2619 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2620 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2621 }
2622
2623 #elif defined COMPILE_PCRE16
2624
2625 static void do_utfreadchar(compiler_common *common)
2626 {
2627 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2628 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2629 DEFINE_COMPILER;
2630 struct sljit_jump *jump;
2631
2632 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2633 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2634 /* Do nothing, only return. */
2635 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2636
2637 JUMPHERE(jump);
2638 /* Combine two 16 bit characters. */
2639 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2640 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2641 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2642 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2643 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2644 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2645 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2646 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2647 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2648 }
2649
2650 #endif /* COMPILE_PCRE[8|16] */
2651
2652 #endif /* SUPPORT_UTF */
2653
2654 #ifdef SUPPORT_UCP
2655
2656 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2657 #define UCD_BLOCK_MASK 127
2658 #define UCD_BLOCK_SHIFT 7
2659
2660 static void do_getucd(compiler_common *common)
2661 {
2662 /* Search the UCD record for the character comes in TMP1.
2663 Returns chartype in TMP1 and UCD offset in TMP2. */
2664 DEFINE_COMPILER;
2665
2666 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2667
2668 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2669 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2670 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2671 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2672 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2673 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2674 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2675 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2676 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2677 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2678 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2679 }
2680 #endif
2681
2682 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2683 {
2684 DEFINE_COMPILER;
2685 struct sljit_label *mainloop;
2686 struct sljit_label *newlinelabel = NULL;
2687 struct sljit_jump *start;
2688 struct sljit_jump *end = NULL;
2689 struct sljit_jump *nl = NULL;
2690 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2691 struct sljit_jump *singlechar;
2692 #endif
2693 jump_list *newline = NULL;
2694 BOOL newlinecheck = FALSE;
2695 BOOL readuchar = FALSE;
2696
2697 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2698 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2699 newlinecheck = TRUE;
2700
2701 if (firstline)
2702 {
2703 /* Search for the end of the first line. */
2704 SLJIT_ASSERT(common->first_line_end != 0);
2705 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2706
2707 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2708 {
2709 mainloop = LABEL();
2710 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2711 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2712 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2713 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2714 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2715 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2716 JUMPHERE(end);
2717 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2718 }
2719 else
2720 {
2721 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2722 mainloop = LABEL();
2723 /* Continual stores does not cause data dependency. */
2724 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2725 read_char(common);
2726 check_newlinechar(common, common->nltype, &newline, TRUE);
2727 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2728 JUMPHERE(end);
2729 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2730 set_jumps(newline, LABEL());
2731 }
2732
2733 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2734 }
2735
2736 start = JUMP(SLJIT_JUMP);
2737
2738 if (newlinecheck)
2739 {
2740 newlinelabel = LABEL();
2741 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2742 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2743 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2744 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2745 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2746 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2747 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2748 #endif
2749 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2750 nl = JUMP(SLJIT_JUMP);
2751 }
2752
2753 mainloop = LABEL();
2754
2755 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2756 #ifdef SUPPORT_UTF
2757 if (common->utf) readuchar = TRUE;
2758 #endif
2759 if (newlinecheck) readuchar = TRUE;
2760
2761 if (readuchar)
2762 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2763
2764 if (newlinecheck)
2765 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2766
2767 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2768 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2769 #if defined COMPILE_PCRE8
2770 if (common->utf)
2771 {
2772 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2773 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2774 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2775 JUMPHERE(singlechar);
2776 }
2777 #elif defined COMPILE_PCRE16
2778 if (common->utf)
2779 {
2780 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2781 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2782 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2783 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2784 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2785 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2786 JUMPHERE(singlechar);
2787 }
2788 #endif /* COMPILE_PCRE[8|16] */
2789 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2790 JUMPHERE(start);
2791
2792 if (newlinecheck)
2793 {
2794 JUMPHERE(end);
2795 JUMPHERE(nl);
2796 }
2797
2798 return mainloop;
2799 }
2800
2801 #define MAX_N_CHARS 3
2802
2803 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2804 {
2805 DEFINE_COMPILER;
2806 struct sljit_label *start;
2807 struct sljit_jump *quit;
2808 pcre_uint32 chars[MAX_N_CHARS * 2];
2809 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2810 int location = 0;
2811 pcre_int32 len, c, bit, caseless;
2812 int must_stop;
2813
2814 /* We do not support alternatives now. */
2815 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2816 return FALSE;
2817
2818 while (TRUE)
2819 {
2820 caseless = 0;
2821 must_stop = 1;
2822 switch(*cc)
2823 {
2824 case OP_CHAR:
2825 must_stop = 0;
2826 cc++;
2827 break;
2828
2829 case OP_CHARI:
2830 caseless = 1;
2831 must_stop = 0;
2832 cc++;
2833 break;
2834
2835 case OP_SOD:
2836 case OP_SOM:
2837 case OP_SET_SOM:
2838 case OP_NOT_WORD_BOUNDARY:
2839 case OP_WORD_BOUNDARY:
2840 case OP_EODN:
2841 case OP_EOD:
2842 case OP_CIRC:
2843 case OP_CIRCM:
2844 case OP_DOLL:
2845 case OP_DOLLM:
2846 /* Zero width assertions. */
2847 cc++;
2848 continue;
2849
2850 case OP_PLUS:
2851 case OP_MINPLUS:
2852 case OP_POSPLUS:
2853 cc++;
2854 break;
2855
2856 case OP_EXACT:
2857 cc += 1 + IMM2_SIZE;
2858 break;
2859
2860 case OP_PLUSI:
2861 case OP_MINPLUSI:
2862 case OP_POSPLUSI:
2863 caseless = 1;
2864 cc++;
2865 break;
2866
2867 case OP_EXACTI:
2868 caseless = 1;
2869 cc += 1 + IMM2_SIZE;
2870 break;
2871
2872 default:
2873 must_stop = 2;
2874 break;
2875 }
2876
2877 if (must_stop == 2)
2878 break;
2879
2880 len = 1;
2881 #ifdef SUPPORT_UTF
2882 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2883 #endif
2884
2885 if (caseless && char_has_othercase(common, cc))
2886 {
2887 caseless = char_get_othercase_bit(common, cc);
2888 if (caseless == 0)
2889 return FALSE;
2890 #ifdef COMPILE_PCRE8
2891 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2892 #else
2893 if ((caseless & 0x100) != 0)
2894 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2895 else
2896 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2897 #endif
2898 }
2899 else
2900 caseless = 0;
2901
2902 while (len > 0 && location < MAX_N_CHARS * 2)
2903 {
2904 c = *cc;
2905 bit = 0;
2906 if (len == (caseless & 0xff))
2907 {
2908 bit = caseless >> 8;
2909 c |= bit;
2910 }
2911
2912 chars[location] = c;
2913 chars[location + 1] = bit;
2914
2915 len--;
2916 location += 2;
2917 cc++;
2918 }
2919
2920 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2921 break;
2922 }
2923
2924 /* At least two characters are required. */
2925 if (location < 2 * 2)
2926 return FALSE;
2927
2928 if (firstline)
2929 {
2930 SLJIT_ASSERT(common->first_line_end != 0);
2931 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2932 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2933 }
2934 else
2935 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2936
2937 start = LABEL();
2938 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2939
2940 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2941 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2942 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2943 if (chars[1] != 0)
2944 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2945 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2946 if (location > 2 * 2)
2947 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2948 if (chars[3] != 0)
2949 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2950 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2951 if (location > 2 * 2)
2952 {
2953 if (chars[5] != 0)
2954 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2955 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2956 }
2957 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2958
2959 JUMPHERE(quit);
2960
2961 if (firstline)
2962 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2963 else
2964 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2965 return TRUE;
2966 }
2967
2968 #undef MAX_N_CHARS
2969
2970 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2971 {
2972 DEFINE_COMPILER;
2973 struct sljit_label *start;
2974 struct sljit_jump *quit;
2975 struct sljit_jump *found;
2976 pcre_uchar oc, bit;
2977
2978 if (firstline)
2979 {
2980 SLJIT_ASSERT(common->first_line_end != 0);
2981 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2982 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2983 }
2984
2985 start = LABEL();
2986 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2987 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2988
2989 oc = first_char;
2990 if (caseless)
2991 {
2992 oc = TABLE_GET(first_char, common->fcc, first_char);
2993 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2994 if (first_char > 127 && common->utf)
2995 oc = UCD_OTHERCASE(first_char);
2996 #endif
2997 }
2998 if (first_char == oc)
2999 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3000 else
3001 {
3002 bit = first_char ^ oc;
3003 if (is_powerof2(bit))
3004 {
3005 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3006 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3007 }
3008 else
3009 {
3010 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3011 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3012 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3013 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3014 found = JUMP(SLJIT_C_NOT_ZERO);
3015 }
3016 }
3017
3018 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3019 JUMPTO(SLJIT_JUMP, start);
3020 JUMPHERE(found);
3021 JUMPHERE(quit);
3022
3023 if (firstline)
3024 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3025 }
3026
3027 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3028 {
3029 DEFINE_COMPILER;
3030 struct sljit_label *loop;
3031 struct sljit_jump *lastchar;
3032 struct sljit_jump *firstchar;
3033 struct sljit_jump *quit;
3034 struct sljit_jump *foundcr = NULL;
3035 struct sljit_jump *notfoundnl;
3036 jump_list *newline = NULL;
3037
3038 if (firstline)
3039 {
3040 SLJIT_ASSERT(common->first_line_end != 0);
3041 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3042 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3043 }
3044
3045 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3046 {
3047 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3048 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3049 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3050 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3051 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3052
3053 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3054 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3055 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3056 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3057 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3058 #endif
3059 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3060
3061 loop = LABEL();
3062 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3063 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3064 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3065 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3066 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3067 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3068
3069 JUMPHERE(quit);
3070 JUMPHERE(firstchar);
3071 JUMPHERE(lastchar);
3072
3073 if (firstline)
3074 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3075 return;
3076 }
3077
3078 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3079 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3080 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3081 skip_char_back(common);
3082
3083 loop = LABEL();
3084 read_char(common);
3085 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3086 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3087 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3088 check_newlinechar(common, common->nltype, &newline, FALSE);
3089 set_jumps(newline, loop);
3090
3091 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3092 {
3093 quit = JUMP(SLJIT_JUMP);
3094 JUMPHERE(foundcr);
3095 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3096 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3097 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3098 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3099 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3100 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3101 #endif
3102 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3103 JUMPHERE(notfoundnl);
3104 JUMPHERE(quit);
3105 }
3106 JUMPHERE(lastchar);
3107 JUMPHERE(firstchar);
3108
3109 if (firstline)
3110 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3111 }
3112
3113 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3114
3115 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3116 {
3117 DEFINE_COMPILER;
3118 struct sljit_label *start;
3119 struct sljit_jump *quit;
3120 struct sljit_jump *found = NULL;
3121 jump_list *matches = NULL;
3122 pcre_uint8 inverted_start_bits[32];
3123 int i;
3124 #ifndef COMPILE_PCRE8
3125 struct sljit_jump *jump;
3126 #endif
3127
3128 for (i = 0; i < 32; ++i)
3129 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3130
3131 if (firstline)
3132 {
3133 SLJIT_ASSERT(common->first_line_end != 0);
3134 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3135 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3136 }
3137
3138 start = LABEL();
3139 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3140 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3141 #ifdef SUPPORT_UTF
3142 if (common->utf)
3143 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3144 #endif
3145
3146 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3147 {
3148 #ifndef COMPILE_PCRE8
3149 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3150 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3151 JUMPHERE(jump);
3152 #endif
3153 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3154 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3155 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3156 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3157 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3158 found = JUMP(SLJIT_C_NOT_ZERO);
3159 }
3160
3161 #ifdef SUPPORT_UTF
3162 if (common->utf)
3163 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3164 #endif
3165 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3166 #ifdef SUPPORT_UTF
3167 #if defined COMPILE_PCRE8
3168 if (common->utf)
3169 {
3170 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3171 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3172 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3173 }
3174 #elif defined COMPILE_PCRE16
3175 if (common->utf)
3176 {
3177 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3178 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3179 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3180 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3181 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3182 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3183 }
3184 #endif /* COMPILE_PCRE[8|16] */
3185 #endif /* SUPPORT_UTF */
3186 JUMPTO(SLJIT_JUMP, start);
3187 if (found != NULL)
3188 JUMPHERE(found);
3189 if (matches != NULL)
3190 set_jumps(matches, LABEL());
3191 JUMPHERE(quit);
3192
3193 if (firstline)
3194 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3195 }
3196
3197 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3198 {
3199 DEFINE_COMPILER;
3200 struct sljit_label *loop;
3201 struct sljit_jump *toolong;
3202 struct sljit_jump *alreadyfound;
3203 struct sljit_jump *found;
3204 struct sljit_jump *foundoc = NULL;
3205 struct sljit_jump *notfound;
3206 pcre_uint32 oc, bit;
3207
3208 SLJIT_ASSERT(common->req_char_ptr != 0);
3209 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3210 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3211 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3212 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3213
3214 if (has_firstchar)
3215 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3216 else
3217 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3218
3219 loop = LABEL();
3220 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3221
3222 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3223 oc = req_char;
3224 if (caseless)
3225 {
3226 oc = TABLE_GET(req_char, common->fcc, req_char);
3227 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3228 if (req_char > 127 && common->utf)
3229 oc = UCD_OTHERCASE(req_char);
3230 #endif
3231 }
3232 if (req_char == oc)
3233 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3234 else
3235 {
3236 bit = req_char ^ oc;
3237 if (is_powerof2(bit))
3238 {
3239 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3240 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3241 }
3242 else
3243 {
3244 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3245 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3246 }
3247 }
3248 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3249 JUMPTO(SLJIT_JUMP, loop);
3250
3251 JUMPHERE(found);
3252 if (foundoc)
3253 JUMPHERE(foundoc);
3254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3255 JUMPHERE(alreadyfound);
3256 JUMPHERE(toolong);
3257 return notfound;
3258 }
3259
3260 static void do_revertframes(compiler_common *common)
3261 {
3262 DEFINE_COMPILER;
3263 struct sljit_jump *jump;
3264 struct sljit_label *mainloop;
3265
3266 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3267 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3268 GET_LOCAL_BASE(TMP3, 0, 0);
3269
3270 /* Drop frames until we reach STACK_TOP. */
3271 mainloop = LABEL();
3272 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3273 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3274 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3275
3276 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3277 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3278 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3279 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3280 JUMPTO(SLJIT_JUMP, mainloop);
3281
3282 JUMPHERE(jump);
3283 jump = JUMP(SLJIT_C_SIG_LESS);
3284 /* End of dropping frames. */
3285 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3286
3287 JUMPHERE(jump);
3288 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3289 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3290 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3291 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3292 JUMPTO(SLJIT_JUMP, mainloop);
3293 }
3294
3295 static void check_wordboundary(compiler_common *common)
3296 {
3297 DEFINE_COMPILER;
3298 struct sljit_jump *skipread;
3299 jump_list *skipread_list = NULL;
3300 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3301 struct sljit_jump *jump;
3302 #endif
3303
3304 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3305
3306 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3307 /* Get type of the previous char, and put it to LOCALS1. */
3308 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3309 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3310 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3311 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3312 skip_char_back(common);
3313 check_start_used_ptr(common);
3314 read_char(common);
3315
3316 /* Testing char type. */
3317 #ifdef SUPPORT_UCP
3318 if (common->use_ucp)
3319 {
3320 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3321 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3322 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3323 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3324 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3325 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3326 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3327 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3328 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3329 JUMPHERE(jump);
3330 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3331 }
3332 else
3333 #endif
3334 {
3335 #ifndef COMPILE_PCRE8
3336 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3337 #elif defined SUPPORT_UTF
3338 /* Here LOCALS1 has already been zeroed. */
3339 jump = NULL;
3340 if (common->utf)
3341 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3342 #endif /* COMPILE_PCRE8 */
3343 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3344 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3345 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3346 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3347 #ifndef COMPILE_PCRE8
3348 JUMPHERE(jump);
3349 #elif defined SUPPORT_UTF
3350 if (jump != NULL)
3351 JUMPHERE(jump);
3352 #endif /* COMPILE_PCRE8 */
3353 }
3354 JUMPHERE(skipread);
3355
3356 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3357 check_str_end(common, &skipread_list);
3358 peek_char(common);
3359
3360 /* Testing char type. This is a code duplication. */
3361 #ifdef SUPPORT_UCP
3362 if (common->use_ucp)
3363 {
3364 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3365 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3366 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3367 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3368 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3369 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3370 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3371 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3372 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3373 JUMPHERE(jump);
3374 }
3375 else
3376 #endif
3377 {
3378 #ifndef COMPILE_PCRE8
3379 /* TMP2 may be destroyed by peek_char. */
3380 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3381 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3382 #elif defined SUPPORT_UTF
3383 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3384 jump = NULL;
3385 if (common->utf)
3386 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3387 #endif
3388 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3389 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3390 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3391 #ifndef COMPILE_PCRE8
3392 JUMPHERE(jump);
3393 #elif defined SUPPORT_UTF
3394 if (jump != NULL)
3395 JUMPHERE(jump);
3396 #endif /* COMPILE_PCRE8 */
3397 }
3398 set_jumps(skipread_list, LABEL());
3399
3400 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3401 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3402 }
3403
3404 /*
3405 range format:
3406
3407 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3408 ranges[1] = first bit (0 or 1)
3409 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3410 */
3411
3412 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3413 {
3414 DEFINE_COMPILER;
3415 struct sljit_jump *jump;
3416
3417 if (ranges[0] < 0)
3418 return FALSE;
3419
3420 switch(ranges[0])
3421 {
3422 case 1:
3423 if (readch)
3424 read_char(common);
3425 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3426 return TRUE;
3427
3428 case 2:
3429 if (readch)
3430 read_char(common);
3431 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3432 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3433 return TRUE;
3434
3435 case 4:
3436 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3437 {
3438 if (readch)
3439 read_char(common);
3440 if (ranges[1] != 0)
3441 {
3442 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3443 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3444 }
3445 else
3446 {
3447 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3448 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3449 JUMPHERE(jump);
3450 }
3451 return TRUE;
3452 }
3453 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3454 {
3455 if (readch)
3456 read_char(common);
3457 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3458 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3459 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3460 return TRUE;
3461 }
3462 return FALSE;
3463
3464 default:
3465 return FALSE;
3466 }
3467 }
3468
3469 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3470 {
3471 int i, bit, length;
3472 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3473
3474 bit = ctypes[0] & flag;
3475 ranges[0] = -1;
3476 ranges[1] = bit != 0 ? 1 : 0;
3477 length = 0;
3478
3479 for (i = 1; i < 256; i++)
3480 if ((ctypes[i] & flag) != bit)
3481 {
3482 if (length >= MAX_RANGE_SIZE)
3483 return;
3484 ranges[2 + length] = i;
3485 length++;
3486 bit ^= flag;
3487 }
3488
3489 if (bit != 0)
3490 {
3491 if (length >= MAX_RANGE_SIZE)
3492 return;
3493 ranges[2 + length] = 256;
3494 length++;
3495 }
3496 ranges[0] = length;
3497 }
3498
3499 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3500 {
3501 int ranges[2 + MAX_RANGE_SIZE];
3502 pcre_uint8 bit, cbit, all;
3503 int i, byte, length = 0;
3504
3505 bit = bits[0] & 0x1;
3506 ranges[1] = bit;
3507 /* Can be 0 or 255. */
3508 all = -bit;
3509
3510 for (i = 0; i < 256; )
3511 {
3512 byte = i >> 3;
3513 if ((i & 0x7) == 0 && bits[byte] == all)
3514 i += 8;
3515 else
3516 {
3517 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3518 if (cbit != bit)
3519 {
3520 if (length >= MAX_RANGE_SIZE)
3521 return FALSE;
3522 ranges[2 + length] = i;
3523 length++;
3524 bit = cbit;
3525 all = -cbit;
3526 }
3527 i++;
3528 }
3529 }
3530
3531 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3532 {
3533 if (length >= MAX_RANGE_SIZE)
3534 return FALSE;
3535 ranges[2 + length] = 256;
3536 length++;
3537 }
3538 ranges[0] = length;
3539
3540 return check_ranges(common, ranges, backtracks, FALSE);
3541 }
3542
3543 static void check_anynewline(compiler_common *common)
3544 {
3545 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3546 DEFINE_COMPILER;
3547
3548 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3549
3550 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3551 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3552 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3553 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3554 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3555 #ifdef COMPILE_PCRE8
3556 if (common->utf)
3557 {
3558 #endif
3559 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3560 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3561 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3562 #ifdef COMPILE_PCRE8
3563 }
3564 #endif
3565 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3566 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3567 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3568 }
3569
3570 static void check_hspace(compiler_common *common)
3571 {
3572 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3573 DEFINE_COMPILER;
3574
3575 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3576
3577 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3578 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3579 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3580 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3581 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3582 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3583 #ifdef COMPILE_PCRE8
3584 if (common->utf)
3585 {
3586 #endif
3587 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3588 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3589 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3590 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3591 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3592 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3593 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3594 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3595 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3596 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3597 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3598 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3599 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3600 #ifdef COMPILE_PCRE8
3601 }
3602 #endif
3603 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3604 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3605
3606 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3607 }
3608
3609 static void check_vspace(compiler_common *common)
3610 {
3611 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3612 DEFINE_COMPILER;
3613
3614 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3615
3616 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3617 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3618 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3619 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3620 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3621 #ifdef COMPILE_PCRE8
3622 if (common->utf)
3623 {
3624 #endif
3625 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3626 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3627 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3628 #ifdef COMPILE_PCRE8
3629 }
3630 #endif
3631 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3632 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3633
3634 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3635 }
3636
3637 #define CHAR1 STR_END
3638 #define CHAR2 STACK_TOP
3639
3640 static void do_casefulcmp(compiler_common *common)
3641 {
3642 DEFINE_COMPILER;
3643 struct sljit_jump *jump;
3644 struct sljit_label *label;
3645
3646 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3647 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3648 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3649 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3650 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3651 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3652
3653 label = LABEL();
3654 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3655 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3656 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3657 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3658 JUMPTO(SLJIT_C_NOT_ZERO, label);
3659
3660 JUMPHERE(jump);
3661 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3662 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3663 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3665 }
3666
3667 #define LCC_TABLE STACK_LIMIT
3668
3669 static void do_caselesscmp(compiler_common *common)
3670 {
3671 DEFINE_COMPILER;
3672 struct sljit_jump *jump;
3673 struct sljit_label *label;
3674
3675 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3676 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3677
3678 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3679 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3681 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3682 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3683 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3684
3685 label = LABEL();
3686 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3687 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3688 #ifndef COMPILE_PCRE8
3689 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3690 #endif
3691 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3692 #ifndef COMPILE_PCRE8
3693 JUMPHERE(jump);
3694 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3695 #endif
3696 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3697 #ifndef COMPILE_PCRE8
3698 JUMPHERE(jump);
3699 #endif
3700 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3701 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3702 JUMPTO(SLJIT_C_NOT_ZERO, label);
3703
3704 JUMPHERE(jump);
3705 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3706 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3707 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3708 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3709 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3710 }
3711
3712 #undef LCC_TABLE
3713 #undef CHAR1
3714 #undef CHAR2
3715
3716 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3717
3718 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3719 {
3720 /* This function would be ineffective to do in JIT level. */
3721 pcre_uint32 c1, c2;
3722 const pcre_uchar *src2 = args->uchar_ptr;
3723 const pcre_uchar *end2 = args->end;
3724 const ucd_record *ur;
3725 const pcre_uint32 *pp;
3726
3727 while (src1 < end1)
3728 {
3729 if (src2 >= end2)
3730 return (pcre_uchar*)1;
3731 GETCHARINC(c1, src1);
3732 GETCHARINC(c2, src2);
3733 ur = GET_UCD(c2);
3734 if (c1 != c2 && c1 != c2 + ur->other_case)
3735 {
3736 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3737 for (;;)
3738 {
3739 if (c1 < *pp) return NULL;
3740 if (c1 == *pp++) break;
3741 }
3742 }
3743 }
3744 return src2;
3745 }
3746
3747 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3748
3749 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3750 compare_context* context, jump_list **backtracks)
3751 {
3752 DEFINE_COMPILER;
3753 unsigned int othercasebit = 0;
3754 pcre_uchar *othercasechar = NULL;
3755 #ifdef SUPPORT_UTF
3756 int utflength;
3757 #endif
3758
3759 if (caseless && char_has_othercase(common, cc))
3760 {
3761 othercasebit = char_get_othercase_bit(common, cc);
3762 SLJIT_ASSERT(othercasebit);
3763 /* Extracting bit difference info. */
3764 #if defined COMPILE_PCRE8
3765 othercasechar = cc + (othercasebit >> 8);
3766 othercasebit &= 0xff;
3767 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3768 /* Note that this code only handles characters in the BMP. If there
3769 ever are characters outside the BMP whose othercase differs in only one
3770 bit from itself (there currently are none), this code will need to be
3771 revised for COMPILE_PCRE32. */
3772 othercasechar = cc + (othercasebit >> 9);
3773 if ((othercasebit & 0x100) != 0)
3774 othercasebit = (othercasebit & 0xff) << 8;
3775 else
3776 othercasebit &= 0xff;
3777 #endif /* COMPILE_PCRE[8|16|32] */
3778 }
3779
3780 if (context->sourcereg == -1)
3781 {
3782 #if defined COMPILE_PCRE8
3783 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3784 if (context->length >= 4)
3785 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3786 else if (context->length >= 2)
3787 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3788 else
3789 #endif
3790 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3791 #elif defined COMPILE_PCRE16
3792 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3793 if (context->length >= 4)
3794 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3795 else
3796 #endif
3797 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3798 #elif defined COMPILE_PCRE32
3799 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3800 #endif /* COMPILE_PCRE[8|16|32] */
3801 context->sourcereg = TMP2;
3802 }
3803
3804 #ifdef SUPPORT_UTF
3805 utflength = 1;
3806 if (common->utf && HAS_EXTRALEN(*cc))
3807 utflength += GET_EXTRALEN(*cc);
3808
3809 do
3810 {
3811 #endif
3812
3813 context->length -= IN_UCHARS(1);
3814 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3815
3816 /* Unaligned read is supported. */
3817 if (othercasebit != 0 && othercasechar == cc)
3818 {
3819 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3820 context->oc.asuchars[context->ucharptr] = othercasebit;
3821 }
3822 else
3823 {
3824 context->c.asuchars[context->ucharptr] = *cc;
3825 context->oc.asuchars[context->ucharptr] = 0;
3826 }
3827 context->ucharptr++;
3828
3829 #if defined COMPILE_PCRE8
3830 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3831 #else
3832 if (context->ucharptr >= 2 || context->length == 0)
3833 #endif
3834 {
3835 if (context->length >= 4)
3836 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3837 else if (context->length >= 2)
3838 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3839 #if defined COMPILE_PCRE8
3840 else if (context->length >= 1)
3841 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3842 #endif /* COMPILE_PCRE8 */
3843 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3844
3845 switch(context->ucharptr)
3846 {
3847 case 4 / sizeof(pcre_uchar):
3848 if (context->oc.asint != 0)
3849 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3850 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3851 break;
3852
3853 case 2 / sizeof(pcre_uchar):
3854 if (context->oc.asushort != 0)
3855 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3856 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3857 break;
3858
3859 #ifdef COMPILE_PCRE8
3860 case 1:
3861 if (context->oc.asbyte != 0)
3862 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3863 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3864 break;
3865 #endif
3866
3867 default:
3868 SLJIT_ASSERT_STOP();
3869 break;
3870 }
3871 context->ucharptr = 0;
3872 }
3873
3874 #else
3875
3876 /* Unaligned read is unsupported or in 32 bit mode. */
3877 if (context->length >= 1)
3878 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3879
3880 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3881
3882 if (othercasebit != 0 && othercasechar == cc)
3883 {
3884 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3885 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3886 }
3887 else
3888 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3889
3890 #endif
3891
3892 cc++;
3893 #ifdef SUPPORT_UTF
3894 utflength--;
3895 }
3896 while (utflength > 0);
3897 #endif
3898
3899 return cc;
3900 }
3901
3902 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3903
3904 #define SET_TYPE_OFFSET(value) \
3905 if ((value) != typeoffset) \
3906 { \
3907 if ((value) > typeoffset) \
3908 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3909 else \
3910 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3911 } \
3912 typeoffset = (value);
3913
3914 #define SET_CHAR_OFFSET(value) \
3915 if ((value) != charoffset) \
3916 { \
3917 if ((value) > charoffset) \
3918 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3919 else \
3920 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3921 } \
3922 charoffset = (value);
3923
3924 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3925 {
3926 DEFINE_COMPILER;
3927 jump_list *found = NULL;
3928 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3929 pcre_int32 c, charoffset;
3930 const pcre_uint32 *other_cases;
3931 struct sljit_jump *jump = NULL;
3932 pcre_uchar *ccbegin;
3933 int compares, invertcmp, numberofcmps;
3934 #ifdef SUPPORT_UCP
3935 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3936 BOOL charsaved = FALSE;
3937 int typereg = TMP1, scriptreg = TMP1;
3938 pcre_int32 typeoffset;
3939 #endif
3940
3941 /* Although SUPPORT_UTF must be defined, we are
3942 not necessary in utf mode even in 8 bit mode. */
3943 detect_partial_match(common, backtracks);
3944 read_char(common);
3945
3946 if ((*cc++ & XCL_MAP) != 0)
3947 {
3948 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3949 #ifndef COMPILE_PCRE8
3950 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3951 #elif defined SUPPORT_UTF
3952 if (common->utf)
3953 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3954 #endif
3955
3956 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3957 {
3958 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3959 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3960 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3961 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3962 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3963 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3964 }
3965
3966 #ifndef COMPILE_PCRE8
3967 JUMPHERE(jump);
3968 #elif defined SUPPORT_UTF
3969 if (common->utf)
3970 JUMPHERE(jump);
3971 #endif
3972 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3973 #ifdef SUPPORT_UCP
3974 charsaved = TRUE;
3975 #endif
3976 cc += 32 / sizeof(pcre_uchar);
3977 }
3978
3979 /* Scanning the necessary info. */
3980 ccbegin = cc;
3981 compares = 0;
3982 while (*cc != XCL_END)
3983 {
3984 compares++;
3985 if (*cc == XCL_SINGLE)
3986 {
3987 cc += 2;
3988 #ifdef SUPPORT_UTF
3989 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3990 #endif
3991 #ifdef SUPPORT_UCP
3992 needschar = TRUE;
3993 #endif
3994 }
3995 else if (*cc == XCL_RANGE)
3996 {
3997 cc += 2;
3998 #ifdef SUPPORT_UTF
3999 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4000 #endif
4001 cc++;
4002 #ifdef SUPPORT_UTF
4003 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4004 #endif
4005 #ifdef SUPPORT_UCP
4006 needschar = TRUE;
4007 #endif
4008 }
4009 #ifdef SUPPORT_UCP
4010 else
4011 {
4012 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4013 cc++;
4014 switch(*cc)
4015 {
4016 case PT_ANY:
4017 break;
4018
4019 case PT_LAMP:
4020 case PT_GC:
4021 case PT_PC:
4022 case PT_ALNUM:
4023 needstype = TRUE;
4024 break;
4025
4026 case PT_SC:
4027 needsscript = TRUE;
4028 break;
4029
4030 case PT_SPACE:
4031 case PT_PXSPACE:
4032 case PT_WORD:
4033 needstype = TRUE;
4034 needschar = TRUE;
4035 break;
4036
4037 case PT_CLIST:
4038 case PT_UCNC:
4039 needschar = TRUE;
4040 break;
4041
4042 default:
4043 SLJIT_ASSERT_STOP();
4044 break;
4045 }
4046 cc += 2;
4047 }
4048 #endif
4049 }
4050
4051 #ifdef SUPPORT_UCP
4052 /* Simple register allocation. TMP1 is preferred if possible. */
4053 if (needstype || needsscript)
4054 {
4055 if (needschar && !charsaved)
4056 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4057 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4058 if (needschar)
4059 {
4060 if (needstype)
4061 {
4062 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4063 typereg = RETURN_ADDR;
4064 }
4065
4066 if (needsscript)
4067 scriptreg = TMP3;
4068 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4069 }
4070 else if (needstype && needsscript)
4071 scriptreg = TMP3;
4072 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4073
4074 if (needsscript)
4075 {
4076 if (scriptreg == TMP1)
4077 {
4078 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4079 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4080 }
4081 else
4082 {
4083 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4084 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4085 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4086 }
4087 }
4088 }
4089 #endif
4090
4091 /* Generating code. */
4092 cc = ccbegin;
4093 charoffset = 0;
4094 numberofcmps = 0;
4095 #ifdef SUPPORT_UCP
4096 typeoffset = 0;
4097 #endif
4098
4099 while (*cc != XCL_END)
4100 {
4101 compares--;
4102 invertcmp = (compares == 0 && list != backtracks);
4103 jump = NULL;
4104
4105 if (*cc == XCL_SINGLE)
4106 {
4107 cc ++;
4108 #ifdef SUPPORT_UTF
4109 if (common->utf)
4110 {
4111 GETCHARINC(c, cc);
4112 }
4113 else
4114 #endif
4115 c = *cc++;
4116
4117 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4118 {
4119 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4120 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4121 numberofcmps++;
4122 }
4123 else if (numberofcmps > 0)
4124 {
4125 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4126 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4127 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4128 numberofcmps = 0;
4129 }
4130 else
4131 {
4132 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4133 numberofcmps = 0;
4134 }
4135 }
4136 else if (*cc == XCL_RANGE)
4137 {
4138 cc ++;
4139 #ifdef SUPPORT_UTF
4140 if (common->utf)
4141 {
4142 GETCHARINC(c, cc);
4143 }
4144 else
4145 #endif
4146 c = *cc++;
4147 SET_CHAR_OFFSET(c);
4148 #ifdef SUPPORT_UTF
4149 if (common->utf)
4150 {
4151 GETCHARINC(c, cc);
4152 }
4153 else
4154 #endif
4155 c = *cc++;
4156 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4157 {
4158 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4159 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4160 numberofcmps++;
4161 }
4162 else if (numberofcmps > 0)
4163 {
4164 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4165 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4166 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4167 numberofcmps = 0;
4168 }
4169 else
4170 {
4171 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4172 numberofcmps = 0;
4173 }
4174 }
4175 #ifdef SUPPORT_UCP
4176 else
4177 {
4178 if (*cc == XCL_NOTPROP)
4179 invertcmp ^= 0x1;
4180 cc++;
4181 switch(*cc)
4182 {
4183 case PT_ANY:
4184 if (list != backtracks)
4185 {
4186 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4187 continue;
4188 }
4189 else if (cc[-1] == XCL_NOTPROP)
4190 continue;
4191 jump = JUMP(SLJIT_JUMP);
4192 break;
4193
4194 case PT_LAMP:
4195 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4196 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4197 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4198 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4199 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4200 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4201 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4202 break;
4203
4204 case PT_GC:
4205 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4206 SET_TYPE_OFFSET(c);
4207 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4208 break;
4209
4210 case PT_PC:
4211 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4212 break;
4213
4214 case PT_SC:
4215 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4216 break;
4217
4218 case PT_SPACE:
4219 case PT_PXSPACE:
4220 if (*cc == PT_SPACE)
4221 {
4222 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4223 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4224 }
4225 SET_CHAR_OFFSET(9);
4226 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4227 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4228 if (*cc == PT_SPACE)
4229 JUMPHERE(jump);
4230
4231 SET_TYPE_OFFSET(ucp_Zl);
4232 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4233 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4234 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4235 break;
4236
4237 case PT_WORD:
4238 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4239 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4240 /* Fall through. */
4241
4242 case PT_ALNUM:
4243 SET_TYPE_OFFSET(ucp_Ll);
4244 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4245 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4246 SET_TYPE_OFFSET(ucp_Nd);
4247 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4248 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4249 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4250 break;
4251
4252 case PT_CLIST:
4253 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4254
4255 /* At least three characters are required.
4256 Otherwise this case would be handled by the normal code path. */
4257 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4258 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4259
4260 /* Optimizing character pairs, if their difference is power of 2. */
4261 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4262 {
4263 if (charoffset == 0)
4264 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4265 else
4266 {
4267 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4268 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4269 }
4270 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4271 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4272 other_cases += 2;
4273 }
4274 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4275 {
4276 if (charoffset == 0)
4277 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4278 else
4279 {
4280 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4281 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4282 }
4283 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4284 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4285
4286 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4287 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4288
4289 other_cases += 3;
4290 }
4291 else
4292 {
4293 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4294 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4295 }
4296
4297 while (*other_cases != NOTACHAR)
4298 {
4299 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4300 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4301 }
4302 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4303 break;
4304
4305 case PT_UCNC:
4306 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4307 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4308 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4309 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4310 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4311 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4312
4313 SET_CHAR_OFFSET(0xa0);
4314 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4315 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4316 SET_CHAR_OFFSET(0);
4317 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4318 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4319 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4320 break;
4321 }
4322 cc += 2;
4323 }
4324 #endif
4325
4326 if (jump != NULL)
4327 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4328 }
4329
4330 if (found != NULL)
4331 set_jumps(found, LABEL());
4332 }
4333
4334 #undef SET_TYPE_OFFSET
4335 #undef SET_CHAR_OFFSET
4336
4337 #endif
4338
4339 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4340 {
4341 DEFINE_COMPILER;
4342 int length;
4343 unsigned int c, oc, bit;
4344 compare_context context;
4345 struct sljit_jump *jump[4];
4346 jump_list *end_list;
4347 #ifdef SUPPORT_UTF
4348 struct sljit_label *label;
4349 #ifdef SUPPORT_UCP
4350 pcre_uchar propdata[5];
4351 #endif
4352 #endif
4353
4354 switch(type)
4355 {
4356 case OP_SOD:
4357 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4358 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4359 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4360 return cc;
4361
4362 case OP_SOM:
4363 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4364 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4365 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4366 return cc;
4367
4368 case OP_NOT_WORD_BOUNDARY:
4369 case OP_WORD_BOUNDARY:
4370 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4371 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4372 return cc;
4373
4374 case OP_NOT_DIGIT:
4375 case OP_DIGIT:
4376 /* Digits are usually 0-9, so it is worth to optimize them. */
4377 if (common->digits[0] == -2)
4378 get_ctype_ranges(common, ctype_digit, common->digits);
4379 detect_partial_match(common, backtracks);
4380 /* Flip the starting bit in the negative case. */
4381 if (type == OP_NOT_DIGIT)
4382 common->digits[1] ^= 1;
4383 if (!check_ranges(common, common->digits, backtracks, TRUE))
4384 {
4385 read_char8_type(common);
4386 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4387 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4388 }
4389 if (type == OP_NOT_DIGIT)
4390 common->digits[1] ^= 1;
4391 return cc;
4392
4393 case OP_NOT_WHITESPACE:
4394 case OP_WHITESPACE:
4395 detect_partial_match(common, backtracks);
4396 read_char8_type(common);
4397 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4398 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4399 return cc;
4400
4401 case OP_NOT_WORDCHAR:
4402 case OP_WORDCHAR:
4403 detect_partial_match(common, backtracks);
4404 read_char8_type(common);
4405 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4406 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4407 return cc;
4408
4409 case OP_ANY:
4410 detect_partial_match(common, backtracks);
4411 read_char(common);
4412 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4413 {
4414 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4415 end_list = NULL;
4416 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4417 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4418 else
4419 check_str_end(common, &end_list);
4420
4421 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4422 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4423 set_jumps(end_list, LABEL());
4424 JUMPHERE(jump[0]);
4425 }
4426 else
4427 check_newlinechar(common, common->nltype, backtracks, TRUE);
4428 return cc;
4429
4430 case OP_ALLANY:
4431 detect_partial_match(common, backtracks);
4432 #ifdef SUPPORT_UTF
4433 if (common->utf)
4434 {
4435 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4436 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4437 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4438 #if defined COMPILE_PCRE8
4439 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4440 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4441 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4442 #elif defined COMPILE_PCRE16
4443 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4444 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4445 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4446 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4447 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4448 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4449 #endif
4450 JUMPHERE(jump[0]);
4451 #endif /* COMPILE_PCRE[8|16] */
4452 return cc;
4453 }
4454 #endif
4455 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4456 return cc;
4457
4458 case OP_ANYBYTE:
4459 detect_partial_match(common, backtracks);
4460 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4461 return cc;
4462
4463 #ifdef SUPPORT_UTF
4464 #ifdef SUPPORT_UCP
4465 case OP_NOTPROP:
4466 case OP_PROP:
4467 propdata[0] = 0;
4468 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4469 propdata[2] = cc[0];
4470 propdata[3] = cc[1];
4471 propdata[4] = XCL_END;
4472 compile_xclass_matchingpath(common, propdata, backtracks);
4473 return cc + 2;
4474 #endif
4475 #endif
4476
4477 case OP_ANYNL:
4478 detect_partial_match(common, backtracks);
4479 read_char(common);
4480 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4481 /* We don't need to handle soft partial matching case. */
4482 end_list = NULL;
4483 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4484 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4485 else
4486 check_str_end(common, &end_list);
4487 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4488 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4489 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4490 jump[2] = JUMP(SLJIT_JUMP);
4491 JUMPHERE(jump[0]);
4492 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4493 set_jumps(end_list, LABEL());
4494 JUMPHERE(jump[1]);
4495 JUMPHERE(jump[2]);
4496 return cc;
4497
4498 case OP_NOT_HSPACE:
4499 case OP_HSPACE:
4500 detect_partial_match(common, backtracks);
4501 read_char(common);
4502 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4503 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4504 return cc;
4505
4506 case OP_NOT_VSPACE:
4507 case OP_VSPACE:
4508 detect_partial_match(common, backtracks);
4509 read_char(common);
4510 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4511 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4512 return cc;
4513
4514 #ifdef SUPPORT_UCP
4515 case OP_EXTUNI:
4516 detect_partial_match(common, backtracks);
4517 read_char(common);
4518 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4519 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4520 /* Optimize register allocation: use a real register. */
4521 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4522 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4523
4524 label = LABEL();
4525 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4526 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4527 read_char(common);
4528 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4529 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4530 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4531
4532 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4533 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4534 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4535 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4536 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4537 JUMPTO(SLJIT_C_NOT_ZERO, label);
4538
4539 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4540 JUMPHERE(jump[0]);
4541 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4542
4543 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4544 {
4545 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4546 /* Since we successfully read a char above, partial matching must occure. */
4547 check_partial(common, TRUE);
4548 JUMPHERE(jump[0]);
4549 }
4550 return cc;
4551 #endif
4552
4553 case OP_EODN:
4554 /* Requires rather complex checks. */
4555 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4556 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4557 {
4558 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4559 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4560 if (common->mode == JIT_COMPILE)
4561 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4562 else
4563 {
4564 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4565 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4566 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4567 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4568 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4569 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4570 check_partial(common, TRUE);
4571 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4572 JUMPHERE(jump[1]);
4573 }
4574 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4575 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4576 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4577 }
4578 else if (common->nltype == NLTYPE_FIXED)
4579 {
4580 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4581 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4582 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4583 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4584 }
4585 else
4586 {
4587 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4588 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4589 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4590 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4591 jump[2] = JUMP(SLJIT_C_GREATER);
4592 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4593 /* Equal. */
4594 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4595 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4596 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4597
4598 JUMPHERE(jump[1]);
4599 if (common->nltype == NLTYPE_ANYCRLF)
4600 {
4601 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4602 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4603 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4604 }
4605 else
4606 {
4607 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4608 read_char(common);
4609 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4610 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4611 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4612 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4613 }
4614 JUMPHERE(jump[2]);
4615 JUMPHERE(jump[3]);
4616 }
4617 JUMPHERE(jump[0]);
4618 check_partial(common, FALSE);
4619 return cc;
4620
4621 case OP_EOD:
4622 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4623 check_partial(common, FALSE);
4624 return cc;
4625
4626 case OP_CIRC:
4627 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4628 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4629 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4630 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4631 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4632 return cc;
4633
4634 case OP_CIRCM:
4635 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4636 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4637 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4638 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4639 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4640 jump[0] = JUMP(SLJIT_JUMP);
4641 JUMPHERE(jump[1]);
4642
4643 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4644 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4645 {
4646 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4647 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4648 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4649 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4650 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4651 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4652 }
4653 else
4654 {
4655 skip_char_back(common);
4656 read_char(common);
4657 check_newlinechar(common, common->nltype, backtracks, FALSE);
4658 }
4659 JUMPHERE(jump[0]);
4660 return cc;
4661
4662 case OP_DOLL:
4663 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4664 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4665 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4666
4667 if (!common->endonly)
4668 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4669 else
4670 {
4671 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4672 check_partial(common, FALSE);
4673 }
4674 return cc;
4675
4676 case OP_DOLLM:
4677 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4678 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4679 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4680 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4681 check_partial(common, FALSE);
4682 jump[0] = JUMP(SLJIT_JUMP);
4683 JUMPHERE(jump[1]);
4684
4685 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4686 {
4687 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4688 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4689 if (common->mode == JIT_COMPILE)
4690 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4691 else
4692 {
4693 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4694 /* STR_PTR = STR_END - IN_UCHARS(1) */
4695 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4696 check_partial(common, TRUE);
4697 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4698 JUMPHERE(jump[1]);
4699 }
4700
4701 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4702 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4703 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4704 }
4705 else
4706 {
4707 peek_char(common);
4708 check_newlinechar(common, common->nltype, backtracks, FALSE);
4709 }
4710 JUMPHERE(jump[0]);
4711 return cc;
4712
4713 case OP_CHAR:
4714 case OP_CHARI:
4715 length = 1;
4716 #ifdef SUPPORT_UTF
4717 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4718 #endif
4719 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4720 {
4721 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4722 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4723
4724 context.length = IN_UCHARS(length);
4725 context.sourcereg = -1;
4726 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4727 context.ucharptr = 0;
4728 #endif
4729 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4730 }
4731 detect_partial_match(common, backtracks);
4732 read_char(common);
4733 #ifdef SUPPORT_UTF
4734 if (common->utf)
4735 {
4736 GETCHAR(c, cc);
4737 }
4738 else
4739 #endif
4740 c = *cc;
4741 if (type == OP_CHAR || !char_has_othercase(common, cc))
4742 {
4743 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4744 return cc + length;
4745 }
4746 oc = char_othercase(common, c);
4747 bit = c ^ oc;
4748 if (is_powerof2(bit))
4749 {
4750 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4751 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4752 return cc + length;
4753 }
4754 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4755 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4756 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4757 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4758 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4759 return cc + length;
4760
4761 case OP_NOT:
4762 case OP_NOTI:
4763 detect_partial_match(common, backtracks);
4764 length = 1;
4765 #ifdef SUPPORT_UTF
4766 if (common->utf)
4767 {
4768 #ifdef COMPILE_PCRE8
4769 c = *cc;
4770 if (c < 128)
4771 {
4772 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4773 if (type == OP_NOT || !char_has_othercase(common, cc))
4774 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4775 else
4776 {
4777 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4778 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4779 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4780 }
4781 /* Skip the variable-length character. */
4782 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4783 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4784 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4785 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4786 JUMPHERE(jump[0]);
4787 return cc + 1;
4788 }
4789 else
4790 #endif /* COMPILE_PCRE8 */
4791 {
4792 GETCHARLEN(c, cc, length);
4793 read_char(common);
4794 }
4795 }
4796 else
4797 #endif /* SUPPORT_UTF */
4798 {
4799 read_char(common);
4800 c = *cc;
4801 }
4802
4803 if (type == OP_NOT || !char_has_othercase(common, cc))
4804 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4805 else
4806 {
4807 oc = char_othercase(common, c);
4808 bit = c ^ oc;
4809 if (is_powerof2(bit))
4810 {
4811 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4812 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4813 }
4814 else
4815 {
4816 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4817 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4818 }
4819 }
4820 return cc + length;
4821
4822 case OP_CLASS:
4823 case OP_NCLASS:
4824 detect_partial_match(common, backtracks);
4825 read_char(common);
4826 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4827 return cc + 32 / sizeof(pcre_uchar);
4828
4829 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4830 jump[0] = NULL;
4831 #ifdef COMPILE_PCRE8
4832 /* This check only affects 8 bit mode. In other modes, we
4833 always need to compare the value with 255. */
4834 if (common->utf)
4835 #endif /* COMPILE_PCRE8 */
4836 {
4837 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4838 if (type == OP_CLASS)
4839 {
4840 add_jump(compiler, backtracks, jump[0]);
4841 jump[0] = NULL;
4842 }
4843 }
4844 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4845 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4846 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4847 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4848 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4849 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4850 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4851 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4852 if (jump[0] != NULL)
4853 JUMPHERE(jump[0]);
4854 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4855 return cc + 32 / sizeof(pcre_uchar);
4856
4857 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4858 case OP_XCLASS:
4859 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4860 return cc + GET(cc, 0) - 1;
4861 #endif
4862
4863 case OP_REVERSE:
4864 length = GET(cc, 0);
4865 if (length == 0)
4866 return cc + LINK_SIZE;
4867 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4868 #ifdef SUPPORT_UTF
4869 if (common->utf)
4870 {
4871 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4872 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4873 label = LABEL();
4874 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4875 skip_char_back(common);
4876 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4877 JUMPTO(SLJIT_C_NOT_ZERO, label);
4878 }
4879 else
4880 #endif
4881 {
4882 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4883 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4884 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4885 }
4886 check_start_used_ptr(common);
4887 return cc + LINK_SIZE;
4888 }
4889 SLJIT_ASSERT_STOP();
4890 return cc;
4891 }
4892
4893 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4894 {
4895 /* This function consumes at least one input character. */
4896 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4897 DEFINE_COMPILER;
4898 pcre_uchar *ccbegin = cc;
4899 compare_context context;
4900 int size;
4901
4902 context.length = 0;
4903 do
4904 {
4905 if (cc >= ccend)
4906 break;
4907
4908 if (*cc == OP_CHAR)
4909 {
4910 size = 1;
4911 #ifdef SUPPORT_UTF
4912 if (common->utf && HAS_EXTRALEN(cc[1]))
4913 size += GET_EXTRALEN(cc[1]);
4914 #endif
4915 }
4916 else if (*cc == OP_CHARI)
4917 {
4918 size = 1;
4919 #ifdef SUPPORT_UTF
4920 if (common->utf)
4921 {
4922 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4923 size = 0;
4924 else if (HAS_EXTRALEN(cc[1]))
4925 size += GET_EXTRALEN(cc[1]);
4926 }
4927 else
4928 #endif
4929 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4930 size = 0;
4931 }
4932 else
4933 size = 0;
4934
4935 cc += 1 + size;
4936 context.length += IN_UCHARS(size);
4937 }
4938 while (size > 0 && context.length <= 128);
4939
4940 cc = ccbegin;
4941 if (context.length > 0)
4942 {
4943 /* We have a fixed-length byte sequence. */
4944 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4945 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4946
4947 context.sourcereg = -1;
4948 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4949 context.ucharptr = 0;
4950 #endif
4951 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4952 return cc;
4953 }
4954
4955 /* A non-fixed length character will be checked if length == 0. */
4956 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4957 }
4958
4959 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4960 {
4961 DEFINE_COMPILER;
4962 int offset = GET2(cc, 1) << 1;
4963
4964 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4965 if (!common->jscript_compat)
4966 {
4967 if (backtracks == NULL)
4968 {
4969 /* OVECTOR(1) contains the "string begin - 1" constant. */
4970 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4971 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4972 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4973 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4974 return JUMP(SLJIT_C_NOT_ZERO);
4975 }
4976 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4977 }
4978 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4979 }
4980
4981 /* Forward definitions. */
4982 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4983 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4984
4985 #define PUSH_BACKTRACK(size, ccstart, error) \
4986 do \
4987 { \
4988 backtrack = sljit_alloc_memory(compiler, (size)); \
4989 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4990 return error; \
4991 memset(backtrack, 0, size); \
4992 backtrack->prev = parent->top; \
4993 backtrack->cc = (ccstart); \
4994 parent->top = backtrack; \
4995 } \
4996 while (0)
4997
4998 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4999 do \
5000 { \
5001 backtrack = sljit_alloc_memory(compiler, (size)); \
5002 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5003 return; \
5004 memset(backtrack, 0, size); \
5005 backtrack->prev = parent->top; \
5006 backtrack->cc = (ccstart); \
5007 parent->top = backtrack; \
5008 } \
5009 while (0)
5010
5011 #define BACKTRACK_AS(type) ((type *)backtrack)
5012
5013 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5014 {
5015 DEFINE_COMPILER;
5016 int offset = GET2(cc, 1) << 1;
5017 struct sljit_jump *jump = NULL;
5018 struct sljit_jump *partial;
5019 struct sljit_jump *nopartial;
5020
5021 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5022 /* OVECTOR(1) contains the "string begin - 1" constant. */
5023 if (withchecks && !common->jscript_compat)
5024 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5025
5026 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5027 if (common->utf && *cc == OP_REFI)
5028 {
5029 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5030 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5031 if (withchecks)
5032 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5033
5034 /* Needed to save important temporary registers. */
5035 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5036 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5037 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5038 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5039 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5040 if (common->mode == JIT_COMPILE)
5041 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5042 else
5043 {
5044 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5045 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5046 check_partial(common, FALSE);
5047 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5048 JUMPHERE(nopartial);
5049 }
5050 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5051 }
5052 else
5053 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5054 {
5055 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5056 if (withchecks)
5057 jump = JUMP(SLJIT_C_ZERO);
5058
5059 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5060 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5061 if (common->mode == JIT_COMPILE)
5062 add_jump(compiler, backtracks, partial);
5063
5064 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5065 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5066
5067 if (common->mode != JIT_COMPILE)
5068 {
5069 nopartial = JUMP(SLJIT_JUMP);
5070 JUMPHERE(partial);
5071 /* TMP2 -= STR_END - STR_PTR */
5072 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5073 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5074 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5075 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5076 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5077 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5078 JUMPHERE(partial);
5079 check_partial(common, FALSE);
5080 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5081 JUMPHERE(nopartial);
5082 }
5083 }
5084
5085 if (jump != NULL)
5086 {
5087 if (emptyfail)
5088 add_jump(compiler, backtracks, jump);
5089 else
5090 JUMPHERE(jump);
5091 }
5092 return cc + 1 + IMM2_SIZE;
5093 }
5094
5095 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5096 {
5097 DEFINE_COMPILER;
5098 backtrack_common *backtrack;
5099 pcre_uchar type;
5100 struct sljit_label *label;
5101 struct sljit_jump *zerolength;
5102 struct sljit_jump *jump = NULL;
5103 pcre_uchar *ccbegin = cc;
5104 int min = 0, max = 0;
5105 BOOL minimize;
5106
5107 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5108
5109 type = cc[1 + IMM2_SIZE];
5110 minimize = (type & 0x1) != 0;
5111 switch(type)
5112 {
5113 case OP_CRSTAR:
5114 case OP_CRMINSTAR:
5115 min = 0;
5116 max = 0;
5117 cc += 1 + IMM2_SIZE + 1;
5118 break;
5119 case OP_CRPLUS:
5120 case OP_CRMINPLUS:
5121 min = 1;
5122 max = 0;
5123 cc += 1 + IMM2_SIZE + 1;
5124 break;
5125 case OP_CRQUERY:
5126 case OP_CRMINQUERY:
5127 min = 0;
5128 max = 1;
5129 cc += 1 + IMM2_SIZE + 1;
5130 break;
5131 case OP_CRRANGE:
5132 case OP_CRMINRANGE:
5133 min = GET2(cc, 1 + IMM2_SIZE + 1);
5134 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5135 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5136 break;
5137 default:
5138 SLJIT_ASSERT_STOP();
5139 break;
5140 }
5141
5142 if (!minimize)
5143 {
5144 if (min == 0)
5145 {
5146 allocate_stack(common, 2);
5147 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5148 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5149 /* Temporary release of STR_PTR. */
5150 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5151 zerolength = compile_ref_checks(common, ccbegin, NULL);
5152 /* Restore if not zero length. */
5153 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5154 }
5155 else
5156 {
5157 allocate_stack(common, 1);
5158 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5159 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5160 }
5161
5162 if (min > 1 || max > 1)
5163 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5164
5165 label = LABEL();
5166 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5167
5168 if (min > 1 || max > 1)
5169 {
5170 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5171 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5172 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5173 if (min > 1)
5174 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5175 if (max > 1)
5176 {
5177 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5178 allocate_stack(common, 1);
5179 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5180 JUMPTO(SLJIT_JUMP, label);
5181 JUMPHERE(jump);
5182 }
5183 }
5184
5185 if (max == 0)
5186 {
5187 /* Includes min > 1 case as well. */
5188 allocate_stack(common, 1);
5189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5190 JUMPTO(SLJIT_JUMP, label);
5191 }
5192
5193 JUMPHERE(zerolength);
5194 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5195
5196 decrease_call_count(common);
5197 return cc;
5198 }
5199
5200 allocate_stack(common, 2);
5201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5202 if (type != OP_CRMINSTAR)
5203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5204
5205 if (min == 0)
5206 {
5207 zerolength = compile_ref_checks(common, ccbegin, NULL);
5208 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5209 jump = JUMP(SLJIT_JUMP);
5210 }
5211 else
5212 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5213
5214 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5215 if (max > 0)
5216 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5217
5218 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5220
5221 if (min > 1)
5222 {
5223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5224 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5226 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5227 }
5228 else if (max > 0)
5229 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5230
5231 if (jump != NULL)
5232 JUMPHERE(jump);
5233 JUMPHERE(zerolength);
5234
5235 decrease_call_count(common);
5236 return cc;
5237 }
5238
5239 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5240 {
5241 DEFINE_COMPILER;
5242 backtrack_common *backtrack;
5243 recurse_entry *entry = common->entries;
5244 recurse_entry *prev = NULL;
5245 int start = GET(cc, 1);
5246 pcre_uchar *start_cc;
5247
5248 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5249
5250 /* Inlining simple patterns. */
5251 if (get_framesize(common, common->start + start, TRUE) == no_stack)
5252 {
5253 start_cc = common->start + start;
5254 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5255 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5256 return cc + 1 + LINK_SIZE;
5257 }
5258
5259 while (entry != NULL)
5260 {
5261 if (entry->start == start)
5262 break;
5263 prev = entry;
5264 entry = entry->next;
5265 }
5266
5267 if (entry == NULL)
5268 {
5269 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5270 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5271 return NULL;
5272 entry->next = NULL;
5273 entry->entry = NULL;
5274 entry->calls = NULL;
5275 entry->start = start;
5276
5277 if (prev != NULL)
5278 prev->next = entry;
5279 else
5280 common->entries = entry;
5281 }
5282
5283 if (common->has_set_som && common->mark_ptr != 0)
5284 {
5285 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5286 allocate_stack(common, 2);
5287 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5288 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5290 }
5291 else if (common->has_set_som || common->mark_ptr != 0)
5292 {
5293 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5294 allocate_stack(common, 1);
5295 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5296 }
5297
5298 if (entry->entry == NULL)
5299 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5300 else
5301 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5302 /* Leave if the match is failed. */
5303 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5304 return cc + 1 + LINK_SIZE;
5305 }
5306
5307 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5308 {
5309 const pcre_uchar *begin = arguments->begin;
5310 int *offset_vector = arguments->offsets;
5311 int offset_count = arguments->offset_count;
5312 int i;
5313
5314 if (PUBL(callout) == NULL)
5315 return 0;
5316
5317 callout_block->version = 2;
5318 callout_block->callout_data = arguments->callout_data;
5319
5320 /* Offsets in subject. */
5321 callout_block->subject_length = arguments->end - arguments->begin;
5322 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5323 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5324 #if defined COMPILE_PCRE8
5325 callout_block->subject = (PCRE_SPTR)begin;
5326 #elif defined COMPILE_PCRE16
5327 callout_block->subject = (PCRE_SPTR16)begin;
5328 #elif defined COMPILE_PCRE32
5329 callout_block->subject = (PCRE_SPTR32)begin;
5330 #endif
5331
5332 /* Convert and copy the JIT offset vector to the offset_vector array. */
5333 callout_block->capture_top = 0;
5334 callout_block->offset_vector = offset_vector;
5335 for (i = 2; i < offset_count; i += 2)
5336 {
5337 offset_vector[i] = jit_ovector[i] - begin;
5338 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5339 if (jit_ovector[i] >= begin)
5340 callout_block->capture_top = i;
5341 }
5342
5343 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5344 if (offset_count > 0)
5345 offset_vector[0] = -1;
5346 if (offset_count > 1)
5347 offset_vector[1] = -1;
5348 return (*PUBL(callout))(callout_block);
5349 }
5350
5351 /* Aligning to 8 byte. */
5352 #define CALLOUT_ARG_SIZE \
5353 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5354
5355 #define CALLOUT_ARG_OFFSET(arg) \
5356 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5357
5358 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5359 {
5360 DEFINE_COMPILER;
5361 backtrack_common *backtrack;
5362
5363 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5364
5365 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5366
5367 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5368 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5369 SLJIT_ASSERT(common->capture_last_ptr != 0);
5370 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5371 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5372
5373 /* These pointer sized fields temporarly stores internal variables. */
5374 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5375 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5377
5378 if (common->mark_ptr != 0)
5379 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5380 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5381 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5382 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5383
5384 /* Needed to save important temporary registers. */
5385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5386 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5387 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5388 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5389 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5390 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5391 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5392
5393 /* Check return value. */
5394 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5395 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5396 if (common->forced_quit_label == NULL)
5397 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5398 else
5399 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5400 return cc + 2 + 2 * LINK_SIZE;
5401 }
5402
5403 #undef CALLOUT_ARG_SIZE
5404 #undef CALLOUT_ARG_OFFSET
5405
5406 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5407 {
5408 DEFINE_COMPILER;
5409 int framesize;
5410 int extrasize;
5411 BOOL needs_control_head = common->control_head_ptr != 0;
5412 int private_data_ptr;
5413 backtrack_common altbacktrack;
5414 pcre_uchar *ccbegin;
5415 pcre_uchar opcode;
5416 pcre_uchar bra = OP_BRA;
5417 jump_list *tmp = NULL;
5418 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5419 jump_list **found;
5420 /* Saving previous accept variables. */
5421 struct sljit_label *save_quit_label = common->quit_label;
5422 struct sljit_label *save_accept_label = common->accept_label;
5423 jump_list *save_quit = common->quit;
5424 jump_list *save_accept = common->accept;
5425 BOOL save_local_exit = common->local_exit;
5426 struct sljit_jump *jump;
5427 struct sljit_jump *brajump = NULL;
5428
5429 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5430 {
5431 SLJIT_ASSERT(!conditional);
5432 bra = *cc;
5433 cc++;
5434 }
5435 private_data_ptr = PRIVATE_DATA(cc);
5436 SLJIT_ASSERT(private_data_ptr != 0);
5437 framesize = get_framesize(common, cc, FALSE);
5438 backtrack->framesize = framesize;
5439 backtrack->private_data_ptr = private_data_ptr;
5440 opcode = *cc;
5441 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5442 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5443 ccbegin = cc;
5444 cc += GET(cc, 1);
5445
5446 if (bra == OP_BRAMINZERO)
5447 {
5448 /* This is a braminzero backtrack path. */
5449 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5450 free_stack(common, 1);
5451 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5452 }
5453
5454 if (framesize < 0)
5455 {
5456 extrasize = needs_control_head ? 2 : 1;
5457 if (framesize != no_stack)
5458 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5459 allocate_stack(common, extrasize);
5460 if (needs_control_head)
5461 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5463 if (needs_control_head)
5464 {
5465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5467 }
5468 }
5469 else
5470 {
5471 extrasize = needs_control_head ? 3 : 2;
5472 allocate_stack(common, framesize + extrasize);
5473 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5474 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5475 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5476 if (needs_control_head)
5477 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5479 if (needs_control_head)
5480 {
5481 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5484 }
5485 else
5486 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5487 init_frame(common, ccbegin, framesize + extrasize - 1, extrasize, FALSE);
5488 }
5489
5490 memset(&altbacktrack, 0, sizeof(backtrack_common));
5491 common->local_exit = TRUE;
5492 common->quit_label = NULL;
5493 common->quit = NULL;
5494 while (1)
5495 {
5496 common->accept_label = NULL;
5497 common->accept = NULL;
5498 altbacktrack.top = NULL;
5499 altbacktrack.topbacktracks = NULL;
5500
5501 if (*ccbegin == OP_ALT)
5502 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5503
5504 altbacktrack.cc = ccbegin;
5505 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5506 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5507 {
5508 common->local_exit = save_local_exit;
5509 common->quit_label = save_quit_label;
5510 common->accept_label = save_accept_label;
5511 common->quit = save_quit;
5512 common->accept = save_accept;
5513 return NULL;
5514 }
5515 common->accept_label = LABEL();
5516 if (common->accept != NULL)
5517 set_jumps(common->accept, common->accept_label);
5518
5519 /* Reset stack. */
5520 if (framesize < 0)
5521 {
5522 if (framesize != no_stack)
5523 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5524 else
5525 free_stack(common, extrasize);
5526 if (needs_control_head)
5527 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5528 }
5529 else
5530 {
5531 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5532 {
5533 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5534 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5535 if (needs_control_head)
5536 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5537 }
5538 else
5539 {
5540 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5541 if (needs_control_head)
5542 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5543 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5544 }
5545 }
5546
5547 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5548 {
5549 /* We know that STR_PTR was stored on the top of the stack. */
5550 if (conditional)
5551 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5552 else if (bra == OP_BRAZERO)
5553 {
5554 if (framesize < 0)
5555 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5556 else
5557 {
5558 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5559 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5560 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5561 }
5562 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5563 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5564 }
5565 else if (framesize >= 0)
5566 {
5567 /* For OP_BRA and OP_BRAMINZERO. */
5568 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5569 }
5570 }
5571 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5572
5573 compile_backtrackingpath(common, altbacktrack.top);
5574 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5575 {
5576 common->local_exit = save_local_exit;
5577 common->quit_label = save_quit_label;
5578 common->accept_label = save_accept_label;
5579 common->quit = save_quit;
5580 common->accept = save_accept;
5581 return NULL;
5582 }
5583 set_jumps(altbacktrack.topbacktracks, LABEL());
5584
5585 if (*cc != OP_ALT)
5586 break;
5587
5588 ccbegin = cc;
5589 cc += GET(cc, 1);
5590 }
5591
5592 /* None of them matched. */
5593 if (common->quit != NULL)
5594 {
5595 jump = JUMP(SLJIT_JUMP);
5596 set_jumps(common->quit, LABEL());
5597 SLJIT_ASSERT(framesize != no_stack);
5598 if (framesize < 0)
5599 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5600 else
5601 {
5602 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5603 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5604 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5605 }
5606 JUMPHERE(jump);
5607 }
5608
5609 if (needs_control_head)
5610 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5611
5612 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5613 {
5614 /* Assert is failed. */
5615 if (conditional || bra == OP_BRAZERO)
5616 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5617
5618 if (framesize < 0)
5619 {
5620 /* The topmost item should be 0. */
5621 if (bra == OP_BRAZERO)
5622 {
5623 if (extrasize == 2)
5624 free_stack(common, 1);
5625 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5626 }
5627 else
5628 free_stack(common, extrasize);
5629 }
5630 else
5631 {
5632 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5633 /* The topmost item should be 0. */
5634 if (bra == OP_BRAZERO)
5635 {
5636 free_stack(common, framesize + extrasize - 1);
5637 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5638 }
5639 else
5640 free_stack(common, framesize + extrasize);
5641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5642 }
5643 jump = JUMP(SLJIT_JUMP);
5644 if (bra != OP_BRAZERO)
5645 add_jump(compiler, target, jump);
5646
5647 /* Assert is successful. */
5648 set_jumps(tmp, LABEL());
5649 if (framesize < 0)
5650 {
5651 /* We know that STR_PTR was stored on the top of the stack. */
5652 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5653 /* Keep the STR_PTR on the top of the stack. */
5654 if (bra == OP_BRAZERO)
5655 {
5656 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5657 if (extrasize == 2)
5658 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5659 }
5660 else if (bra == OP_BRAMINZERO)
5661 {
5662 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5663 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5664 }
5665 }
5666 else
5667 {
5668 if (bra == OP_BRA)
5669 {
5670 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5671 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5672 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5673 }
5674 else
5675 {
5676 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5677 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5678 if (extrasize == 2)
5679 {
5680 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5681 if (bra == OP_BRAMINZERO)
5682 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5683 }
5684 else
5685 {
5686 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5687 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5688 }
5689 }
5690 }
5691
5692 if (bra == OP_BRAZERO)
5693 {
5694 backtrack->matchingpath = LABEL();
5695 SET_LABEL(jump, backtrack->matchingpath);
5696 }
5697 else if (bra == OP_BRAMINZERO)
5698 {
5699 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5700 JUMPHERE(brajump);
5701 if (framesize >= 0)
5702 {
5703 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5704 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5705 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5706 }
5707 set_jumps(backtrack->common.topbacktracks, LABEL());
5708 }
5709 }
5710 else
5711 {
5712 /* AssertNot is successful. */
5713 if (framesize < 0)
5714 {
5715 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5716 if (bra != OP_BRA)
5717 {
5718 if (extrasize == 2)
5719 free_stack(common, 1);
5720 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5721 }
5722 else
5723 free_stack(common, extrasize);
5724 }
5725 else
5726 {
5727 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5729 /* The topmost item should be 0. */
5730 if (bra != OP_BRA)
5731 {
5732 free_stack(common, framesize + extrasize - 1);
5733 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5734 }
5735 else
5736 free_stack(common, framesize + extrasize);
5737 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5738 }
5739
5740 if (bra == OP_BRAZERO)
5741 backtrack->matchingpath = LABEL();
5742 else if (bra == OP_BRAMINZERO)
5743 {
5744 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5745 JUMPHERE(brajump);
5746 }
5747
5748 if (bra != OP_BRA)
5749 {
5750 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5751 set_jumps(backtrack->common.topbacktracks, LABEL());
5752 backtrack->common.topbacktracks = NULL;
5753 }
5754 }
5755
5756 common->local_exit = save_local_exit;
5757 common->quit_label = save_quit_label;
5758 common->accept_label = save_accept_label;
5759 common->quit = save_quit;
5760 common->accept = save_accept;
5761 return cc + 1 + LINK_SIZE;
5762 }
5763
5764 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5765 {
5766 int condition = FALSE;
5767 pcre_uchar *slotA = name_table;
5768 pcre_uchar *slotB;
5769 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5770 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5771 sljit_sw no_capture;
5772 int i;
5773
5774 locals += refno & 0xff;
5775 refno >>= 8;
5776 no_capture = locals[1];
5777
5778 for (i = 0; i < name_count; i++)
5779 {
5780 if (GET2(slotA, 0) == refno) break;
5781 slotA += name_entry_size;
5782 }
5783
5784 if (i < name_count)
5785 {
5786 /* Found a name for the number - there can be only one; duplicate names
5787 for different numbers are allowed, but not vice versa. First scan down
5788 for duplicates. */
5789
5790 slotB = slotA;
5791 while (slotB > name_table)
5792 {
5793 slotB -= name_entry_size;
5794 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5795 {
5796 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5797 if (condition) break;
5798 }
5799 else break;
5800 }
5801
5802 /* Scan up for duplicates */
5803 if (!condition)
5804 {
5805 slotB = slotA;
5806 for (i++; i < name_count; i++)
5807 {
5808 slotB += name_entry_size;
5809 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5810 {
5811 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5812 if (condition) break;
5813 }
5814 else break;
5815 }
5816 }
5817 }
5818 return condition;
5819 }
5820
5821 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5822 {
5823 int condition = FALSE;
5824 pcre_uchar *slotA = name_table;
5825 pcre_uchar *slotB;
5826 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5827 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5828 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5829 sljit_uw i;
5830
5831 for (i = 0; i < name_count; i++)
5832 {
5833 if (GET2(slotA, 0) == recno) break;
5834 slotA += name_entry_size;
5835 }
5836
5837 if (i < name_count)
5838 {
5839 /* Found a name for the number - there can be only one; duplicate
5840 names for different numbers are allowed, but not vice versa. First
5841 scan down for duplicates. */
5842
5843 slotB = slotA;
5844 while (slotB > name_table)
5845 {
5846 slotB -= name_entry_size;
5847 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5848 {
5849 condition = GET2(slotB, 0) == group_num;
5850 if (condition) break;
5851 }
5852 else break;
5853 }
5854
5855 /* Scan up for duplicates */
5856 if (!condition)
5857 {
5858 slotB = slotA;
5859 for (i++; i < name_count; i++)
5860 {
5861 slotB += name_entry_size;
5862 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5863 {
5864 condition = GET2(slotB, 0) == group_num;
5865 if (condition) break;
5866 }
5867 else break;
5868 }
5869 }
5870 }
5871 return condition;
5872 }
5873
5874 /*
5875 Handling bracketed expressions is probably the most complex part.
5876
5877 Stack layout naming characters:
5878 S - Push the current STR_PTR
5879 0 - Push a 0 (NULL)
5880 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5881 before the next alternative. Not pushed if there are no alternatives.
5882 M - Any values pushed by the current alternative. Can be empty, or anything.
5883 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5884 L - Push the previous local (pointed by localptr) to the stack
5885 () - opional values stored on the stack
5886 ()* - optonal, can be stored multiple times
5887
5888 The following list shows the regular expression templates, their PCRE byte codes
5889 and stack layout supported by pcre-sljit.
5890
5891 (?:) OP_BRA | OP_KET A M
5892 () OP_CBRA | OP_KET C M
5893 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5894 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5895 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5896 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5897 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5898 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5899 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5900 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5901 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5902 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5903 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5904 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5905 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5906 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5907 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5908 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5909 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5910 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5911 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5912 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5913
5914
5915 Stack layout naming characters:
5916 A - Push the alternative index (starting from 0) on the stack.
5917 Not pushed if there is no alternatives.
5918 M - Any values pushed by the current alternative. Can be empty, or anything.
5919
5920 The next list shows the possible content of a bracket:
5921 (|) OP_*BRA | OP_ALT ... M A
5922 (?()|) OP_*COND | OP_ALT M A
5923 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5924 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5925 Or nothing, if trace is unnecessary
5926 */
5927
5928 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5929 {
5930 DEFINE_COMPILER;
5931 backtrack_common *backtrack;
5932 pcre_uchar opcode;
5933 int private_data_ptr = 0;
5934 int offset = 0;
5935 int stacksize;
5936 pcre_uchar *ccbegin;
5937 pcre_uchar *matchingpath;
5938 pcre_uchar bra = OP_BRA;
5939 pcre_uchar ket;
5940 assert_backtrack *assert;
5941 BOOL has_alternatives;
5942 struct sljit_jump *jump;
5943 struct sljit_jump *skip;
5944 struct sljit_label *rmaxlabel = NULL;
5945 struct sljit_jump *braminzerojump = NULL;
5946
5947 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5948
5949 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5950 {
5951 bra = *cc;
5952 cc++;
5953 opcode = *cc;
5954 }
5955
5956 opcode = *cc;
5957 ccbegin = cc;
5958 matchingpath = ccbegin + 1 + LINK_SIZE;
5959
5960 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5961 {
5962 /* Drop this bracket_backtrack. */
5963 parent->top = backtrack->prev;
5964 return bracketend(cc);
5965 }
5966
5967 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5968 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5969 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5970 cc += GET(cc, 1);
5971
5972 has_alternatives = *cc == OP_ALT;
5973 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5974 {
5975 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5976 if (*matchingpath == OP_NRREF)
5977 {
5978 stacksize = GET2(matchingpath, 1);
5979 if (common->currententry == NULL || stacksize == RREF_ANY)
5980 has_alternatives = FALSE;
5981 else if (common->currententry->start == 0)
5982 has_alternatives = stacksize != 0;
5983 else
5984 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5985 }
5986 }
5987
5988 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5989 opcode = OP_SCOND;
5990 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5991 opcode = OP_ONCE;
5992
5993 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5994 {
5995 /* Capturing brackets has a pre-allocated space. */
5996 offset = GET2(ccbegin, 1 + LINK_SIZE);
5997 if (common->optimized_cbracket[offset] == 0)
5998 {
5999 private_data_ptr = OVECTOR_PRIV(offset);
6000 offset <<= 1;
6001 }
6002 else
6003 {
6004 offset <<= 1;
6005 private_data_ptr = OVECTOR(offset);
6006 }
6007 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6008 matchingpath += IMM2_SIZE;
6009 }
6010 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6011 {
6012 /* Other brackets simply allocate the next entry. */
6013 private_data_ptr = PRIVATE_DATA(ccbegin);
6014 SLJIT_ASSERT(private_data_ptr != 0);
6015 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6016 if (opcode == OP_ONCE)
6017 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
6018 }
6019
6020 /* Instructions before the first alternative. */
6021 stacksize = 0;
6022 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6023 stacksize++;
6024 if (bra == OP_BRAZERO)
6025 stacksize++;
6026
6027 if (stacksize > 0)
6028 allocate_stack(common, stacksize);
6029
6030 stacksize = 0;
6031 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6032 {
6033 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6034 stacksize++;
6035 }
6036
6037 if (bra == OP_BRAZERO)
6038 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6039
6040 if (bra == OP_BRAMINZERO)
6041 {
6042 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6043 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6044 if (ket != OP_KETRMIN)
6045 {
6046 free_stack(common, 1);
6047 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6048 }
6049 else
6050 {
6051 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6052 {
6053 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6054 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6055 /* Nothing stored during the first run. */
6056 skip = JUMP(SLJIT_JUMP);
6057 JUMPHERE(jump);
6058 /* Checking zero-length iteration. */
6059 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6060 {
6061 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6062 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6063 }
6064 else
6065 {
6066 /* Except when the whole stack frame must be saved. */
6067 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6068 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6069 }
6070 JUMPHERE(skip);
6071 }
6072 else
6073 {
6074 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6075 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6076 JUMPHERE(jump);
6077 }
6078 }
6079 }
6080
6081 if (ket == OP_KETRMIN)
6082 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6083
6084 if (ket == OP_KETRMAX)
6085 {
6086 rmaxlabel = LABEL();
6087 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
6088 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
6089 }
6090
6091 /* Handling capturing brackets and alternatives. */
6092 if (opcode == OP_ONCE)
6093 {
6094 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6095 {
6096 /* Neither capturing brackets nor recursions are not found in the block. */
6097 if (ket == OP_KETRMIN)
6098 {
6099 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6100 allocate_stack(common, 2);
6101 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6102 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6103 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6104 }
6105 else if (ket == OP_KETRMAX || has_alternatives)
6106 {
6107 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6108 allocate_stack(common, 1);
6109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6110 }
6111 else
6112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6113 }
6114 else
6115 {
6116 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
6117 {
6118 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
6119 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6120 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
6121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6123 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6124 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
6125 }
6126 else
6127 {
6128 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
6129 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6130 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
6131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6132 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6133 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
6134 }
6135 }
6136 }
6137 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6138 {
6139 /* Saving the previous values. */
6140 if (common->optimized_cbracket[offset >> 1] != 0)
6141 {
6142 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6143 allocate_stack(common, 2);
6144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6145 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6147 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6148 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6149 }
6150 else
6151 {
6152 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6153 allocate_stack(common, 1);
6154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6156 }
6157 }
6158 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6159 {
6160 /* Saving the previous value. */
6161 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6162 allocate_stack(common, 1);
6163 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6164 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6165 }
6166 else if (has_alternatives)
6167 {
6168 /* Pushing the starting string pointer. */
6169 allocate_stack(common, 1);
6170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6171 }
6172
6173 /* Generating code for the first alternative. */
6174 if (opcode == OP_COND || opcode == OP_SCOND)
6175 {
6176 if (*matchingpath == OP_CREF)
6177 {
6178 SLJIT_ASSERT(has_alternatives);
6179 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6180 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6181 matchingpath += 1 + IMM2_SIZE;
6182 }
6183 else if (*matchingpath == OP_NCREF)
6184 {
6185 SLJIT_ASSERT(has_alternatives);
6186 stacksize = GET2(matchingpath, 1);
6187 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6188
6189 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6192 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6193 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6194 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6195 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6196 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6197 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6198
6199 JUMPHERE(jump);
6200 matchingpath += 1 + IMM2_SIZE;
6201 }
6202 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6203 {
6204 /* Never has other case. */
6205 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6206
6207 stacksize = GET2(matchingpath, 1);
6208 if (common->currententry == NULL)
6209 stacksize = 0;
6210 else if (stacksize == RREF_ANY)
6211 stacksize = 1;
6212 else if (common->currententry->start == 0)
6213 stacksize = stacksize == 0;
6214 else
6215 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6216
6217 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6218 {
6219 SLJIT_ASSERT(!has_alternatives);
6220 if (stacksize != 0)
6221 matchingpath += 1 + IMM2_SIZE;
6222 else
6223 {
6224 if (*cc == OP_ALT)
6225 {
6226 matchingpath = cc + 1 + LINK_SIZE;
6227 cc += GET(cc, 1);
6228 }
6229 else
6230 matchingpath = cc;
6231 }
6232 }
6233 else
6234 {
6235 SLJIT_ASSERT(has_alternatives);
6236
6237 stacksize = GET2(matchingpath, 1);
6238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6239 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6240 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6242 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6243 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6244 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6245 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6246 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6247 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6248 matchingpath += 1 + IMM2_SIZE;
6249 }
6250 }
6251 else
6252 {
6253 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6254 /* Similar code as PUSH_BACKTRACK macro. */
6255 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6256 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6257 return NULL;
6258 memset(assert, 0, sizeof(assert_backtrack));
6259 assert->common.cc = matchingpath;
6260 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6261 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6262 }
6263 }
6264
6265 compile_matchingpath(common, matchingpath, cc, backtrack);
6266 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6267 return NULL;
6268
6269 if (opcode == OP_ONCE)
6270 {
6271 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6272 {
6273 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6274 /* TMP2 which is set here used by OP_KETRMAX below. */
6275 if (ket == OP_KETRMAX)
6276 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6277 else if (ket == OP_KETRMIN)
6278 {
6279 /* Move the STR_PTR to the private_data_ptr. */
6280 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6281 }
6282 }
6283 else
6284 {
6285 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
6286 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
6287 if (ket == OP_KETRMAX)
6288 {
6289 /* TMP2 which is set here used by OP_KETRMAX below. */
6290 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6291 }
6292 }
6293 }
6294
6295 stacksize = 0;
6296 if (ket != OP_KET || bra != OP_BRA)
6297 stacksize++;
6298 if (offset != 0)
6299 {
6300 if (common->capture_last_ptr != 0)
6301 stacksize++;
6302 if (common->optimized_cbracket[offset >> 1] == 0)
6303 stacksize += 2;
6304 }
6305 if (has_alternatives && opcode != OP_ONCE)
6306 stacksize++;
6307
6308 if (stacksize > 0)
6309 allocate_stack(common, stacksize);
6310
6311 stacksize = 0;
6312 if (ket != OP_KET || bra != OP_BRA)
6313 {
6314 if (ket != OP_KET)
6315 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6316 else
6317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6318 stacksize++;
6319 }
6320
6321 if (offset != 0)
6322 {
6323 if (common->capture_last_ptr != 0)
6324 {
6325 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6326 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6327 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0);
6328 stacksize++;
6329 }
6330 if (common->optimized_cbracket[offset >> 1] == 0)
6331 {
6332 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6333 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6334 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6335 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6336 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6337 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6339 stacksize += 2;
6340 }
6341 }
6342
6343 if (has_alternatives)
6344 {
6345 if (opcode != OP_ONCE)
6346 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6347 if (ket != OP_KETRMAX)
6348 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6349 }
6350
6351 /* Must be after the matchingpath label. */
6352 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6353 {
6354 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6355 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6356 }
6357
6358 if (ket == OP_KETRMAX)
6359 {
6360 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6361 {
6362 if (has_alternatives)
6363 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6364 /* Checking zero-length iteration. */
6365 if (opcode != OP_ONCE)
6366 {
6367 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6368 /* Drop STR_PTR for greedy plus quantifier. */
6369 if (bra != OP_BRAZERO)
6370 free_stack(common, 1);
6371 }
6372 else
6373 /* TMP2 must contain the starting STR_PTR. */
6374 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6375 }
6376 else
6377 JUMPTO(SLJIT_JUMP, rmaxlabel);
6378 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6379 }
6380
6381 if (bra == OP_BRAZERO)
6382 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6383
6384 if (bra == OP_BRAMINZERO)
6385 {
6386 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6387 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6388 if (braminzerojump != NULL)
6389 {
6390 JUMPHERE(braminzerojump);
6391 /* We need to release the end pointer to perform the
6392 backtrack for the zero-length iteration. When
6393 framesize is < 0, OP_ONCE will do the release itself. */
6394 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6395 {
6396 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6397 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6398 }
6399 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6400 free_stack(common, 1);
6401 }
6402 /* Continue to the normal backtrack. */
6403 }
6404
6405 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6406 decrease_call_count(common);
6407
6408 /* Skip the other alternatives. */
6409 while (*cc == OP_ALT)
6410 cc += GET(cc, 1);
6411 cc += 1 + LINK_SIZE;
6412 return cc;
6413 }
6414
6415 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6416 {
6417 DEFINE_COMPILER;
6418 backtrack_common *backtrack;
6419 pcre_uchar opcode;
6420 int private_data_ptr;
6421 int cbraprivptr = 0;
6422 int framesize;
6423 int stacksize;
6424 int offset = 0;
6425 BOOL zero = FALSE;
6426 pcre_uchar *ccbegin = NULL;
6427 int stack;
6428 struct sljit_label *loop = NULL;
6429 struct jump_list *emptymatch = NULL;
6430
6431 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6432 if (*cc == OP_BRAPOSZERO)
6433 {
6434 zero = TRUE;
6435 cc++;
6436 }
6437
6438 opcode = *cc;
6439 private_data_ptr = PRIVATE_DATA(cc);
6440 SLJIT_ASSERT(private_data_ptr != 0);
6441 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6442 switch(opcode)
6443 {
6444 case OP_BRAPOS:
6445 case OP_SBRAPOS:
6446 ccbegin = cc + 1 + LINK_SIZE;
6447 break;
6448
6449 case OP_CBRAPOS:
6450 case OP_SCBRAPOS:
6451 offset = GET2(cc, 1 + LINK_SIZE);
6452 /* This case cannot be optimized in the same was as
6453 normal capturing brackets. */
6454 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6455 cbraprivptr = OVECTOR_PRIV(offset);
6456 offset <<= 1;
6457 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6458 break;
6459
6460 default:
6461 SLJIT_ASSERT_STOP();
6462 break;
6463 }
6464
6465 framesize = get_framesize(common, cc, FALSE);
6466 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6467 if (framesize < 0)
6468 {
6469 if (offset != 0)
6470 {
6471 stacksize = 2;
6472 if (common->capture_last_ptr != 0)
6473 stacksize++;
6474 }
6475 else
6476 stacksize = 1;
6477
6478 if (!zero)
6479 stacksize++;
6480
6481 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6482 allocate_stack(common, stacksize);
6483 if (framesize == no_frame)
6484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6485
6486 if (offset != 0)
6487 {
6488 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6489 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6490 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6491 if (common->capture_last_ptr != 0)
6492 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6493 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6494 if (common->capture_last_ptr != 0)
6495 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6496 }
6497 else
6498 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6499
6500 if (!zero)
6501 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6502 }
6503 else
6504 {
6505 stacksize = framesize + 1;
6506 if (!zero)
6507 stacksize++;
6508 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6509 stacksize++;
6510 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6511
6512 allocate_stack(common, stacksize);
6513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6514 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6515 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6516
6517 stack = 0;
6518 if (!zero)
6519 {
6520 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6521 stack++;
6522 }
6523 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6524 {
6525 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6526 stack++;
6527 }
6528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6529 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6530 }
6531
6532 if (offset != 0)
6533 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6534
6535 loop = LABEL();
6536 while (*cc != OP_KETRPOS)
6537 {
6538 backtrack->top = NULL;
6539 backtrack->topbacktracks = NULL;
6540 cc += GET(cc, 1);
6541
6542 compile_matchingpath(common, ccbegin, cc, backtrack);
6543 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6544 return NULL;
6545
6546 if (framesize < 0)
6547 {
6548 if (framesize == no_frame)
6549 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6550
6551 if (offset != 0)
6552 {
6553 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6554 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6555 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6556 if (common->capture_last_ptr != 0)
6557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6558 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6559 }
6560 else
6561 {
6562 if (opcode == OP_SBRAPOS)
6563 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6564 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6565 }
6566
6567 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6568 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6569
6570 if (!zero)
6571 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6572 }
6573 else
6574 {
6575 if (offset != 0)
6576 {
6577 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6578 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6579 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6580 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6581 if (common->capture_last_ptr != 0)
6582 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6583 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6584 }
6585 else
6586 {
6587 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6588 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6589 if (opcode == OP_SBRAPOS)
6590 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6591 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6592 }
6593
6594 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6595 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6596
6597 if (!zero)
6598 {
6599 if (framesize < 0)
6600 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6601 else
6602 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6603 }
6604 }
6605 JUMPTO(SLJIT_JUMP, loop);
6606 flush_stubs(common);
6607
6608 compile_backtrackingpath(common, backtrack->top);
6609 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6610 return NULL;
6611 set_jumps(backtrack->topbacktracks, LABEL());
6612
6613 if (framesize < 0)
6614 {
6615 if (offset != 0)
6616 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6617 else
6618 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6619 }
6620 else
6621 {
6622 if (offset != 0)
6623 {
6624 /* Last alternative. */
6625 if (*cc == OP_KETRPOS)
6626 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6627 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6628 }
6629 else
6630 {
6631 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6632 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6633 }
6634 }
6635
6636 if (*cc == OP_KETRPOS)
6637 break;
6638 ccbegin = cc + 1 + LINK_SIZE;
6639 }
6640
6641 backtrack->topbacktracks = NULL;
6642 if (!zero)
6643 {
6644 if (framesize < 0)
6645 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6646 else /* TMP2 is set to [private_data_ptr] above. */
6647 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6648 }
6649
6650 /* None of them matched. */
6651 set_jumps(emptymatch, LABEL());
6652 decrease_call_count(common);
6653 return cc + 1 + LINK_SIZE;
6654 }
6655
6656 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6657 {
6658 int class_len;
6659
6660 *opcode = *cc;
6661 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6662 {
6663 cc++;
6664 *type = OP_CHAR;
6665 }
6666 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6667 {
6668 cc++;
6669 *type = OP_CHARI;
6670 *opcode -= OP_STARI - OP_STAR;
6671 }
6672 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6673 {
6674 cc++;
6675 *type = OP_NOT;
6676 *opcode -= OP_NOTSTAR - OP_STAR;
6677 }
6678 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6679 {
6680 cc++;
6681 *type = OP_NOTI;
6682 *opcode -= OP_NOTSTARI - OP_STAR;
6683 }
6684 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6685 {
6686 cc++;
6687 *opcode -= OP_TYPESTAR - OP_STAR;
6688 *type = 0;
6689 }
6690 else
6691 {
6692 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6693 *type = *opcode;
6694 cc++;
6695 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6696 *opcode = cc[class_len - 1];
6697 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6698 {
6699 *opcode -= OP_CRSTAR - OP_STAR;
6700 if (end != NULL)
6701 *end = cc + class_len;
6702 }
6703 else
6704 {
6705 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6706 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6707 *arg2 = GET2(cc, class_len);
6708
6709 if (*arg2 == 0)
6710 {
6711 SLJIT_ASSERT(*arg1 != 0);
6712 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6713 }
6714 if (*arg1 == *arg2)
6715 *opcode = OP_EXACT;
6716
6717 if (end != NULL)
6718 *end = cc + class_len + 2 * IMM2_SIZE;
6719 }
6720 return cc;
6721 }
6722
6723 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6724 {
6725 *arg1 = GET2(cc, 0);
6726 cc += IMM2_SIZE;
6727 }
6728
6729 if (*type == 0)
6730 {
6731 *type = *cc;
6732 if (end != NULL)
6733 *end = next_opcode(common, cc);
6734 cc++;
6735 return cc;
6736 }
6737
6738 if (end != NULL)
6739 {
6740 *end = cc + 1;
6741 #ifdef SUPPORT_UTF
6742 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6743 #endif
6744 }
6745