/[pcre]/code/tags/pcre-8.37/pcre_jit_compile.c
ViewVC logotype

Contents of /code/tags/pcre-8.37/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1277 - (show annotations)
Mon Mar 11 09:50:29 2013 UTC (6 years, 8 months ago) by zherczeg
Original Path: code/trunk/pcre_jit_compile.c
File MIME type: text/plain
File size: 291889 byte(s)
Error occurred while calculating annotation data.
OP_ONCE support is added to the backtracking control verb chain support in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum frame_types {
200 no_frame = -1,
201 no_stack = -2
202 };
203
204 enum control_types {
205 type_commit = 0,
206 type_prune = 1,
207 type_skip = 2
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the aguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 int start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define MAX_RANGE_SIZE 6
295
296 typedef struct compiler_common {
297 /* The sljit ceneric compiler. */
298 struct sljit_compiler *compiler;
299 /* First byte code. */
300 pcre_uchar *start;
301 /* Maps private data offset to each opcode. */
302 int *private_data_ptrs;
303 /* Tells whether the capturing bracket is optimized. */
304 pcre_uint8 *optimized_cbracket;
305 /* Starting offset of private data for capturing brackets. */
306 int cbra_ptr;
307 /* Output vector starting point. Must be divisible by 2. */
308 int ovector_start;
309 /* Last known position of the requested byte. */
310 int req_char_ptr;
311 /* Head of the last recursion. */
312 int recursive_head_ptr;
313 /* First inspected character for partial matching. */
314 int start_used_ptr;
315 /* Starting pointer for partial soft matches. */
316 int hit_start;
317 /* End pointer of the first line. */
318 int first_line_end;
319 /* Points to the marked string. */
320 int mark_ptr;
321 /* Recursive control verb management chain. */
322 int control_head_ptr;
323 /* Points to the last matched capture block index. */
324 int capture_last_ptr;
325 /* Points to the starting position of the current match. */
326 int start_ptr;
327
328 /* Flipped and lower case tables. */
329 const pcre_uint8 *fcc;
330 sljit_sw lcc;
331 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
332 int mode;
333 /* \K is in the pattern. */
334 BOOL has_set_som;
335 /* Needs to know the start position anytime. */
336 BOOL needs_start_ptr;
337 /* Currently in recurse or assert. */
338 BOOL local_exit;
339 /* Newline control. */
340 int nltype;
341 int newline;
342 int bsr_nltype;
343 /* Dollar endonly. */
344 int endonly;
345 /* Tables. */
346 sljit_sw ctypes;
347 int digits[2 + MAX_RANGE_SIZE];
348 /* Named capturing brackets. */
349 sljit_uw name_table;
350 sljit_sw name_count;
351 sljit_sw name_entry_size;
352
353 /* Labels and jump lists. */
354 struct sljit_label *partialmatchlabel;
355 struct sljit_label *quit_label;
356 struct sljit_label *forced_quit_label;
357 struct sljit_label *accept_label;
358 stub_list *stubs;
359 recurse_entry *entries;
360 recurse_entry *currententry;
361 jump_list *partialmatch;
362 jump_list *quit;
363 jump_list *forced_quit;
364 jump_list *accept;
365 jump_list *calllimit;
366 jump_list *stackalloc;
367 jump_list *revertframes;
368 jump_list *wordboundary;
369 jump_list *anynewline;
370 jump_list *hspace;
371 jump_list *vspace;
372 jump_list *casefulcmp;
373 jump_list *caselesscmp;
374 jump_list *reset_match;
375 BOOL jscript_compat;
376 #ifdef SUPPORT_UTF
377 BOOL utf;
378 #ifdef SUPPORT_UCP
379 BOOL use_ucp;
380 #endif
381 #ifndef COMPILE_PCRE32
382 jump_list *utfreadchar;
383 #endif
384 #ifdef COMPILE_PCRE8
385 jump_list *utfreadtype8;
386 #endif
387 #endif /* SUPPORT_UTF */
388 #ifdef SUPPORT_UCP
389 jump_list *getucd;
390 #endif
391 } compiler_common;
392
393 /* For byte_sequence_compare. */
394
395 typedef struct compare_context {
396 int length;
397 int sourcereg;
398 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
399 int ucharptr;
400 union {
401 sljit_si asint;
402 sljit_uh asushort;
403 #if defined COMPILE_PCRE8
404 sljit_ub asbyte;
405 sljit_ub asuchars[4];
406 #elif defined COMPILE_PCRE16
407 sljit_uh asuchars[2];
408 #elif defined COMPILE_PCRE32
409 sljit_ui asuchars[1];
410 #endif
411 } c;
412 union {
413 sljit_si asint;
414 sljit_uh asushort;
415 #if defined COMPILE_PCRE8
416 sljit_ub asbyte;
417 sljit_ub asuchars[4];
418 #elif defined COMPILE_PCRE16
419 sljit_uh asuchars[2];
420 #elif defined COMPILE_PCRE32
421 sljit_ui asuchars[1];
422 #endif
423 } oc;
424 #endif
425 } compare_context;
426
427 /* Undefine sljit macros. */
428 #undef CMP
429
430 /* Used for accessing the elements of the stack. */
431 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
432
433 #define TMP1 SLJIT_SCRATCH_REG1
434 #define TMP2 SLJIT_SCRATCH_REG3
435 #define TMP3 SLJIT_TEMPORARY_EREG2
436 #define STR_PTR SLJIT_SAVED_REG1
437 #define STR_END SLJIT_SAVED_REG2
438 #define STACK_TOP SLJIT_SCRATCH_REG2
439 #define STACK_LIMIT SLJIT_SAVED_REG3
440 #define ARGUMENTS SLJIT_SAVED_EREG1
441 #define CALL_COUNT SLJIT_SAVED_EREG2
442 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
443
444 /* Local space layout. */
445 /* These two locals can be used by the current opcode. */
446 #define LOCALS0 (0 * sizeof(sljit_sw))
447 #define LOCALS1 (1 * sizeof(sljit_sw))
448 /* Two local variables for possessive quantifiers (char1 cannot use them). */
449 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
450 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
451 /* Max limit of recursions. */
452 #define CALL_LIMIT (4 * sizeof(sljit_sw))
453 /* The output vector is stored on the stack, and contains pointers
454 to characters. The vector data is divided into two groups: the first
455 group contains the start / end character pointers, and the second is
456 the start pointers when the end of the capturing group has not yet reached. */
457 #define OVECTOR_START (common->ovector_start)
458 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
459 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
460 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
461
462 #if defined COMPILE_PCRE8
463 #define MOV_UCHAR SLJIT_MOV_UB
464 #define MOVU_UCHAR SLJIT_MOVU_UB
465 #elif defined COMPILE_PCRE16
466 #define MOV_UCHAR SLJIT_MOV_UH
467 #define MOVU_UCHAR SLJIT_MOVU_UH
468 #elif defined COMPILE_PCRE32
469 #define MOV_UCHAR SLJIT_MOV_UI
470 #define MOVU_UCHAR SLJIT_MOVU_UI
471 #else
472 #error Unsupported compiling mode
473 #endif
474
475 /* Shortcuts. */
476 #define DEFINE_COMPILER \
477 struct sljit_compiler *compiler = common->compiler
478 #define OP1(op, dst, dstw, src, srcw) \
479 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
480 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
481 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
482 #define LABEL() \
483 sljit_emit_label(compiler)
484 #define JUMP(type) \
485 sljit_emit_jump(compiler, (type))
486 #define JUMPTO(type, label) \
487 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
488 #define JUMPHERE(jump) \
489 sljit_set_label((jump), sljit_emit_label(compiler))
490 #define SET_LABEL(jump, label) \
491 sljit_set_label((jump), (label))
492 #define CMP(type, src1, src1w, src2, src2w) \
493 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
494 #define CMPTO(type, src1, src1w, src2, src2w, label) \
495 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
496 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
497 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
498 #define GET_LOCAL_BASE(dst, dstw, offset) \
499 sljit_get_local_base(compiler, (dst), (dstw), (offset))
500
501 static pcre_uchar* bracketend(pcre_uchar* cc)
502 {
503 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
504 do cc += GET(cc, 1); while (*cc == OP_ALT);
505 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
506 cc += 1 + LINK_SIZE;
507 return cc;
508 }
509
510 /* Functions whose might need modification for all new supported opcodes:
511 next_opcode
512 get_private_data_length
513 set_private_data_ptrs
514 get_framesize
515 init_frame
516 get_private_data_copy_length
517 copy_private_data
518 compile_matchingpath
519 compile_backtrackingpath
520 */
521
522 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
523 {
524 SLJIT_UNUSED_ARG(common);
525 switch(*cc)
526 {
527 case OP_SOD:
528 case OP_SOM:
529 case OP_SET_SOM:
530 case OP_NOT_WORD_BOUNDARY:
531 case OP_WORD_BOUNDARY:
532 case OP_NOT_DIGIT:
533 case OP_DIGIT:
534 case OP_NOT_WHITESPACE:
535 case OP_WHITESPACE:
536 case OP_NOT_WORDCHAR:
537 case OP_WORDCHAR:
538 case OP_ANY:
539 case OP_ALLANY:
540 case OP_NOTPROP:
541 case OP_PROP:
542 case OP_ANYNL:
543 case OP_NOT_HSPACE:
544 case OP_HSPACE:
545 case OP_NOT_VSPACE:
546 case OP_VSPACE:
547 case OP_EXTUNI:
548 case OP_EODN:
549 case OP_EOD:
550 case OP_CIRC:
551 case OP_CIRCM:
552 case OP_DOLL:
553 case OP_DOLLM:
554 case OP_CRSTAR:
555 case OP_CRMINSTAR:
556 case OP_CRPLUS:
557 case OP_CRMINPLUS:
558 case OP_CRQUERY:
559 case OP_CRMINQUERY:
560 case OP_CRRANGE:
561 case OP_CRMINRANGE:
562 case OP_CLASS:
563 case OP_NCLASS:
564 case OP_REF:
565 case OP_REFI:
566 case OP_RECURSE:
567 case OP_CALLOUT:
568 case OP_ALT:
569 case OP_KET:
570 case OP_KETRMAX:
571 case OP_KETRMIN:
572 case OP_KETRPOS:
573 case OP_REVERSE:
574 case OP_ASSERT:
575 case OP_ASSERT_NOT:
576 case OP_ASSERTBACK:
577 case OP_ASSERTBACK_NOT:
578 case OP_ONCE:
579 case OP_ONCE_NC:
580 case OP_BRA:
581 case OP_BRAPOS:
582 case OP_CBRA:
583 case OP_CBRAPOS:
584 case OP_COND:
585 case OP_SBRA:
586 case OP_SBRAPOS:
587 case OP_SCBRA:
588 case OP_SCBRAPOS:
589 case OP_SCOND:
590 case OP_CREF:
591 case OP_NCREF:
592 case OP_RREF:
593 case OP_NRREF:
594 case OP_DEF:
595 case OP_BRAZERO:
596 case OP_BRAMINZERO:
597 case OP_BRAPOSZERO:
598 case OP_PRUNE:
599 case OP_SKIP:
600 case OP_COMMIT:
601 case OP_FAIL:
602 case OP_ACCEPT:
603 case OP_ASSERT_ACCEPT:
604 case OP_CLOSE:
605 case OP_SKIPZERO:
606 return cc + PRIV(OP_lengths)[*cc];
607
608 case OP_CHAR:
609 case OP_CHARI:
610 case OP_NOT:
611 case OP_NOTI:
612 case OP_STAR:
613 case OP_MINSTAR:
614 case OP_PLUS:
615 case OP_MINPLUS:
616 case OP_QUERY:
617 case OP_MINQUERY:
618 case OP_UPTO:
619 case OP_MINUPTO:
620 case OP_EXACT:
621 case OP_POSSTAR:
622 case OP_POSPLUS:
623 case OP_POSQUERY:
624 case OP_POSUPTO:
625 case OP_STARI:
626 case OP_MINSTARI:
627 case OP_PLUSI:
628 case OP_MINPLUSI:
629 case OP_QUERYI:
630 case OP_MINQUERYI:
631 case OP_UPTOI:
632 case OP_MINUPTOI:
633 case OP_EXACTI:
634 case OP_POSSTARI:
635 case OP_POSPLUSI:
636 case OP_POSQUERYI:
637 case OP_POSUPTOI:
638 case OP_NOTSTAR:
639 case OP_NOTMINSTAR:
640 case OP_NOTPLUS:
641 case OP_NOTMINPLUS:
642 case OP_NOTQUERY:
643 case OP_NOTMINQUERY:
644 case OP_NOTUPTO:
645 case OP_NOTMINUPTO:
646 case OP_NOTEXACT:
647 case OP_NOTPOSSTAR:
648 case OP_NOTPOSPLUS:
649 case OP_NOTPOSQUERY:
650 case OP_NOTPOSUPTO:
651 case OP_NOTSTARI:
652 case OP_NOTMINSTARI:
653 case OP_NOTPLUSI:
654 case OP_NOTMINPLUSI:
655 case OP_NOTQUERYI:
656 case OP_NOTMINQUERYI:
657 case OP_NOTUPTOI:
658 case OP_NOTMINUPTOI:
659 case OP_NOTEXACTI:
660 case OP_NOTPOSSTARI:
661 case OP_NOTPOSPLUSI:
662 case OP_NOTPOSQUERYI:
663 case OP_NOTPOSUPTOI:
664 cc += PRIV(OP_lengths)[*cc];
665 #ifdef SUPPORT_UTF
666 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
667 #endif
668 return cc;
669
670 /* Special cases. */
671 case OP_TYPESTAR:
672 case OP_TYPEMINSTAR:
673 case OP_TYPEPLUS:
674 case OP_TYPEMINPLUS:
675 case OP_TYPEQUERY:
676 case OP_TYPEMINQUERY:
677 case OP_TYPEUPTO:
678 case OP_TYPEMINUPTO:
679 case OP_TYPEEXACT:
680 case OP_TYPEPOSSTAR:
681 case OP_TYPEPOSPLUS:
682 case OP_TYPEPOSQUERY:
683 case OP_TYPEPOSUPTO:
684 return cc + PRIV(OP_lengths)[*cc] - 1;
685
686 case OP_ANYBYTE:
687 #ifdef SUPPORT_UTF
688 if (common->utf) return NULL;
689 #endif
690 return cc + 1;
691
692 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
693 case OP_XCLASS:
694 return cc + GET(cc, 1);
695 #endif
696
697 case OP_MARK:
698 case OP_PRUNE_ARG:
699 return cc + 1 + 2 + cc[1];
700
701 default:
702 return NULL;
703 }
704 }
705
706 #define CASE_ITERATOR_PRIVATE_DATA_1 \
707 case OP_MINSTAR: \
708 case OP_MINPLUS: \
709 case OP_QUERY: \
710 case OP_MINQUERY: \
711 case OP_MINSTARI: \
712 case OP_MINPLUSI: \
713 case OP_QUERYI: \
714 case OP_MINQUERYI: \
715 case OP_NOTMINSTAR: \
716 case OP_NOTMINPLUS: \
717 case OP_NOTQUERY: \
718 case OP_NOTMINQUERY: \
719 case OP_NOTMINSTARI: \
720 case OP_NOTMINPLUSI: \
721 case OP_NOTQUERYI: \
722 case OP_NOTMINQUERYI:
723
724 #define CASE_ITERATOR_PRIVATE_DATA_2A \
725 case OP_STAR: \
726 case OP_PLUS: \
727 case OP_STARI: \
728 case OP_PLUSI: \
729 case OP_NOTSTAR: \
730 case OP_NOTPLUS: \
731 case OP_NOTSTARI: \
732 case OP_NOTPLUSI:
733
734 #define CASE_ITERATOR_PRIVATE_DATA_2B \
735 case OP_UPTO: \
736 case OP_MINUPTO: \
737 case OP_UPTOI: \
738 case OP_MINUPTOI: \
739 case OP_NOTUPTO: \
740 case OP_NOTMINUPTO: \
741 case OP_NOTUPTOI: \
742 case OP_NOTMINUPTOI:
743
744 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
745 case OP_TYPEMINSTAR: \
746 case OP_TYPEMINPLUS: \
747 case OP_TYPEQUERY: \
748 case OP_TYPEMINQUERY:
749
750 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
751 case OP_TYPESTAR: \
752 case OP_TYPEPLUS:
753
754 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
755 case OP_TYPEUPTO: \
756 case OP_TYPEMINUPTO:
757
758 static int get_class_iterator_size(pcre_uchar *cc)
759 {
760 switch(*cc)
761 {
762 case OP_CRSTAR:
763 case OP_CRPLUS:
764 return 2;
765
766 case OP_CRMINSTAR:
767 case OP_CRMINPLUS:
768 case OP_CRQUERY:
769 case OP_CRMINQUERY:
770 return 1;
771
772 case OP_CRRANGE:
773 case OP_CRMINRANGE:
774 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
775 return 0;
776 return 2;
777
778 default:
779 return 0;
780 }
781 }
782
783 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
784 {
785 int private_data_length = 0;
786 pcre_uchar *alternative;
787 pcre_uchar *name;
788 pcre_uchar *end = NULL;
789 int space, size, i;
790 pcre_uint32 bracketlen;
791
792 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
793 while (cc < ccend)
794 {
795 space = 0;
796 size = 0;
797 bracketlen = 0;
798 switch(*cc)
799 {
800 case OP_SET_SOM:
801 common->has_set_som = TRUE;
802 cc += 1;
803 break;
804
805 case OP_REF:
806 case OP_REFI:
807 common->optimized_cbracket[GET2(cc, 1)] = 0;
808 cc += 1 + IMM2_SIZE;
809 break;
810
811 case OP_ASSERT:
812 case OP_ASSERT_NOT:
813 case OP_ASSERTBACK:
814 case OP_ASSERTBACK_NOT:
815 case OP_ONCE:
816 case OP_ONCE_NC:
817 case OP_BRAPOS:
818 case OP_SBRA:
819 case OP_SBRAPOS:
820 private_data_length += sizeof(sljit_sw);
821 bracketlen = 1 + LINK_SIZE;
822 break;
823
824 case OP_CBRAPOS:
825 case OP_SCBRAPOS:
826 private_data_length += sizeof(sljit_sw);
827 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
828 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
829 break;
830
831 case OP_COND:
832 case OP_SCOND:
833 /* Only AUTO_CALLOUT can insert this opcode. We do
834 not intend to support this case. */
835 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
836 return -1;
837
838 if (*cc == OP_COND)
839 {
840 /* Might be a hidden SCOND. */
841 alternative = cc + GET(cc, 1);
842 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
843 private_data_length += sizeof(sljit_sw);
844 }
845 else
846 private_data_length += sizeof(sljit_sw);
847 bracketlen = 1 + LINK_SIZE;
848 break;
849
850 case OP_CREF:
851 i = GET2(cc, 1);
852 common->optimized_cbracket[i] = 0;
853 cc += 1 + IMM2_SIZE;
854 break;
855
856 case OP_NCREF:
857 bracketlen = GET2(cc, 1);
858 name = (pcre_uchar *)common->name_table;
859 alternative = name;
860 for (i = 0; i < common->name_count; i++)
861 {
862 if (GET2(name, 0) == bracketlen) break;
863 name += common->name_entry_size;
864 }
865 SLJIT_ASSERT(i != common->name_count);
866
867 for (i = 0; i < common->name_count; i++)
868 {
869 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
870 common->optimized_cbracket[GET2(alternative, 0)] = 0;
871 alternative += common->name_entry_size;
872 }
873 bracketlen = 0;
874 cc += 1 + IMM2_SIZE;
875 break;
876
877 case OP_BRA:
878 bracketlen = 1 + LINK_SIZE;
879 break;
880
881 case OP_CBRA:
882 case OP_SCBRA:
883 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
884 break;
885
886 CASE_ITERATOR_PRIVATE_DATA_1
887 space = 1;
888 size = -2;
889 break;
890
891 CASE_ITERATOR_PRIVATE_DATA_2A
892 space = 2;
893 size = -2;
894 break;
895
896 CASE_ITERATOR_PRIVATE_DATA_2B
897 space = 2;
898 size = -(2 + IMM2_SIZE);
899 break;
900
901 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
902 space = 1;
903 size = 1;
904 break;
905
906 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
907 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
908 space = 2;
909 size = 1;
910 break;
911
912 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
913 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
914 space = 2;
915 size = 1 + IMM2_SIZE;
916 break;
917
918 case OP_CLASS:
919 case OP_NCLASS:
920 size += 1 + 32 / sizeof(pcre_uchar);
921 space = get_class_iterator_size(cc + size);
922 break;
923
924 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
925 case OP_XCLASS:
926 size = GET(cc, 1);
927 space = get_class_iterator_size(cc + size);
928 break;
929 #endif
930
931 case OP_RECURSE:
932 /* Set its value only once. */
933 if (common->recursive_head_ptr == 0)
934 {
935 common->recursive_head_ptr = common->ovector_start;
936 common->ovector_start += sizeof(sljit_sw);
937 }
938 cc += 1 + LINK_SIZE;
939 break;
940
941 case OP_CALLOUT:
942 if (common->capture_last_ptr == 0)
943 {
944 common->capture_last_ptr = common->ovector_start;
945 common->ovector_start += sizeof(sljit_sw);
946 }
947 cc += 2 + 2 * LINK_SIZE;
948 break;
949
950 case OP_PRUNE_ARG:
951 common->needs_start_ptr = TRUE;
952 common->control_head_ptr = 1;
953 /* Fall through. */
954
955 case OP_MARK:
956 if (common->mark_ptr == 0)
957 {
958 common->mark_ptr = common->ovector_start;
959 common->ovector_start += sizeof(sljit_sw);
960 }
961 cc += 1 + 2 + cc[1];
962 break;
963
964 case OP_PRUNE:
965 case OP_SKIP:
966 common->needs_start_ptr = TRUE;
967 /* Fall through. */
968
969 case OP_COMMIT:
970 common->control_head_ptr = 1;
971 cc += 1;
972 break;
973
974 default:
975 cc = next_opcode(common, cc);
976 if (cc == NULL)
977 return -1;
978 break;
979 }
980
981 if (space > 0 && cc >= end)
982 private_data_length += sizeof(sljit_sw) * space;
983
984 if (size != 0)
985 {
986 if (size < 0)
987 {
988 cc += -size;
989 #ifdef SUPPORT_UTF
990 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
991 #endif
992 }
993 else
994 cc += size;
995 }
996
997 if (bracketlen != 0)
998 {
999 if (cc >= end)
1000 {
1001 end = bracketend(cc);
1002 if (end[-1 - LINK_SIZE] == OP_KET)
1003 end = NULL;
1004 }
1005 cc += bracketlen;
1006 }
1007 }
1008 return private_data_length;
1009 }
1010
1011 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
1012 {
1013 pcre_uchar *cc = common->start;
1014 pcre_uchar *alternative;
1015 pcre_uchar *end = NULL;
1016 int space, size, bracketlen;
1017
1018 while (cc < ccend)
1019 {
1020 space = 0;
1021 size = 0;
1022 bracketlen = 0;
1023 switch(*cc)
1024 {
1025 case OP_ASSERT:
1026 case OP_ASSERT_NOT:
1027 case OP_ASSERTBACK:
1028 case OP_ASSERTBACK_NOT:
1029 case OP_ONCE:
1030 case OP_ONCE_NC:
1031 case OP_BRAPOS:
1032 case OP_SBRA:
1033 case OP_SBRAPOS:
1034 case OP_SCOND:
1035 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1036 private_data_ptr += sizeof(sljit_sw);
1037 bracketlen = 1 + LINK_SIZE;
1038 break;
1039
1040 case OP_CBRAPOS:
1041 case OP_SCBRAPOS:
1042 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1043 private_data_ptr += sizeof(sljit_sw);
1044 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1045 break;
1046
1047 case OP_COND:
1048 /* Might be a hidden SCOND. */
1049 alternative = cc + GET(cc, 1);
1050 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1051 {
1052 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1053 private_data_ptr += sizeof(sljit_sw);
1054 }
1055 bracketlen = 1 + LINK_SIZE;
1056 break;
1057
1058 case OP_BRA:
1059 bracketlen = 1 + LINK_SIZE;
1060 break;
1061
1062 case OP_CBRA:
1063 case OP_SCBRA:
1064 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1065 break;
1066
1067 CASE_ITERATOR_PRIVATE_DATA_1
1068 space = 1;
1069 size = -2;
1070 break;
1071
1072 CASE_ITERATOR_PRIVATE_DATA_2A
1073 space = 2;
1074 size = -2;
1075 break;
1076
1077 CASE_ITERATOR_PRIVATE_DATA_2B
1078 space = 2;
1079 size = -(2 + IMM2_SIZE);
1080 break;
1081
1082 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1083 space = 1;
1084 size = 1;
1085 break;
1086
1087 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1088 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1089 space = 2;
1090 size = 1;
1091 break;
1092
1093 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1094 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1095 space = 2;
1096 size = 1 + IMM2_SIZE;
1097 break;
1098
1099 case OP_CLASS:
1100 case OP_NCLASS:
1101 size += 1 + 32 / sizeof(pcre_uchar);
1102 space = get_class_iterator_size(cc + size);
1103 break;
1104
1105 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1106 case OP_XCLASS:
1107 size = GET(cc, 1);
1108 space = get_class_iterator_size(cc + size);
1109 break;
1110 #endif
1111
1112 default:
1113 cc = next_opcode(common, cc);
1114 SLJIT_ASSERT(cc != NULL);
1115 break;
1116 }
1117
1118 if (space > 0 && cc >= end)
1119 {
1120 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1121 private_data_ptr += sizeof(sljit_sw) * space;
1122 }
1123
1124 if (size != 0)
1125 {
1126 if (size < 0)
1127 {
1128 cc += -size;
1129 #ifdef SUPPORT_UTF
1130 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1131 #endif
1132 }
1133 else
1134 cc += size;
1135 }
1136
1137 if (bracketlen > 0)
1138 {
1139 if (cc >= end)
1140 {
1141 end = bracketend(cc);
1142 if (end[-1 - LINK_SIZE] == OP_KET)
1143 end = NULL;
1144 }
1145 cc += bracketlen;
1146 }
1147 }
1148 }
1149
1150 /* Returns with a frame_types (always < 0) if no need for frame. */
1151 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive, BOOL* needs_control_head)
1152 {
1153 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1154 int length = 0;
1155 int possessive = 0;
1156 BOOL stack_restore = FALSE;
1157 BOOL setsom_found = recursive;
1158 BOOL setmark_found = recursive;
1159 /* The last capture is a local variable even for recursions. */
1160 BOOL capture_last_found = FALSE;
1161
1162 #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1163 SLJIT_ASSERT(common->control_head_ptr != 0);
1164 *needs_control_head = TRUE;
1165 #else
1166 *needs_control_head = FALSE;
1167 #endif
1168
1169 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1170 {
1171 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1172 /* This is correct regardless of common->capture_last_ptr. */
1173 capture_last_found = TRUE;
1174 }
1175
1176 cc = next_opcode(common, cc);
1177 SLJIT_ASSERT(cc != NULL);
1178 while (cc < ccend)
1179 switch(*cc)
1180 {
1181 case OP_SET_SOM:
1182 SLJIT_ASSERT(common->has_set_som);
1183 stack_restore = TRUE;
1184 if (!setsom_found)
1185 {
1186 length += 2;
1187 setsom_found = TRUE;
1188 }
1189 cc += 1;
1190 break;
1191
1192 case OP_MARK:
1193 case OP_PRUNE_ARG:
1194 SLJIT_ASSERT(common->mark_ptr != 0);
1195 stack_restore = TRUE;
1196 if (!setmark_found)
1197 {
1198 length += 2;
1199 setmark_found = TRUE;
1200 }
1201 if (common->control_head_ptr != 0)
1202 *needs_control_head = TRUE;
1203 cc += 1 + 2 + cc[1];
1204 break;
1205
1206 case OP_RECURSE:
1207 stack_restore = TRUE;
1208 if (common->has_set_som && !setsom_found)
1209 {
1210 length += 2;
1211 setsom_found = TRUE;
1212 }
1213 if (common->mark_ptr != 0 && !setmark_found)
1214 {
1215 length += 2;
1216 setmark_found = TRUE;
1217 }
1218 if (common->capture_last_ptr != 0 && !capture_last_found)
1219 {
1220 length += 2;
1221 capture_last_found = TRUE;
1222 }
1223 cc += 1 + LINK_SIZE;
1224 break;
1225
1226 case OP_CBRA:
1227 case OP_CBRAPOS:
1228 case OP_SCBRA:
1229 case OP_SCBRAPOS:
1230 stack_restore = TRUE;
1231 if (common->capture_last_ptr != 0 && !capture_last_found)
1232 {
1233 length += 2;
1234 capture_last_found = TRUE;
1235 }
1236 length += 3;
1237 cc += 1 + LINK_SIZE + IMM2_SIZE;
1238 break;
1239
1240 case OP_PRUNE:
1241 case OP_SKIP:
1242 case OP_COMMIT:
1243 if (common->control_head_ptr != 0)
1244 *needs_control_head = TRUE;
1245 /* Fall through. */
1246
1247 default:
1248 stack_restore = TRUE;
1249 /* Fall through. */
1250
1251 case OP_NOT_WORD_BOUNDARY:
1252 case OP_WORD_BOUNDARY:
1253 case OP_NOT_DIGIT:
1254 case OP_DIGIT:
1255 case OP_NOT_WHITESPACE:
1256 case OP_WHITESPACE:
1257 case OP_NOT_WORDCHAR:
1258 case OP_WORDCHAR:
1259 case OP_ANY:
1260 case OP_ALLANY:
1261 case OP_ANYBYTE:
1262 case OP_NOTPROP:
1263 case OP_PROP:
1264 case OP_ANYNL:
1265 case OP_NOT_HSPACE:
1266 case OP_HSPACE:
1267 case OP_NOT_VSPACE:
1268 case OP_VSPACE:
1269 case OP_EXTUNI:
1270 case OP_EODN:
1271 case OP_EOD:
1272 case OP_CIRC:
1273 case OP_CIRCM:
1274 case OP_DOLL:
1275 case OP_DOLLM:
1276 case OP_CHAR:
1277 case OP_CHARI:
1278 case OP_NOT:
1279 case OP_NOTI:
1280
1281 case OP_EXACT:
1282 case OP_POSSTAR:
1283 case OP_POSPLUS:
1284 case OP_POSQUERY:
1285 case OP_POSUPTO:
1286
1287 case OP_EXACTI:
1288 case OP_POSSTARI:
1289 case OP_POSPLUSI:
1290 case OP_POSQUERYI:
1291 case OP_POSUPTOI:
1292
1293 case OP_NOTEXACT:
1294 case OP_NOTPOSSTAR:
1295 case OP_NOTPOSPLUS:
1296 case OP_NOTPOSQUERY:
1297 case OP_NOTPOSUPTO:
1298
1299 case OP_NOTEXACTI:
1300 case OP_NOTPOSSTARI:
1301 case OP_NOTPOSPLUSI:
1302 case OP_NOTPOSQUERYI:
1303 case OP_NOTPOSUPTOI:
1304
1305 case OP_TYPEEXACT:
1306 case OP_TYPEPOSSTAR:
1307 case OP_TYPEPOSPLUS:
1308 case OP_TYPEPOSQUERY:
1309 case OP_TYPEPOSUPTO:
1310
1311 case OP_CLASS:
1312 case OP_NCLASS:
1313 case OP_XCLASS:
1314
1315 cc = next_opcode(common, cc);
1316 SLJIT_ASSERT(cc != NULL);
1317 break;
1318 }
1319
1320 /* Possessive quantifiers can use a special case. */
1321 if (SLJIT_UNLIKELY(possessive == length))
1322 return stack_restore ? no_frame : no_stack;
1323
1324 if (length > 0)
1325 return length + 1;
1326 return stack_restore ? no_frame : no_stack;
1327 }
1328
1329 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1330 {
1331 DEFINE_COMPILER;
1332 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1333 BOOL setsom_found = recursive;
1334 BOOL setmark_found = recursive;
1335 /* The last capture is a local variable even for recursions. */
1336 BOOL capture_last_found = FALSE;
1337 int offset;
1338
1339 /* >= 1 + shortest item size (2) */
1340 SLJIT_UNUSED_ARG(stacktop);
1341 SLJIT_ASSERT(stackpos >= stacktop + 2);
1342
1343 stackpos = STACK(stackpos);
1344 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1345 cc = next_opcode(common, cc);
1346 SLJIT_ASSERT(cc != NULL);
1347 while (cc < ccend)
1348 switch(*cc)
1349 {
1350 case OP_SET_SOM:
1351 SLJIT_ASSERT(common->has_set_som);
1352 if (!setsom_found)
1353 {
1354 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1355 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1356 stackpos += (int)sizeof(sljit_sw);
1357 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1358 stackpos += (int)sizeof(sljit_sw);
1359 setsom_found = TRUE;
1360 }
1361 cc += 1;
1362 break;
1363
1364 case OP_MARK:
1365 case OP_PRUNE_ARG:
1366 SLJIT_ASSERT(common->mark_ptr != 0);
1367 if (!setmark_found)
1368 {
1369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1370 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1371 stackpos += (int)sizeof(sljit_sw);
1372 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1373 stackpos += (int)sizeof(sljit_sw);
1374 setmark_found = TRUE;
1375 }
1376 cc += 1 + 2 + cc[1];
1377 break;
1378
1379 case OP_RECURSE:
1380 if (common->has_set_som && !setsom_found)
1381 {
1382 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1384 stackpos += (int)sizeof(sljit_sw);
1385 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1386 stackpos += (int)sizeof(sljit_sw);
1387 setsom_found = TRUE;
1388 }
1389 if (common->mark_ptr != 0 && !setmark_found)
1390 {
1391 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1392 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1393 stackpos += (int)sizeof(sljit_sw);
1394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1395 stackpos += (int)sizeof(sljit_sw);
1396 setmark_found = TRUE;
1397 }
1398 if (common->capture_last_ptr != 0 && !capture_last_found)
1399 {
1400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1402 stackpos += (int)sizeof(sljit_sw);
1403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1404 stackpos += (int)sizeof(sljit_sw);
1405 capture_last_found = TRUE;
1406 }
1407 cc += 1 + LINK_SIZE;
1408 break;
1409
1410 case OP_CBRA:
1411 case OP_CBRAPOS:
1412 case OP_SCBRA:
1413 case OP_SCBRAPOS:
1414 if (common->capture_last_ptr != 0 && !capture_last_found)
1415 {
1416 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1418 stackpos += (int)sizeof(sljit_sw);
1419 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1420 stackpos += (int)sizeof(sljit_sw);
1421 capture_last_found = TRUE;
1422 }
1423 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1424 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1425 stackpos += (int)sizeof(sljit_sw);
1426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1427 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1428 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1429 stackpos += (int)sizeof(sljit_sw);
1430 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1431 stackpos += (int)sizeof(sljit_sw);
1432
1433 cc += 1 + LINK_SIZE + IMM2_SIZE;
1434 break;
1435
1436 default:
1437 cc = next_opcode(common, cc);
1438 SLJIT_ASSERT(cc != NULL);
1439 break;
1440 }
1441
1442 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1443 SLJIT_ASSERT(stackpos == STACK(stacktop));
1444 }
1445
1446 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1447 {
1448 int private_data_length = needs_control_head ? 3 : 2;
1449 int size;
1450 pcre_uchar *alternative;
1451 /* Calculate the sum of the private machine words. */
1452 while (cc < ccend)
1453 {
1454 size = 0;
1455 switch(*cc)
1456 {
1457 case OP_ASSERT:
1458 case OP_ASSERT_NOT:
1459 case OP_ASSERTBACK:
1460 case OP_ASSERTBACK_NOT:
1461 case OP_ONCE:
1462 case OP_ONCE_NC:
1463 case OP_BRAPOS:
1464 case OP_SBRA:
1465 case OP_SBRAPOS:
1466 case OP_SCOND:
1467 private_data_length++;
1468 cc += 1 + LINK_SIZE;
1469 break;
1470
1471 case OP_CBRA:
1472 case OP_SCBRA:
1473 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1474 private_data_length++;
1475 cc += 1 + LINK_SIZE + IMM2_SIZE;
1476 break;
1477
1478 case OP_CBRAPOS:
1479 case OP_SCBRAPOS:
1480 private_data_length += 2;
1481 cc += 1 + LINK_SIZE + IMM2_SIZE;
1482 break;
1483
1484 case OP_COND:
1485 /* Might be a hidden SCOND. */
1486 alternative = cc + GET(cc, 1);
1487 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1488 private_data_length++;
1489 cc += 1 + LINK_SIZE;
1490 break;
1491
1492 CASE_ITERATOR_PRIVATE_DATA_1
1493 if (PRIVATE_DATA(cc))
1494 private_data_length++;
1495 cc += 2;
1496 #ifdef SUPPORT_UTF
1497 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1498 #endif
1499 break;
1500
1501 CASE_ITERATOR_PRIVATE_DATA_2A
1502 if (PRIVATE_DATA(cc))
1503 private_data_length += 2;
1504 cc += 2;
1505 #ifdef SUPPORT_UTF
1506 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1507 #endif
1508 break;
1509
1510 CASE_ITERATOR_PRIVATE_DATA_2B
1511 if (PRIVATE_DATA(cc))
1512 private_data_length += 2;
1513 cc += 2 + IMM2_SIZE;
1514 #ifdef SUPPORT_UTF
1515 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1516 #endif
1517 break;
1518
1519 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1520 if (PRIVATE_DATA(cc))
1521 private_data_length++;
1522 cc += 1;
1523 break;
1524
1525 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1526 if (PRIVATE_DATA(cc))
1527 private_data_length += 2;
1528 cc += 1;
1529 break;
1530
1531 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1532 if (PRIVATE_DATA(cc))
1533 private_data_length += 2;
1534 cc += 1 + IMM2_SIZE;
1535 break;
1536
1537 case OP_CLASS:
1538 case OP_NCLASS:
1539 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1540 case OP_XCLASS:
1541 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1542 #else
1543 size = 1 + 32 / (int)sizeof(pcre_uchar);
1544 #endif
1545 if (PRIVATE_DATA(cc))
1546 private_data_length += get_class_iterator_size(cc + size);
1547 cc += size;
1548 break;
1549
1550 default:
1551 cc = next_opcode(common, cc);
1552 SLJIT_ASSERT(cc != NULL);
1553 break;
1554 }
1555 }
1556 SLJIT_ASSERT(cc == ccend);
1557 return private_data_length;
1558 }
1559
1560 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1561 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1562 {
1563 DEFINE_COMPILER;
1564 int srcw[2];
1565 int count, size;
1566 BOOL tmp1next = TRUE;
1567 BOOL tmp1empty = TRUE;
1568 BOOL tmp2empty = TRUE;
1569 pcre_uchar *alternative;
1570 enum {
1571 start,
1572 loop,
1573 end
1574 } status;
1575
1576 status = save ? start : loop;
1577 stackptr = STACK(stackptr - 2);
1578 stacktop = STACK(stacktop - 1);
1579
1580 if (!save)
1581 {
1582 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1583 if (stackptr < stacktop)
1584 {
1585 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1586 stackptr += sizeof(sljit_sw);
1587 tmp1empty = FALSE;
1588 }
1589 if (stackptr < stacktop)
1590 {
1591 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1592 stackptr += sizeof(sljit_sw);
1593 tmp2empty = FALSE;
1594 }
1595 /* The tmp1next must be TRUE in either way. */
1596 }
1597
1598 do
1599 {
1600 count = 0;
1601 switch(status)
1602 {
1603 case start:
1604 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1605 count = 1;
1606 srcw[0] = common->recursive_head_ptr;
1607 if (needs_control_head)
1608 {
1609 SLJIT_ASSERT(common->control_head_ptr != 0);
1610 count = 2;
1611 srcw[1] = common->control_head_ptr;
1612 }
1613 status = loop;
1614 break;
1615
1616 case loop:
1617 if (cc >= ccend)
1618 {
1619 status = end;
1620 break;
1621 }
1622
1623 switch(*cc)
1624 {
1625 case OP_ASSERT:
1626 case OP_ASSERT_NOT:
1627 case OP_ASSERTBACK:
1628 case OP_ASSERTBACK_NOT:
1629 case OP_ONCE:
1630 case OP_ONCE_NC:
1631 case OP_BRAPOS:
1632 case OP_SBRA:
1633 case OP_SBRAPOS:
1634 case OP_SCOND:
1635 count = 1;
1636 srcw[0] = PRIVATE_DATA(cc);
1637 SLJIT_ASSERT(srcw[0] != 0);
1638 cc += 1 + LINK_SIZE;
1639 break;
1640
1641 case OP_CBRA:
1642 case OP_SCBRA:
1643 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1644 {
1645 count = 1;
1646 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1647 }
1648 cc += 1 + LINK_SIZE + IMM2_SIZE;
1649 break;
1650
1651 case OP_CBRAPOS:
1652 case OP_SCBRAPOS:
1653 count = 2;
1654 srcw[0] = PRIVATE_DATA(cc);
1655 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1656 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1657 cc += 1 + LINK_SIZE + IMM2_SIZE;
1658 break;
1659
1660 case OP_COND:
1661 /* Might be a hidden SCOND. */
1662 alternative = cc + GET(cc, 1);
1663 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1664 {
1665 count = 1;
1666 srcw[0] = PRIVATE_DATA(cc);
1667 SLJIT_ASSERT(srcw[0] != 0);
1668 }
1669 cc += 1 + LINK_SIZE;
1670 break;
1671
1672 CASE_ITERATOR_PRIVATE_DATA_1
1673 if (PRIVATE_DATA(cc))
1674 {
1675 count = 1;
1676 srcw[0] = PRIVATE_DATA(cc);
1677 }
1678 cc += 2;
1679 #ifdef SUPPORT_UTF
1680 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1681 #endif
1682 break;
1683
1684 CASE_ITERATOR_PRIVATE_DATA_2A
1685 if (PRIVATE_DATA(cc))
1686 {
1687 count = 2;
1688 srcw[0] = PRIVATE_DATA(cc);
1689 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1690 }
1691 cc += 2;
1692 #ifdef SUPPORT_UTF
1693 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1694 #endif
1695 break;
1696
1697 CASE_ITERATOR_PRIVATE_DATA_2B
1698 if (PRIVATE_DATA(cc))
1699 {
1700 count = 2;
1701 srcw[0] = PRIVATE_DATA(cc);
1702 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1703 }
1704 cc += 2 + IMM2_SIZE;
1705 #ifdef SUPPORT_UTF
1706 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1707 #endif
1708 break;
1709
1710 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1711 if (PRIVATE_DATA(cc))
1712 {
1713 count = 1;
1714 srcw[0] = PRIVATE_DATA(cc);
1715 }
1716 cc += 1;
1717 break;
1718
1719 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1720 if (PRIVATE_DATA(cc))
1721 {
1722 count = 2;
1723 srcw[0] = PRIVATE_DATA(cc);
1724 srcw[1] = srcw[0] + sizeof(sljit_sw);
1725 }
1726 cc += 1;
1727 break;
1728
1729 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1730 if (PRIVATE_DATA(cc))
1731 {
1732 count = 2;
1733 srcw[0] = PRIVATE_DATA(cc);
1734 srcw[1] = srcw[0] + sizeof(sljit_sw);
1735 }
1736 cc += 1 + IMM2_SIZE;
1737 break;
1738
1739 case OP_CLASS:
1740 case OP_NCLASS:
1741 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1742 case OP_XCLASS:
1743 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1744 #else
1745 size = 1 + 32 / (int)sizeof(pcre_uchar);
1746 #endif
1747 if (PRIVATE_DATA(cc))
1748 switch(get_class_iterator_size(cc + size))
1749 {
1750 case 1:
1751 count = 1;
1752 srcw[0] = PRIVATE_DATA(cc);
1753 break;
1754
1755 case 2:
1756 count = 2;
1757 srcw[0] = PRIVATE_DATA(cc);
1758 srcw[1] = srcw[0] + sizeof(sljit_sw);
1759 break;
1760
1761 default:
1762 SLJIT_ASSERT_STOP();
1763 break;
1764 }
1765 cc += size;
1766 break;
1767
1768 default:
1769 cc = next_opcode(common, cc);
1770 SLJIT_ASSERT(cc != NULL);
1771 break;
1772 }
1773 break;
1774
1775 case end:
1776 SLJIT_ASSERT_STOP();
1777 break;
1778 }
1779
1780 while (count > 0)
1781 {
1782 count--;
1783 if (save)
1784 {
1785 if (tmp1next)
1786 {
1787 if (!tmp1empty)
1788 {
1789 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1790 stackptr += sizeof(sljit_sw);
1791 }
1792 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1793 tmp1empty = FALSE;
1794 tmp1next = FALSE;
1795 }
1796 else
1797 {
1798 if (!tmp2empty)
1799 {
1800 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1801 stackptr += sizeof(sljit_sw);
1802 }
1803 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1804 tmp2empty = FALSE;
1805 tmp1next = TRUE;
1806 }
1807 }
1808 else
1809 {
1810 if (tmp1next)
1811 {
1812 SLJIT_ASSERT(!tmp1empty);
1813 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1814 tmp1empty = stackptr >= stacktop;
1815 if (!tmp1empty)
1816 {
1817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1818 stackptr += sizeof(sljit_sw);
1819 }
1820 tmp1next = FALSE;
1821 }
1822 else
1823 {
1824 SLJIT_ASSERT(!tmp2empty);
1825 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1826 tmp2empty = stackptr >= stacktop;
1827 if (!tmp2empty)
1828 {
1829 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1830 stackptr += sizeof(sljit_sw);
1831 }
1832 tmp1next = TRUE;
1833 }
1834 }
1835 }
1836 }
1837 while (status != end);
1838
1839 if (save)
1840 {
1841 if (tmp1next)
1842 {
1843 if (!tmp1empty)
1844 {
1845 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1846 stackptr += sizeof(sljit_sw);
1847 }
1848 if (!tmp2empty)
1849 {
1850 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1851 stackptr += sizeof(sljit_sw);
1852 }
1853 }
1854 else
1855 {
1856 if (!tmp2empty)
1857 {
1858 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1859 stackptr += sizeof(sljit_sw);
1860 }
1861 if (!tmp1empty)
1862 {
1863 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1864 stackptr += sizeof(sljit_sw);
1865 }
1866 }
1867 }
1868 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1869 }
1870
1871 #undef CASE_ITERATOR_PRIVATE_DATA_1
1872 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1873 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1874 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1875 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1876 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1877
1878 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1879 {
1880 return (value & (value - 1)) == 0;
1881 }
1882
1883 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1884 {
1885 while (list)
1886 {
1887 /* sljit_set_label is clever enough to do nothing
1888 if either the jump or the label is NULL. */
1889 SET_LABEL(list->jump, label);
1890 list = list->next;
1891 }
1892 }
1893
1894 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1895 {
1896 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1897 if (list_item)
1898 {
1899 list_item->next = *list;
1900 list_item->jump = jump;
1901 *list = list_item;
1902 }
1903 }
1904
1905 static void add_stub(compiler_common *common, struct sljit_jump *start)
1906 {
1907 DEFINE_COMPILER;
1908 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1909
1910 if (list_item)
1911 {
1912 list_item->start = start;
1913 list_item->quit = LABEL();
1914 list_item->next = common->stubs;
1915 common->stubs = list_item;
1916 }
1917 }
1918
1919 static void flush_stubs(compiler_common *common)
1920 {
1921 DEFINE_COMPILER;
1922 stub_list* list_item = common->stubs;
1923
1924 while (list_item)
1925 {
1926 JUMPHERE(list_item->start);
1927 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1928 JUMPTO(SLJIT_JUMP, list_item->quit);
1929 list_item = list_item->next;
1930 }
1931 common->stubs = NULL;
1932 }
1933
1934 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1935 {
1936 DEFINE_COMPILER;
1937
1938 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1939 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1940 }
1941
1942 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1943 {
1944 /* May destroy all locals and registers except TMP2. */
1945 DEFINE_COMPILER;
1946
1947 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1948 #ifdef DESTROY_REGISTERS
1949 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1950 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1951 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1953 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1954 #endif
1955 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1956 }
1957
1958 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1959 {
1960 DEFINE_COMPILER;
1961 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1962 }
1963
1964 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1965 {
1966 DEFINE_COMPILER;
1967 struct sljit_label *loop;
1968 int i;
1969
1970 /* At this point we can freely use all temporary registers. */
1971 SLJIT_ASSERT(length > 1);
1972 /* TMP1 returns with begin - 1. */
1973 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1974 if (length < 8)
1975 {
1976 for (i = 1; i < length; i++)
1977 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1978 }
1979 else
1980 {
1981 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
1982 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
1983 loop = LABEL();
1984 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1985 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1986 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1987 }
1988 }
1989
1990 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
1991 {
1992 DEFINE_COMPILER;
1993 struct sljit_label *loop;
1994 int i;
1995
1996 SLJIT_ASSERT(length > 1);
1997 /* OVECTOR(1) contains the "string begin - 1" constant. */
1998 if (length > 2)
1999 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2000 if (length < 8)
2001 {
2002 for (i = 2; i < length; i++)
2003 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
2004 }
2005 else
2006 {
2007 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
2008 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
2009 loop = LABEL();
2010 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
2011 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
2012 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2013 }
2014
2015 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2016 if (common->mark_ptr != 0)
2017 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2018 SLJIT_ASSERT(common->control_head_ptr != 0);
2019 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2020 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2021 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2022 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2023 }
2024
2025 static sljit_sw SLJIT_CALL do_check_control_chain(sljit_sw *current)
2026 {
2027 sljit_sw return_value = 0;
2028
2029 SLJIT_ASSERT(current != NULL);
2030 do
2031 {
2032 switch (current[-2])
2033 {
2034 case type_commit:
2035 /* Commit overwrites all. */
2036 return -1;
2037
2038 case type_prune:
2039 break;
2040
2041 case type_skip:
2042 /* Overwrites prune, but not other skips. */
2043 if (return_value == 0)
2044 return_value = current[-3];
2045 break;
2046
2047 default:
2048 SLJIT_ASSERT_STOP();
2049 break;
2050 }
2051 current = (sljit_sw*)current[-1];
2052 }
2053 while (current != NULL);
2054 return return_value;
2055 }
2056
2057 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2058 {
2059 DEFINE_COMPILER;
2060 struct sljit_label *loop;
2061 struct sljit_jump *early_quit;
2062
2063 /* At this point we can freely use all registers. */
2064 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2065 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2066
2067 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2068 if (common->mark_ptr != 0)
2069 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2070 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2071 if (common->mark_ptr != 0)
2072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2073 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2074 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2075 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2076 /* Unlikely, but possible */
2077 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2078 loop = LABEL();
2079 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2080 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2081 /* Copy the integer value to the output buffer */
2082 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2083 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2084 #endif
2085 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2086 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2087 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2088 JUMPHERE(early_quit);
2089
2090 /* Calculate the return value, which is the maximum ovector value. */
2091 if (topbracket > 1)
2092 {
2093 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2094 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2095
2096 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2097 loop = LABEL();
2098 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2099 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2100 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2101 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2102 }
2103 else
2104 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2105 }
2106
2107 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2108 {
2109 DEFINE_COMPILER;
2110 struct sljit_jump *jump;
2111
2112 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2113 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2114 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2115
2116 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2117 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2118 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2119 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2120
2121 /* Store match begin and end. */
2122 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2123 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2124
2125 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2126 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2127 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2128 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2129 #endif
2130 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2131 JUMPHERE(jump);
2132
2133 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2134 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2135 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2136 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2137 #endif
2138 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2139
2140 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2141 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2142 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2143 #endif
2144 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2145
2146 JUMPTO(SLJIT_JUMP, quit);
2147 }
2148
2149 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2150 {
2151 /* May destroy TMP1. */
2152 DEFINE_COMPILER;
2153 struct sljit_jump *jump;
2154
2155 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2156 {
2157 /* The value of -1 must be kept for start_used_ptr! */
2158 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2159 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2160 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2161 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2163 JUMPHERE(jump);
2164 }
2165 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2166 {
2167 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2168 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2169 JUMPHERE(jump);
2170 }
2171 }
2172
2173 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2174 {
2175 /* Detects if the character has an othercase. */
2176 unsigned int c;
2177
2178 #ifdef SUPPORT_UTF
2179 if (common->utf)
2180 {
2181 GETCHAR(c, cc);
2182 if (c > 127)
2183 {
2184 #ifdef SUPPORT_UCP
2185 return c != UCD_OTHERCASE(c);
2186 #else
2187 return FALSE;
2188 #endif
2189 }
2190 #ifndef COMPILE_PCRE8
2191 return common->fcc[c] != c;
2192 #endif
2193 }
2194 else
2195 #endif
2196 c = *cc;
2197 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2198 }
2199
2200 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2201 {
2202 /* Returns with the othercase. */
2203 #ifdef SUPPORT_UTF
2204 if (common->utf && c > 127)
2205 {
2206 #ifdef SUPPORT_UCP
2207 return UCD_OTHERCASE(c);
2208 #else
2209 return c;
2210 #endif
2211 }
2212 #endif
2213 return TABLE_GET(c, common->fcc, c);
2214 }
2215
2216 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2217 {
2218 /* Detects if the character and its othercase has only 1 bit difference. */
2219 unsigned int c, oc, bit;
2220 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2221 int n;
2222 #endif
2223
2224 #ifdef SUPPORT_UTF
2225 if (common->utf)
2226 {
2227 GETCHAR(c, cc);
2228 if (c <= 127)
2229 oc = common->fcc[c];
2230 else
2231 {
2232 #ifdef SUPPORT_UCP
2233 oc = UCD_OTHERCASE(c);
2234 #else
2235 oc = c;
2236 #endif
2237 }
2238 }
2239 else
2240 {
2241 c = *cc;
2242 oc = TABLE_GET(c, common->fcc, c);
2243 }
2244 #else
2245 c = *cc;
2246 oc = TABLE_GET(c, common->fcc, c);
2247 #endif
2248
2249 SLJIT_ASSERT(c != oc);
2250
2251 bit = c ^ oc;
2252 /* Optimized for English alphabet. */
2253 if (c <= 127 && bit == 0x20)
2254 return (0 << 8) | 0x20;
2255
2256 /* Since c != oc, they must have at least 1 bit difference. */
2257 if (!is_powerof2(bit))
2258 return 0;
2259
2260 #if defined COMPILE_PCRE8
2261
2262 #ifdef SUPPORT_UTF
2263 if (common->utf && c > 127)
2264 {
2265 n = GET_EXTRALEN(*cc);
2266 while ((bit & 0x3f) == 0)
2267 {
2268 n--;
2269 bit >>= 6;
2270 }
2271 return (n << 8) | bit;
2272 }
2273 #endif /* SUPPORT_UTF */
2274 return (0 << 8) | bit;
2275
2276 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2277
2278 #ifdef SUPPORT_UTF
2279 if (common->utf && c > 65535)
2280 {
2281 if (bit >= (1 << 10))
2282 bit >>= 10;
2283 else
2284 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2285 }
2286 #endif /* SUPPORT_UTF */
2287 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2288
2289 #endif /* COMPILE_PCRE[8|16|32] */
2290 }
2291
2292 static void check_partial(compiler_common *common, BOOL force)
2293 {
2294 /* Checks whether a partial matching is occured. Does not modify registers. */
2295 DEFINE_COMPILER;
2296 struct sljit_jump *jump = NULL;
2297
2298 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2299
2300 if (common->mode == JIT_COMPILE)
2301 return;
2302
2303 if (!force)
2304 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2305 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2306 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2307
2308 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2309 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2310 else
2311 {
2312 if (common->partialmatchlabel != NULL)
2313 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2314 else
2315 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2316 }
2317
2318 if (jump != NULL)
2319 JUMPHERE(jump);
2320 }
2321
2322 static void check_str_end(compiler_common *common, jump_list **end_reached)
2323 {
2324 /* Does not affect registers. Usually used in a tight spot. */
2325 DEFINE_COMPILER;
2326 struct sljit_jump *jump;
2327
2328 if (common->mode == JIT_COMPILE)
2329 {
2330 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2331 return;
2332 }
2333
2334 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2335 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2336 {
2337 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2339 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2340 }
2341 else
2342 {
2343 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2344 if (common->partialmatchlabel != NULL)
2345 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2346 else
2347 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2348 }
2349 JUMPHERE(jump);
2350 }
2351
2352 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2353 {
2354 DEFINE_COMPILER;
2355 struct sljit_jump *jump;
2356
2357 if (common->mode == JIT_COMPILE)
2358 {
2359 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2360 return;
2361 }
2362
2363 /* Partial matching mode. */
2364 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2365 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2366 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2367 {
2368 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2369 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2370 }
2371 else
2372 {
2373 if (common->partialmatchlabel != NULL)
2374 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2375 else
2376 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2377 }
2378 JUMPHERE(jump);
2379 }
2380
2381 static void read_char(compiler_common *common)
2382 {
2383 /* Reads the character into TMP1, updates STR_PTR.
2384 Does not check STR_END. TMP2 Destroyed. */
2385 DEFINE_COMPILER;
2386 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2387 struct sljit_jump *jump;
2388 #endif
2389
2390 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2391 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2392 if (common->utf)
2393 {
2394 #if defined COMPILE_PCRE8
2395 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2396 #elif defined COMPILE_PCRE16
2397 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2398 #endif /* COMPILE_PCRE[8|16] */
2399 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2400 JUMPHERE(jump);
2401 }
2402 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2403 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2404 }
2405
2406 static void peek_char(compiler_common *common)
2407 {
2408 /* Reads the character into TMP1, keeps STR_PTR.
2409 Does not check STR_END. TMP2 Destroyed. */
2410 DEFINE_COMPILER;
2411 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2412 struct sljit_jump *jump;
2413 #endif
2414
2415 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2416 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2417 if (common->utf)
2418 {
2419 #if defined COMPILE_PCRE8
2420 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2421 #elif defined COMPILE_PCRE16
2422 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2423 #endif /* COMPILE_PCRE[8|16] */
2424 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2425 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2426 JUMPHERE(jump);
2427 }
2428 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2429 }
2430
2431 static void read_char8_type(compiler_common *common)
2432 {
2433 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2434 DEFINE_COMPILER;
2435 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2436 struct sljit_jump *jump;
2437 #endif
2438
2439 #ifdef SUPPORT_UTF
2440 if (common->utf)
2441 {
2442 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2443 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2444 #if defined COMPILE_PCRE8
2445 /* This can be an extra read in some situations, but hopefully
2446 it is needed in most cases. */
2447 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2448 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2449 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2450 JUMPHERE(jump);
2451 #elif defined COMPILE_PCRE16
2452 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2453 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2454 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2455 JUMPHERE(jump);
2456 /* Skip low surrogate if necessary. */
2457 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2458 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2459 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2460 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2461 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2462 #elif defined COMPILE_PCRE32
2463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2464 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2465 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2466 JUMPHERE(jump);
2467 #endif /* COMPILE_PCRE[8|16|32] */
2468 return;
2469 }
2470 #endif /* SUPPORT_UTF */
2471 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2472 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2473 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2474 /* The ctypes array contains only 256 values. */
2475 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2476 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2477 #endif
2478 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2479 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2480 JUMPHERE(jump);
2481 #endif
2482 }
2483
2484 static void skip_char_back(compiler_common *common)
2485 {
2486 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2487 DEFINE_COMPILER;
2488 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2489 #if defined COMPILE_PCRE8
2490 struct sljit_label *label;
2491
2492 if (common->utf)
2493 {
2494 label = LABEL();
2495 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2496 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2497 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2498 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2499 return;
2500 }
2501 #elif defined COMPILE_PCRE16
2502 if (common->utf)
2503 {
2504 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2505 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2506 /* Skip low surrogate if necessary. */
2507 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2508 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2509 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2510 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2511 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2512 return;
2513 }
2514 #endif /* COMPILE_PCRE[8|16] */
2515 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2516 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2517 }
2518
2519 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2520 {
2521 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2522 DEFINE_COMPILER;
2523
2524 if (nltype == NLTYPE_ANY)
2525 {
2526 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2527 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2528 }
2529 else if (nltype == NLTYPE_ANYCRLF)
2530 {
2531 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2532 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2533 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2534 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2535 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2536 }
2537 else
2538 {
2539 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2540 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2541 }
2542 }
2543
2544 #ifdef SUPPORT_UTF
2545
2546 #if defined COMPILE_PCRE8
2547 static void do_utfreadchar(compiler_common *common)
2548 {
2549 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2550 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2551 DEFINE_COMPILER;
2552 struct sljit_jump *jump;
2553
2554 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2555 /* Searching for the first zero. */
2556 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2557 jump = JUMP(SLJIT_C_NOT_ZERO);
2558 /* Two byte sequence. */
2559 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2560 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2561 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2562 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2563 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2564 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2565 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2566 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2567 JUMPHERE(jump);
2568
2569 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2570 jump = JUMP(SLJIT_C_NOT_ZERO);
2571 /* Three byte sequence. */
2572 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2573 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2574 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2575 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2576 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2577 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2578 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2579 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2580 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2581 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2582 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2583 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2584 JUMPHERE(jump);
2585
2586 /* Four byte sequence. */
2587 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2588 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2589 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2590 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2591 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2592 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2593 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2594 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2595 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2596 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2597 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2599 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2600 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2601 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2602 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2603 }
2604
2605 static void do_utfreadtype8(compiler_common *common)
2606 {
2607 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2608 of the character (>= 0xc0). Return value in TMP1. */
2609 DEFINE_COMPILER;
2610 struct sljit_jump *jump;
2611 struct sljit_jump *compare;
2612
2613 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2614
2615 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2616 jump = JUMP(SLJIT_C_NOT_ZERO);
2617 /* Two byte sequence. */
2618 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2619 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2620 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2621 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2622 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2623 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2624 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2625 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2626 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2627
2628 JUMPHERE(compare);
2629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2630 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2631 JUMPHERE(jump);
2632
2633 /* We only have types for characters less than 256. */
2634 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2635 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2636 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2637 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2638 }
2639
2640 #elif defined COMPILE_PCRE16
2641
2642 static void do_utfreadchar(compiler_common *common)
2643 {
2644 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2645 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2646 DEFINE_COMPILER;
2647 struct sljit_jump *jump;
2648
2649 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2650 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2651 /* Do nothing, only return. */
2652 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2653
2654 JUMPHERE(jump);
2655 /* Combine two 16 bit characters. */
2656 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2657 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2658 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2659 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2660 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2661 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2662 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2663 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2664 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2665 }
2666
2667 #endif /* COMPILE_PCRE[8|16] */
2668
2669 #endif /* SUPPORT_UTF */
2670
2671 #ifdef SUPPORT_UCP
2672
2673 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2674 #define UCD_BLOCK_MASK 127
2675 #define UCD_BLOCK_SHIFT 7
2676
2677 static void do_getucd(compiler_common *common)
2678 {
2679 /* Search the UCD record for the character comes in TMP1.
2680 Returns chartype in TMP1 and UCD offset in TMP2. */
2681 DEFINE_COMPILER;
2682
2683 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2684
2685 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2686 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2687 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2688 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2689 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2690 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2691 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2692 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2693 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2694 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2695 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2696 }
2697 #endif
2698
2699 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2700 {
2701 DEFINE_COMPILER;
2702 struct sljit_label *mainloop;
2703 struct sljit_label *newlinelabel = NULL;
2704 struct sljit_jump *start;
2705 struct sljit_jump *end = NULL;
2706 struct sljit_jump *nl = NULL;
2707 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2708 struct sljit_jump *singlechar;
2709 #endif
2710 jump_list *newline = NULL;
2711 BOOL newlinecheck = FALSE;
2712 BOOL readuchar = FALSE;
2713
2714 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2715 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2716 newlinecheck = TRUE;
2717
2718 if (firstline)
2719 {
2720 /* Search for the end of the first line. */
2721 SLJIT_ASSERT(common->first_line_end != 0);
2722 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2723
2724 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2725 {
2726 mainloop = LABEL();
2727 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2728 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2729 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2730 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2731 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2732 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2733 JUMPHERE(end);
2734 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2735 }
2736 else
2737 {
2738 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2739 mainloop = LABEL();
2740 /* Continual stores does not cause data dependency. */
2741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2742 read_char(common);
2743 check_newlinechar(common, common->nltype, &newline, TRUE);
2744 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2745 JUMPHERE(end);
2746 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2747 set_jumps(newline, LABEL());
2748 }
2749
2750 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2751 }
2752
2753 start = JUMP(SLJIT_JUMP);
2754
2755 if (newlinecheck)
2756 {
2757 newlinelabel = LABEL();
2758 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2759 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2760 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2761 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2762 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2763 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2764 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2765 #endif
2766 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2767 nl = JUMP(SLJIT_JUMP);
2768 }
2769
2770 mainloop = LABEL();
2771
2772 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2773 #ifdef SUPPORT_UTF
2774 if (common->utf) readuchar = TRUE;
2775 #endif
2776 if (newlinecheck) readuchar = TRUE;
2777
2778 if (readuchar)
2779 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2780
2781 if (newlinecheck)
2782 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2783
2784 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2785 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2786 #if defined COMPILE_PCRE8
2787 if (common->utf)
2788 {
2789 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2790 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2791 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2792 JUMPHERE(singlechar);
2793 }
2794 #elif defined COMPILE_PCRE16
2795 if (common->utf)
2796 {
2797 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2798 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2799 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2800 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2801 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2803 JUMPHERE(singlechar);
2804 }
2805 #endif /* COMPILE_PCRE[8|16] */
2806 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2807 JUMPHERE(start);
2808
2809 if (newlinecheck)
2810 {
2811 JUMPHERE(end);
2812 JUMPHERE(nl);
2813 }
2814
2815 return mainloop;
2816 }
2817
2818 #define MAX_N_CHARS 3
2819
2820 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2821 {
2822 DEFINE_COMPILER;
2823 struct sljit_label *start;
2824 struct sljit_jump *quit;
2825 pcre_uint32 chars[MAX_N_CHARS * 2];
2826 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2827 int location = 0;
2828 pcre_int32 len, c, bit, caseless;
2829 int must_stop;
2830
2831 /* We do not support alternatives now. */
2832 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2833 return FALSE;
2834
2835 while (TRUE)
2836 {
2837 caseless = 0;
2838 must_stop = 1;
2839 switch(*cc)
2840 {
2841 case OP_CHAR:
2842 must_stop = 0;
2843 cc++;
2844 break;
2845
2846 case OP_CHARI:
2847 caseless = 1;
2848 must_stop = 0;
2849 cc++;
2850 break;
2851
2852 case OP_SOD:
2853 case OP_SOM:
2854 case OP_SET_SOM:
2855 case OP_NOT_WORD_BOUNDARY:
2856 case OP_WORD_BOUNDARY:
2857 case OP_EODN:
2858 case OP_EOD:
2859 case OP_CIRC:
2860 case OP_CIRCM:
2861 case OP_DOLL:
2862 case OP_DOLLM:
2863 /* Zero width assertions. */
2864 cc++;
2865 continue;
2866
2867 case OP_PLUS:
2868 case OP_MINPLUS:
2869 case OP_POSPLUS:
2870 cc++;
2871 break;
2872
2873 case OP_EXACT:
2874 cc += 1 + IMM2_SIZE;
2875 break;
2876
2877 case OP_PLUSI:
2878 case OP_MINPLUSI:
2879 case OP_POSPLUSI:
2880 caseless = 1;
2881 cc++;
2882 break;
2883
2884 case OP_EXACTI:
2885 caseless = 1;
2886 cc += 1 + IMM2_SIZE;
2887 break;
2888
2889 default:
2890 must_stop = 2;
2891 break;
2892 }
2893
2894 if (must_stop == 2)
2895 break;
2896
2897 len = 1;
2898 #ifdef SUPPORT_UTF
2899 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2900 #endif
2901
2902 if (caseless && char_has_othercase(common, cc))
2903 {
2904 caseless = char_get_othercase_bit(common, cc);
2905 if (caseless == 0)
2906 return FALSE;
2907 #ifdef COMPILE_PCRE8
2908 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2909 #else
2910 if ((caseless & 0x100) != 0)
2911 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2912 else
2913 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2914 #endif
2915 }
2916 else
2917 caseless = 0;
2918
2919 while (len > 0 && location < MAX_N_CHARS * 2)
2920 {
2921 c = *cc;
2922 bit = 0;
2923 if (len == (caseless & 0xff))
2924 {
2925 bit = caseless >> 8;
2926 c |= bit;
2927 }
2928
2929 chars[location] = c;
2930 chars[location + 1] = bit;
2931
2932 len--;
2933 location += 2;
2934 cc++;
2935 }
2936
2937 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2938 break;
2939 }
2940
2941 /* At least two characters are required. */
2942 if (location < 2 * 2)
2943 return FALSE;
2944
2945 if (firstline)
2946 {
2947 SLJIT_ASSERT(common->first_line_end != 0);
2948 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2949 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2950 }
2951 else
2952 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2953
2954 start = LABEL();
2955 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2956
2957 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2958 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2960 if (chars[1] != 0)
2961 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2962 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2963 if (location > 2 * 2)
2964 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2965 if (chars[3] != 0)
2966 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2967 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2968 if (location > 2 * 2)
2969 {
2970 if (chars[5] != 0)
2971 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2972 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2973 }
2974 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2975
2976 JUMPHERE(quit);
2977
2978 if (firstline)
2979 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2980 else
2981 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2982 return TRUE;
2983 }
2984
2985 #undef MAX_N_CHARS
2986
2987 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2988 {
2989 DEFINE_COMPILER;
2990 struct sljit_label *start;
2991 struct sljit_jump *quit;
2992 struct sljit_jump *found;
2993 pcre_uchar oc, bit;
2994
2995 if (firstline)
2996 {
2997 SLJIT_ASSERT(common->first_line_end != 0);
2998 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2999 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3000 }
3001
3002 start = LABEL();
3003 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3004 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3005
3006 oc = first_char;
3007 if (caseless)
3008 {
3009 oc = TABLE_GET(first_char, common->fcc, first_char);
3010 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3011 if (first_char > 127 && common->utf)
3012 oc = UCD_OTHERCASE(first_char);
3013 #endif
3014 }
3015 if (first_char == oc)
3016 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3017 else
3018 {
3019 bit = first_char ^ oc;
3020 if (is_powerof2(bit))
3021 {
3022 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3023 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3024 }
3025 else
3026 {
3027 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3028 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3029 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3030 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3031 found = JUMP(SLJIT_C_NOT_ZERO);
3032 }
3033 }
3034
3035 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3036 JUMPTO(SLJIT_JUMP, start);
3037 JUMPHERE(found);
3038 JUMPHERE(quit);
3039
3040 if (firstline)
3041 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3042 }
3043
3044 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3045 {
3046 DEFINE_COMPILER;
3047 struct sljit_label *loop;
3048 struct sljit_jump *lastchar;
3049 struct sljit_jump *firstchar;
3050 struct sljit_jump *quit;
3051 struct sljit_jump *foundcr = NULL;
3052 struct sljit_jump *notfoundnl;
3053 jump_list *newline = NULL;
3054
3055 if (firstline)
3056 {
3057 SLJIT_ASSERT(common->first_line_end != 0);
3058 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3059 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3060 }
3061
3062 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3063 {
3064 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3065 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3066 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3067 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3068 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3069
3070 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3071 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3072 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3073 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3074 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3075 #endif
3076 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3077
3078 loop = LABEL();
3079 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3080 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3081 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3082 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3083 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3084 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3085
3086 JUMPHERE(quit);
3087 JUMPHERE(firstchar);
3088 JUMPHERE(lastchar);
3089
3090 if (firstline)
3091 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3092 return;
3093 }
3094
3095 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3096 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3097 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3098 skip_char_back(common);
3099
3100 loop = LABEL();
3101 read_char(common);
3102 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3103 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3104 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3105 check_newlinechar(common, common->nltype, &newline, FALSE);
3106 set_jumps(newline, loop);
3107
3108 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3109 {
3110 quit = JUMP(SLJIT_JUMP);
3111 JUMPHERE(foundcr);
3112 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3113 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3114 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3115 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3116 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3117 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3118 #endif
3119 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3120 JUMPHERE(notfoundnl);
3121 JUMPHERE(quit);
3122 }
3123 JUMPHERE(lastchar);
3124 JUMPHERE(firstchar);
3125
3126 if (firstline)
3127 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3128 }
3129
3130 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3131
3132 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3133 {
3134 DEFINE_COMPILER;
3135 struct sljit_label *start;
3136 struct sljit_jump *quit;
3137 struct sljit_jump *found = NULL;
3138 jump_list *matches = NULL;
3139 pcre_uint8 inverted_start_bits[32];
3140 int i;
3141 #ifndef COMPILE_PCRE8
3142 struct sljit_jump *jump;
3143 #endif
3144
3145 for (i = 0; i < 32; ++i)
3146 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3147
3148 if (firstline)
3149 {
3150 SLJIT_ASSERT(common->first_line_end != 0);
3151 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3152 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3153 }
3154
3155 start = LABEL();
3156 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3157 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3158 #ifdef SUPPORT_UTF
3159 if (common->utf)
3160 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3161 #endif
3162
3163 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3164 {
3165 #ifndef COMPILE_PCRE8
3166 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3167 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3168 JUMPHERE(jump);
3169 #endif
3170 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3171 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3172 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3173 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3174 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3175 found = JUMP(SLJIT_C_NOT_ZERO);
3176 }
3177
3178 #ifdef SUPPORT_UTF
3179 if (common->utf)
3180 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3181 #endif
3182 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3183 #ifdef SUPPORT_UTF
3184 #if defined COMPILE_PCRE8
3185 if (common->utf)
3186 {
3187 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3188 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3189 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3190 }
3191 #elif defined COMPILE_PCRE16
3192 if (common->utf)
3193 {
3194 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3195 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3196 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3197 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3198 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3199 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3200 }
3201 #endif /* COMPILE_PCRE[8|16] */
3202 #endif /* SUPPORT_UTF */
3203 JUMPTO(SLJIT_JUMP, start);
3204 if (found != NULL)
3205 JUMPHERE(found);
3206 if (matches != NULL)
3207 set_jumps(matches, LABEL());
3208 JUMPHERE(quit);
3209
3210 if (firstline)
3211 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3212 }
3213
3214 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3215 {
3216 DEFINE_COMPILER;
3217 struct sljit_label *loop;
3218 struct sljit_jump *toolong;
3219 struct sljit_jump *alreadyfound;
3220 struct sljit_jump *found;
3221 struct sljit_jump *foundoc = NULL;
3222 struct sljit_jump *notfound;
3223 pcre_uint32 oc, bit;
3224
3225 SLJIT_ASSERT(common->req_char_ptr != 0);
3226 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3227 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3228 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3229 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3230
3231 if (has_firstchar)
3232 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3233 else
3234 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3235
3236 loop = LABEL();
3237 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3238
3239 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3240 oc = req_char;
3241 if (caseless)
3242 {
3243 oc = TABLE_GET(req_char, common->fcc, req_char);
3244 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3245 if (req_char > 127 && common->utf)
3246 oc = UCD_OTHERCASE(req_char);
3247 #endif
3248 }
3249 if (req_char == oc)
3250 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3251 else
3252 {
3253 bit = req_char ^ oc;
3254 if (is_powerof2(bit))
3255 {
3256 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3257 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3258 }
3259 else
3260 {
3261 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3262 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3263 }
3264 }
3265 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3266 JUMPTO(SLJIT_JUMP, loop);
3267
3268 JUMPHERE(found);
3269 if (foundoc)
3270 JUMPHERE(foundoc);
3271 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3272 JUMPHERE(alreadyfound);
3273 JUMPHERE(toolong);
3274 return notfound;
3275 }
3276
3277 static void do_revertframes(compiler_common *common)
3278 {
3279 DEFINE_COMPILER;
3280 struct sljit_jump *jump;
3281 struct sljit_label *mainloop;
3282
3283 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3284 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3285 GET_LOCAL_BASE(TMP3, 0, 0);
3286
3287 /* Drop frames until we reach STACK_TOP. */
3288 mainloop = LABEL();
3289 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3290 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3291 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3292
3293 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3294 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3295 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3296 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3297 JUMPTO(SLJIT_JUMP, mainloop);
3298
3299 JUMPHERE(jump);
3300 jump = JUMP(SLJIT_C_SIG_LESS);
3301 /* End of dropping frames. */
3302 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3303
3304 JUMPHERE(jump);
3305 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3306 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3307 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3308 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3309 JUMPTO(SLJIT_JUMP, mainloop);
3310 }
3311
3312 static void check_wordboundary(compiler_common *common)
3313 {
3314 DEFINE_COMPILER;
3315 struct sljit_jump *skipread;
3316 jump_list *skipread_list = NULL;
3317 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3318 struct sljit_jump *jump;
3319 #endif
3320
3321 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3322
3323 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3324 /* Get type of the previous char, and put it to LOCALS1. */
3325 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3326 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3327 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3328 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3329 skip_char_back(common);
3330 check_start_used_ptr(common);
3331 read_char(common);
3332
3333 /* Testing char type. */
3334 #ifdef SUPPORT_UCP
3335 if (common->use_ucp)
3336 {
3337 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3338 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3339 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3340 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3341 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3342 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3343 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3344 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3345 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3346 JUMPHERE(jump);
3347 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3348 }
3349 else
3350 #endif
3351 {
3352 #ifndef COMPILE_PCRE8
3353 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3354 #elif defined SUPPORT_UTF
3355 /* Here LOCALS1 has already been zeroed. */
3356 jump = NULL;
3357 if (common->utf)
3358 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3359 #endif /* COMPILE_PCRE8 */
3360 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3361 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3362 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3364 #ifndef COMPILE_PCRE8
3365 JUMPHERE(jump);
3366 #elif defined SUPPORT_UTF
3367 if (jump != NULL)
3368 JUMPHERE(jump);
3369 #endif /* COMPILE_PCRE8 */
3370 }
3371 JUMPHERE(skipread);
3372
3373 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3374 check_str_end(common, &skipread_list);
3375 peek_char(common);
3376
3377 /* Testing char type. This is a code duplication. */
3378 #ifdef SUPPORT_UCP
3379 if (common->use_ucp)
3380 {
3381 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3382 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3383 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3384 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3385 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3386 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3387 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3388 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3389 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3390 JUMPHERE(jump);
3391 }
3392 else
3393 #endif
3394 {
3395 #ifndef COMPILE_PCRE8
3396 /* TMP2 may be destroyed by peek_char. */
3397 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3398 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3399 #elif defined SUPPORT_UTF
3400 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3401 jump = NULL;
3402 if (common->utf)
3403 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3404 #endif
3405 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3406 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3407 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3408 #ifndef COMPILE_PCRE8
3409 JUMPHERE(jump);
3410 #elif defined SUPPORT_UTF
3411 if (jump != NULL)
3412 JUMPHERE(jump);
3413 #endif /* COMPILE_PCRE8 */
3414 }
3415 set_jumps(skipread_list, LABEL());
3416
3417 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3418 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3419 }
3420
3421 /*
3422 range format:
3423
3424 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3425 ranges[1] = first bit (0 or 1)
3426 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3427 */
3428
3429 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3430 {
3431 DEFINE_COMPILER;
3432 struct sljit_jump *jump;
3433
3434 if (ranges[0] < 0)
3435 return FALSE;
3436
3437 switch(ranges[0])
3438 {
3439 case 1:
3440 if (readch)
3441 read_char(common);
3442 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3443 return TRUE;
3444
3445 case 2:
3446 if (readch)
3447 read_char(common);
3448 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3449 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3450 return TRUE;
3451
3452 case 4:
3453 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3454 {
3455 if (readch)
3456 read_char(common);
3457 if (ranges[1] != 0)
3458 {
3459 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3460 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3461 }
3462 else
3463 {
3464 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3465 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3466 JUMPHERE(jump);
3467 }
3468 return TRUE;
3469 }
3470 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3471 {
3472 if (readch)
3473 read_char(common);
3474 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3475 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3476 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3477 return TRUE;
3478 }
3479 return FALSE;
3480
3481 default:
3482 return FALSE;
3483 }
3484 }
3485
3486 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3487 {
3488 int i, bit, length;
3489 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3490
3491 bit = ctypes[0] & flag;
3492 ranges[0] = -1;
3493 ranges[1] = bit != 0 ? 1 : 0;
3494 length = 0;
3495
3496 for (i = 1; i < 256; i++)
3497 if ((ctypes[i] & flag) != bit)
3498 {
3499 if (length >= MAX_RANGE_SIZE)
3500 return;
3501 ranges[2 + length] = i;
3502 length++;
3503 bit ^= flag;
3504 }
3505
3506 if (bit != 0)
3507 {
3508 if (length >= MAX_RANGE_SIZE)
3509 return;
3510 ranges[2 + length] = 256;
3511 length++;
3512 }
3513 ranges[0] = length;
3514 }
3515
3516 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3517 {
3518 int ranges[2 + MAX_RANGE_SIZE];
3519 pcre_uint8 bit, cbit, all;
3520 int i, byte, length = 0;
3521
3522 bit = bits[0] & 0x1;
3523 ranges[1] = bit;
3524 /* Can be 0 or 255. */
3525 all = -bit;
3526
3527 for (i = 0; i < 256; )
3528 {
3529 byte = i >> 3;
3530 if ((i & 0x7) == 0 && bits[byte] == all)
3531 i += 8;
3532 else
3533 {
3534 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3535 if (cbit != bit)
3536 {
3537 if (length >= MAX_RANGE_SIZE)
3538 return FALSE;
3539 ranges[2 + length] = i;
3540 length++;
3541 bit = cbit;
3542 all = -cbit;
3543 }
3544 i++;
3545 }
3546 }
3547
3548 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3549 {
3550 if (length >= MAX_RANGE_SIZE)
3551 return FALSE;
3552 ranges[2 + length] = 256;
3553 length++;
3554 }
3555 ranges[0] = length;
3556
3557 return check_ranges(common, ranges, backtracks, FALSE);
3558 }
3559
3560 static void check_anynewline(compiler_common *common)
3561 {
3562 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3563 DEFINE_COMPILER;
3564
3565 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3566
3567 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3568 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3569 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3570 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3571 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3572 #ifdef COMPILE_PCRE8
3573 if (common->utf)
3574 {
3575 #endif
3576 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3577 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3578 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3579 #ifdef COMPILE_PCRE8
3580 }
3581 #endif
3582 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3583 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3584 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3585 }
3586
3587 static void check_hspace(compiler_common *common)
3588 {
3589 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3590 DEFINE_COMPILER;
3591
3592 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3593
3594 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3595 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3596 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3597 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3598 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3599 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3600 #ifdef COMPILE_PCRE8
3601 if (common->utf)
3602 {
3603 #endif
3604 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3605 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3606 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3607 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3608 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3609 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3610 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3611 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3612 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3613 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3614 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3615 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3616 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3617 #ifdef COMPILE_PCRE8
3618 }
3619 #endif
3620 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3621 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3622
3623 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3624 }
3625
3626 static void check_vspace(compiler_common *common)
3627 {
3628 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3629 DEFINE_COMPILER;
3630
3631 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3632
3633 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3634 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3635 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3636 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3637 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3638 #ifdef COMPILE_PCRE8
3639 if (common->utf)
3640 {
3641 #endif
3642 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3643 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3644 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3645 #ifdef COMPILE_PCRE8
3646 }
3647 #endif
3648 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3649 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3650
3651 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3652 }
3653
3654 #define CHAR1 STR_END
3655 #define CHAR2 STACK_TOP
3656
3657 static void do_casefulcmp(compiler_common *common)
3658 {
3659 DEFINE_COMPILER;
3660 struct sljit_jump *jump;
3661 struct sljit_label *label;
3662
3663 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3664 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3665 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3666 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3667 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3668 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3669
3670 label = LABEL();
3671 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3672 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3673 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3674 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3675 JUMPTO(SLJIT_C_NOT_ZERO, label);
3676
3677 JUMPHERE(jump);
3678 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3679 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3680 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3681 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3682 }
3683
3684 #define LCC_TABLE STACK_LIMIT
3685
3686 static void do_caselesscmp(compiler_common *common)
3687 {
3688 DEFINE_COMPILER;
3689 struct sljit_jump *jump;
3690 struct sljit_label *label;
3691
3692 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3693 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3694
3695 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3697 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3698 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3699 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3700 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3701
3702 label = LABEL();
3703 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3704 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3705 #ifndef COMPILE_PCRE8
3706 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3707 #endif
3708 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3709 #ifndef COMPILE_PCRE8
3710 JUMPHERE(jump);
3711 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3712 #endif
3713 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3714 #ifndef COMPILE_PCRE8
3715 JUMPHERE(jump);
3716 #endif
3717 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3718 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3719 JUMPTO(SLJIT_C_NOT_ZERO, label);
3720
3721 JUMPHERE(jump);
3722 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3723 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3724 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3725 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3726 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3727 }
3728
3729 #undef LCC_TABLE
3730 #undef CHAR1
3731 #undef CHAR2
3732
3733 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3734
3735 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3736 {
3737 /* This function would be ineffective to do in JIT level. */
3738 pcre_uint32 c1, c2;
3739 const pcre_uchar *src2 = args->uchar_ptr;
3740 const pcre_uchar *end2 = args->end;
3741 const ucd_record *ur;
3742 const pcre_uint32 *pp;
3743
3744 while (src1 < end1)
3745 {
3746 if (src2 >= end2)
3747 return (pcre_uchar*)1;
3748 GETCHARINC(c1, src1);
3749 GETCHARINC(c2, src2);
3750 ur = GET_UCD(c2);
3751 if (c1 != c2 && c1 != c2 + ur->other_case)
3752 {
3753 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3754 for (;;)
3755 {
3756 if (c1 < *pp) return NULL;
3757 if (c1 == *pp++) break;
3758 }
3759 }
3760 }
3761 return src2;
3762 }
3763
3764 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3765
3766 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3767 compare_context* context, jump_list **backtracks)
3768 {
3769 DEFINE_COMPILER;
3770 unsigned int othercasebit = 0;
3771 pcre_uchar *othercasechar = NULL;
3772 #ifdef SUPPORT_UTF
3773 int utflength;
3774 #endif
3775
3776 if (caseless && char_has_othercase(common, cc))
3777 {
3778 othercasebit = char_get_othercase_bit(common, cc);
3779 SLJIT_ASSERT(othercasebit);
3780 /* Extracting bit difference info. */
3781 #if defined COMPILE_PCRE8
3782 othercasechar = cc + (othercasebit >> 8);
3783 othercasebit &= 0xff;
3784 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3785 /* Note that this code only handles characters in the BMP. If there
3786 ever are characters outside the BMP whose othercase differs in only one
3787 bit from itself (there currently are none), this code will need to be
3788 revised for COMPILE_PCRE32. */
3789 othercasechar = cc + (othercasebit >> 9);
3790 if ((othercasebit & 0x100) != 0)
3791 othercasebit = (othercasebit & 0xff) << 8;
3792 else
3793 othercasebit &= 0xff;
3794 #endif /* COMPILE_PCRE[8|16|32] */
3795 }
3796
3797 if (context->sourcereg == -1)
3798 {
3799 #if defined COMPILE_PCRE8
3800 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3801 if (context->length >= 4)
3802 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3803 else if (context->length >= 2)
3804 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3805 else
3806 #endif
3807 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3808 #elif defined COMPILE_PCRE16
3809 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3810 if (context->length >= 4)
3811 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3812 else
3813 #endif
3814 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3815 #elif defined COMPILE_PCRE32
3816 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3817 #endif /* COMPILE_PCRE[8|16|32] */
3818 context->sourcereg = TMP2;
3819 }
3820
3821 #ifdef SUPPORT_UTF
3822 utflength = 1;
3823 if (common->utf && HAS_EXTRALEN(*cc))
3824 utflength += GET_EXTRALEN(*cc);
3825
3826 do
3827 {
3828 #endif
3829
3830 context->length -= IN_UCHARS(1);
3831 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3832
3833 /* Unaligned read is supported. */
3834 if (othercasebit != 0 && othercasechar == cc)
3835 {
3836 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3837 context->oc.asuchars[context->ucharptr] = othercasebit;
3838 }
3839 else
3840 {
3841 context->c.asuchars[context->ucharptr] = *cc;
3842 context->oc.asuchars[context->ucharptr] = 0;
3843 }
3844 context->ucharptr++;
3845
3846 #if defined COMPILE_PCRE8
3847 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3848 #else
3849 if (context->ucharptr >= 2 || context->length == 0)
3850 #endif
3851 {
3852 if (context->length >= 4)
3853 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3854 else if (context->length >= 2)
3855 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3856 #if defined COMPILE_PCRE8
3857 else if (context->length >= 1)
3858 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3859 #endif /* COMPILE_PCRE8 */
3860 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3861
3862 switch(context->ucharptr)
3863 {
3864 case 4 / sizeof(pcre_uchar):
3865 if (context->oc.asint != 0)
3866 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3867 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3868 break;
3869
3870 case 2 / sizeof(pcre_uchar):
3871 if (context->oc.asushort != 0)
3872 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3873 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3874 break;
3875
3876 #ifdef COMPILE_PCRE8
3877 case 1:
3878 if (context->oc.asbyte != 0)
3879 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3880 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3881 break;
3882 #endif
3883
3884 default:
3885 SLJIT_ASSERT_STOP();
3886 break;
3887 }
3888 context->ucharptr = 0;
3889 }
3890
3891 #else
3892
3893 /* Unaligned read is unsupported or in 32 bit mode. */
3894 if (context->length >= 1)
3895 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3896
3897 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3898
3899 if (othercasebit != 0 && othercasechar == cc)
3900 {
3901 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3902 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3903 }
3904 else
3905 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3906
3907 #endif
3908
3909 cc++;
3910 #ifdef SUPPORT_UTF
3911 utflength--;
3912 }
3913 while (utflength > 0);
3914 #endif
3915
3916 return cc;
3917 }
3918
3919 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3920
3921 #define SET_TYPE_OFFSET(value) \
3922 if ((value) != typeoffset) \
3923 { \
3924 if ((value) > typeoffset) \
3925 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3926 else \
3927 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3928 } \
3929 typeoffset = (value);
3930
3931 #define SET_CHAR_OFFSET(value) \
3932 if ((value) != charoffset) \
3933 { \
3934 if ((value) > charoffset) \
3935 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3936 else \
3937 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3938 } \
3939 charoffset = (value);
3940
3941 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3942 {
3943 DEFINE_COMPILER;
3944 jump_list *found = NULL;
3945 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3946 pcre_int32 c, charoffset;
3947 const pcre_uint32 *other_cases;
3948 struct sljit_jump *jump = NULL;
3949 pcre_uchar *ccbegin;
3950 int compares, invertcmp, numberofcmps;
3951 #ifdef SUPPORT_UCP
3952 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3953 BOOL charsaved = FALSE;
3954 int typereg = TMP1, scriptreg = TMP1;
3955 pcre_int32 typeoffset;
3956 #endif
3957
3958 /* Although SUPPORT_UTF must be defined, we are
3959 not necessary in utf mode even in 8 bit mode. */
3960 detect_partial_match(common, backtracks);
3961 read_char(common);
3962
3963 if ((*cc++ & XCL_MAP) != 0)
3964 {
3965 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3966 #ifndef COMPILE_PCRE8
3967 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3968 #elif defined SUPPORT_UTF
3969 if (common->utf)
3970 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3971 #endif
3972
3973 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3974 {
3975 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3976 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3977 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3978 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3979 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3980 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3981 }
3982
3983 #ifndef COMPILE_PCRE8
3984 JUMPHERE(jump);
3985 #elif defined SUPPORT_UTF
3986 if (common->utf)
3987 JUMPHERE(jump);
3988 #endif
3989 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3990 #ifdef SUPPORT_UCP
3991 charsaved = TRUE;
3992 #endif
3993 cc += 32 / sizeof(pcre_uchar);
3994 }
3995
3996 /* Scanning the necessary info. */
3997 ccbegin = cc;
3998 compares = 0;
3999 while (*cc != XCL_END)
4000 {
4001 compares++;
4002 if (*cc == XCL_SINGLE)
4003 {
4004 cc += 2;
4005 #ifdef SUPPORT_UTF
4006 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4007 #endif
4008 #ifdef SUPPORT_UCP
4009 needschar = TRUE;
4010 #endif
4011 }
4012 else if (*cc == XCL_RANGE)
4013 {
4014 cc += 2;
4015 #ifdef SUPPORT_UTF
4016 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4017 #endif
4018 cc++;
4019 #ifdef SUPPORT_UTF
4020 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4021 #endif
4022 #ifdef SUPPORT_UCP
4023 needschar = TRUE;
4024 #endif
4025 }
4026 #ifdef SUPPORT_UCP
4027 else
4028 {
4029 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4030 cc++;
4031 switch(*cc)
4032 {
4033 case PT_ANY:
4034 break;
4035
4036 case PT_LAMP:
4037 case PT_GC:
4038 case PT_PC:
4039 case PT_ALNUM:
4040 needstype = TRUE;
4041 break;
4042
4043 case PT_SC:
4044 needsscript = TRUE;
4045 break;
4046
4047 case PT_SPACE:
4048 case PT_PXSPACE:
4049 case PT_WORD:
4050 needstype = TRUE;
4051 needschar = TRUE;
4052 break;
4053
4054 case PT_CLIST:
4055 case PT_UCNC:
4056 needschar = TRUE;
4057 break;
4058
4059 default:
4060 SLJIT_ASSERT_STOP();
4061 break;
4062 }
4063 cc += 2;
4064 }
4065 #endif
4066 }
4067
4068 #ifdef SUPPORT_UCP
4069 /* Simple register allocation. TMP1 is preferred if possible. */
4070 if (needstype || needsscript)
4071 {
4072 if (needschar && !charsaved)
4073 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4074 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4075 if (needschar)
4076 {
4077 if (needstype)
4078 {
4079 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4080 typereg = RETURN_ADDR;
4081 }
4082
4083 if (needsscript)
4084 scriptreg = TMP3;
4085 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4086 }
4087 else if (needstype && needsscript)
4088 scriptreg = TMP3;
4089 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4090
4091 if (needsscript)
4092 {
4093 if (scriptreg == TMP1)
4094 {
4095 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4096 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4097 }
4098 else
4099 {
4100 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4101 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4102 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4103 }
4104 }
4105 }
4106 #endif
4107
4108 /* Generating code. */
4109 cc = ccbegin;
4110 charoffset = 0;
4111 numberofcmps = 0;
4112 #ifdef SUPPORT_UCP
4113 typeoffset = 0;
4114 #endif
4115
4116 while (*cc != XCL_END)
4117 {
4118 compares--;
4119 invertcmp = (compares == 0 && list != backtracks);
4120 jump = NULL;
4121
4122 if (*cc == XCL_SINGLE)
4123 {
4124 cc ++;
4125 #ifdef SUPPORT_UTF
4126 if (common->utf)
4127 {
4128 GETCHARINC(c, cc);
4129 }
4130 else
4131 #endif
4132 c = *cc++;
4133
4134 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4135 {
4136 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4137 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4138 numberofcmps++;
4139 }
4140 else if (numberofcmps > 0)
4141 {
4142 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4143 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4144 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4145 numberofcmps = 0;
4146 }
4147 else
4148 {
4149 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4150 numberofcmps = 0;
4151 }
4152 }
4153 else if (*cc == XCL_RANGE)
4154 {
4155 cc ++;
4156 #ifdef SUPPORT_UTF
4157 if (common->utf)
4158 {
4159 GETCHARINC(c, cc);
4160 }
4161 else
4162 #endif
4163 c = *cc++;
4164 SET_CHAR_OFFSET(c);
4165 #ifdef SUPPORT_UTF
4166 if (common->utf)
4167 {
4168 GETCHARINC(c, cc);
4169 }
4170 else
4171 #endif
4172 c = *cc++;
4173 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4174 {
4175 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4176 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4177 numberofcmps++;
4178 }
4179 else if (numberofcmps > 0)
4180 {
4181 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4182 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4183 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4184 numberofcmps = 0;
4185 }
4186 else
4187 {
4188 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4189 numberofcmps = 0;
4190 }
4191 }
4192 #ifdef SUPPORT_UCP
4193 else
4194 {
4195 if (*cc == XCL_NOTPROP)
4196 invertcmp ^= 0x1;
4197 cc++;
4198 switch(*cc)
4199 {
4200 case PT_ANY:
4201 if (list != backtracks)
4202 {
4203 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4204 continue;
4205 }
4206 else if (cc[-1] == XCL_NOTPROP)
4207 continue;
4208 jump = JUMP(SLJIT_JUMP);
4209 break;
4210
4211 case PT_LAMP:
4212 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4213 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4214 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4215 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4216 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4217 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4218 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4219 break;
4220
4221 case PT_GC:
4222 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4223 SET_TYPE_OFFSET(c);
4224 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4225 break;
4226
4227 case PT_PC:
4228 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4229 break;
4230
4231 case PT_SC:
4232 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4233 break;
4234
4235 case PT_SPACE:
4236 case PT_PXSPACE:
4237 if (*cc == PT_SPACE)
4238 {
4239 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4240 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4241 }
4242 SET_CHAR_OFFSET(9);
4243 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4244 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4245 if (*cc == PT_SPACE)
4246 JUMPHERE(jump);
4247
4248 SET_TYPE_OFFSET(ucp_Zl);
4249 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4250 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4251 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4252 break;
4253
4254 case PT_WORD:
4255 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4256 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4257 /* Fall through. */
4258
4259 case PT_ALNUM:
4260 SET_TYPE_OFFSET(ucp_Ll);
4261 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4262 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4263 SET_TYPE_OFFSET(ucp_Nd);
4264 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4265 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4266 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4267 break;
4268
4269 case PT_CLIST:
4270 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4271
4272 /* At least three characters are required.
4273 Otherwise this case would be handled by the normal code path. */
4274 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4275 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4276
4277 /* Optimizing character pairs, if their difference is power of 2. */
4278 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4279 {
4280 if (charoffset == 0)
4281 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4282 else
4283 {
4284 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4285 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4286 }
4287 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4288 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4289 other_cases += 2;
4290 }
4291 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4292 {
4293 if (charoffset == 0)
4294 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4295 else
4296 {
4297 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4298 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4299 }
4300 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4301 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4302
4303 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4304 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4305
4306 other_cases += 3;
4307 }
4308 else
4309 {
4310 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4311 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4312 }
4313
4314 while (*other_cases != NOTACHAR)
4315 {
4316 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4317 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4318 }
4319 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4320 break;
4321
4322 case PT_UCNC:
4323 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4324 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4325 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4326 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4327 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4328 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4329
4330 SET_CHAR_OFFSET(0xa0);
4331 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4332 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4333 SET_CHAR_OFFSET(0);
4334 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4335 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4336 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4337 break;
4338 }
4339 cc += 2;
4340 }
4341 #endif
4342
4343 if (jump != NULL)
4344 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4345 }
4346
4347 if (found != NULL)
4348 set_jumps(found, LABEL());
4349 }
4350
4351 #undef SET_TYPE_OFFSET
4352 #undef SET_CHAR_OFFSET
4353
4354 #endif
4355
4356 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4357 {
4358 DEFINE_COMPILER;
4359 int length;
4360 unsigned int c, oc, bit;
4361 compare_context context;
4362 struct sljit_jump *jump[4];
4363 jump_list *end_list;
4364 #ifdef SUPPORT_UTF
4365 struct sljit_label *label;
4366 #ifdef SUPPORT_UCP
4367 pcre_uchar propdata[5];
4368 #endif
4369 #endif
4370
4371 switch(type)
4372 {
4373 case OP_SOD:
4374 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4375 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4376 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4377 return cc;
4378
4379 case OP_SOM:
4380 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4381 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4382 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4383 return cc;
4384
4385 case OP_NOT_WORD_BOUNDARY:
4386 case OP_WORD_BOUNDARY:
4387 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4388 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4389 return cc;
4390
4391 case OP_NOT_DIGIT:
4392 case OP_DIGIT:
4393 /* Digits are usually 0-9, so it is worth to optimize them. */
4394 if (common->digits[0] == -2)
4395 get_ctype_ranges(common, ctype_digit, common->digits);
4396 detect_partial_match(common, backtracks);
4397 /* Flip the starting bit in the negative case. */
4398 if (type == OP_NOT_DIGIT)
4399 common->digits[1] ^= 1;
4400 if (!check_ranges(common, common->digits, backtracks, TRUE))
4401 {
4402 read_char8_type(common);
4403 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4404 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4405 }
4406 if (type == OP_NOT_DIGIT)
4407 common->digits[1] ^= 1;
4408 return cc;
4409
4410 case OP_NOT_WHITESPACE:
4411 case OP_WHITESPACE:
4412 detect_partial_match(common, backtracks);
4413 read_char8_type(common);
4414 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4415 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4416 return cc;
4417
4418 case OP_NOT_WORDCHAR:
4419 case OP_WORDCHAR:
4420 detect_partial_match(common, backtracks);
4421 read_char8_type(common);
4422 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4423 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4424 return cc;
4425
4426 case OP_ANY:
4427 detect_partial_match(common, backtracks);
4428 read_char(common);
4429 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4430 {
4431 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4432 end_list = NULL;
4433 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4434 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4435 else
4436 check_str_end(common, &end_list);
4437
4438 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4439 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4440 set_jumps(end_list, LABEL());
4441 JUMPHERE(jump[0]);
4442 }
4443 else
4444 check_newlinechar(common, common->nltype, backtracks, TRUE);
4445 return cc;
4446
4447 case OP_ALLANY:
4448 detect_partial_match(common, backtracks);
4449 #ifdef SUPPORT_UTF
4450 if (common->utf)
4451 {
4452 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4453 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4454 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4455 #if defined COMPILE_PCRE8
4456 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4457 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4458 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4459 #elif defined COMPILE_PCRE16
4460 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4461 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4462 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4463 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4464 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4465 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4466 #endif
4467 JUMPHERE(jump[0]);
4468 #endif /* COMPILE_PCRE[8|16] */
4469 return cc;
4470 }
4471 #endif
4472 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4473 return cc;
4474
4475 case OP_ANYBYTE:
4476 detect_partial_match(common, backtracks);
4477 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4478 return cc;
4479
4480 #ifdef SUPPORT_UTF
4481 #ifdef SUPPORT_UCP
4482 case OP_NOTPROP:
4483 case OP_PROP:
4484 propdata[0] = 0;
4485 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4486 propdata[2] = cc[0];
4487 propdata[3] = cc[1];
4488 propdata[4] = XCL_END;
4489 compile_xclass_matchingpath(common, propdata, backtracks);
4490 return cc + 2;
4491 #endif
4492 #endif
4493
4494 case OP_ANYNL:
4495 detect_partial_match(common, backtracks);
4496 read_char(common);
4497 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4498 /* We don't need to handle soft partial matching case. */
4499 end_list = NULL;
4500 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4501 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4502 else
4503 check_str_end(common, &end_list);
4504 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4505 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4506 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4507 jump[2] = JUMP(SLJIT_JUMP);
4508 JUMPHERE(jump[0]);
4509 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4510 set_jumps(end_list, LABEL());
4511 JUMPHERE(jump[1]);
4512 JUMPHERE(jump[2]);
4513 return cc;
4514
4515 case OP_NOT_HSPACE:
4516 case OP_HSPACE:
4517 detect_partial_match(common, backtracks);
4518 read_char(common);
4519 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4520 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4521 return cc;
4522
4523 case OP_NOT_VSPACE:
4524 case OP_VSPACE:
4525 detect_partial_match(common, backtracks);
4526 read_char(common);
4527 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4528 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4529 return cc;
4530
4531 #ifdef SUPPORT_UCP
4532 case OP_EXTUNI:
4533 detect_partial_match(common, backtracks);
4534 read_char(common);
4535 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4536 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4537 /* Optimize register allocation: use a real register. */
4538 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4539 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4540
4541 label = LABEL();
4542 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4543 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4544 read_char(common);
4545 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4546 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4547 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4548
4549 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4550 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4551 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4552 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4553 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4554 JUMPTO(SLJIT_C_NOT_ZERO, label);
4555
4556 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4557 JUMPHERE(jump[0]);
4558 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4559
4560 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4561 {
4562 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4563 /* Since we successfully read a char above, partial matching must occure. */
4564 check_partial(common, TRUE);
4565 JUMPHERE(jump[0]);
4566 }
4567 return cc;
4568 #endif
4569
4570 case OP_EODN:
4571 /* Requires rather complex checks. */
4572 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4573 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4574 {
4575 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4576 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4577 if (common->mode == JIT_COMPILE)
4578 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4579 else
4580 {
4581 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4582 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4583 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4584 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4585 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4586 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4587 check_partial(common, TRUE);
4588 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4589 JUMPHERE(jump[1]);
4590 }
4591 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4592 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4593 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4594 }
4595 else if (common->nltype == NLTYPE_FIXED)
4596 {
4597 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4598 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4599 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4600 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4601 }
4602 else
4603 {
4604 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4605 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4606 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4607 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4608 jump[2] = JUMP(SLJIT_C_GREATER);
4609 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4610 /* Equal. */
4611 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4612 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4613 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4614
4615 JUMPHERE(jump[1]);
4616 if (common->nltype == NLTYPE_ANYCRLF)
4617 {
4618 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4619 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4620 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4621 }
4622 else
4623 {
4624 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4625 read_char(common);
4626 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4627 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4628 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4629 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4630 }
4631 JUMPHERE(jump[2]);
4632 JUMPHERE(jump[3]);
4633 }
4634 JUMPHERE(jump[0]);
4635 check_partial(common, FALSE);
4636 return cc;
4637
4638 case OP_EOD:
4639 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4640 check_partial(common, FALSE);
4641 return cc;
4642
4643 case OP_CIRC:
4644 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4645 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4646 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4647 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4648 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4649 return cc;
4650
4651 case OP_CIRCM:
4652 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4653 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4654 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4655 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4656 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4657 jump[0] = JUMP(SLJIT_JUMP);
4658 JUMPHERE(jump[1]);
4659
4660 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4661 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4662 {
4663 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4664 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4665 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4666 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4667 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4668 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4669 }
4670 else
4671 {
4672 skip_char_back(common);
4673 read_char(common);
4674 check_newlinechar(common, common->nltype, backtracks, FALSE);
4675 }
4676 JUMPHERE(jump[0]);
4677 return cc;
4678
4679 case OP_DOLL:
4680 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4681 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4682 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4683
4684 if (!common->endonly)
4685 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4686 else
4687 {
4688 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4689 check_partial(common, FALSE);
4690 }
4691 return cc;
4692
4693 case OP_DOLLM:
4694 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4695 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4696 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4697 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4698 check_partial(common, FALSE);
4699 jump[0] = JUMP(SLJIT_JUMP);
4700 JUMPHERE(jump[1]);
4701
4702 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4703 {
4704 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4705 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4706 if (common->mode == JIT_COMPILE)
4707 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4708 else
4709 {
4710 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4711 /* STR_PTR = STR_END - IN_UCHARS(1) */
4712 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4713 check_partial(common, TRUE);
4714 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4715 JUMPHERE(jump[1]);
4716 }
4717
4718 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4719 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4720 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4721 }
4722 else
4723 {
4724 peek_char(common);
4725 check_newlinechar(common, common->nltype, backtracks, FALSE);
4726 }
4727 JUMPHERE(jump[0]);
4728 return cc;
4729
4730 case OP_CHAR:
4731 case OP_CHARI:
4732 length = 1;
4733 #ifdef SUPPORT_UTF
4734 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4735 #endif
4736 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4737 {
4738 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4739 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4740
4741 context.length = IN_UCHARS(length);
4742 context.sourcereg = -1;
4743 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4744 context.ucharptr = 0;
4745 #endif
4746 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4747 }
4748 detect_partial_match(common, backtracks);
4749 read_char(common);
4750 #ifdef SUPPORT_UTF
4751 if (common->utf)
4752 {
4753 GETCHAR(c, cc);
4754 }
4755 else
4756 #endif
4757 c = *cc;
4758 if (type == OP_CHAR || !char_has_othercase(common, cc))
4759 {
4760 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4761 return cc + length;
4762 }
4763 oc = char_othercase(common, c);
4764 bit = c ^ oc;
4765 if (is_powerof2(bit))
4766 {
4767 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4768 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4769 return cc + length;
4770 }
4771 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4772 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4773 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4774 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4775 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4776 return cc + length;
4777
4778 case OP_NOT:
4779 case OP_NOTI:
4780 detect_partial_match(common, backtracks);
4781 length = 1;
4782 #ifdef SUPPORT_UTF
4783 if (common->utf)
4784 {
4785 #ifdef COMPILE_PCRE8
4786 c = *cc;
4787 if (c < 128)
4788 {
4789 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4790 if (type == OP_NOT || !char_has_othercase(common, cc))
4791 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4792 else
4793 {
4794 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4795 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4796 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4797 }
4798 /* Skip the variable-length character. */
4799 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4800 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4801 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4802 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4803 JUMPHERE(jump[0]);
4804 return cc + 1;
4805 }
4806 else
4807 #endif /* COMPILE_PCRE8 */
4808 {
4809 GETCHARLEN(c, cc, length);
4810 read_char(common);
4811 }
4812 }
4813 else
4814 #endif /* SUPPORT_UTF */
4815 {
4816 read_char(common);
4817 c = *cc;
4818 }
4819
4820 if (type == OP_NOT || !char_has_othercase(common, cc))
4821 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4822 else
4823 {
4824 oc = char_othercase(common, c);
4825 bit = c ^ oc;
4826 if (is_powerof2(bit))
4827 {
4828 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4829 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4830 }
4831 else
4832 {
4833 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4834 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4835 }
4836 }
4837 return cc + length;
4838
4839 case OP_CLASS:
4840 case OP_NCLASS:
4841 detect_partial_match(common, backtracks);
4842 read_char(common);
4843 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4844 return cc + 32 / sizeof(pcre_uchar);
4845
4846 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4847 jump[0] = NULL;
4848 #ifdef COMPILE_PCRE8
4849 /* This check only affects 8 bit mode. In other modes, we
4850 always need to compare the value with 255. */
4851 if (common->utf)
4852 #endif /* COMPILE_PCRE8 */
4853 {
4854 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4855 if (type == OP_CLASS)
4856 {
4857 add_jump(compiler, backtracks, jump[0]);
4858 jump[0] = NULL;
4859 }
4860 }
4861 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4862 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4863 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4864 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4865 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4866 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4867 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4868 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4869 if (jump[0] != NULL)
4870 JUMPHERE(jump[0]);
4871 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4872 return cc + 32 / sizeof(pcre_uchar);
4873
4874 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4875 case OP_XCLASS:
4876 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4877 return cc + GET(cc, 0) - 1;
4878 #endif
4879
4880 case OP_REVERSE:
4881 length = GET(cc, 0);
4882 if (length == 0)
4883 return cc + LINK_SIZE;
4884 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4885 #ifdef SUPPORT_UTF
4886 if (common->utf)
4887 {
4888 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4889 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4890 label = LABEL();
4891 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4892 skip_char_back(common);
4893 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4894 JUMPTO(SLJIT_C_NOT_ZERO, label);
4895 }
4896 else
4897 #endif
4898 {
4899 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4900 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4901 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4902 }
4903 check_start_used_ptr(common);
4904 return cc + LINK_SIZE;
4905 }
4906 SLJIT_ASSERT_STOP();
4907 return cc;
4908 }
4909
4910 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4911 {
4912 /* This function consumes at least one input character. */
4913 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4914 DEFINE_COMPILER;
4915 pcre_uchar *ccbegin = cc;
4916 compare_context context;
4917 int size;
4918
4919 context.length = 0;
4920 do
4921 {
4922 if (cc >= ccend)
4923 break;
4924
4925 if (*cc == OP_CHAR)
4926 {
4927 size = 1;
4928 #ifdef SUPPORT_UTF
4929 if (common->utf && HAS_EXTRALEN(cc[1]))
4930 size += GET_EXTRALEN(cc[1]);
4931 #endif
4932 }
4933 else if (*cc == OP_CHARI)
4934 {
4935 size = 1;
4936 #ifdef SUPPORT_UTF
4937 if (common->utf)
4938 {
4939 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4940 size = 0;
4941 else if (HAS_EXTRALEN(cc[1]))
4942 size += GET_EXTRALEN(cc[1]);
4943 }
4944 else
4945 #endif
4946 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4947 size = 0;
4948 }
4949 else
4950 size = 0;
4951
4952 cc += 1 + size;
4953 context.length += IN_UCHARS(size);
4954 }
4955 while (size > 0 && context.length <= 128);
4956
4957 cc = ccbegin;
4958 if (context.length > 0)
4959 {
4960 /* We have a fixed-length byte sequence. */
4961 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4962 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4963
4964 context.sourcereg = -1;
4965 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4966 context.ucharptr = 0;
4967 #endif
4968 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4969 return cc;
4970 }
4971
4972 /* A non-fixed length character will be checked if length == 0. */
4973 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4974 }
4975
4976 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4977 {
4978 DEFINE_COMPILER;
4979 int offset = GET2(cc, 1) << 1;
4980
4981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4982 if (!common->jscript_compat)
4983 {
4984 if (backtracks == NULL)
4985 {
4986 /* OVECTOR(1) contains the "string begin - 1" constant. */
4987 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4988 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4989 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4990 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4991 return JUMP(SLJIT_C_NOT_ZERO);
4992 }
4993 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4994 }
4995 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4996 }
4997
4998 /* Forward definitions. */
4999 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
5000 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
5001
5002 #define PUSH_BACKTRACK(size, ccstart, error) \
5003 do \
5004 { \
5005 backtrack = sljit_alloc_memory(compiler, (size)); \
5006 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5007 return error; \
5008 memset(backtrack, 0, size); \
5009 backtrack->prev = parent->top; \
5010 backtrack->cc = (ccstart); \
5011 parent->top = backtrack; \
5012 } \
5013 while (0)
5014
5015 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5016 do \
5017 { \
5018 backtrack = sljit_alloc_memory(compiler, (size)); \
5019 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5020 return; \
5021 memset(backtrack, 0, size); \
5022 backtrack->prev = parent->top; \
5023 backtrack->cc = (ccstart); \
5024 parent->top = backtrack; \
5025 } \
5026 while (0)
5027
5028 #define BACKTRACK_AS(type) ((type *)backtrack)
5029
5030 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5031 {
5032 DEFINE_COMPILER;
5033 int offset = GET2(cc, 1) << 1;
5034 struct sljit_jump *jump = NULL;
5035 struct sljit_jump *partial;
5036 struct sljit_jump *nopartial;
5037
5038 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5039 /* OVECTOR(1) contains the "string begin - 1" constant. */
5040 if (withchecks && !common->jscript_compat)
5041 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5042
5043 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5044 if (common->utf && *cc == OP_REFI)
5045 {
5046 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5047 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5048 if (withchecks)
5049 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5050
5051 /* Needed to save important temporary registers. */
5052 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5053 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5054 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5055 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5056 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5057 if (common->mode == JIT_COMPILE)
5058 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5059 else
5060 {
5061 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5062 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5063 check_partial(common, FALSE);
5064 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5065 JUMPHERE(nopartial);
5066 }
5067 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5068 }
5069 else
5070 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5071 {
5072 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5073 if (withchecks)
5074 jump = JUMP(SLJIT_C_ZERO);
5075
5076 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5077 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5078 if (common->mode == JIT_COMPILE)
5079 add_jump(compiler, backtracks, partial);
5080
5081 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5082 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5083
5084 if (common->mode != JIT_COMPILE)
5085 {
5086 nopartial = JUMP(SLJIT_JUMP);
5087 JUMPHERE(partial);
5088 /* TMP2 -= STR_END - STR_PTR */
5089 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5090 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5091 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5092 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5093 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5094 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5095 JUMPHERE(partial);
5096 check_partial(common, FALSE);
5097 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5098 JUMPHERE(nopartial);
5099 }
5100 }
5101
5102 if (jump != NULL)
5103 {
5104 if (emptyfail)
5105 add_jump(compiler, backtracks, jump);
5106 else
5107 JUMPHERE(jump);
5108 }
5109 return cc + 1 + IMM2_SIZE;
5110 }
5111
5112 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5113 {
5114 DEFINE_COMPILER;
5115 backtrack_common *backtrack;
5116 pcre_uchar type;
5117 struct sljit_label *label;
5118 struct sljit_jump *zerolength;
5119 struct sljit_jump *jump = NULL;
5120 pcre_uchar *ccbegin = cc;
5121 int min = 0, max = 0;
5122 BOOL minimize;
5123
5124 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5125
5126 type = cc[1 + IMM2_SIZE];
5127 minimize = (type & 0x1) != 0;
5128 switch(type)
5129 {
5130 case OP_CRSTAR:
5131 case OP_CRMINSTAR:
5132 min = 0;
5133 max = 0;
5134 cc += 1 + IMM2_SIZE + 1;
5135 break;
5136 case OP_CRPLUS:
5137 case OP_CRMINPLUS:
5138 min = 1;
5139 max = 0;
5140 cc += 1 + IMM2_SIZE + 1;
5141 break;
5142 case OP_CRQUERY:
5143 case OP_CRMINQUERY:
5144 min = 0;
5145 max = 1;
5146 cc += 1 + IMM2_SIZE + 1;
5147 break;
5148 case OP_CRRANGE:
5149 case OP_CRMINRANGE:
5150 min = GET2(cc, 1 + IMM2_SIZE + 1);
5151 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5152 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5153 break;
5154 default:
5155 SLJIT_ASSERT_STOP();
5156 break;
5157 }
5158
5159 if (!minimize)
5160 {
5161 if (min == 0)
5162 {
5163 allocate_stack(common, 2);
5164 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5166 /* Temporary release of STR_PTR. */
5167 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5168 zerolength = compile_ref_checks(common, ccbegin, NULL);
5169 /* Restore if not zero length. */
5170 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5171 }
5172 else
5173 {
5174 allocate_stack(common, 1);
5175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5176 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5177 }
5178
5179 if (min > 1 || max > 1)
5180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5181
5182 label = LABEL();
5183 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5184
5185 if (min > 1 || max > 1)
5186 {
5187 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5188 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5189 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5190 if (min > 1)
5191 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5192 if (max > 1)
5193 {
5194 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5195 allocate_stack(common, 1);
5196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5197 JUMPTO(SLJIT_JUMP, label);
5198 JUMPHERE(jump);
5199 }
5200 }
5201
5202 if (max == 0)
5203 {
5204 /* Includes min > 1 case as well. */
5205 allocate_stack(common, 1);
5206 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5207 JUMPTO(SLJIT_JUMP, label);
5208 }
5209
5210 JUMPHERE(zerolength);
5211 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5212
5213 decrease_call_count(common);
5214 return cc;
5215 }
5216
5217 allocate_stack(common, 2);
5218 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5219 if (type != OP_CRMINSTAR)
5220 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5221
5222 if (min == 0)
5223 {
5224 zerolength = compile_ref_checks(common, ccbegin, NULL);
5225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5226 jump = JUMP(SLJIT_JUMP);
5227 }
5228 else
5229 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5230
5231 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5232 if (max > 0)
5233 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5234
5235 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5236 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5237
5238 if (min > 1)
5239 {
5240 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5241 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5243 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5244 }
5245 else if (max > 0)
5246 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5247
5248 if (jump != NULL)
5249 JUMPHERE(jump);
5250 JUMPHERE(zerolength);
5251
5252 decrease_call_count(common);
5253 return cc;
5254 }
5255
5256 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5257 {
5258 DEFINE_COMPILER;
5259 backtrack_common *backtrack;
5260 recurse_entry *entry = common->entries;
5261 recurse_entry *prev = NULL;
5262 int start = GET(cc, 1);
5263 pcre_uchar *start_cc;
5264 BOOL needs_control_head;
5265
5266 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5267
5268 /* Inlining simple patterns. */
5269 if (get_framesize(common, common->start + start, TRUE, &needs_control_head) == no_stack)
5270 {
5271 start_cc = common->start + start;
5272 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5273 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5274 return cc + 1 + LINK_SIZE;
5275 }
5276
5277 while (entry != NULL)
5278 {
5279 if (entry->start == start)
5280 break;
5281 prev = entry;
5282 entry = entry->next;
5283 }
5284
5285 if (entry == NULL)
5286 {
5287 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5288 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5289 return NULL;
5290 entry->next = NULL;
5291 entry->entry = NULL;
5292 entry->calls = NULL;
5293 entry->start = start;
5294
5295 if (prev != NULL)
5296 prev->next = entry;
5297 else
5298 common->entries = entry;
5299 }
5300
5301 if (common->has_set_som && common->mark_ptr != 0)
5302 {
5303 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5304 allocate_stack(common, 2);
5305 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5306 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5307 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5308 }
5309 else if (common->has_set_som || common->mark_ptr != 0)
5310 {
5311 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5312 allocate_stack(common, 1);
5313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5314 }
5315
5316 if (entry->entry == NULL)
5317 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5318 else
5319 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5320 /* Leave if the match is failed. */
5321 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5322 return cc + 1 + LINK_SIZE;
5323 }
5324
5325 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5326 {
5327 const pcre_uchar *begin = arguments->begin;
5328 int *offset_vector = arguments->offsets;
5329 int offset_count = arguments->offset_count;
5330 int i;
5331
5332 if (PUBL(callout) == NULL)
5333 return 0;
5334
5335 callout_block->version = 2;
5336 callout_block->callout_data = arguments->callout_data;
5337
5338 /* Offsets in subject. */
5339 callout_block->subject_length = arguments->end - arguments->begin;
5340 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5341 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5342 #if defined COMPILE_PCRE8
5343 callout_block->subject = (PCRE_SPTR)begin;
5344 #elif defined COMPILE_PCRE16
5345 callout_block->subject = (PCRE_SPTR16)begin;
5346 #elif defined COMPILE_PCRE32
5347 callout_block->subject = (PCRE_SPTR32)begin;
5348 #endif
5349
5350 /* Convert and copy the JIT offset vector to the offset_vector array. */
5351 callout_block->capture_top = 0;
5352 callout_block->offset_vector = offset_vector;
5353 for (i = 2; i < offset_count; i += 2)
5354 {
5355 offset_vector[i] = jit_ovector[i] - begin;
5356 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5357 if (jit_ovector[i] >= begin)
5358 callout_block->capture_top = i;
5359 }
5360
5361 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5362 if (offset_count > 0)
5363 offset_vector[0] = -1;
5364 if (offset_count > 1)
5365 offset_vector[1] = -1;
5366 return (*PUBL(callout))(callout_block);
5367 }
5368
5369 /* Aligning to 8 byte. */
5370 #define CALLOUT_ARG_SIZE \
5371 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5372
5373 #define CALLOUT_ARG_OFFSET(arg) \
5374 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5375
5376 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5377 {
5378 DEFINE_COMPILER;
5379 backtrack_common *backtrack;
5380
5381 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5382
5383 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5384
5385 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5386 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5387 SLJIT_ASSERT(common->capture_last_ptr != 0);
5388 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5389 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5390
5391 /* These pointer sized fields temporarly stores internal variables. */
5392 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5393 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5395
5396 if (common->mark_ptr != 0)
5397 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5398 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5399 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5400 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5401
5402 /* Needed to save important temporary registers. */
5403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5404 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5405 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5406 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5407 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5408 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5409 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5410
5411 /* Check return value. */
5412 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5413 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5414 if (common->forced_quit_label == NULL)
5415 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5416 else
5417 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5418 return cc + 2 + 2 * LINK_SIZE;
5419 }
5420
5421 #undef CALLOUT_ARG_SIZE
5422 #undef CALLOUT_ARG_OFFSET
5423
5424 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5425 {
5426 DEFINE_COMPILER;
5427 int framesize;
5428 int extrasize;
5429 BOOL needs_control_head;
5430 int private_data_ptr;
5431 backtrack_common altbacktrack;
5432 pcre_uchar *ccbegin;
5433 pcre_uchar opcode;
5434 pcre_uchar bra = OP_BRA;
5435 jump_list *tmp = NULL;
5436 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5437 jump_list **found;
5438 /* Saving previous accept variables. */
5439 struct sljit_label *save_quit_label = common->quit_label;
5440 struct sljit_label *save_accept_label = common->accept_label;
5441 jump_list *save_quit = common->quit;
5442 jump_list *save_accept = common->accept;
5443 BOOL save_local_exit = common->local_exit;
5444 struct sljit_jump *jump;
5445 struct sljit_jump *brajump = NULL;
5446
5447 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5448 {
5449 SLJIT_ASSERT(!conditional);
5450 bra = *cc;
5451 cc++;
5452 }
5453 private_data_ptr = PRIVATE_DATA(cc);
5454 SLJIT_ASSERT(private_data_ptr != 0);
5455 framesize = get_framesize(common, cc, FALSE, &needs_control_head);
5456 backtrack->framesize = framesize;
5457 backtrack->private_data_ptr = private_data_ptr;
5458 opcode = *cc;
5459 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5460 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5461 ccbegin = cc;
5462 cc += GET(cc, 1);
5463
5464 if (bra == OP_BRAMINZERO)
5465 {
5466 /* This is a braminzero backtrack path. */
5467 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5468 free_stack(common, 1);
5469 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5470 }
5471
5472 if (framesize < 0)
5473 {
5474 extrasize = needs_control_head ? 2 : 1;
5475 if (framesize == no_frame)
5476 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5477 allocate_stack(common, extrasize);
5478 if (needs_control_head)
5479 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5480 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5481 if (needs_control_head)
5482 {
5483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5484 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5485 }
5486 }
5487 else
5488 {
5489 extrasize = needs_control_head ? 3 : 2;
5490 allocate_stack(common, framesize + extrasize);
5491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5492 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5493 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5494 if (needs_control_head)
5495 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5497 if (needs_control_head)
5498 {
5499 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5500 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5501 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5502 }
5503 else
5504 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5505 init_frame(common, ccbegin, framesize + extrasize - 1, extrasize, FALSE);
5506 }
5507
5508 memset(&altbacktrack, 0, sizeof(backtrack_common));
5509 common->local_exit = TRUE;
5510 common->quit_label = NULL;
5511 common->quit = NULL;
5512 while (1)
5513 {
5514 common->accept_label = NULL;
5515 common->accept = NULL;
5516 altbacktrack.top = NULL;
5517 altbacktrack.topbacktracks = NULL;
5518
5519 if (*ccbegin == OP_ALT)
5520 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5521
5522 altbacktrack.cc = ccbegin;
5523 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5524 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5525 {
5526 common->local_exit = save_local_exit;
5527 common->quit_label = save_quit_label;
5528 common->accept_label = save_accept_label;
5529 common->quit = save_quit;
5530 common->accept = save_accept;
5531 return NULL;
5532 }
5533 common->accept_label = LABEL();
5534 if (common->accept != NULL)
5535 set_jumps(common->accept, common->accept_label);
5536
5537 /* Reset stack. */
5538 if (framesize < 0)
5539 {
5540 if (framesize == no_frame)
5541 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5542 else
5543 free_stack(common, extrasize);
5544 if (needs_control_head)
5545 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5546 }
5547 else
5548 {
5549 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5550 {
5551 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5552 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5553 if (needs_control_head)
5554 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5555 }
5556 else
5557 {
5558 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5559 if (needs_control_head)
5560 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5561 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5562 }
5563 }
5564
5565 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5566 {
5567 /* We know that STR_PTR was stored on the top of the stack. */
5568 if (conditional)
5569 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5570 else if (bra == OP_BRAZERO)
5571 {
5572 if (framesize < 0)
5573 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5574 else
5575 {
5576 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5577 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5578 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5579 }
5580 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5581 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5582 }
5583 else if (framesize >= 0)
5584 {
5585 /* For OP_BRA and OP_BRAMINZERO. */
5586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5587 }
5588 }
5589 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5590
5591 compile_backtrackingpath(common, altbacktrack.top);
5592 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5593 {
5594 common->local_exit = save_local_exit;
5595 common->quit_label = save_quit_label;
5596 common->accept_label = save_accept_label;
5597 common->quit = save_quit;
5598 common->accept = save_accept;
5599 return NULL;
5600 }
5601 set_jumps(altbacktrack.topbacktracks, LABEL());
5602
5603 if (*cc != OP_ALT)
5604 break;
5605
5606 ccbegin = cc;
5607 cc += GET(cc, 1);
5608 }
5609
5610 /* None of them matched. */
5611 if (common->quit != NULL)
5612 {
5613 jump = JUMP(SLJIT_JUMP);
5614 set_jumps(common->quit, LABEL());
5615 SLJIT_ASSERT(framesize != no_stack);
5616 if (framesize < 0)
5617 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5618 else
5619 {
5620 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5621 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5622 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5623 }
5624 JUMPHERE(jump);
5625 }
5626
5627 if (needs_control_head)
5628 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5629
5630 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5631 {
5632 /* Assert is failed. */
5633 if (conditional || bra == OP_BRAZERO)
5634 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5635
5636 if (framesize < 0)
5637 {
5638 /* The topmost item should be 0. */
5639 if (bra == OP_BRAZERO)
5640 {
5641 if (extrasize == 2)
5642 free_stack(common, 1);
5643 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5644 }
5645 else
5646 free_stack(common, extrasize);
5647 }
5648 else
5649 {
5650 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5651 /* The topmost item should be 0. */
5652 if (bra == OP_BRAZERO)
5653 {
5654 free_stack(common, framesize + extrasize - 1);
5655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5656 }
5657 else
5658 free_stack(common, framesize + extrasize);
5659 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5660 }
5661 jump = JUMP(SLJIT_JUMP);
5662 if (bra != OP_BRAZERO)
5663 add_jump(compiler, target, jump);
5664
5665 /* Assert is successful. */
5666 set_jumps(tmp, LABEL());
5667 if (framesize < 0)
5668 {
5669 /* We know that STR_PTR was stored on the top of the stack. */
5670 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5671 /* Keep the STR_PTR on the top of the stack. */
5672 if (bra == OP_BRAZERO)
5673 {
5674 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5675 if (extrasize == 2)
5676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5677 }
5678 else if (bra == OP_BRAMINZERO)
5679 {
5680 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5682 }
5683 }
5684 else
5685 {
5686 if (bra == OP_BRA)
5687 {
5688 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5689 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5690 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5691 }
5692 else
5693 {
5694 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5695 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5696 if (extrasize == 2)
5697 {
5698 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5699 if (bra == OP_BRAMINZERO)
5700 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5701 }
5702 else
5703 {
5704 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5706 }
5707 }
5708 }
5709
5710 if (bra == OP_BRAZERO)
5711 {
5712 backtrack->matchingpath = LABEL();
5713 SET_LABEL(jump, backtrack->matchingpath);
5714 }
5715 else if (bra == OP_BRAMINZERO)
5716 {
5717 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5718 JUMPHERE(brajump);
5719 if (framesize >= 0)
5720 {
5721 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5722 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5723 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5724 }
5725 set_jumps(backtrack->common.topbacktracks, LABEL());
5726 }
5727 }
5728 else
5729 {
5730 /* AssertNot is successful. */
5731 if (framesize < 0)
5732 {
5733 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5734 if (bra != OP_BRA)
5735 {
5736 if (extrasize == 2)
5737 free_stack(common, 1);
5738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5739 }
5740 else
5741 free_stack(common, extrasize);
5742 }
5743 else
5744 {
5745 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5746 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5747 /* The topmost item should be 0. */
5748 if (bra != OP_BRA)
5749 {
5750 free_stack(common, framesize + extrasize - 1);
5751 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5752 }
5753 else
5754 free_stack(common, framesize + extrasize);
5755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5756 }
5757
5758 if (bra == OP_BRAZERO)
5759 backtrack->matchingpath = LABEL();
5760 else if (bra == OP_BRAMINZERO)
5761 {
5762 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5763 JUMPHERE(brajump);
5764 }
5765
5766 if (bra != OP_BRA)
5767 {
5768 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5769 set_jumps(backtrack->common.topbacktracks, LABEL());
5770 backtrack->common.topbacktracks = NULL;
5771 }
5772 }
5773
5774 common->local_exit = save_local_exit;
5775 common->quit_label = save_quit_label;
5776 common->accept_label = save_accept_label;
5777 common->quit = save_quit;
5778 common->accept = save_accept;
5779 return cc + 1 + LINK_SIZE;
5780 }
5781
5782 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5783 {
5784 int condition = FALSE;
5785 pcre_uchar *slotA = name_table;
5786 pcre_uchar *slotB;
5787 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5788 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5789 sljit_sw no_capture;
5790 int i;
5791
5792 locals += refno & 0xff;
5793 refno >>= 8;
5794 no_capture = locals[1];
5795
5796 for (i = 0; i < name_count; i++)
5797 {
5798 if (GET2(slotA, 0) == refno) break;
5799 slotA += name_entry_size;
5800 }
5801
5802 if (i < name_count)
5803 {
5804 /* Found a name for the number - there can be only one; duplicate names
5805 for different numbers are allowed, but not vice versa. First scan down
5806 for duplicates. */
5807
5808 slotB = slotA;
5809 while (slotB > name_table)
5810 {
5811 slotB -= name_entry_size;
5812 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5813 {
5814 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5815 if (condition) break;
5816 }
5817 else break;
5818 }
5819
5820 /* Scan up for duplicates */
5821 if (!condition)
5822 {
5823 slotB = slotA;
5824 for (i++; i < name_count; i++)
5825 {
5826 slotB += name_entry_size;
5827 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5828 {
5829 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5830 if (condition) break;
5831 }
5832 else break;
5833 }
5834 }
5835 }
5836 return condition;
5837 }
5838
5839 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5840 {
5841 int condition = FALSE;
5842 pcre_uchar *slotA = name_table;
5843 pcre_uchar *slotB;
5844 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5845 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5846 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5847 sljit_uw i;
5848
5849 for (i = 0; i < name_count; i++)
5850 {
5851 if (GET2(slotA, 0) == recno) break;
5852 slotA += name_entry_size;
5853 }
5854
5855 if (i < name_count)
5856 {
5857 /* Found a name for the number - there can be only one; duplicate
5858 names for different numbers are allowed, but not vice versa. First
5859 scan down for duplicates. */
5860
5861 slotB = slotA;
5862 while (slotB > name_table)
5863 {
5864 slotB -= name_entry_size;
5865 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5866 {
5867 condition = GET2(slotB, 0) == group_num;
5868 if (condition) break;
5869 }
5870 else break;
5871 }
5872
5873 /* Scan up for duplicates */
5874 if (!condition)
5875 {
5876 slotB = slotA;
5877 for (i++; i < name_count; i++)
5878 {
5879 slotB += name_entry_size;
5880 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5881 {
5882 condition = GET2(slotB, 0) == group_num;
5883 if (condition) break;
5884 }
5885 else break;
5886 }
5887 }
5888 }
5889 return condition;
5890 }
5891
5892 static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
5893 {
5894 DEFINE_COMPILER;
5895 int stacksize;
5896
5897 if (framesize < 0)
5898 {
5899 if (framesize == no_frame)
5900 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5901 else
5902 {
5903 stacksize = needs_control_head ? 1 : 0;
5904 if (ket != OP_KET || has_alternatives)
5905 stacksize++;
5906 free_stack(common, stacksize);
5907 }
5908
5909 if (needs_control_head)
5910 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? sizeof(sljit_sw) : 0);
5911
5912 /* TMP2 which is set here used by OP_KETRMAX below. */
5913 if (ket == OP_KETRMAX)
5914 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5915 else if (ket == OP_KETRMIN)
5916 {
5917 /* Move the STR_PTR to the private_data_ptr. */
5918 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5919 }
5920 }
5921 else
5922 {
5923 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
5924 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
5925 if (needs_control_head)
5926 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), 0);
5927
5928 if (ket == OP_KETRMAX)
5929 {
5930 /* TMP2 which is set here used by OP_KETRMAX below. */
5931 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5932 }
5933 }
5934 if (needs_control_head)
5935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, TMP1, 0);
5936 }
5937
5938 static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
5939 {
5940 DEFINE_COMPILER;
5941
5942 if (common->capture_last_ptr != 0)
5943 {
5944 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5945 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
5946 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
5947 stacksize++;
5948 }
5949 if (common->optimized_cbracket[offset >> 1] == 0)
5950 {
5951 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5952 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5953 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
5954 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
5956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5957 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5958 stacksize += 2;
5959 }
5960 return stacksize;
5961 }
5962
5963 /*
5964 Handling bracketed expressions is probably the most complex part.
5965
5966 Stack layout naming characters:
5967 S - Push the current STR_PTR
5968 0 - Push a 0 (NULL)
5969 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5970 before the next alternative. Not pushed if there are no alternatives.
5971 M - Any values pushed by the current alternative. Can be empty, or anything.
5972 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5973 L - Push the previous local (pointed by localptr) to the stack
5974 () - opional values stored on the stack
5975 ()* - optonal, can be stored multiple times
5976
5977 The following list shows the regular expression templates, their PCRE byte codes
5978 and stack layout supported by pcre-sljit.
5979
5980 (?:) OP_BRA | OP_KET A M
5981 () OP_CBRA | OP_KET C M
5982 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5983 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5984 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5985 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5986 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5987 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5988 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5989 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5990 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5991 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5992 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5993 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5994 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5995 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5996 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5997 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5998 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5999 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
6000 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
6001 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
6002
6003
6004 Stack layout naming characters:
6005 A - Push the alternative index (starting from 0) on the stack.
6006 Not pushed if there is no alternatives.
6007 M - Any values pushed by the current alternative. Can be empty, or anything.
6008
6009 The next list shows the possible content of a bracket:
6010 (|) OP_*BRA | OP_ALT ... M A
6011 (?()|) OP_*COND | OP_ALT M A
6012 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
6013 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
6014 Or nothing, if trace is unnecessary
6015 */
6016
6017 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6018 {
6019 DEFINE_COMPILER;
6020 backtrack_common *backtrack;
6021 pcre_uchar opcode;
6022 int private_data_ptr = 0;
6023 int offset = 0;
6024 int stacksize;
6025 pcre_uchar *ccbegin;
6026 pcre_uchar *matchingpath;
6027 pcre_uchar bra = OP_BRA;
6028 pcre_uchar ket;
6029 assert_backtrack *assert;
6030 BOOL has_alternatives;
6031 BOOL needs_control_head = FALSE;
6032 struct sljit_jump *jump;
6033 struct sljit_jump *skip;
6034 struct sljit_label *rmaxlabel = NULL;
6035 struct sljit_jump *braminzerojump = NULL;
6036
6037 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6038
6039 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
6040 {
6041 bra = *cc;
6042 cc++;
6043 opcode = *cc;
6044 }
6045
6046 opcode = *cc;
6047 ccbegin = cc;
6048 matchingpath = ccbegin + 1 + LINK_SIZE;
6049
6050 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
6051 {
6052 /* Drop this bracket_backtrack. */
6053 parent->top = backtrack->prev;
6054 return bracketend(cc);
6055 }
6056
6057 ket = *(bracketend(cc) - 1 - LINK_SIZE);
6058 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
6059 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
6060 cc += GET(cc, 1);
6061
6062 has_alternatives = *cc == OP_ALT;
6063 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
6064 {
6065 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
6066 if (*matchingpath == OP_NRREF)
6067 {
6068 stacksize = GET2(matchingpath, 1);
6069 if (common->currententry == NULL || stacksize == RREF_ANY)
6070 has_alternatives = FALSE;
6071 else if (common->currententry->start == 0)
6072 has_alternatives = stacksize != 0;
6073 else
6074 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6075 }
6076 }
6077
6078 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6079 opcode = OP_SCOND;
6080 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
6081 opcode = OP_ONCE;
6082
6083 if (opcode == OP_CBRA || opcode == OP_SCBRA)
6084 {
6085 /* Capturing brackets has a pre-allocated space. */
6086 offset = GET2(ccbegin, 1 + LINK_SIZE);
6087 if (common->optimized_cbracket[offset] == 0)
6088 {
6089 private_data_ptr = OVECTOR_PRIV(offset);
6090 offset <<= 1;
6091 }
6092 else
6093 {
6094 offset <<= 1;
6095 private_data_ptr = OVECTOR(offset);
6096 }
6097 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6098 matchingpath += IMM2_SIZE;
6099 }
6100 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6101 {
6102 /* Other brackets simply allocate the next entry. */
6103 private_data_ptr = PRIVATE_DATA(ccbegin);
6104 SLJIT_ASSERT(private_data_ptr != 0);
6105 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6106 if (opcode == OP_ONCE)
6107 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE, &needs_control_head);
6108 }
6109
6110 /* Instructions before the first alternative. */
6111 stacksize = 0;
6112 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6113 stacksize++;
6114 if (bra == OP_BRAZERO)
6115 stacksize++;
6116
6117 if (stacksize > 0)
6118 allocate_stack(common, stacksize);
6119
6120 stacksize = 0;
6121 if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6122 {
6123 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6124 stacksize++;
6125 }
6126
6127 if (bra == OP_BRAZERO)
6128 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6129
6130 if (bra == OP_BRAMINZERO)
6131 {
6132 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6133 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6134 if (ket != OP_KETRMIN)
6135 {
6136 free_stack(common, 1);
6137 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6138 }
6139 else
6140 {
6141 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6142 {
6143 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6144 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6145 /* Nothing stored during the first run. */
6146 skip = JUMP(SLJIT_JUMP);
6147 JUMPHERE(jump);
6148 /* Checking zero-length iteration. */
6149 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6150 {
6151 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6152 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6153 }
6154 else
6155 {
6156 /* Except when the whole stack frame must be saved. */
6157 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6158 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6159 }
6160 JUMPHERE(skip);
6161 }
6162 else
6163 {
6164 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6165 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6166 JUMPHERE(jump);
6167 }
6168 }
6169 }
6170
6171 if (ket == OP_KETRMIN)
6172 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6173
6174 if (ket == OP_KETRMAX)
6175 {
6176 rmaxlabel = LABEL();
6177 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
6178 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
6179 }
6180
6181 /* Handling capturing brackets and alternatives. */
6182 if (opcode == OP_ONCE)
6183 {
6184 stacksize = 0;
6185 if (needs_control_head)
6186 {
6187 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6188 stacksize++;
6189 }
6190
6191 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6192 {
6193 /* Neither capturing brackets nor recursions are found in the block. */
6194 if (ket == OP_KETRMIN)
6195 {
6196 stacksize += 2;
6197 if (!needs_control_head)
6198 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6199 }
6200 else
6201 {
6202 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6203 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6204 if (ket == OP_KETRMAX || has_alternatives)
6205 stacksize++;
6206 }
6207
6208 if (stacksize > 0)
6209 allocate_stack(common, stacksize);
6210
6211 stacksize = 0;
6212 if (needs_control_head)
6213 {
6214 stacksize++;
6215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6216 }
6217
6218 if (ket == OP_KETRMIN)
6219 {
6220 if (needs_control_head)
6221 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6222 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6223 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
6224 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
6225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6226 }
6227 else if (ket == OP_KETRMAX || has_alternatives)
6228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6229 }
6230 else
6231 {
6232 if (ket != OP_KET || has_alternatives)
6233 stacksize++;
6234
6235 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
6236 allocate_stack(common, stacksize);
6237
6238 if (needs_control_head)
6239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6240
6241 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6242 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6243
6244 stacksize = needs_control_head ? 1 : 0;
6245 if (ket != OP_KET || has_alternatives)
6246 {
6247 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6248 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6249 stacksize++;
6250 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6251 }
6252 else
6253 {
6254 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6255 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6256 }
6257 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE);
6258 }
6259 }
6260 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6261 {
6262 /* Saving the previous values. */
6263 if (common->optimized_cbracket[offset >> 1] != 0)
6264 {
6265 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6266 allocate_stack(common, 2);
6267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6268 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6272 }
6273 else
6274 {
6275 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6276 allocate_stack(common, 1);
6277 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6278 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6279 }
6280 }
6281 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6282 {
6283 /* Saving the previous value. */
6284 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6285 allocate_stack(common, 1);
6286 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6287 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6288 }
6289 else if (has_alternatives)
6290 {
6291 /* Pushing the starting string pointer. */
6292 allocate_stack(common, 1);
6293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6294 }
6295
6296 /* Generating code for the first alternative. */
6297 if (opcode == OP_COND || opcode == OP_SCOND)
6298 {
6299 if (*matchingpath == OP_CREF)
6300 {
6301 SLJIT_ASSERT(has_alternatives);
6302 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6303 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6304 matchingpath += 1 + IMM2_SIZE;
6305 }
6306 else if (*matchingpath == OP_NCREF)
6307 {
6308 SLJIT_ASSERT(has_alternatives);
6309 stacksize = GET2(matchingpath, 1);
6310 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6311
6312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6313 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6314 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6315 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6316 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6317 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6318 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6319 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6320 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6321
6322 JUMPHERE(jump);
6323 matchingpath += 1 + IMM2_SIZE;
6324 }
6325 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6326 {
6327 /* Never has other case. */
6328 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6329
6330 stacksize = GET2(matchingpath, 1);
6331 if (common->currententry == NULL)
6332 stacksize = 0;
6333 else if (stacksize == RREF_ANY)
6334 stacksize = 1;
6335 else if (common->currententry->start == 0)
6336 stacksize = stacksize == 0;
6337 else
6338 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6339
6340 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6341 {
6342 SLJIT_ASSERT(!has_alternatives);
6343 if (stacksize != 0)
6344 matchingpath += 1 + IMM2_SIZE;
6345 else
6346 {
6347 if (*cc == OP_ALT)
6348 {
6349 matchingpath = cc + 1 + LINK_SIZE;
6350 cc += GET(cc, 1);
6351 }
6352 else
6353 matchingpath = cc;
6354 }
6355 }
6356 else
6357 {
6358 SLJIT_ASSERT(has_alternatives);
6359
6360 stacksize = GET2(matchingpath, 1);
6361 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6362 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6364 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6365 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6366 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6367 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6368 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6369 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6370 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6371 matchingpath += 1 + IMM2_SIZE;
6372 }
6373 }
6374 else
6375 {
6376 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6377 /* Similar code as PUSH_BACKTRACK macro. */
6378 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6379 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6380 return NULL;
6381 memset(assert, 0, sizeof(assert_backtrack));
6382 assert->common.cc = matchingpath;
6383 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6384 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6385 }
6386 }
6387
6388 compile_matchingpath(common, matchingpath, cc, backtrack);
6389 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6390 return NULL;
6391
6392 if (opcode == OP_ONCE)
6393 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
6394
6395 stacksize = 0;
6396 if (ket != OP_KET || bra != OP_BRA)
6397 stacksize++;
6398 if (offset != 0)
6399 {
6400 if (common->capture_last_ptr != 0)
6401 stacksize++;
6402 if (common->optimized_cbracket[offset >> 1] == 0)
6403 stacksize += 2;
6404 }
6405 if (has_alternatives && opcode != OP_ONCE)
6406 stacksize++;
6407
6408 if (stacksize > 0)
6409 allocate_stack(common, stacksize);
6410
6411 stacksize = 0;
6412 if (ket != OP_KET || bra != OP_BRA)
6413 {
6414 if (ket != OP_KET)
6415 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6416 else
6417 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6418 stacksize++;
6419 }
6420
6421 if (offset != 0)
6422 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
6423
6424 if (has_alternatives)
6425 {
6426 if (opcode != OP_ONCE)
6427 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6428 if (ket != OP_KETRMAX)
6429 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6430 }
6431
6432 /* Must be after the matchingpath label. */
6433 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6434 {
6435 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6436 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6437 }
6438
6439 if (ket == OP_KETRMAX)
6440 {
6441 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6442 {
6443 if (has_alternatives)
6444 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6445 /* Checking zero-length iteration. */
6446 if (opcode != OP_ONCE)
6447 {
6448 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6449 /* Drop STR_PTR for greedy plus quantifier. */
6450 if (bra != OP_BRAZERO)
6451 free_stack(common, 1);
6452 }
6453 else
6454 /* TMP2 must contain the starting STR_PTR. */
6455 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6456 }
6457 else
6458 JUMPTO(SLJIT_JUMP, rmaxlabel);
6459 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6460 }
6461
6462 if (bra == OP_BRAZERO)
6463 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6464
6465 if (bra == OP_BRAMINZERO)
6466 {
6467 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6468 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6469 if (braminzerojump != NULL)
6470 {
6471 JUMPHERE(braminzerojump);
6472 /* We need to release the end pointer to perform the
6473 backtrack for the zero-length iteration. When
6474 framesize is < 0, OP_ONCE will do the release itself. */
6475 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6476 {
6477 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6478 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6479 }
6480 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6481 free_stack(common, 1);
6482 }
6483 /* Continue to the normal backtrack. */
6484 }
6485
6486 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6487 decrease_call_count(common);
6488
6489 /* Skip the other alternatives. */
6490 while (*cc == OP_ALT)
6491 cc += GET(cc, 1);
6492 cc += 1 + LINK_SIZE;
6493
6494 /* Temporarily encoding the needs_control_head in framesize. */
6495 if (opcode == OP_ONCE)
6496 BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
6497 return cc;
6498 }
6499
6500 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6501 {
6502 DEFINE_COMPILER;
6503 backtrack_common *backtrack;
6504 pcre_uchar opcode;
6505 int private_data_ptr;
6506 int cbraprivptr = 0;
6507 BOOL needs_control_head;
6508 int framesize;
6509 int stacksize;
6510 int offset = 0;
6511 BOOL zero = FALSE;
6512 pcre_uchar *ccbegin = NULL;
6513 int stack; /* Also contains the offset of control head. */
6514 struct sljit_label *loop = NULL;
6515 struct jump_list *emptymatch = NULL;
6516
6517 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6518 if (*cc == OP_BRAPOSZERO)
6519 {
6520 zero = TRUE;
6521 cc++;
6522 }
6523
6524 opcode = *cc;
6525 private_data_ptr = PRIVATE_DATA(cc);
6526 SLJIT_ASSERT(private_data_ptr != 0);
6527 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6528 switch(opcode)
6529 {
6530 case OP_BRAPOS:
6531 case OP_SBRAPOS:
6532 ccbegin = cc + 1 + LINK_SIZE;
6533 break;
6534
6535 case OP_CBRAPOS:
6536 case OP_SCBRAPOS:
6537 offset = GET2(cc, 1 + LINK_SIZE);
6538 /* This case cannot be optimized in the same was as
6539 normal capturing brackets. */
6540 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6541 cbraprivptr = OVECTOR_PRIV(offset);
6542 offset <<= 1;
6543 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6544 break;
6545
6546 default:
6547 SLJIT_ASSERT_STOP();
6548 break;
6549 }
6550
6551 framesize = get_framesize(common, cc, FALSE, &needs_control_head);
6552 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6553 if (framesize < 0)
6554 {
6555 if (offset != 0)
6556 {
6557 stacksize = 2;
6558 if (common->capture_last_ptr != 0)
6559 stacksize++;
6560 }
6561 else
6562 stacksize = 1;
6563
6564 if (needs_control_head)
6565 stacksize++;
6566 if (!zero)
6567 stacksize++;
6568
6569 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6570 allocate_stack(common, stacksize);
6571 if (framesize == no_frame)
6572 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6573
6574 stack = 0;
6575 if (offset != 0)
6576 {
6577 stack = 2;
6578 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6579 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6580 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6581 if (common->capture_last_ptr != 0)
6582 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6584 if (needs_control_head)
6585 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6586 if (common->capture_last_ptr != 0)
6587 {
6588 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6589 stack = 3;
6590 }
6591 }
6592 else
6593 {
6594 if (needs_control_head)
6595 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6596 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6597 stack = 1;
6598 }
6599
6600 if (needs_control_head)
6601 stack++;
6602 if (!zero)
6603 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6604 if (needs_control_head)
6605 {
6606 stack--;
6607 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6608 }
6609 }
6610 else
6611 {
6612 stacksize = framesize + 1;
6613 if (!zero)
6614 stacksize++;
6615 if (needs_control_head)
6616 stacksize++;
6617 if (offset == 0)
6618 stacksize++;
6619 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6620
6621 allocate_stack(common, stacksize);
6622 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6623 if (needs_control_head)
6624 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6625 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6626
6627 stack = 0;
6628 if (!zero)
6629 {
6630 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6631 stack = 1;
6632 }
6633 if (needs_control_head)
6634 {
6635 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6636 stack++;
6637 }
6638 if (offset == 0)
6639 {
6640 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6641 stack++;
6642 }
6643 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6644 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6645 stack -= 1 + (offset == 0);
6646 }
6647
6648 if (offset != 0)
6649 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6650
6651 loop = LABEL();
6652 while (*cc != OP_KETRPOS)
6653 {
6654 backtrack->top = NULL;
6655 backtrack->topbacktracks = NULL;
6656 cc += GET(cc, 1);
6657
6658 compile_matchingpath(common, ccbegin, cc, backtrack);
6659 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6660 return NULL;
6661
6662 if (framesize < 0)
6663 {
6664 if (framesize == no_frame)
6665 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6666
6667 if (offset != 0)
6668 {
6669 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6670 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6672 if (common->capture_last_ptr != 0)
6673 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6674 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6675 }
6676 else
6677 {
6678 if (opcode == OP_SBRAPOS)
6679 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6681 }
6682
6683 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6684 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6685
6686 if (!zero)
6687 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6688 }
6689 else
6690 {
6691 if (offset != 0)
6692 {
6693 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6694 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6695 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6696 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6697 if (common->capture_last_ptr != 0)
6698 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6699 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6700 }
6701 else
6702 {
6703 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6704 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6705 if (opcode == OP_SBRAPOS)
6706 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6707 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6708 }
6709
6710 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6711 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6712
6713 if (!zero)
6714 {
6715 if (framesize < 0)
6716 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6717 else
6718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6719 }
6720 }
6721
6722 if (needs_control_head)
6723 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
6724
6725 JUMPTO(SLJIT_JUMP, loop);
6726 flush_stubs(common);
6727
6728 compile_backtrackingpath(common, backtrack->top);
6729 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6730 return NULL;
6731 set_jumps(backtrack->topbacktracks, LABEL());
6732
6733 if (framesize < 0)
6734 {
6735 if (offset != 0)
6736 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6737 else
6738 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6739 }
6740 else
6741 {