/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1276 - (show annotations)
Sun Mar 10 17:35:23 2013 UTC (6 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 290939 byte(s)
Error occurred while calculating annotation data.
Next patch for control verb chain. OP_ONCE support is still missing.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2013 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2013
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* 1 - Always have a control head. */
75 /* #define DEBUG_FORCE_CONTROL_HEAD 1 */
76
77 /* Allocate memory for the regex stack on the real machine stack.
78 Fast, but limited size. */
79 #define MACHINE_STACK_SIZE 32768
80
81 /* Growth rate for stack allocated by the OS. Should be the multiply
82 of page size. */
83 #define STACK_GROWTH_RATE 8192
84
85 /* Enable to check that the allocation could destroy temporaries. */
86 #if defined SLJIT_DEBUG && SLJIT_DEBUG
87 #define DESTROY_REGISTERS 1
88 #endif
89
90 /*
91 Short summary about the backtracking mechanism empolyed by the jit code generator:
92
93 The code generator follows the recursive nature of the PERL compatible regular
94 expressions. The basic blocks of regular expressions are condition checkers
95 whose execute different commands depending on the result of the condition check.
96 The relationship between the operators can be horizontal (concatenation) and
97 vertical (sub-expression) (See struct backtrack_common for more details).
98
99 'ab' - 'a' and 'b' regexps are concatenated
100 'a+' - 'a' is the sub-expression of the '+' operator
101
102 The condition checkers are boolean (true/false) checkers. Machine code is generated
103 for the checker itself and for the actions depending on the result of the checker.
104 The 'true' case is called as the matching path (expected path), and the other is called as
105 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
106 branches on the matching path.
107
108 Greedy star operator (*) :
109 Matching path: match happens.
110 Backtrack path: match failed.
111 Non-greedy star operator (*?) :
112 Matching path: no need to perform a match.
113 Backtrack path: match is required.
114
115 The following example shows how the code generated for a capturing bracket
116 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
117 we have the following regular expression:
118
119 A(B|C)D
120
121 The generated code will be the following:
122
123 A matching path
124 '(' matching path (pushing arguments to the stack)
125 B matching path
126 ')' matching path (pushing arguments to the stack)
127 D matching path
128 return with successful match
129
130 D backtrack path
131 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
132 B backtrack path
133 C expected path
134 jump to D matching path
135 C backtrack path
136 A backtrack path
137
138 Notice, that the order of backtrack code paths are the opposite of the fast
139 code paths. In this way the topmost value on the stack is always belong
140 to the current backtrack code path. The backtrack path must check
141 whether there is a next alternative. If so, it needs to jump back to
142 the matching path eventually. Otherwise it needs to clear out its own stack
143 frame and continue the execution on the backtrack code paths.
144 */
145
146 /*
147 Saved stack frames:
148
149 Atomic blocks and asserts require reloading the values of private data
150 when the backtrack mechanism performed. Because of OP_RECURSE, the data
151 are not necessarly known in compile time, thus we need a dynamic restore
152 mechanism.
153
154 The stack frames are stored in a chain list, and have the following format:
155 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
156
157 Thus we can restore the private data to a particular point in the stack.
158 */
159
160 typedef struct jit_arguments {
161 /* Pointers first. */
162 struct sljit_stack *stack;
163 const pcre_uchar *str;
164 const pcre_uchar *begin;
165 const pcre_uchar *end;
166 int *offsets;
167 pcre_uchar *uchar_ptr;
168 pcre_uchar *mark_ptr;
169 void *callout_data;
170 /* Everything else after. */
171 int real_offset_count;
172 int offset_count;
173 int call_limit;
174 pcre_uint8 notbol;
175 pcre_uint8 noteol;
176 pcre_uint8 notempty;
177 pcre_uint8 notempty_atstart;
178 } jit_arguments;
179
180 typedef struct executable_functions {
181 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
182 PUBL(jit_callback) callback;
183 void *userdata;
184 pcre_uint32 top_bracket;
185 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
186 } executable_functions;
187
188 typedef struct jump_list {
189 struct sljit_jump *jump;
190 struct jump_list *next;
191 } jump_list;
192
193 typedef struct stub_list {
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 enum frame_types {
200 no_frame = -1,
201 no_stack = -2
202 };
203
204 enum control_types {
205 type_commit = 0,
206 type_prune = 1,
207 type_skip = 2
208 };
209
210 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
211
212 /* The following structure is the key data type for the recursive
213 code generator. It is allocated by compile_matchingpath, and contains
214 the aguments for compile_backtrackingpath. Must be the first member
215 of its descendants. */
216 typedef struct backtrack_common {
217 /* Concatenation stack. */
218 struct backtrack_common *prev;
219 jump_list *nextbacktracks;
220 /* Internal stack (for component operators). */
221 struct backtrack_common *top;
222 jump_list *topbacktracks;
223 /* Opcode pointer. */
224 pcre_uchar *cc;
225 } backtrack_common;
226
227 typedef struct assert_backtrack {
228 backtrack_common common;
229 jump_list *condfailed;
230 /* Less than 0 if a frame is not needed. */
231 int framesize;
232 /* Points to our private memory word on the stack. */
233 int private_data_ptr;
234 /* For iterators. */
235 struct sljit_label *matchingpath;
236 } assert_backtrack;
237
238 typedef struct bracket_backtrack {
239 backtrack_common common;
240 /* Where to coninue if an alternative is successfully matched. */
241 struct sljit_label *alternative_matchingpath;
242 /* For rmin and rmax iterators. */
243 struct sljit_label *recursive_matchingpath;
244 /* For greedy ? operator. */
245 struct sljit_label *zero_matchingpath;
246 /* Contains the branches of a failed condition. */
247 union {
248 /* Both for OP_COND, OP_SCOND. */
249 jump_list *condfailed;
250 assert_backtrack *assert;
251 /* For OP_ONCE. Less than 0 if not needed. */
252 int framesize;
253 } u;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 } bracket_backtrack;
257
258 typedef struct bracketpos_backtrack {
259 backtrack_common common;
260 /* Points to our private memory word on the stack. */
261 int private_data_ptr;
262 /* Reverting stack is needed. */
263 int framesize;
264 /* Allocated stack size. */
265 int stacksize;
266 } bracketpos_backtrack;
267
268 typedef struct braminzero_backtrack {
269 backtrack_common common;
270 struct sljit_label *matchingpath;
271 } braminzero_backtrack;
272
273 typedef struct iterator_backtrack {
274 backtrack_common common;
275 /* Next iteration. */
276 struct sljit_label *matchingpath;
277 } iterator_backtrack;
278
279 typedef struct recurse_entry {
280 struct recurse_entry *next;
281 /* Contains the function entry. */
282 struct sljit_label *entry;
283 /* Collects the calls until the function is not created. */
284 jump_list *calls;
285 /* Points to the starting opcode. */
286 int start;
287 } recurse_entry;
288
289 typedef struct recurse_backtrack {
290 backtrack_common common;
291 BOOL inlined_pattern;
292 } recurse_backtrack;
293
294 #define MAX_RANGE_SIZE 6
295
296 typedef struct compiler_common {
297 /* The sljit ceneric compiler. */
298 struct sljit_compiler *compiler;
299 /* First byte code. */
300 pcre_uchar *start;
301 /* Maps private data offset to each opcode. */
302 int *private_data_ptrs;
303 /* Tells whether the capturing bracket is optimized. */
304 pcre_uint8 *optimized_cbracket;
305 /* Starting offset of private data for capturing brackets. */
306 int cbra_ptr;
307 /* Output vector starting point. Must be divisible by 2. */
308 int ovector_start;
309 /* Last known position of the requested byte. */
310 int req_char_ptr;
311 /* Head of the last recursion. */
312 int recursive_head_ptr;
313 /* First inspected character for partial matching. */
314 int start_used_ptr;
315 /* Starting pointer for partial soft matches. */
316 int hit_start;
317 /* End pointer of the first line. */
318 int first_line_end;
319 /* Points to the marked string. */
320 int mark_ptr;
321 /* Recursive control verb management chain. */
322 int control_head_ptr;
323 /* Points to the last matched capture block index. */
324 int capture_last_ptr;
325 /* Points to the starting position of the current match. */
326 int start_ptr;
327
328 /* Flipped and lower case tables. */
329 const pcre_uint8 *fcc;
330 sljit_sw lcc;
331 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
332 int mode;
333 /* \K is in the pattern. */
334 BOOL has_set_som;
335 /* Needs to know the start position anytime. */
336 BOOL needs_start_ptr;
337 /* Currently in recurse or assert. */
338 BOOL local_exit;
339 /* Newline control. */
340 int nltype;
341 int newline;
342 int bsr_nltype;
343 /* Dollar endonly. */
344 int endonly;
345 /* Tables. */
346 sljit_sw ctypes;
347 int digits[2 + MAX_RANGE_SIZE];
348 /* Named capturing brackets. */
349 sljit_uw name_table;
350 sljit_sw name_count;
351 sljit_sw name_entry_size;
352
353 /* Labels and jump lists. */
354 struct sljit_label *partialmatchlabel;
355 struct sljit_label *quit_label;
356 struct sljit_label *forced_quit_label;
357 struct sljit_label *accept_label;
358 stub_list *stubs;
359 recurse_entry *entries;
360 recurse_entry *currententry;
361 jump_list *partialmatch;
362 jump_list *quit;
363 jump_list *forced_quit;
364 jump_list *accept;
365 jump_list *calllimit;
366 jump_list *stackalloc;
367 jump_list *revertframes;
368 jump_list *wordboundary;
369 jump_list *anynewline;
370 jump_list *hspace;
371 jump_list *vspace;
372 jump_list *casefulcmp;
373 jump_list *caselesscmp;
374 jump_list *reset_match;
375 BOOL jscript_compat;
376 #ifdef SUPPORT_UTF
377 BOOL utf;
378 #ifdef SUPPORT_UCP
379 BOOL use_ucp;
380 #endif
381 #ifndef COMPILE_PCRE32
382 jump_list *utfreadchar;
383 #endif
384 #ifdef COMPILE_PCRE8
385 jump_list *utfreadtype8;
386 #endif
387 #endif /* SUPPORT_UTF */
388 #ifdef SUPPORT_UCP
389 jump_list *getucd;
390 #endif
391 } compiler_common;
392
393 /* For byte_sequence_compare. */
394
395 typedef struct compare_context {
396 int length;
397 int sourcereg;
398 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
399 int ucharptr;
400 union {
401 sljit_si asint;
402 sljit_uh asushort;
403 #if defined COMPILE_PCRE8
404 sljit_ub asbyte;
405 sljit_ub asuchars[4];
406 #elif defined COMPILE_PCRE16
407 sljit_uh asuchars[2];
408 #elif defined COMPILE_PCRE32
409 sljit_ui asuchars[1];
410 #endif
411 } c;
412 union {
413 sljit_si asint;
414 sljit_uh asushort;
415 #if defined COMPILE_PCRE8
416 sljit_ub asbyte;
417 sljit_ub asuchars[4];
418 #elif defined COMPILE_PCRE16
419 sljit_uh asuchars[2];
420 #elif defined COMPILE_PCRE32
421 sljit_ui asuchars[1];
422 #endif
423 } oc;
424 #endif
425 } compare_context;
426
427 /* Undefine sljit macros. */
428 #undef CMP
429
430 /* Used for accessing the elements of the stack. */
431 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
432
433 #define TMP1 SLJIT_SCRATCH_REG1
434 #define TMP2 SLJIT_SCRATCH_REG3
435 #define TMP3 SLJIT_TEMPORARY_EREG2
436 #define STR_PTR SLJIT_SAVED_REG1
437 #define STR_END SLJIT_SAVED_REG2
438 #define STACK_TOP SLJIT_SCRATCH_REG2
439 #define STACK_LIMIT SLJIT_SAVED_REG3
440 #define ARGUMENTS SLJIT_SAVED_EREG1
441 #define CALL_COUNT SLJIT_SAVED_EREG2
442 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
443
444 /* Local space layout. */
445 /* These two locals can be used by the current opcode. */
446 #define LOCALS0 (0 * sizeof(sljit_sw))
447 #define LOCALS1 (1 * sizeof(sljit_sw))
448 /* Two local variables for possessive quantifiers (char1 cannot use them). */
449 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
450 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
451 /* Max limit of recursions. */
452 #define CALL_LIMIT (4 * sizeof(sljit_sw))
453 /* The output vector is stored on the stack, and contains pointers
454 to characters. The vector data is divided into two groups: the first
455 group contains the start / end character pointers, and the second is
456 the start pointers when the end of the capturing group has not yet reached. */
457 #define OVECTOR_START (common->ovector_start)
458 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
459 #define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * sizeof(sljit_sw))
460 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
461
462 #if defined COMPILE_PCRE8
463 #define MOV_UCHAR SLJIT_MOV_UB
464 #define MOVU_UCHAR SLJIT_MOVU_UB
465 #elif defined COMPILE_PCRE16
466 #define MOV_UCHAR SLJIT_MOV_UH
467 #define MOVU_UCHAR SLJIT_MOVU_UH
468 #elif defined COMPILE_PCRE32
469 #define MOV_UCHAR SLJIT_MOV_UI
470 #define MOVU_UCHAR SLJIT_MOVU_UI
471 #else
472 #error Unsupported compiling mode
473 #endif
474
475 /* Shortcuts. */
476 #define DEFINE_COMPILER \
477 struct sljit_compiler *compiler = common->compiler
478 #define OP1(op, dst, dstw, src, srcw) \
479 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
480 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
481 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
482 #define LABEL() \
483 sljit_emit_label(compiler)
484 #define JUMP(type) \
485 sljit_emit_jump(compiler, (type))
486 #define JUMPTO(type, label) \
487 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
488 #define JUMPHERE(jump) \
489 sljit_set_label((jump), sljit_emit_label(compiler))
490 #define SET_LABEL(jump, label) \
491 sljit_set_label((jump), (label))
492 #define CMP(type, src1, src1w, src2, src2w) \
493 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
494 #define CMPTO(type, src1, src1w, src2, src2w, label) \
495 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
496 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
497 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
498 #define GET_LOCAL_BASE(dst, dstw, offset) \
499 sljit_get_local_base(compiler, (dst), (dstw), (offset))
500
501 static pcre_uchar* bracketend(pcre_uchar* cc)
502 {
503 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
504 do cc += GET(cc, 1); while (*cc == OP_ALT);
505 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
506 cc += 1 + LINK_SIZE;
507 return cc;
508 }
509
510 /* Functions whose might need modification for all new supported opcodes:
511 next_opcode
512 get_private_data_length
513 set_private_data_ptrs
514 get_framesize
515 init_frame
516 get_private_data_copy_length
517 copy_private_data
518 compile_matchingpath
519 compile_backtrackingpath
520 */
521
522 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
523 {
524 SLJIT_UNUSED_ARG(common);
525 switch(*cc)
526 {
527 case OP_SOD:
528 case OP_SOM:
529 case OP_SET_SOM:
530 case OP_NOT_WORD_BOUNDARY:
531 case OP_WORD_BOUNDARY:
532 case OP_NOT_DIGIT:
533 case OP_DIGIT:
534 case OP_NOT_WHITESPACE:
535 case OP_WHITESPACE:
536 case OP_NOT_WORDCHAR:
537 case OP_WORDCHAR:
538 case OP_ANY:
539 case OP_ALLANY:
540 case OP_NOTPROP:
541 case OP_PROP:
542 case OP_ANYNL:
543 case OP_NOT_HSPACE:
544 case OP_HSPACE:
545 case OP_NOT_VSPACE:
546 case OP_VSPACE:
547 case OP_EXTUNI:
548 case OP_EODN:
549 case OP_EOD:
550 case OP_CIRC:
551 case OP_CIRCM:
552 case OP_DOLL:
553 case OP_DOLLM:
554 case OP_CRSTAR:
555 case OP_CRMINSTAR:
556 case OP_CRPLUS:
557 case OP_CRMINPLUS:
558 case OP_CRQUERY:
559 case OP_CRMINQUERY:
560 case OP_CRRANGE:
561 case OP_CRMINRANGE:
562 case OP_CLASS:
563 case OP_NCLASS:
564 case OP_REF:
565 case OP_REFI:
566 case OP_RECURSE:
567 case OP_CALLOUT:
568 case OP_ALT:
569 case OP_KET:
570 case OP_KETRMAX:
571 case OP_KETRMIN:
572 case OP_KETRPOS:
573 case OP_REVERSE:
574 case OP_ASSERT:
575 case OP_ASSERT_NOT:
576 case OP_ASSERTBACK:
577 case OP_ASSERTBACK_NOT:
578 case OP_ONCE:
579 case OP_ONCE_NC:
580 case OP_BRA:
581 case OP_BRAPOS:
582 case OP_CBRA:
583 case OP_CBRAPOS:
584 case OP_COND:
585 case OP_SBRA:
586 case OP_SBRAPOS:
587 case OP_SCBRA:
588 case OP_SCBRAPOS:
589 case OP_SCOND:
590 case OP_CREF:
591 case OP_NCREF:
592 case OP_RREF:
593 case OP_NRREF:
594 case OP_DEF:
595 case OP_BRAZERO:
596 case OP_BRAMINZERO:
597 case OP_BRAPOSZERO:
598 case OP_PRUNE:
599 case OP_SKIP:
600 case OP_COMMIT:
601 case OP_FAIL:
602 case OP_ACCEPT:
603 case OP_ASSERT_ACCEPT:
604 case OP_CLOSE:
605 case OP_SKIPZERO:
606 return cc + PRIV(OP_lengths)[*cc];
607
608 case OP_CHAR:
609 case OP_CHARI:
610 case OP_NOT:
611 case OP_NOTI:
612 case OP_STAR:
613 case OP_MINSTAR:
614 case OP_PLUS:
615 case OP_MINPLUS:
616 case OP_QUERY:
617 case OP_MINQUERY:
618 case OP_UPTO:
619 case OP_MINUPTO:
620 case OP_EXACT:
621 case OP_POSSTAR:
622 case OP_POSPLUS:
623 case OP_POSQUERY:
624 case OP_POSUPTO:
625 case OP_STARI:
626 case OP_MINSTARI:
627 case OP_PLUSI:
628 case OP_MINPLUSI:
629 case OP_QUERYI:
630 case OP_MINQUERYI:
631 case OP_UPTOI:
632 case OP_MINUPTOI:
633 case OP_EXACTI:
634 case OP_POSSTARI:
635 case OP_POSPLUSI:
636 case OP_POSQUERYI:
637 case OP_POSUPTOI:
638 case OP_NOTSTAR:
639 case OP_NOTMINSTAR:
640 case OP_NOTPLUS:
641 case OP_NOTMINPLUS:
642 case OP_NOTQUERY:
643 case OP_NOTMINQUERY:
644 case OP_NOTUPTO:
645 case OP_NOTMINUPTO:
646 case OP_NOTEXACT:
647 case OP_NOTPOSSTAR:
648 case OP_NOTPOSPLUS:
649 case OP_NOTPOSQUERY:
650 case OP_NOTPOSUPTO:
651 case OP_NOTSTARI:
652 case OP_NOTMINSTARI:
653 case OP_NOTPLUSI:
654 case OP_NOTMINPLUSI:
655 case OP_NOTQUERYI:
656 case OP_NOTMINQUERYI:
657 case OP_NOTUPTOI:
658 case OP_NOTMINUPTOI:
659 case OP_NOTEXACTI:
660 case OP_NOTPOSSTARI:
661 case OP_NOTPOSPLUSI:
662 case OP_NOTPOSQUERYI:
663 case OP_NOTPOSUPTOI:
664 cc += PRIV(OP_lengths)[*cc];
665 #ifdef SUPPORT_UTF
666 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
667 #endif
668 return cc;
669
670 /* Special cases. */
671 case OP_TYPESTAR:
672 case OP_TYPEMINSTAR:
673 case OP_TYPEPLUS:
674 case OP_TYPEMINPLUS:
675 case OP_TYPEQUERY:
676 case OP_TYPEMINQUERY:
677 case OP_TYPEUPTO:
678 case OP_TYPEMINUPTO:
679 case OP_TYPEEXACT:
680 case OP_TYPEPOSSTAR:
681 case OP_TYPEPOSPLUS:
682 case OP_TYPEPOSQUERY:
683 case OP_TYPEPOSUPTO:
684 return cc + PRIV(OP_lengths)[*cc] - 1;
685
686 case OP_ANYBYTE:
687 #ifdef SUPPORT_UTF
688 if (common->utf) return NULL;
689 #endif
690 return cc + 1;
691
692 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
693 case OP_XCLASS:
694 return cc + GET(cc, 1);
695 #endif
696
697 case OP_MARK:
698 case OP_PRUNE_ARG:
699 return cc + 1 + 2 + cc[1];
700
701 default:
702 return NULL;
703 }
704 }
705
706 #define CASE_ITERATOR_PRIVATE_DATA_1 \
707 case OP_MINSTAR: \
708 case OP_MINPLUS: \
709 case OP_QUERY: \
710 case OP_MINQUERY: \
711 case OP_MINSTARI: \
712 case OP_MINPLUSI: \
713 case OP_QUERYI: \
714 case OP_MINQUERYI: \
715 case OP_NOTMINSTAR: \
716 case OP_NOTMINPLUS: \
717 case OP_NOTQUERY: \
718 case OP_NOTMINQUERY: \
719 case OP_NOTMINSTARI: \
720 case OP_NOTMINPLUSI: \
721 case OP_NOTQUERYI: \
722 case OP_NOTMINQUERYI:
723
724 #define CASE_ITERATOR_PRIVATE_DATA_2A \
725 case OP_STAR: \
726 case OP_PLUS: \
727 case OP_STARI: \
728 case OP_PLUSI: \
729 case OP_NOTSTAR: \
730 case OP_NOTPLUS: \
731 case OP_NOTSTARI: \
732 case OP_NOTPLUSI:
733
734 #define CASE_ITERATOR_PRIVATE_DATA_2B \
735 case OP_UPTO: \
736 case OP_MINUPTO: \
737 case OP_UPTOI: \
738 case OP_MINUPTOI: \
739 case OP_NOTUPTO: \
740 case OP_NOTMINUPTO: \
741 case OP_NOTUPTOI: \
742 case OP_NOTMINUPTOI:
743
744 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
745 case OP_TYPEMINSTAR: \
746 case OP_TYPEMINPLUS: \
747 case OP_TYPEQUERY: \
748 case OP_TYPEMINQUERY:
749
750 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
751 case OP_TYPESTAR: \
752 case OP_TYPEPLUS:
753
754 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
755 case OP_TYPEUPTO: \
756 case OP_TYPEMINUPTO:
757
758 static int get_class_iterator_size(pcre_uchar *cc)
759 {
760 switch(*cc)
761 {
762 case OP_CRSTAR:
763 case OP_CRPLUS:
764 return 2;
765
766 case OP_CRMINSTAR:
767 case OP_CRMINPLUS:
768 case OP_CRQUERY:
769 case OP_CRMINQUERY:
770 return 1;
771
772 case OP_CRRANGE:
773 case OP_CRMINRANGE:
774 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
775 return 0;
776 return 2;
777
778 default:
779 return 0;
780 }
781 }
782
783 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
784 {
785 int private_data_length = 0;
786 pcre_uchar *alternative;
787 pcre_uchar *name;
788 pcre_uchar *end = NULL;
789 int space, size, i;
790 pcre_uint32 bracketlen;
791
792 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
793 while (cc < ccend)
794 {
795 space = 0;
796 size = 0;
797 bracketlen = 0;
798 switch(*cc)
799 {
800 case OP_SET_SOM:
801 common->has_set_som = TRUE;
802 cc += 1;
803 break;
804
805 case OP_REF:
806 case OP_REFI:
807 common->optimized_cbracket[GET2(cc, 1)] = 0;
808 cc += 1 + IMM2_SIZE;
809 break;
810
811 case OP_ASSERT:
812 case OP_ASSERT_NOT:
813 case OP_ASSERTBACK:
814 case OP_ASSERTBACK_NOT:
815 case OP_ONCE:
816 case OP_ONCE_NC:
817 case OP_BRAPOS:
818 case OP_SBRA:
819 case OP_SBRAPOS:
820 private_data_length += sizeof(sljit_sw);
821 bracketlen = 1 + LINK_SIZE;
822 break;
823
824 case OP_CBRAPOS:
825 case OP_SCBRAPOS:
826 private_data_length += sizeof(sljit_sw);
827 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
828 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
829 break;
830
831 case OP_COND:
832 case OP_SCOND:
833 /* Only AUTO_CALLOUT can insert this opcode. We do
834 not intend to support this case. */
835 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
836 return -1;
837
838 if (*cc == OP_COND)
839 {
840 /* Might be a hidden SCOND. */
841 alternative = cc + GET(cc, 1);
842 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
843 private_data_length += sizeof(sljit_sw);
844 }
845 else
846 private_data_length += sizeof(sljit_sw);
847 bracketlen = 1 + LINK_SIZE;
848 break;
849
850 case OP_CREF:
851 i = GET2(cc, 1);
852 common->optimized_cbracket[i] = 0;
853 cc += 1 + IMM2_SIZE;
854 break;
855
856 case OP_NCREF:
857 bracketlen = GET2(cc, 1);
858 name = (pcre_uchar *)common->name_table;
859 alternative = name;
860 for (i = 0; i < common->name_count; i++)
861 {
862 if (GET2(name, 0) == bracketlen) break;
863 name += common->name_entry_size;
864 }
865 SLJIT_ASSERT(i != common->name_count);
866
867 for (i = 0; i < common->name_count; i++)
868 {
869 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
870 common->optimized_cbracket[GET2(alternative, 0)] = 0;
871 alternative += common->name_entry_size;
872 }
873 bracketlen = 0;
874 cc += 1 + IMM2_SIZE;
875 break;
876
877 case OP_BRA:
878 bracketlen = 1 + LINK_SIZE;
879 break;
880
881 case OP_CBRA:
882 case OP_SCBRA:
883 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
884 break;
885
886 CASE_ITERATOR_PRIVATE_DATA_1
887 space = 1;
888 size = -2;
889 break;
890
891 CASE_ITERATOR_PRIVATE_DATA_2A
892 space = 2;
893 size = -2;
894 break;
895
896 CASE_ITERATOR_PRIVATE_DATA_2B
897 space = 2;
898 size = -(2 + IMM2_SIZE);
899 break;
900
901 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
902 space = 1;
903 size = 1;
904 break;
905
906 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
907 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
908 space = 2;
909 size = 1;
910 break;
911
912 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
913 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
914 space = 2;
915 size = 1 + IMM2_SIZE;
916 break;
917
918 case OP_CLASS:
919 case OP_NCLASS:
920 size += 1 + 32 / sizeof(pcre_uchar);
921 space = get_class_iterator_size(cc + size);
922 break;
923
924 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
925 case OP_XCLASS:
926 size = GET(cc, 1);
927 space = get_class_iterator_size(cc + size);
928 break;
929 #endif
930
931 case OP_RECURSE:
932 /* Set its value only once. */
933 if (common->recursive_head_ptr == 0)
934 {
935 common->recursive_head_ptr = common->ovector_start;
936 common->ovector_start += sizeof(sljit_sw);
937 }
938 cc += 1 + LINK_SIZE;
939 break;
940
941 case OP_CALLOUT:
942 if (common->capture_last_ptr == 0)
943 {
944 common->capture_last_ptr = common->ovector_start;
945 common->ovector_start += sizeof(sljit_sw);
946 }
947 cc += 2 + 2 * LINK_SIZE;
948 break;
949
950 case OP_PRUNE_ARG:
951 common->needs_start_ptr = TRUE;
952 common->control_head_ptr = 1;
953 /* Fall through. */
954
955 case OP_MARK:
956 if (common->mark_ptr == 0)
957 {
958 common->mark_ptr = common->ovector_start;
959 common->ovector_start += sizeof(sljit_sw);
960 }
961 cc += 1 + 2 + cc[1];
962 break;
963
964 case OP_PRUNE:
965 case OP_SKIP:
966 common->needs_start_ptr = TRUE;
967 /* Fall through. */
968
969 case OP_COMMIT:
970 common->control_head_ptr = 1;
971 cc += 1;
972 break;
973
974 default:
975 cc = next_opcode(common, cc);
976 if (cc == NULL)
977 return -1;
978 break;
979 }
980
981 if (space > 0 && cc >= end)
982 private_data_length += sizeof(sljit_sw) * space;
983
984 if (size != 0)
985 {
986 if (size < 0)
987 {
988 cc += -size;
989 #ifdef SUPPORT_UTF
990 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
991 #endif
992 }
993 else
994 cc += size;
995 }
996
997 if (bracketlen != 0)
998 {
999 if (cc >= end)
1000 {
1001 end = bracketend(cc);
1002 if (end[-1 - LINK_SIZE] == OP_KET)
1003 end = NULL;
1004 }
1005 cc += bracketlen;
1006 }
1007 }
1008 return private_data_length;
1009 }
1010
1011 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
1012 {
1013 pcre_uchar *cc = common->start;
1014 pcre_uchar *alternative;
1015 pcre_uchar *end = NULL;
1016 int space, size, bracketlen;
1017
1018 while (cc < ccend)
1019 {
1020 space = 0;
1021 size = 0;
1022 bracketlen = 0;
1023 switch(*cc)
1024 {
1025 case OP_ASSERT:
1026 case OP_ASSERT_NOT:
1027 case OP_ASSERTBACK:
1028 case OP_ASSERTBACK_NOT:
1029 case OP_ONCE:
1030 case OP_ONCE_NC:
1031 case OP_BRAPOS:
1032 case OP_SBRA:
1033 case OP_SBRAPOS:
1034 case OP_SCOND:
1035 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1036 private_data_ptr += sizeof(sljit_sw);
1037 bracketlen = 1 + LINK_SIZE;
1038 break;
1039
1040 case OP_CBRAPOS:
1041 case OP_SCBRAPOS:
1042 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1043 private_data_ptr += sizeof(sljit_sw);
1044 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1045 break;
1046
1047 case OP_COND:
1048 /* Might be a hidden SCOND. */
1049 alternative = cc + GET(cc, 1);
1050 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1051 {
1052 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1053 private_data_ptr += sizeof(sljit_sw);
1054 }
1055 bracketlen = 1 + LINK_SIZE;
1056 break;
1057
1058 case OP_BRA:
1059 bracketlen = 1 + LINK_SIZE;
1060 break;
1061
1062 case OP_CBRA:
1063 case OP_SCBRA:
1064 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1065 break;
1066
1067 CASE_ITERATOR_PRIVATE_DATA_1
1068 space = 1;
1069 size = -2;
1070 break;
1071
1072 CASE_ITERATOR_PRIVATE_DATA_2A
1073 space = 2;
1074 size = -2;
1075 break;
1076
1077 CASE_ITERATOR_PRIVATE_DATA_2B
1078 space = 2;
1079 size = -(2 + IMM2_SIZE);
1080 break;
1081
1082 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1083 space = 1;
1084 size = 1;
1085 break;
1086
1087 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1088 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1089 space = 2;
1090 size = 1;
1091 break;
1092
1093 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1094 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1095 space = 2;
1096 size = 1 + IMM2_SIZE;
1097 break;
1098
1099 case OP_CLASS:
1100 case OP_NCLASS:
1101 size += 1 + 32 / sizeof(pcre_uchar);
1102 space = get_class_iterator_size(cc + size);
1103 break;
1104
1105 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1106 case OP_XCLASS:
1107 size = GET(cc, 1);
1108 space = get_class_iterator_size(cc + size);
1109 break;
1110 #endif
1111
1112 default:
1113 cc = next_opcode(common, cc);
1114 SLJIT_ASSERT(cc != NULL);
1115 break;
1116 }
1117
1118 if (space > 0 && cc >= end)
1119 {
1120 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1121 private_data_ptr += sizeof(sljit_sw) * space;
1122 }
1123
1124 if (size != 0)
1125 {
1126 if (size < 0)
1127 {
1128 cc += -size;
1129 #ifdef SUPPORT_UTF
1130 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1131 #endif
1132 }
1133 else
1134 cc += size;
1135 }
1136
1137 if (bracketlen > 0)
1138 {
1139 if (cc >= end)
1140 {
1141 end = bracketend(cc);
1142 if (end[-1 - LINK_SIZE] == OP_KET)
1143 end = NULL;
1144 }
1145 cc += bracketlen;
1146 }
1147 }
1148 }
1149
1150 /* Returns with a frame_types (always < 0) if no need for frame. */
1151 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1152 {
1153 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1154 int length = 0;
1155 int possessive = 0;
1156 BOOL stack_restore = FALSE;
1157 BOOL setsom_found = recursive;
1158 BOOL setmark_found = recursive;
1159 /* The last capture is a local variable even for recursions. */
1160 BOOL capture_last_found = FALSE;
1161
1162 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1163 {
1164 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1165 /* This is correct regardless of common->capture_last_ptr. */
1166 capture_last_found = TRUE;
1167 }
1168
1169 cc = next_opcode(common, cc);
1170 SLJIT_ASSERT(cc != NULL);
1171 while (cc < ccend)
1172 switch(*cc)
1173 {
1174 case OP_SET_SOM:
1175 SLJIT_ASSERT(common->has_set_som);
1176 stack_restore = TRUE;
1177 if (!setsom_found)
1178 {
1179 length += 2;
1180 setsom_found = TRUE;
1181 }
1182 cc += 1;
1183 break;
1184
1185 case OP_MARK:
1186 case OP_PRUNE_ARG:
1187 SLJIT_ASSERT(common->mark_ptr != 0);
1188 stack_restore = TRUE;
1189 if (!setmark_found)
1190 {
1191 length += 2;
1192 setmark_found = TRUE;
1193 }
1194 cc += 1 + 2 + cc[1];
1195 break;
1196
1197 case OP_RECURSE:
1198 stack_restore = TRUE;
1199 if (common->has_set_som && !setsom_found)
1200 {
1201 length += 2;
1202 setsom_found = TRUE;
1203 }
1204 if (common->mark_ptr != 0 && !setmark_found)
1205 {
1206 length += 2;
1207 setmark_found = TRUE;
1208 }
1209 if (common->capture_last_ptr != 0 && !capture_last_found)
1210 {
1211 length += 2;
1212 capture_last_found = TRUE;
1213 }
1214 cc += 1 + LINK_SIZE;
1215 break;
1216
1217 case OP_CBRA:
1218 case OP_CBRAPOS:
1219 case OP_SCBRA:
1220 case OP_SCBRAPOS:
1221 stack_restore = TRUE;
1222 if (common->capture_last_ptr != 0 && !capture_last_found)
1223 {
1224 length += 2;
1225 capture_last_found = TRUE;
1226 }
1227 length += 3;
1228 cc += 1 + LINK_SIZE + IMM2_SIZE;
1229 break;
1230
1231 default:
1232 stack_restore = TRUE;
1233 /* Fall through. */
1234
1235 case OP_NOT_WORD_BOUNDARY:
1236 case OP_WORD_BOUNDARY:
1237 case OP_NOT_DIGIT:
1238 case OP_DIGIT:
1239 case OP_NOT_WHITESPACE:
1240 case OP_WHITESPACE:
1241 case OP_NOT_WORDCHAR:
1242 case OP_WORDCHAR:
1243 case OP_ANY:
1244 case OP_ALLANY:
1245 case OP_ANYBYTE:
1246 case OP_NOTPROP:
1247 case OP_PROP:
1248 case OP_ANYNL:
1249 case OP_NOT_HSPACE:
1250 case OP_HSPACE:
1251 case OP_NOT_VSPACE:
1252 case OP_VSPACE:
1253 case OP_EXTUNI:
1254 case OP_EODN:
1255 case OP_EOD:
1256 case OP_CIRC:
1257 case OP_CIRCM:
1258 case OP_DOLL:
1259 case OP_DOLLM:
1260 case OP_CHAR:
1261 case OP_CHARI:
1262 case OP_NOT:
1263 case OP_NOTI:
1264
1265 case OP_EXACT:
1266 case OP_POSSTAR:
1267 case OP_POSPLUS:
1268 case OP_POSQUERY:
1269 case OP_POSUPTO:
1270
1271 case OP_EXACTI:
1272 case OP_POSSTARI:
1273 case OP_POSPLUSI:
1274 case OP_POSQUERYI:
1275 case OP_POSUPTOI:
1276
1277 case OP_NOTEXACT:
1278 case OP_NOTPOSSTAR:
1279 case OP_NOTPOSPLUS:
1280 case OP_NOTPOSQUERY:
1281 case OP_NOTPOSUPTO:
1282
1283 case OP_NOTEXACTI:
1284 case OP_NOTPOSSTARI:
1285 case OP_NOTPOSPLUSI:
1286 case OP_NOTPOSQUERYI:
1287 case OP_NOTPOSUPTOI:
1288
1289 case OP_TYPEEXACT:
1290 case OP_TYPEPOSSTAR:
1291 case OP_TYPEPOSPLUS:
1292 case OP_TYPEPOSQUERY:
1293 case OP_TYPEPOSUPTO:
1294
1295 case OP_CLASS:
1296 case OP_NCLASS:
1297 case OP_XCLASS:
1298
1299 cc = next_opcode(common, cc);
1300 SLJIT_ASSERT(cc != NULL);
1301 break;
1302 }
1303
1304 /* Possessive quantifiers can use a special case. */
1305 if (SLJIT_UNLIKELY(possessive == length))
1306 return stack_restore ? no_frame : no_stack;
1307
1308 if (length > 0)
1309 return length + 1;
1310 return stack_restore ? no_frame : no_stack;
1311 }
1312
1313 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1314 {
1315 DEFINE_COMPILER;
1316 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1317 BOOL setsom_found = recursive;
1318 BOOL setmark_found = recursive;
1319 /* The last capture is a local variable even for recursions. */
1320 BOOL capture_last_found = FALSE;
1321 int offset;
1322
1323 /* >= 1 + shortest item size (2) */
1324 SLJIT_UNUSED_ARG(stacktop);
1325 SLJIT_ASSERT(stackpos >= stacktop + 2);
1326
1327 stackpos = STACK(stackpos);
1328 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1329 cc = next_opcode(common, cc);
1330 SLJIT_ASSERT(cc != NULL);
1331 while (cc < ccend)
1332 switch(*cc)
1333 {
1334 case OP_SET_SOM:
1335 SLJIT_ASSERT(common->has_set_som);
1336 if (!setsom_found)
1337 {
1338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1339 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1340 stackpos += (int)sizeof(sljit_sw);
1341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1342 stackpos += (int)sizeof(sljit_sw);
1343 setsom_found = TRUE;
1344 }
1345 cc += 1;
1346 break;
1347
1348 case OP_MARK:
1349 case OP_PRUNE_ARG:
1350 SLJIT_ASSERT(common->mark_ptr != 0);
1351 if (!setmark_found)
1352 {
1353 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1354 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1355 stackpos += (int)sizeof(sljit_sw);
1356 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1357 stackpos += (int)sizeof(sljit_sw);
1358 setmark_found = TRUE;
1359 }
1360 cc += 1 + 2 + cc[1];
1361 break;
1362
1363 case OP_RECURSE:
1364 if (common->has_set_som && !setsom_found)
1365 {
1366 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1367 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1368 stackpos += (int)sizeof(sljit_sw);
1369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1370 stackpos += (int)sizeof(sljit_sw);
1371 setsom_found = TRUE;
1372 }
1373 if (common->mark_ptr != 0 && !setmark_found)
1374 {
1375 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1377 stackpos += (int)sizeof(sljit_sw);
1378 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1379 stackpos += (int)sizeof(sljit_sw);
1380 setmark_found = TRUE;
1381 }
1382 if (common->capture_last_ptr != 0 && !capture_last_found)
1383 {
1384 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1385 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1386 stackpos += (int)sizeof(sljit_sw);
1387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1388 stackpos += (int)sizeof(sljit_sw);
1389 capture_last_found = TRUE;
1390 }
1391 cc += 1 + LINK_SIZE;
1392 break;
1393
1394 case OP_CBRA:
1395 case OP_CBRAPOS:
1396 case OP_SCBRA:
1397 case OP_SCBRAPOS:
1398 if (common->capture_last_ptr != 0 && !capture_last_found)
1399 {
1400 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1401 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1402 stackpos += (int)sizeof(sljit_sw);
1403 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1404 stackpos += (int)sizeof(sljit_sw);
1405 capture_last_found = TRUE;
1406 }
1407 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1408 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1409 stackpos += (int)sizeof(sljit_sw);
1410 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1411 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1412 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1413 stackpos += (int)sizeof(sljit_sw);
1414 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1415 stackpos += (int)sizeof(sljit_sw);
1416
1417 cc += 1 + LINK_SIZE + IMM2_SIZE;
1418 break;
1419
1420 default:
1421 cc = next_opcode(common, cc);
1422 SLJIT_ASSERT(cc != NULL);
1423 break;
1424 }
1425
1426 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1427 SLJIT_ASSERT(stackpos == STACK(stacktop));
1428 }
1429
1430 static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head)
1431 {
1432 int private_data_length = needs_control_head ? 3 : 2;
1433 int size;
1434 pcre_uchar *alternative;
1435 /* Calculate the sum of the private machine words. */
1436 while (cc < ccend)
1437 {
1438 size = 0;
1439 switch(*cc)
1440 {
1441 case OP_ASSERT:
1442 case OP_ASSERT_NOT:
1443 case OP_ASSERTBACK:
1444 case OP_ASSERTBACK_NOT:
1445 case OP_ONCE:
1446 case OP_ONCE_NC:
1447 case OP_BRAPOS:
1448 case OP_SBRA:
1449 case OP_SBRAPOS:
1450 case OP_SCOND:
1451 private_data_length++;
1452 cc += 1 + LINK_SIZE;
1453 break;
1454
1455 case OP_CBRA:
1456 case OP_SCBRA:
1457 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1458 private_data_length++;
1459 cc += 1 + LINK_SIZE + IMM2_SIZE;
1460 break;
1461
1462 case OP_CBRAPOS:
1463 case OP_SCBRAPOS:
1464 private_data_length += 2;
1465 cc += 1 + LINK_SIZE + IMM2_SIZE;
1466 break;
1467
1468 case OP_COND:
1469 /* Might be a hidden SCOND. */
1470 alternative = cc + GET(cc, 1);
1471 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1472 private_data_length++;
1473 cc += 1 + LINK_SIZE;
1474 break;
1475
1476 CASE_ITERATOR_PRIVATE_DATA_1
1477 if (PRIVATE_DATA(cc))
1478 private_data_length++;
1479 cc += 2;
1480 #ifdef SUPPORT_UTF
1481 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1482 #endif
1483 break;
1484
1485 CASE_ITERATOR_PRIVATE_DATA_2A
1486 if (PRIVATE_DATA(cc))
1487 private_data_length += 2;
1488 cc += 2;
1489 #ifdef SUPPORT_UTF
1490 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1491 #endif
1492 break;
1493
1494 CASE_ITERATOR_PRIVATE_DATA_2B
1495 if (PRIVATE_DATA(cc))
1496 private_data_length += 2;
1497 cc += 2 + IMM2_SIZE;
1498 #ifdef SUPPORT_UTF
1499 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1500 #endif
1501 break;
1502
1503 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1504 if (PRIVATE_DATA(cc))
1505 private_data_length++;
1506 cc += 1;
1507 break;
1508
1509 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1510 if (PRIVATE_DATA(cc))
1511 private_data_length += 2;
1512 cc += 1;
1513 break;
1514
1515 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1516 if (PRIVATE_DATA(cc))
1517 private_data_length += 2;
1518 cc += 1 + IMM2_SIZE;
1519 break;
1520
1521 case OP_CLASS:
1522 case OP_NCLASS:
1523 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1524 case OP_XCLASS:
1525 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1526 #else
1527 size = 1 + 32 / (int)sizeof(pcre_uchar);
1528 #endif
1529 if (PRIVATE_DATA(cc))
1530 private_data_length += get_class_iterator_size(cc + size);
1531 cc += size;
1532 break;
1533
1534 default:
1535 cc = next_opcode(common, cc);
1536 SLJIT_ASSERT(cc != NULL);
1537 break;
1538 }
1539 }
1540 SLJIT_ASSERT(cc == ccend);
1541 return private_data_length;
1542 }
1543
1544 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1545 BOOL save, int stackptr, int stacktop, BOOL needs_control_head)
1546 {
1547 DEFINE_COMPILER;
1548 int srcw[2];
1549 int count, size;
1550 BOOL tmp1next = TRUE;
1551 BOOL tmp1empty = TRUE;
1552 BOOL tmp2empty = TRUE;
1553 pcre_uchar *alternative;
1554 enum {
1555 start,
1556 loop,
1557 end
1558 } status;
1559
1560 status = save ? start : loop;
1561 stackptr = STACK(stackptr - 2);
1562 stacktop = STACK(stacktop - 1);
1563
1564 if (!save)
1565 {
1566 stackptr += (needs_control_head ? 2 : 1) * sizeof(sljit_sw);
1567 if (stackptr < stacktop)
1568 {
1569 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1570 stackptr += sizeof(sljit_sw);
1571 tmp1empty = FALSE;
1572 }
1573 if (stackptr < stacktop)
1574 {
1575 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1576 stackptr += sizeof(sljit_sw);
1577 tmp2empty = FALSE;
1578 }
1579 /* The tmp1next must be TRUE in either way. */
1580 }
1581
1582 do
1583 {
1584 count = 0;
1585 switch(status)
1586 {
1587 case start:
1588 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1589 count = 1;
1590 srcw[0] = common->recursive_head_ptr;
1591 if (needs_control_head)
1592 {
1593 SLJIT_ASSERT(common->control_head_ptr != 0);
1594 count = 2;
1595 srcw[1] = common->control_head_ptr;
1596 }
1597 status = loop;
1598 break;
1599
1600 case loop:
1601 if (cc >= ccend)
1602 {
1603 status = end;
1604 break;
1605 }
1606
1607 switch(*cc)
1608 {
1609 case OP_ASSERT:
1610 case OP_ASSERT_NOT:
1611 case OP_ASSERTBACK:
1612 case OP_ASSERTBACK_NOT:
1613 case OP_ONCE:
1614 case OP_ONCE_NC:
1615 case OP_BRAPOS:
1616 case OP_SBRA:
1617 case OP_SBRAPOS:
1618 case OP_SCOND:
1619 count = 1;
1620 srcw[0] = PRIVATE_DATA(cc);
1621 SLJIT_ASSERT(srcw[0] != 0);
1622 cc += 1 + LINK_SIZE;
1623 break;
1624
1625 case OP_CBRA:
1626 case OP_SCBRA:
1627 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1628 {
1629 count = 1;
1630 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1631 }
1632 cc += 1 + LINK_SIZE + IMM2_SIZE;
1633 break;
1634
1635 case OP_CBRAPOS:
1636 case OP_SCBRAPOS:
1637 count = 2;
1638 srcw[0] = PRIVATE_DATA(cc);
1639 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1640 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1641 cc += 1 + LINK_SIZE + IMM2_SIZE;
1642 break;
1643
1644 case OP_COND:
1645 /* Might be a hidden SCOND. */
1646 alternative = cc + GET(cc, 1);
1647 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1648 {
1649 count = 1;
1650 srcw[0] = PRIVATE_DATA(cc);
1651 SLJIT_ASSERT(srcw[0] != 0);
1652 }
1653 cc += 1 + LINK_SIZE;
1654 break;
1655
1656 CASE_ITERATOR_PRIVATE_DATA_1
1657 if (PRIVATE_DATA(cc))
1658 {
1659 count = 1;
1660 srcw[0] = PRIVATE_DATA(cc);
1661 }
1662 cc += 2;
1663 #ifdef SUPPORT_UTF
1664 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1665 #endif
1666 break;
1667
1668 CASE_ITERATOR_PRIVATE_DATA_2A
1669 if (PRIVATE_DATA(cc))
1670 {
1671 count = 2;
1672 srcw[0] = PRIVATE_DATA(cc);
1673 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1674 }
1675 cc += 2;
1676 #ifdef SUPPORT_UTF
1677 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1678 #endif
1679 break;
1680
1681 CASE_ITERATOR_PRIVATE_DATA_2B
1682 if (PRIVATE_DATA(cc))
1683 {
1684 count = 2;
1685 srcw[0] = PRIVATE_DATA(cc);
1686 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1687 }
1688 cc += 2 + IMM2_SIZE;
1689 #ifdef SUPPORT_UTF
1690 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1691 #endif
1692 break;
1693
1694 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1695 if (PRIVATE_DATA(cc))
1696 {
1697 count = 1;
1698 srcw[0] = PRIVATE_DATA(cc);
1699 }
1700 cc += 1;
1701 break;
1702
1703 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1704 if (PRIVATE_DATA(cc))
1705 {
1706 count = 2;
1707 srcw[0] = PRIVATE_DATA(cc);
1708 srcw[1] = srcw[0] + sizeof(sljit_sw);
1709 }
1710 cc += 1;
1711 break;
1712
1713 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1714 if (PRIVATE_DATA(cc))
1715 {
1716 count = 2;
1717 srcw[0] = PRIVATE_DATA(cc);
1718 srcw[1] = srcw[0] + sizeof(sljit_sw);
1719 }
1720 cc += 1 + IMM2_SIZE;
1721 break;
1722
1723 case OP_CLASS:
1724 case OP_NCLASS:
1725 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1726 case OP_XCLASS:
1727 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1728 #else
1729 size = 1 + 32 / (int)sizeof(pcre_uchar);
1730 #endif
1731 if (PRIVATE_DATA(cc))
1732 switch(get_class_iterator_size(cc + size))
1733 {
1734 case 1:
1735 count = 1;
1736 srcw[0] = PRIVATE_DATA(cc);
1737 break;
1738
1739 case 2:
1740 count = 2;
1741 srcw[0] = PRIVATE_DATA(cc);
1742 srcw[1] = srcw[0] + sizeof(sljit_sw);
1743 break;
1744
1745 default:
1746 SLJIT_ASSERT_STOP();
1747 break;
1748 }
1749 cc += size;
1750 break;
1751
1752 default:
1753 cc = next_opcode(common, cc);
1754 SLJIT_ASSERT(cc != NULL);
1755 break;
1756 }
1757 break;
1758
1759 case end:
1760 SLJIT_ASSERT_STOP();
1761 break;
1762 }
1763
1764 while (count > 0)
1765 {
1766 count--;
1767 if (save)
1768 {
1769 if (tmp1next)
1770 {
1771 if (!tmp1empty)
1772 {
1773 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1774 stackptr += sizeof(sljit_sw);
1775 }
1776 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1777 tmp1empty = FALSE;
1778 tmp1next = FALSE;
1779 }
1780 else
1781 {
1782 if (!tmp2empty)
1783 {
1784 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1785 stackptr += sizeof(sljit_sw);
1786 }
1787 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1788 tmp2empty = FALSE;
1789 tmp1next = TRUE;
1790 }
1791 }
1792 else
1793 {
1794 if (tmp1next)
1795 {
1796 SLJIT_ASSERT(!tmp1empty);
1797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1798 tmp1empty = stackptr >= stacktop;
1799 if (!tmp1empty)
1800 {
1801 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1802 stackptr += sizeof(sljit_sw);
1803 }
1804 tmp1next = FALSE;
1805 }
1806 else
1807 {
1808 SLJIT_ASSERT(!tmp2empty);
1809 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1810 tmp2empty = stackptr >= stacktop;
1811 if (!tmp2empty)
1812 {
1813 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1814 stackptr += sizeof(sljit_sw);
1815 }
1816 tmp1next = TRUE;
1817 }
1818 }
1819 }
1820 }
1821 while (status != end);
1822
1823 if (save)
1824 {
1825 if (tmp1next)
1826 {
1827 if (!tmp1empty)
1828 {
1829 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1830 stackptr += sizeof(sljit_sw);
1831 }
1832 if (!tmp2empty)
1833 {
1834 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1835 stackptr += sizeof(sljit_sw);
1836 }
1837 }
1838 else
1839 {
1840 if (!tmp2empty)
1841 {
1842 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1843 stackptr += sizeof(sljit_sw);
1844 }
1845 if (!tmp1empty)
1846 {
1847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1848 stackptr += sizeof(sljit_sw);
1849 }
1850 }
1851 }
1852 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1853 }
1854
1855 #undef CASE_ITERATOR_PRIVATE_DATA_1
1856 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1857 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1858 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1859 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1860 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1861
1862 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1863 {
1864 return (value & (value - 1)) == 0;
1865 }
1866
1867 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1868 {
1869 while (list)
1870 {
1871 /* sljit_set_label is clever enough to do nothing
1872 if either the jump or the label is NULL. */
1873 SET_LABEL(list->jump, label);
1874 list = list->next;
1875 }
1876 }
1877
1878 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1879 {
1880 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1881 if (list_item)
1882 {
1883 list_item->next = *list;
1884 list_item->jump = jump;
1885 *list = list_item;
1886 }
1887 }
1888
1889 static void add_stub(compiler_common *common, struct sljit_jump *start)
1890 {
1891 DEFINE_COMPILER;
1892 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1893
1894 if (list_item)
1895 {
1896 list_item->start = start;
1897 list_item->quit = LABEL();
1898 list_item->next = common->stubs;
1899 common->stubs = list_item;
1900 }
1901 }
1902
1903 static void flush_stubs(compiler_common *common)
1904 {
1905 DEFINE_COMPILER;
1906 stub_list* list_item = common->stubs;
1907
1908 while (list_item)
1909 {
1910 JUMPHERE(list_item->start);
1911 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1912 JUMPTO(SLJIT_JUMP, list_item->quit);
1913 list_item = list_item->next;
1914 }
1915 common->stubs = NULL;
1916 }
1917
1918 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1919 {
1920 DEFINE_COMPILER;
1921
1922 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1923 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1924 }
1925
1926 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1927 {
1928 /* May destroy all locals and registers except TMP2. */
1929 DEFINE_COMPILER;
1930
1931 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1932 #ifdef DESTROY_REGISTERS
1933 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1934 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1935 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1936 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1937 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1938 #endif
1939 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1940 }
1941
1942 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1943 {
1944 DEFINE_COMPILER;
1945 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1946 }
1947
1948 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1949 {
1950 DEFINE_COMPILER;
1951 struct sljit_label *loop;
1952 int i;
1953
1954 /* At this point we can freely use all temporary registers. */
1955 SLJIT_ASSERT(length > 1);
1956 /* TMP1 returns with begin - 1. */
1957 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1958 if (length < 8)
1959 {
1960 for (i = 1; i < length; i++)
1961 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1962 }
1963 else
1964 {
1965 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START);
1966 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length - 1);
1967 loop = LABEL();
1968 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1969 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1970 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1971 }
1972 }
1973
1974 static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
1975 {
1976 DEFINE_COMPILER;
1977 struct sljit_label *loop;
1978 int i;
1979
1980 SLJIT_ASSERT(length > 1);
1981 /* OVECTOR(1) contains the "string begin - 1" constant. */
1982 if (length > 2)
1983 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1984 if (length < 8)
1985 {
1986 for (i = 2; i < length; i++)
1987 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), TMP1, 0);
1988 }
1989 else
1990 {
1991 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
1992 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
1993 loop = LABEL();
1994 OP1(SLJIT_MOVU, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
1995 OP2(SLJIT_SUB | SLJIT_SET_E, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
1996 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1997 }
1998
1999 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
2000 if (common->mark_ptr != 0)
2001 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, SLJIT_IMM, 0);
2002 SLJIT_ASSERT(common->control_head_ptr != 0);
2003 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
2004 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
2005 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_ptr);
2006 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, base));
2007 }
2008
2009 static sljit_sw SLJIT_CALL do_check_control_chain(sljit_sw *current)
2010 {
2011 sljit_sw return_value = 0;
2012
2013 SLJIT_ASSERT(current != NULL);
2014 do
2015 {
2016 switch (current[-2])
2017 {
2018 case type_commit:
2019 /* Commit overwrites all. */
2020 return -1;
2021
2022 case type_prune:
2023 break;
2024
2025 case type_skip:
2026 /* Overwrites prune, but not other skips. */
2027 if (return_value == 0)
2028 return_value = current[-3];
2029 break;
2030
2031 default:
2032 SLJIT_ASSERT_STOP();
2033 break;
2034 }
2035 current = (sljit_sw*)current[-1];
2036 }
2037 while (current != NULL);
2038 return return_value;
2039 }
2040
2041 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
2042 {
2043 DEFINE_COMPILER;
2044 struct sljit_label *loop;
2045 struct sljit_jump *early_quit;
2046
2047 /* At this point we can freely use all registers. */
2048 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
2049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
2050
2051 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
2052 if (common->mark_ptr != 0)
2053 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
2054 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
2055 if (common->mark_ptr != 0)
2056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
2057 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
2058 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
2059 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
2060 /* Unlikely, but possible */
2061 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
2062 loop = LABEL();
2063 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
2064 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
2065 /* Copy the integer value to the output buffer */
2066 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2067 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2068 #endif
2069 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
2070 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2071 JUMPTO(SLJIT_C_NOT_ZERO, loop);
2072 JUMPHERE(early_quit);
2073
2074 /* Calculate the return value, which is the maximum ovector value. */
2075 if (topbracket > 1)
2076 {
2077 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
2078 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
2079
2080 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
2081 loop = LABEL();
2082 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
2083 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
2084 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
2085 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
2086 }
2087 else
2088 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
2089 }
2090
2091 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
2092 {
2093 DEFINE_COMPILER;
2094 struct sljit_jump *jump;
2095
2096 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
2097 SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
2098 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
2099
2100 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
2101 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
2102 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
2103 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
2104
2105 /* Store match begin and end. */
2106 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
2107 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
2108
2109 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
2110 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_SAVED_REG1, 0);
2111 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2112 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2113 #endif
2114 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
2115 JUMPHERE(jump);
2116
2117 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
2118 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
2119 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2120 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2121 #endif
2122 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2123
2124 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2125 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2126 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2127 #endif
2128 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2129
2130 JUMPTO(SLJIT_JUMP, quit);
2131 }
2132
2133 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2134 {
2135 /* May destroy TMP1. */
2136 DEFINE_COMPILER;
2137 struct sljit_jump *jump;
2138
2139 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2140 {
2141 /* The value of -1 must be kept for start_used_ptr! */
2142 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2143 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2144 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2145 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2147 JUMPHERE(jump);
2148 }
2149 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2150 {
2151 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2152 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2153 JUMPHERE(jump);
2154 }
2155 }
2156
2157 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2158 {
2159 /* Detects if the character has an othercase. */
2160 unsigned int c;
2161
2162 #ifdef SUPPORT_UTF
2163 if (common->utf)
2164 {
2165 GETCHAR(c, cc);
2166 if (c > 127)
2167 {
2168 #ifdef SUPPORT_UCP
2169 return c != UCD_OTHERCASE(c);
2170 #else
2171 return FALSE;
2172 #endif
2173 }
2174 #ifndef COMPILE_PCRE8
2175 return common->fcc[c] != c;
2176 #endif
2177 }
2178 else
2179 #endif
2180 c = *cc;
2181 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2182 }
2183
2184 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2185 {
2186 /* Returns with the othercase. */
2187 #ifdef SUPPORT_UTF
2188 if (common->utf && c > 127)
2189 {
2190 #ifdef SUPPORT_UCP
2191 return UCD_OTHERCASE(c);
2192 #else
2193 return c;
2194 #endif
2195 }
2196 #endif
2197 return TABLE_GET(c, common->fcc, c);
2198 }
2199
2200 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2201 {
2202 /* Detects if the character and its othercase has only 1 bit difference. */
2203 unsigned int c, oc, bit;
2204 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2205 int n;
2206 #endif
2207
2208 #ifdef SUPPORT_UTF
2209 if (common->utf)
2210 {
2211 GETCHAR(c, cc);
2212 if (c <= 127)
2213 oc = common->fcc[c];
2214 else
2215 {
2216 #ifdef SUPPORT_UCP
2217 oc = UCD_OTHERCASE(c);
2218 #else
2219 oc = c;
2220 #endif
2221 }
2222 }
2223 else
2224 {
2225 c = *cc;
2226 oc = TABLE_GET(c, common->fcc, c);
2227 }
2228 #else
2229 c = *cc;
2230 oc = TABLE_GET(c, common->fcc, c);
2231 #endif
2232
2233 SLJIT_ASSERT(c != oc);
2234
2235 bit = c ^ oc;
2236 /* Optimized for English alphabet. */
2237 if (c <= 127 && bit == 0x20)
2238 return (0 << 8) | 0x20;
2239
2240 /* Since c != oc, they must have at least 1 bit difference. */
2241 if (!is_powerof2(bit))
2242 return 0;
2243
2244 #if defined COMPILE_PCRE8
2245
2246 #ifdef SUPPORT_UTF
2247 if (common->utf && c > 127)
2248 {
2249 n = GET_EXTRALEN(*cc);
2250 while ((bit & 0x3f) == 0)
2251 {
2252 n--;
2253 bit >>= 6;
2254 }
2255 return (n << 8) | bit;
2256 }
2257 #endif /* SUPPORT_UTF */
2258 return (0 << 8) | bit;
2259
2260 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2261
2262 #ifdef SUPPORT_UTF
2263 if (common->utf && c > 65535)
2264 {
2265 if (bit >= (1 << 10))
2266 bit >>= 10;
2267 else
2268 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2269 }
2270 #endif /* SUPPORT_UTF */
2271 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2272
2273 #endif /* COMPILE_PCRE[8|16|32] */
2274 }
2275
2276 static void check_partial(compiler_common *common, BOOL force)
2277 {
2278 /* Checks whether a partial matching is occured. Does not modify registers. */
2279 DEFINE_COMPILER;
2280 struct sljit_jump *jump = NULL;
2281
2282 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2283
2284 if (common->mode == JIT_COMPILE)
2285 return;
2286
2287 if (!force)
2288 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2289 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2290 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2291
2292 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2293 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2294 else
2295 {
2296 if (common->partialmatchlabel != NULL)
2297 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2298 else
2299 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2300 }
2301
2302 if (jump != NULL)
2303 JUMPHERE(jump);
2304 }
2305
2306 static void check_str_end(compiler_common *common, jump_list **end_reached)
2307 {
2308 /* Does not affect registers. Usually used in a tight spot. */
2309 DEFINE_COMPILER;
2310 struct sljit_jump *jump;
2311
2312 if (common->mode == JIT_COMPILE)
2313 {
2314 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2315 return;
2316 }
2317
2318 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2319 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2320 {
2321 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2322 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2323 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
2324 }
2325 else
2326 {
2327 add_jump(compiler, end_reached, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2328 if (common->partialmatchlabel != NULL)
2329 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2330 else
2331 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2332 }
2333 JUMPHERE(jump);
2334 }
2335
2336 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2337 {
2338 DEFINE_COMPILER;
2339 struct sljit_jump *jump;
2340
2341 if (common->mode == JIT_COMPILE)
2342 {
2343 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2344 return;
2345 }
2346
2347 /* Partial matching mode. */
2348 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2349 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2350 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2351 {
2352 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, 0);
2353 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2354 }
2355 else
2356 {
2357 if (common->partialmatchlabel != NULL)
2358 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2359 else
2360 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2361 }
2362 JUMPHERE(jump);
2363 }
2364
2365 static void read_char(compiler_common *common)
2366 {
2367 /* Reads the character into TMP1, updates STR_PTR.
2368 Does not check STR_END. TMP2 Destroyed. */
2369 DEFINE_COMPILER;
2370 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2371 struct sljit_jump *jump;
2372 #endif
2373
2374 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2375 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2376 if (common->utf)
2377 {
2378 #if defined COMPILE_PCRE8
2379 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2380 #elif defined COMPILE_PCRE16
2381 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2382 #endif /* COMPILE_PCRE[8|16] */
2383 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2384 JUMPHERE(jump);
2385 }
2386 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2387 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2388 }
2389
2390 static void peek_char(compiler_common *common)
2391 {
2392 /* Reads the character into TMP1, keeps STR_PTR.
2393 Does not check STR_END. TMP2 Destroyed. */
2394 DEFINE_COMPILER;
2395 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2396 struct sljit_jump *jump;
2397 #endif
2398
2399 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2400 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2401 if (common->utf)
2402 {
2403 #if defined COMPILE_PCRE8
2404 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2405 #elif defined COMPILE_PCRE16
2406 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2407 #endif /* COMPILE_PCRE[8|16] */
2408 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2409 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2410 JUMPHERE(jump);
2411 }
2412 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2413 }
2414
2415 static void read_char8_type(compiler_common *common)
2416 {
2417 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2418 DEFINE_COMPILER;
2419 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2420 struct sljit_jump *jump;
2421 #endif
2422
2423 #ifdef SUPPORT_UTF
2424 if (common->utf)
2425 {
2426 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2427 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2428 #if defined COMPILE_PCRE8
2429 /* This can be an extra read in some situations, but hopefully
2430 it is needed in most cases. */
2431 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2432 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2433 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2434 JUMPHERE(jump);
2435 #elif defined COMPILE_PCRE16
2436 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2437 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2438 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2439 JUMPHERE(jump);
2440 /* Skip low surrogate if necessary. */
2441 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2443 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2444 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2445 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2446 #elif defined COMPILE_PCRE32
2447 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2448 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2449 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2450 JUMPHERE(jump);
2451 #endif /* COMPILE_PCRE[8|16|32] */
2452 return;
2453 }
2454 #endif /* SUPPORT_UTF */
2455 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2456 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2457 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2458 /* The ctypes array contains only 256 values. */
2459 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2460 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2461 #endif
2462 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2463 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2464 JUMPHERE(jump);
2465 #endif
2466 }
2467
2468 static void skip_char_back(compiler_common *common)
2469 {
2470 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2471 DEFINE_COMPILER;
2472 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2473 #if defined COMPILE_PCRE8
2474 struct sljit_label *label;
2475
2476 if (common->utf)
2477 {
2478 label = LABEL();
2479 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2480 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2481 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2482 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2483 return;
2484 }
2485 #elif defined COMPILE_PCRE16
2486 if (common->utf)
2487 {
2488 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2489 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2490 /* Skip low surrogate if necessary. */
2491 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2492 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2493 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2494 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2495 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2496 return;
2497 }
2498 #endif /* COMPILE_PCRE[8|16] */
2499 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2500 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2501 }
2502
2503 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2504 {
2505 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2506 DEFINE_COMPILER;
2507
2508 if (nltype == NLTYPE_ANY)
2509 {
2510 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2511 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2512 }
2513 else if (nltype == NLTYPE_ANYCRLF)
2514 {
2515 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2516 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2517 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2518 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2519 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2520 }
2521 else
2522 {
2523 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2524 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2525 }
2526 }
2527
2528 #ifdef SUPPORT_UTF
2529
2530 #if defined COMPILE_PCRE8
2531 static void do_utfreadchar(compiler_common *common)
2532 {
2533 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2534 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2535 DEFINE_COMPILER;
2536 struct sljit_jump *jump;
2537
2538 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2539 /* Searching for the first zero. */
2540 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2541 jump = JUMP(SLJIT_C_NOT_ZERO);
2542 /* Two byte sequence. */
2543 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2544 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2545 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2546 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2547 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2548 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2549 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2550 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2551 JUMPHERE(jump);
2552
2553 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2554 jump = JUMP(SLJIT_C_NOT_ZERO);
2555 /* Three byte sequence. */
2556 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2557 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2558 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2559 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2560 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2561 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2562 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2563 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2564 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2565 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2566 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2567 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2568 JUMPHERE(jump);
2569
2570 /* Four byte sequence. */
2571 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2572 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2573 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2574 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2575 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2576 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2577 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2578 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2579 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2580 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2581 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2582 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2583 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2584 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2585 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2586 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2587 }
2588
2589 static void do_utfreadtype8(compiler_common *common)
2590 {
2591 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2592 of the character (>= 0xc0). Return value in TMP1. */
2593 DEFINE_COMPILER;
2594 struct sljit_jump *jump;
2595 struct sljit_jump *compare;
2596
2597 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2598
2599 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2600 jump = JUMP(SLJIT_C_NOT_ZERO);
2601 /* Two byte sequence. */
2602 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2603 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2604 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2605 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2606 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2607 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2608 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2609 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2610 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2611
2612 JUMPHERE(compare);
2613 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2614 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2615 JUMPHERE(jump);
2616
2617 /* We only have types for characters less than 256. */
2618 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2619 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2620 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2621 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2622 }
2623
2624 #elif defined COMPILE_PCRE16
2625
2626 static void do_utfreadchar(compiler_common *common)
2627 {
2628 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2629 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2630 DEFINE_COMPILER;
2631 struct sljit_jump *jump;
2632
2633 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2634 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2635 /* Do nothing, only return. */
2636 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2637
2638 JUMPHERE(jump);
2639 /* Combine two 16 bit characters. */
2640 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2641 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2642 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2643 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2644 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2645 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2646 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2647 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2648 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2649 }
2650
2651 #endif /* COMPILE_PCRE[8|16] */
2652
2653 #endif /* SUPPORT_UTF */
2654
2655 #ifdef SUPPORT_UCP
2656
2657 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2658 #define UCD_BLOCK_MASK 127
2659 #define UCD_BLOCK_SHIFT 7
2660
2661 static void do_getucd(compiler_common *common)
2662 {
2663 /* Search the UCD record for the character comes in TMP1.
2664 Returns chartype in TMP1 and UCD offset in TMP2. */
2665 DEFINE_COMPILER;
2666
2667 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2668
2669 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2670 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2671 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2672 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2673 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2674 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2675 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2676 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2677 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2678 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2679 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2680 }
2681 #endif
2682
2683 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2684 {
2685 DEFINE_COMPILER;
2686 struct sljit_label *mainloop;
2687 struct sljit_label *newlinelabel = NULL;
2688 struct sljit_jump *start;
2689 struct sljit_jump *end = NULL;
2690 struct sljit_jump *nl = NULL;
2691 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2692 struct sljit_jump *singlechar;
2693 #endif
2694 jump_list *newline = NULL;
2695 BOOL newlinecheck = FALSE;
2696 BOOL readuchar = FALSE;
2697
2698 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2699 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2700 newlinecheck = TRUE;
2701
2702 if (firstline)
2703 {
2704 /* Search for the end of the first line. */
2705 SLJIT_ASSERT(common->first_line_end != 0);
2706 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2707
2708 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2709 {
2710 mainloop = LABEL();
2711 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2712 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2713 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2714 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2715 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2716 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2717 JUMPHERE(end);
2718 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2719 }
2720 else
2721 {
2722 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2723 mainloop = LABEL();
2724 /* Continual stores does not cause data dependency. */
2725 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2726 read_char(common);
2727 check_newlinechar(common, common->nltype, &newline, TRUE);
2728 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2729 JUMPHERE(end);
2730 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2731 set_jumps(newline, LABEL());
2732 }
2733
2734 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2735 }
2736
2737 start = JUMP(SLJIT_JUMP);
2738
2739 if (newlinecheck)
2740 {
2741 newlinelabel = LABEL();
2742 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2743 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2744 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2745 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2746 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2747 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2748 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2749 #endif
2750 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2751 nl = JUMP(SLJIT_JUMP);
2752 }
2753
2754 mainloop = LABEL();
2755
2756 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2757 #ifdef SUPPORT_UTF
2758 if (common->utf) readuchar = TRUE;
2759 #endif
2760 if (newlinecheck) readuchar = TRUE;
2761
2762 if (readuchar)
2763 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2764
2765 if (newlinecheck)
2766 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2767
2768 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2769 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2770 #if defined COMPILE_PCRE8
2771 if (common->utf)
2772 {
2773 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2774 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2775 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2776 JUMPHERE(singlechar);
2777 }
2778 #elif defined COMPILE_PCRE16
2779 if (common->utf)
2780 {
2781 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2782 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2783 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2784 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2785 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2786 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2787 JUMPHERE(singlechar);
2788 }
2789 #endif /* COMPILE_PCRE[8|16] */
2790 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2791 JUMPHERE(start);
2792
2793 if (newlinecheck)
2794 {
2795 JUMPHERE(end);
2796 JUMPHERE(nl);
2797 }
2798
2799 return mainloop;
2800 }
2801
2802 #define MAX_N_CHARS 3
2803
2804 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2805 {
2806 DEFINE_COMPILER;
2807 struct sljit_label *start;
2808 struct sljit_jump *quit;
2809 pcre_uint32 chars[MAX_N_CHARS * 2];
2810 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2811 int location = 0;
2812 pcre_int32 len, c, bit, caseless;
2813 int must_stop;
2814
2815 /* We do not support alternatives now. */
2816 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2817 return FALSE;
2818
2819 while (TRUE)
2820 {
2821 caseless = 0;
2822 must_stop = 1;
2823 switch(*cc)
2824 {
2825 case OP_CHAR:
2826 must_stop = 0;
2827 cc++;
2828 break;
2829
2830 case OP_CHARI:
2831 caseless = 1;
2832 must_stop = 0;
2833 cc++;
2834 break;
2835
2836 case OP_SOD:
2837 case OP_SOM:
2838 case OP_SET_SOM:
2839 case OP_NOT_WORD_BOUNDARY:
2840 case OP_WORD_BOUNDARY:
2841 case OP_EODN:
2842 case OP_EOD:
2843 case OP_CIRC:
2844 case OP_CIRCM:
2845 case OP_DOLL:
2846 case OP_DOLLM:
2847 /* Zero width assertions. */
2848 cc++;
2849 continue;
2850
2851 case OP_PLUS:
2852 case OP_MINPLUS:
2853 case OP_POSPLUS:
2854 cc++;
2855 break;
2856
2857 case OP_EXACT:
2858 cc += 1 + IMM2_SIZE;
2859 break;
2860
2861 case OP_PLUSI:
2862 case OP_MINPLUSI:
2863 case OP_POSPLUSI:
2864 caseless = 1;
2865 cc++;
2866 break;
2867
2868 case OP_EXACTI:
2869 caseless = 1;
2870 cc += 1 + IMM2_SIZE;
2871 break;
2872
2873 default:
2874 must_stop = 2;
2875 break;
2876 }
2877
2878 if (must_stop == 2)
2879 break;
2880
2881 len = 1;
2882 #ifdef SUPPORT_UTF
2883 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2884 #endif
2885
2886 if (caseless && char_has_othercase(common, cc))
2887 {
2888 caseless = char_get_othercase_bit(common, cc);
2889 if (caseless == 0)
2890 return FALSE;
2891 #ifdef COMPILE_PCRE8
2892 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2893 #else
2894 if ((caseless & 0x100) != 0)
2895 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2896 else
2897 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2898 #endif
2899 }
2900 else
2901 caseless = 0;
2902
2903 while (len > 0 && location < MAX_N_CHARS * 2)
2904 {
2905 c = *cc;
2906 bit = 0;
2907 if (len == (caseless & 0xff))
2908 {
2909 bit = caseless >> 8;
2910 c |= bit;
2911 }
2912
2913 chars[location] = c;
2914 chars[location + 1] = bit;
2915
2916 len--;
2917 location += 2;
2918 cc++;
2919 }
2920
2921 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2922 break;
2923 }
2924
2925 /* At least two characters are required. */
2926 if (location < 2 * 2)
2927 return FALSE;
2928
2929 if (firstline)
2930 {
2931 SLJIT_ASSERT(common->first_line_end != 0);
2932 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2933 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2934 }
2935 else
2936 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2937
2938 start = LABEL();
2939 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2940
2941 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2942 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2943 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2944 if (chars[1] != 0)
2945 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2946 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2947 if (location > 2 * 2)
2948 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2949 if (chars[3] != 0)
2950 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2951 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2952 if (location > 2 * 2)
2953 {
2954 if (chars[5] != 0)
2955 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2956 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2957 }
2958 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2959
2960 JUMPHERE(quit);
2961
2962 if (firstline)
2963 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2964 else
2965 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2966 return TRUE;
2967 }
2968
2969 #undef MAX_N_CHARS
2970
2971 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2972 {
2973 DEFINE_COMPILER;
2974 struct sljit_label *start;
2975 struct sljit_jump *quit;
2976 struct sljit_jump *found;
2977 pcre_uchar oc, bit;
2978
2979 if (firstline)
2980 {
2981 SLJIT_ASSERT(common->first_line_end != 0);
2982 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2983 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2984 }
2985
2986 start = LABEL();
2987 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2988 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2989
2990 oc = first_char;
2991 if (caseless)
2992 {
2993 oc = TABLE_GET(first_char, common->fcc, first_char);
2994 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2995 if (first_char > 127 && common->utf)
2996 oc = UCD_OTHERCASE(first_char);
2997 #endif
2998 }
2999 if (first_char == oc)
3000 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
3001 else
3002 {
3003 bit = first_char ^ oc;
3004 if (is_powerof2(bit))
3005 {
3006 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
3007 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
3008 }
3009 else
3010 {
3011 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
3012 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3013 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
3014 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3015 found = JUMP(SLJIT_C_NOT_ZERO);
3016 }
3017 }
3018
3019 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3020 JUMPTO(SLJIT_JUMP, start);
3021 JUMPHERE(found);
3022 JUMPHERE(quit);
3023
3024 if (firstline)
3025 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3026 }
3027
3028 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
3029 {
3030 DEFINE_COMPILER;
3031 struct sljit_label *loop;
3032 struct sljit_jump *lastchar;
3033 struct sljit_jump *firstchar;
3034 struct sljit_jump *quit;
3035 struct sljit_jump *foundcr = NULL;
3036 struct sljit_jump *notfoundnl;
3037 jump_list *newline = NULL;
3038
3039 if (firstline)
3040 {
3041 SLJIT_ASSERT(common->first_line_end != 0);
3042 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
3043 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3044 }
3045
3046 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
3047 {
3048 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3049 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3050 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3052 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3053
3054 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
3055 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
3056 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
3057 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3058 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
3059 #endif
3060 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3061
3062 loop = LABEL();
3063 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3064 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3065 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3066 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3067 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
3068 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
3069
3070 JUMPHERE(quit);
3071 JUMPHERE(firstchar);
3072 JUMPHERE(lastchar);
3073
3074 if (firstline)
3075 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
3076 return;
3077 }
3078
3079 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3080 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
3081 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
3082 skip_char_back(common);
3083
3084 loop = LABEL();
3085 read_char(common);
3086 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3087 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3088 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
3089 check_newlinechar(common, common->nltype, &newline, FALSE);
3090 set_jumps(newline, loop);
3091
3092 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
3093 {
3094 quit = JUMP(SLJIT_JUMP);
3095 JUMPHERE(foundcr);
3096 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3097 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3098 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
3099 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3100 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3101 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
3102 #endif
3103 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3104 JUMPHERE(notfoundnl);
3105 JUMPHERE(quit);
3106 }
3107 JUMPHERE(lastchar);
3108 JUMPHERE(firstchar);
3109
3110 if (firstline)
3111 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
3112 }
3113
3114 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
3115
3116 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
3117 {
3118 DEFINE_COMPILER;
3119 struct sljit_label *start;
3120 struct sljit_jump *quit;
3121 struct sljit_jump *found = NULL;
3122 jump_list *matches = NULL;
3123 pcre_uint8 inverted_start_bits[32];
3124 int i;
3125 #ifndef COMPILE_PCRE8
3126 struct sljit_jump *jump;
3127 #endif
3128
3129 for (i = 0; i < 32; ++i)
3130 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3131
3132 if (firstline)
3133 {
3134 SLJIT_ASSERT(common->first_line_end != 0);
3135 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3136 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3137 }
3138
3139 start = LABEL();
3140 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3141 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3142 #ifdef SUPPORT_UTF
3143 if (common->utf)
3144 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3145 #endif
3146
3147 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3148 {
3149 #ifndef COMPILE_PCRE8
3150 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3151 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3152 JUMPHERE(jump);
3153 #endif
3154 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3155 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3156 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3157 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3158 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3159 found = JUMP(SLJIT_C_NOT_ZERO);
3160 }
3161
3162 #ifdef SUPPORT_UTF
3163 if (common->utf)
3164 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3165 #endif
3166 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3167 #ifdef SUPPORT_UTF
3168 #if defined COMPILE_PCRE8
3169 if (common->utf)
3170 {
3171 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3172 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3173 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3174 }
3175 #elif defined COMPILE_PCRE16
3176 if (common->utf)
3177 {
3178 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3179 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3180 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3181 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3182 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3183 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3184 }
3185 #endif /* COMPILE_PCRE[8|16] */
3186 #endif /* SUPPORT_UTF */
3187 JUMPTO(SLJIT_JUMP, start);
3188 if (found != NULL)
3189 JUMPHERE(found);
3190 if (matches != NULL)
3191 set_jumps(matches, LABEL());
3192 JUMPHERE(quit);
3193
3194 if (firstline)
3195 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3196 }
3197
3198 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3199 {
3200 DEFINE_COMPILER;
3201 struct sljit_label *loop;
3202 struct sljit_jump *toolong;
3203 struct sljit_jump *alreadyfound;
3204 struct sljit_jump *found;
3205 struct sljit_jump *foundoc = NULL;
3206 struct sljit_jump *notfound;
3207 pcre_uint32 oc, bit;
3208
3209 SLJIT_ASSERT(common->req_char_ptr != 0);
3210 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3211 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3212 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3213 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3214
3215 if (has_firstchar)
3216 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3217 else
3218 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3219
3220 loop = LABEL();
3221 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3222
3223 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3224 oc = req_char;
3225 if (caseless)
3226 {
3227 oc = TABLE_GET(req_char, common->fcc, req_char);
3228 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3229 if (req_char > 127 && common->utf)
3230 oc = UCD_OTHERCASE(req_char);
3231 #endif
3232 }
3233 if (req_char == oc)
3234 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3235 else
3236 {
3237 bit = req_char ^ oc;
3238 if (is_powerof2(bit))
3239 {
3240 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3241 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3242 }
3243 else
3244 {
3245 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3246 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3247 }
3248 }
3249 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3250 JUMPTO(SLJIT_JUMP, loop);
3251
3252 JUMPHERE(found);
3253 if (foundoc)
3254 JUMPHERE(foundoc);
3255 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3256 JUMPHERE(alreadyfound);
3257 JUMPHERE(toolong);
3258 return notfound;
3259 }
3260
3261 static void do_revertframes(compiler_common *common)
3262 {
3263 DEFINE_COMPILER;
3264 struct sljit_jump *jump;
3265 struct sljit_label *mainloop;
3266
3267 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3268 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3269 GET_LOCAL_BASE(TMP3, 0, 0);
3270
3271 /* Drop frames until we reach STACK_TOP. */
3272 mainloop = LABEL();
3273 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3274 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3275 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3276
3277 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3278 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3279 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3280 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3281 JUMPTO(SLJIT_JUMP, mainloop);
3282
3283 JUMPHERE(jump);
3284 jump = JUMP(SLJIT_C_SIG_LESS);
3285 /* End of dropping frames. */
3286 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3287
3288 JUMPHERE(jump);
3289 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3290 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3291 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3292 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3293 JUMPTO(SLJIT_JUMP, mainloop);
3294 }
3295
3296 static void check_wordboundary(compiler_common *common)
3297 {
3298 DEFINE_COMPILER;
3299 struct sljit_jump *skipread;
3300 jump_list *skipread_list = NULL;
3301 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3302 struct sljit_jump *jump;
3303 #endif
3304
3305 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3306
3307 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3308 /* Get type of the previous char, and put it to LOCALS1. */
3309 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3311 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3312 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3313 skip_char_back(common);
3314 check_start_used_ptr(common);
3315 read_char(common);
3316
3317 /* Testing char type. */
3318 #ifdef SUPPORT_UCP
3319 if (common->use_ucp)
3320 {
3321 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3322 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3323 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3324 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3325 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3326 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3327 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3328 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3329 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3330 JUMPHERE(jump);
3331 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3332 }
3333 else
3334 #endif
3335 {
3336 #ifndef COMPILE_PCRE8
3337 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3338 #elif defined SUPPORT_UTF
3339 /* Here LOCALS1 has already been zeroed. */
3340 jump = NULL;
3341 if (common->utf)
3342 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3343 #endif /* COMPILE_PCRE8 */
3344 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3345 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3346 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3347 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3348 #ifndef COMPILE_PCRE8
3349 JUMPHERE(jump);
3350 #elif defined SUPPORT_UTF
3351 if (jump != NULL)
3352 JUMPHERE(jump);
3353 #endif /* COMPILE_PCRE8 */
3354 }
3355 JUMPHERE(skipread);
3356
3357 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3358 check_str_end(common, &skipread_list);
3359 peek_char(common);
3360
3361 /* Testing char type. This is a code duplication. */
3362 #ifdef SUPPORT_UCP
3363 if (common->use_ucp)
3364 {
3365 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3366 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3367 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3368 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3369 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3370 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3371 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3372 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3373 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3374 JUMPHERE(jump);
3375 }
3376 else
3377 #endif
3378 {
3379 #ifndef COMPILE_PCRE8
3380 /* TMP2 may be destroyed by peek_char. */
3381 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3382 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3383 #elif defined SUPPORT_UTF
3384 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3385 jump = NULL;
3386 if (common->utf)
3387 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3388 #endif
3389 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3390 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3391 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3392 #ifndef COMPILE_PCRE8
3393 JUMPHERE(jump);
3394 #elif defined SUPPORT_UTF
3395 if (jump != NULL)
3396 JUMPHERE(jump);
3397 #endif /* COMPILE_PCRE8 */
3398 }
3399 set_jumps(skipread_list, LABEL());
3400
3401 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3402 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3403 }
3404
3405 /*
3406 range format:
3407
3408 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3409 ranges[1] = first bit (0 or 1)
3410 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3411 */
3412
3413 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3414 {
3415 DEFINE_COMPILER;
3416 struct sljit_jump *jump;
3417
3418 if (ranges[0] < 0)
3419 return FALSE;
3420
3421 switch(ranges[0])
3422 {
3423 case 1:
3424 if (readch)
3425 read_char(common);
3426 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3427 return TRUE;
3428
3429 case 2:
3430 if (readch)
3431 read_char(common);
3432 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3433 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3434 return TRUE;
3435
3436 case 4:
3437 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3438 {
3439 if (readch)
3440 read_char(common);
3441 if (ranges[1] != 0)
3442 {
3443 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3444 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3445 }
3446 else
3447 {
3448 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3449 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3450 JUMPHERE(jump);
3451 }
3452 return TRUE;
3453 }
3454 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3455 {
3456 if (readch)
3457 read_char(common);
3458 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3459 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3460 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3461 return TRUE;
3462 }
3463 return FALSE;
3464
3465 default:
3466 return FALSE;
3467 }
3468 }
3469
3470 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3471 {
3472 int i, bit, length;
3473 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3474
3475 bit = ctypes[0] & flag;
3476 ranges[0] = -1;
3477 ranges[1] = bit != 0 ? 1 : 0;
3478 length = 0;
3479
3480 for (i = 1; i < 256; i++)
3481 if ((ctypes[i] & flag) != bit)
3482 {
3483 if (length >= MAX_RANGE_SIZE)
3484 return;
3485 ranges[2 + length] = i;
3486 length++;
3487 bit ^= flag;
3488 }
3489
3490 if (bit != 0)
3491 {
3492 if (length >= MAX_RANGE_SIZE)
3493 return;
3494 ranges[2 + length] = 256;
3495 length++;
3496 }
3497 ranges[0] = length;
3498 }
3499
3500 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3501 {
3502 int ranges[2 + MAX_RANGE_SIZE];
3503 pcre_uint8 bit, cbit, all;
3504 int i, byte, length = 0;
3505
3506 bit = bits[0] & 0x1;
3507 ranges[1] = bit;
3508 /* Can be 0 or 255. */
3509 all = -bit;
3510
3511 for (i = 0; i < 256; )
3512 {
3513 byte = i >> 3;
3514 if ((i & 0x7) == 0 && bits[byte] == all)
3515 i += 8;
3516 else
3517 {
3518 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3519 if (cbit != bit)
3520 {
3521 if (length >= MAX_RANGE_SIZE)
3522 return FALSE;
3523 ranges[2 + length] = i;
3524 length++;
3525 bit = cbit;
3526 all = -cbit;
3527 }
3528 i++;
3529 }
3530 }
3531
3532 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3533 {
3534 if (length >= MAX_RANGE_SIZE)
3535 return FALSE;
3536 ranges[2 + length] = 256;
3537 length++;
3538 }
3539 ranges[0] = length;
3540
3541 return check_ranges(common, ranges, backtracks, FALSE);
3542 }
3543
3544 static void check_anynewline(compiler_common *common)
3545 {
3546 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3547 DEFINE_COMPILER;
3548
3549 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3550
3551 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3552 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3553 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3554 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3555 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3556 #ifdef COMPILE_PCRE8
3557 if (common->utf)
3558 {
3559 #endif
3560 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3561 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3562 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3563 #ifdef COMPILE_PCRE8
3564 }
3565 #endif
3566 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3567 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3568 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3569 }
3570
3571 static void check_hspace(compiler_common *common)
3572 {
3573 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3574 DEFINE_COMPILER;
3575
3576 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3577
3578 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3579 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3580 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3581 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3582 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3583 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3584 #ifdef COMPILE_PCRE8
3585 if (common->utf)
3586 {
3587 #endif
3588 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3589 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3590 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3591 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3592 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3593 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3594 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3595 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3596 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3597 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3598 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3599 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3600 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3601 #ifdef COMPILE_PCRE8
3602 }
3603 #endif
3604 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3605 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3606
3607 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3608 }
3609
3610 static void check_vspace(compiler_common *common)
3611 {
3612 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3613 DEFINE_COMPILER;
3614
3615 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3616
3617 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3618 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3619 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3620 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3621 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3622 #ifdef COMPILE_PCRE8
3623 if (common->utf)
3624 {
3625 #endif
3626 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3627 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3628 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3629 #ifdef COMPILE_PCRE8
3630 }
3631 #endif
3632 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3633 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3634
3635 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3636 }
3637
3638 #define CHAR1 STR_END
3639 #define CHAR2 STACK_TOP
3640
3641 static void do_casefulcmp(compiler_common *common)
3642 {
3643 DEFINE_COMPILER;
3644 struct sljit_jump *jump;
3645 struct sljit_label *label;
3646
3647 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3648 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3649 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3650 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3651 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3652 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3653
3654 label = LABEL();
3655 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3656 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3657 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3658 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3659 JUMPTO(SLJIT_C_NOT_ZERO, label);
3660
3661 JUMPHERE(jump);
3662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3663 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3664 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3665 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3666 }
3667
3668 #define LCC_TABLE STACK_LIMIT
3669
3670 static void do_caselesscmp(compiler_common *common)
3671 {
3672 DEFINE_COMPILER;
3673 struct sljit_jump *jump;
3674 struct sljit_label *label;
3675
3676 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3677 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3678
3679 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3680 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3682 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3683 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3684 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3685
3686 label = LABEL();
3687 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3688 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3689 #ifndef COMPILE_PCRE8
3690 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3691 #endif
3692 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3693 #ifndef COMPILE_PCRE8
3694 JUMPHERE(jump);
3695 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3696 #endif
3697 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3698 #ifndef COMPILE_PCRE8
3699 JUMPHERE(jump);
3700 #endif
3701 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3702 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3703 JUMPTO(SLJIT_C_NOT_ZERO, label);
3704
3705 JUMPHERE(jump);
3706 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3707 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3708 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3709 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3710 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3711 }
3712
3713 #undef LCC_TABLE
3714 #undef CHAR1
3715 #undef CHAR2
3716
3717 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3718
3719 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3720 {
3721 /* This function would be ineffective to do in JIT level. */
3722 pcre_uint32 c1, c2;
3723 const pcre_uchar *src2 = args->uchar_ptr;
3724 const pcre_uchar *end2 = args->end;
3725 const ucd_record *ur;
3726 const pcre_uint32 *pp;
3727
3728 while (src1 < end1)
3729 {
3730 if (src2 >= end2)
3731 return (pcre_uchar*)1;
3732 GETCHARINC(c1, src1);
3733 GETCHARINC(c2, src2);
3734 ur = GET_UCD(c2);
3735 if (c1 != c2 && c1 != c2 + ur->other_case)
3736 {
3737 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3738 for (;;)
3739 {
3740 if (c1 < *pp) return NULL;
3741 if (c1 == *pp++) break;
3742 }
3743 }
3744 }
3745 return src2;
3746 }
3747
3748 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3749
3750 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3751 compare_context* context, jump_list **backtracks)
3752 {
3753 DEFINE_COMPILER;
3754 unsigned int othercasebit = 0;
3755 pcre_uchar *othercasechar = NULL;
3756 #ifdef SUPPORT_UTF
3757 int utflength;
3758 #endif
3759
3760 if (caseless && char_has_othercase(common, cc))
3761 {
3762 othercasebit = char_get_othercase_bit(common, cc);
3763 SLJIT_ASSERT(othercasebit);
3764 /* Extracting bit difference info. */
3765 #if defined COMPILE_PCRE8
3766 othercasechar = cc + (othercasebit >> 8);
3767 othercasebit &= 0xff;
3768 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3769 /* Note that this code only handles characters in the BMP. If there
3770 ever are characters outside the BMP whose othercase differs in only one
3771 bit from itself (there currently are none), this code will need to be
3772 revised for COMPILE_PCRE32. */
3773 othercasechar = cc + (othercasebit >> 9);
3774 if ((othercasebit & 0x100) != 0)
3775 othercasebit = (othercasebit & 0xff) << 8;
3776 else
3777 othercasebit &= 0xff;
3778 #endif /* COMPILE_PCRE[8|16|32] */
3779 }
3780
3781 if (context->sourcereg == -1)
3782 {
3783 #if defined COMPILE_PCRE8
3784 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3785 if (context->length >= 4)
3786 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3787 else if (context->length >= 2)
3788 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3789 else
3790 #endif
3791 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3792 #elif defined COMPILE_PCRE16
3793 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3794 if (context->length >= 4)
3795 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3796 else
3797 #endif
3798 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3799 #elif defined COMPILE_PCRE32
3800 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3801 #endif /* COMPILE_PCRE[8|16|32] */
3802 context->sourcereg = TMP2;
3803 }
3804
3805 #ifdef SUPPORT_UTF
3806 utflength = 1;
3807 if (common->utf && HAS_EXTRALEN(*cc))
3808 utflength += GET_EXTRALEN(*cc);
3809
3810 do
3811 {
3812 #endif
3813
3814 context->length -= IN_UCHARS(1);
3815 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3816
3817 /* Unaligned read is supported. */
3818 if (othercasebit != 0 && othercasechar == cc)
3819 {
3820 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3821 context->oc.asuchars[context->ucharptr] = othercasebit;
3822 }
3823 else
3824 {
3825 context->c.asuchars[context->ucharptr] = *cc;
3826 context->oc.asuchars[context->ucharptr] = 0;
3827 }
3828 context->ucharptr++;
3829
3830 #if defined COMPILE_PCRE8
3831 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3832 #else
3833 if (context->ucharptr >= 2 || context->length == 0)
3834 #endif
3835 {
3836 if (context->length >= 4)
3837 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3838 else if (context->length >= 2)
3839 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3840 #if defined COMPILE_PCRE8
3841 else if (context->length >= 1)
3842 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3843 #endif /* COMPILE_PCRE8 */
3844 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3845
3846 switch(context->ucharptr)
3847 {
3848 case 4 / sizeof(pcre_uchar):
3849 if (context->oc.asint != 0)
3850 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3851 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3852 break;
3853
3854 case 2 / sizeof(pcre_uchar):
3855 if (context->oc.asushort != 0)
3856 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3857 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3858 break;
3859
3860 #ifdef COMPILE_PCRE8
3861 case 1:
3862 if (context->oc.asbyte != 0)
3863 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3864 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3865 break;
3866 #endif
3867
3868 default:
3869 SLJIT_ASSERT_STOP();
3870 break;
3871 }
3872 context->ucharptr = 0;
3873 }
3874
3875 #else
3876
3877 /* Unaligned read is unsupported or in 32 bit mode. */
3878 if (context->length >= 1)
3879 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3880
3881 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3882
3883 if (othercasebit != 0 && othercasechar == cc)
3884 {
3885 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3886 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3887 }
3888 else
3889 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3890
3891 #endif
3892
3893 cc++;
3894 #ifdef SUPPORT_UTF
3895 utflength--;
3896 }
3897 while (utflength > 0);
3898 #endif
3899
3900 return cc;
3901 }
3902
3903 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3904
3905 #define SET_TYPE_OFFSET(value) \
3906 if ((value) != typeoffset) \
3907 { \
3908 if ((value) > typeoffset) \
3909 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3910 else \
3911 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3912 } \
3913 typeoffset = (value);
3914
3915 #define SET_CHAR_OFFSET(value) \
3916 if ((value) != charoffset) \
3917 { \
3918 if ((value) > charoffset) \
3919 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3920 else \
3921 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3922 } \
3923 charoffset = (value);
3924
3925 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3926 {
3927 DEFINE_COMPILER;
3928 jump_list *found = NULL;
3929 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3930 pcre_int32 c, charoffset;
3931 const pcre_uint32 *other_cases;
3932 struct sljit_jump *jump = NULL;
3933 pcre_uchar *ccbegin;
3934 int compares, invertcmp, numberofcmps;
3935 #ifdef SUPPORT_UCP
3936 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3937 BOOL charsaved = FALSE;
3938 int typereg = TMP1, scriptreg = TMP1;
3939 pcre_int32 typeoffset;
3940 #endif
3941
3942 /* Although SUPPORT_UTF must be defined, we are
3943 not necessary in utf mode even in 8 bit mode. */
3944 detect_partial_match(common, backtracks);
3945 read_char(common);
3946
3947 if ((*cc++ & XCL_MAP) != 0)
3948 {
3949 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3950 #ifndef COMPILE_PCRE8
3951 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3952 #elif defined SUPPORT_UTF
3953 if (common->utf)
3954 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3955 #endif
3956
3957 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3958 {
3959 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3960 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3961 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3962 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3963 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3964 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3965 }
3966
3967 #ifndef COMPILE_PCRE8
3968 JUMPHERE(jump);
3969 #elif defined SUPPORT_UTF
3970 if (common->utf)
3971 JUMPHERE(jump);
3972 #endif
3973 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3974 #ifdef SUPPORT_UCP
3975 charsaved = TRUE;
3976 #endif
3977 cc += 32 / sizeof(pcre_uchar);
3978 }
3979
3980 /* Scanning the necessary info. */
3981 ccbegin = cc;
3982 compares = 0;
3983 while (*cc != XCL_END)
3984 {
3985 compares++;
3986 if (*cc == XCL_SINGLE)
3987 {
3988 cc += 2;
3989 #ifdef SUPPORT_UTF
3990 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3991 #endif
3992 #ifdef SUPPORT_UCP
3993 needschar = TRUE;
3994 #endif
3995 }
3996 else if (*cc == XCL_RANGE)
3997 {
3998 cc += 2;
3999 #ifdef SUPPORT_UTF
4000 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4001 #endif
4002 cc++;
4003 #ifdef SUPPORT_UTF
4004 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
4005 #endif
4006 #ifdef SUPPORT_UCP
4007 needschar = TRUE;
4008 #endif
4009 }
4010 #ifdef SUPPORT_UCP
4011 else
4012 {
4013 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
4014 cc++;
4015 switch(*cc)
4016 {
4017 case PT_ANY:
4018 break;
4019
4020 case PT_LAMP:
4021 case PT_GC:
4022 case PT_PC:
4023 case PT_ALNUM:
4024 needstype = TRUE;
4025 break;
4026
4027 case PT_SC:
4028 needsscript = TRUE;
4029 break;
4030
4031 case PT_SPACE:
4032 case PT_PXSPACE:
4033 case PT_WORD:
4034 needstype = TRUE;
4035 needschar = TRUE;
4036 break;
4037
4038 case PT_CLIST:
4039 case PT_UCNC:
4040 needschar = TRUE;
4041 break;
4042
4043 default:
4044 SLJIT_ASSERT_STOP();
4045 break;
4046 }
4047 cc += 2;
4048 }
4049 #endif
4050 }
4051
4052 #ifdef SUPPORT_UCP
4053 /* Simple register allocation. TMP1 is preferred if possible. */
4054 if (needstype || needsscript)
4055 {
4056 if (needschar && !charsaved)
4057 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
4058 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4059 if (needschar)
4060 {
4061 if (needstype)
4062 {
4063 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
4064 typereg = RETURN_ADDR;
4065 }
4066
4067 if (needsscript)
4068 scriptreg = TMP3;
4069 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
4070 }
4071 else if (needstype && needsscript)
4072 scriptreg = TMP3;
4073 /* In all other cases only one of them was specified, and that can goes to TMP1. */
4074
4075 if (needsscript)
4076 {
4077 if (scriptreg == TMP1)
4078 {
4079 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4080 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
4081 }
4082 else
4083 {
4084 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
4085 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
4086 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
4087 }
4088 }
4089 }
4090 #endif
4091
4092 /* Generating code. */
4093 cc = ccbegin;
4094 charoffset = 0;
4095 numberofcmps = 0;
4096 #ifdef SUPPORT_UCP
4097 typeoffset = 0;
4098 #endif
4099
4100 while (*cc != XCL_END)
4101 {
4102 compares--;
4103 invertcmp = (compares == 0 && list != backtracks);
4104 jump = NULL;
4105
4106 if (*cc == XCL_SINGLE)
4107 {
4108 cc ++;
4109 #ifdef SUPPORT_UTF
4110 if (common->utf)
4111 {
4112 GETCHARINC(c, cc);
4113 }
4114 else
4115 #endif
4116 c = *cc++;
4117
4118 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4119 {
4120 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4121 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4122 numberofcmps++;
4123 }
4124 else if (numberofcmps > 0)
4125 {
4126 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4127 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4128 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4129 numberofcmps = 0;
4130 }
4131 else
4132 {
4133 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4134 numberofcmps = 0;
4135 }
4136 }
4137 else if (*cc == XCL_RANGE)
4138 {
4139 cc ++;
4140 #ifdef SUPPORT_UTF
4141 if (common->utf)
4142 {
4143 GETCHARINC(c, cc);
4144 }
4145 else
4146 #endif
4147 c = *cc++;
4148 SET_CHAR_OFFSET(c);
4149 #ifdef SUPPORT_UTF
4150 if (common->utf)
4151 {
4152 GETCHARINC(c, cc);
4153 }
4154 else
4155 #endif
4156 c = *cc++;
4157 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4158 {
4159 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4160 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4161 numberofcmps++;
4162 }
4163 else if (numberofcmps > 0)
4164 {
4165 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4166 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4167 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4168 numberofcmps = 0;
4169 }
4170 else
4171 {
4172 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4173 numberofcmps = 0;
4174 }
4175 }
4176 #ifdef SUPPORT_UCP
4177 else
4178 {
4179 if (*cc == XCL_NOTPROP)
4180 invertcmp ^= 0x1;
4181 cc++;
4182 switch(*cc)
4183 {
4184 case PT_ANY:
4185 if (list != backtracks)
4186 {
4187 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4188 continue;
4189 }
4190 else if (cc[-1] == XCL_NOTPROP)
4191 continue;
4192 jump = JUMP(SLJIT_JUMP);
4193 break;
4194
4195 case PT_LAMP:
4196 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4197 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4198 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4199 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4200 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4201 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4202 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4203 break;
4204
4205 case PT_GC:
4206 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4207 SET_TYPE_OFFSET(c);
4208 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4209 break;
4210
4211 case PT_PC:
4212 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4213 break;
4214
4215 case PT_SC:
4216 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4217 break;
4218
4219 case PT_SPACE:
4220 case PT_PXSPACE:
4221 if (*cc == PT_SPACE)
4222 {
4223 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4224 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4225 }
4226 SET_CHAR_OFFSET(9);
4227 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4228 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4229 if (*cc == PT_SPACE)
4230 JUMPHERE(jump);
4231
4232 SET_TYPE_OFFSET(ucp_Zl);
4233 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4234 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4235 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4236 break;
4237
4238 case PT_WORD:
4239 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4240 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4241 /* Fall through. */
4242
4243 case PT_ALNUM:
4244 SET_TYPE_OFFSET(ucp_Ll);
4245 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4246 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4247 SET_TYPE_OFFSET(ucp_Nd);
4248 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4249 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4250 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4251 break;
4252
4253 case PT_CLIST:
4254 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4255
4256 /* At least three characters are required.
4257 Otherwise this case would be handled by the normal code path. */
4258 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4259 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4260
4261 /* Optimizing character pairs, if their difference is power of 2. */
4262 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4263 {
4264 if (charoffset == 0)
4265 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4266 else
4267 {
4268 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4269 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4270 }
4271 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4272 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4273 other_cases += 2;
4274 }
4275 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4276 {
4277 if (charoffset == 0)
4278 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4279 else
4280 {
4281 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4282 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4283 }
4284 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4285 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4286
4287 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4288 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4289
4290 other_cases += 3;
4291 }
4292 else
4293 {
4294 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4295 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4296 }
4297
4298 while (*other_cases != NOTACHAR)
4299 {
4300 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4301 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4302 }
4303 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4304 break;
4305
4306 case PT_UCNC:
4307 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_DOLLAR_SIGN - charoffset);
4308 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4309 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_COMMERCIAL_AT - charoffset);
4310 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4311 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_GRAVE_ACCENT - charoffset);
4312 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4313
4314 SET_CHAR_OFFSET(0xa0);
4315 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd7ff - charoffset);
4316 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4317 SET_CHAR_OFFSET(0);
4318 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
4319 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_GREATER_EQUAL);
4320 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4321 break;
4322 }
4323 cc += 2;
4324 }
4325 #endif
4326
4327 if (jump != NULL)
4328 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4329 }
4330
4331 if (found != NULL)
4332 set_jumps(found, LABEL());
4333 }
4334
4335 #undef SET_TYPE_OFFSET
4336 #undef SET_CHAR_OFFSET
4337
4338 #endif
4339
4340 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4341 {
4342 DEFINE_COMPILER;
4343 int length;
4344 unsigned int c, oc, bit;
4345 compare_context context;
4346 struct sljit_jump *jump[4];
4347 jump_list *end_list;
4348 #ifdef SUPPORT_UTF
4349 struct sljit_label *label;
4350 #ifdef SUPPORT_UCP
4351 pcre_uchar propdata[5];
4352 #endif
4353 #endif
4354
4355 switch(type)
4356 {
4357 case OP_SOD:
4358 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4359 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4360 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4361 return cc;
4362
4363 case OP_SOM:
4364 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4365 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4366 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4367 return cc;
4368
4369 case OP_NOT_WORD_BOUNDARY:
4370 case OP_WORD_BOUNDARY:
4371 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4372 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4373 return cc;
4374
4375 case OP_NOT_DIGIT:
4376 case OP_DIGIT:
4377 /* Digits are usually 0-9, so it is worth to optimize them. */
4378 if (common->digits[0] == -2)
4379 get_ctype_ranges(common, ctype_digit, common->digits);
4380 detect_partial_match(common, backtracks);
4381 /* Flip the starting bit in the negative case. */
4382 if (type == OP_NOT_DIGIT)
4383 common->digits[1] ^= 1;
4384 if (!check_ranges(common, common->digits, backtracks, TRUE))
4385 {
4386 read_char8_type(common);
4387 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4388 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4389 }
4390 if (type == OP_NOT_DIGIT)
4391 common->digits[1] ^= 1;
4392 return cc;
4393
4394 case OP_NOT_WHITESPACE:
4395 case OP_WHITESPACE:
4396 detect_partial_match(common, backtracks);
4397 read_char8_type(common);
4398 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4399 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4400 return cc;
4401
4402 case OP_NOT_WORDCHAR:
4403 case OP_WORDCHAR:
4404 detect_partial_match(common, backtracks);
4405 read_char8_type(common);
4406 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4407 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4408 return cc;
4409
4410 case OP_ANY:
4411 detect_partial_match(common, backtracks);
4412 read_char(common);
4413 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4414 {
4415 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4416 end_list = NULL;
4417 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4418 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4419 else
4420 check_str_end(common, &end_list);
4421
4422 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4423 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4424 set_jumps(end_list, LABEL());
4425 JUMPHERE(jump[0]);
4426 }
4427 else
4428 check_newlinechar(common, common->nltype, backtracks, TRUE);
4429 return cc;
4430
4431 case OP_ALLANY:
4432 detect_partial_match(common, backtracks);
4433 #ifdef SUPPORT_UTF
4434 if (common->utf)
4435 {
4436 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4437 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4438 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4439 #if defined COMPILE_PCRE8
4440 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4441 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4442 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4443 #elif defined COMPILE_PCRE16
4444 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4445 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4446 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4447 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4448 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4449 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4450 #endif
4451 JUMPHERE(jump[0]);
4452 #endif /* COMPILE_PCRE[8|16] */
4453 return cc;
4454 }
4455 #endif
4456 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4457 return cc;
4458
4459 case OP_ANYBYTE:
4460 detect_partial_match(common, backtracks);
4461 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4462 return cc;
4463
4464 #ifdef SUPPORT_UTF
4465 #ifdef SUPPORT_UCP
4466 case OP_NOTPROP:
4467 case OP_PROP:
4468 propdata[0] = 0;
4469 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4470 propdata[2] = cc[0];
4471 propdata[3] = cc[1];
4472 propdata[4] = XCL_END;
4473 compile_xclass_matchingpath(common, propdata, backtracks);
4474 return cc + 2;
4475 #endif
4476 #endif
4477
4478 case OP_ANYNL:
4479 detect_partial_match(common, backtracks);
4480 read_char(common);
4481 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4482 /* We don't need to handle soft partial matching case. */
4483 end_list = NULL;
4484 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4485 add_jump(compiler, &end_list, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4486 else
4487 check_str_end(common, &end_list);
4488 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4489 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4490 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4491 jump[2] = JUMP(SLJIT_JUMP);
4492 JUMPHERE(jump[0]);
4493 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4494 set_jumps(end_list, LABEL());
4495 JUMPHERE(jump[1]);
4496 JUMPHERE(jump[2]);
4497 return cc;
4498
4499 case OP_NOT_HSPACE:
4500 case OP_HSPACE:
4501 detect_partial_match(common, backtracks);
4502 read_char(common);
4503 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4504 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4505 return cc;
4506
4507 case OP_NOT_VSPACE:
4508 case OP_VSPACE:
4509 detect_partial_match(common, backtracks);
4510 read_char(common);
4511 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4512 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4513 return cc;
4514
4515 #ifdef SUPPORT_UCP
4516 case OP_EXTUNI:
4517 detect_partial_match(common, backtracks);
4518 read_char(common);
4519 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4520 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4521 /* Optimize register allocation: use a real register. */
4522 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4523 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4524
4525 label = LABEL();
4526 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4527 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4528 read_char(common);
4529 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4530 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4531 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4532
4533 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4534 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4535 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4536 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4537 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4538 JUMPTO(SLJIT_C_NOT_ZERO, label);
4539
4540 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4541 JUMPHERE(jump[0]);
4542 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4543
4544 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4545 {
4546 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4547 /* Since we successfully read a char above, partial matching must occure. */
4548 check_partial(common, TRUE);
4549 JUMPHERE(jump[0]);
4550 }
4551 return cc;
4552 #endif
4553
4554 case OP_EODN:
4555 /* Requires rather complex checks. */
4556 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4557 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4558 {
4559 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4560 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4561 if (common->mode == JIT_COMPILE)
4562 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4563 else
4564 {
4565 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4566 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4567 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4568 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4569 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4570 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4571 check_partial(common, TRUE);
4572 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4573 JUMPHERE(jump[1]);
4574 }
4575 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4576 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4577 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4578 }
4579 else if (common->nltype == NLTYPE_FIXED)
4580 {
4581 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4582 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4583 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4584 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4585 }
4586 else
4587 {
4588 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4589 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4590 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4591 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4592 jump[2] = JUMP(SLJIT_C_GREATER);
4593 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4594 /* Equal. */
4595 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4596 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4597 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4598
4599 JUMPHERE(jump[1]);
4600 if (common->nltype == NLTYPE_ANYCRLF)
4601 {
4602 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4603 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4604 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4605 }
4606 else
4607 {
4608 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4609 read_char(common);
4610 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4611 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4612 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4613 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4614 }
4615 JUMPHERE(jump[2]);
4616 JUMPHERE(jump[3]);
4617 }
4618 JUMPHERE(jump[0]);
4619 check_partial(common, FALSE);
4620 return cc;
4621
4622 case OP_EOD:
4623 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4624 check_partial(common, FALSE);
4625 return cc;
4626
4627 case OP_CIRC:
4628 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4630 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4631 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4632 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4633 return cc;
4634
4635 case OP_CIRCM:
4636 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4637 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4638 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4639 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4640 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4641 jump[0] = JUMP(SLJIT_JUMP);
4642 JUMPHERE(jump[1]);
4643
4644 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4645 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4646 {
4647 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4648 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4649 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4650 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4651 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4652 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4653 }
4654 else
4655 {
4656 skip_char_back(common);
4657 read_char(common);
4658 check_newlinechar(common, common->nltype, backtracks, FALSE);
4659 }
4660 JUMPHERE(jump[0]);
4661 return cc;
4662
4663 case OP_DOLL:
4664 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4665 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4666 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4667
4668 if (!common->endonly)
4669 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4670 else
4671 {
4672 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4673 check_partial(common, FALSE);
4674 }
4675 return cc;
4676
4677 case OP_DOLLM:
4678 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4679 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4680 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4681 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4682 check_partial(common, FALSE);
4683 jump[0] = JUMP(SLJIT_JUMP);
4684 JUMPHERE(jump[1]);
4685
4686 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4687 {
4688 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4689 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4690 if (common->mode == JIT_COMPILE)
4691 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4692 else
4693 {
4694 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4695 /* STR_PTR = STR_END - IN_UCHARS(1) */
4696 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4697 check_partial(common, TRUE);
4698 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4699 JUMPHERE(jump[1]);
4700 }
4701
4702 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4703 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4704 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4705 }
4706 else
4707 {
4708 peek_char(common);
4709 check_newlinechar(common, common->nltype, backtracks, FALSE);
4710 }
4711 JUMPHERE(jump[0]);
4712 return cc;
4713
4714 case OP_CHAR:
4715 case OP_CHARI:
4716 length = 1;
4717 #ifdef SUPPORT_UTF
4718 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4719 #endif
4720 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4721 {
4722 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4723 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4724
4725 context.length = IN_UCHARS(length);
4726 context.sourcereg = -1;
4727 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4728 context.ucharptr = 0;
4729 #endif
4730 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4731 }
4732 detect_partial_match(common, backtracks);
4733 read_char(common);
4734 #ifdef SUPPORT_UTF
4735 if (common->utf)
4736 {
4737 GETCHAR(c, cc);
4738 }
4739 else
4740 #endif
4741 c = *cc;
4742 if (type == OP_CHAR || !char_has_othercase(common, cc))
4743 {
4744 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4745 return cc + length;
4746 }
4747 oc = char_othercase(common, c);
4748 bit = c ^ oc;
4749 if (is_powerof2(bit))
4750 {
4751 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4752 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4753 return cc + length;
4754 }
4755 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4756 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4757 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4758 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4759 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4760 return cc + length;
4761
4762 case OP_NOT:
4763 case OP_NOTI:
4764 detect_partial_match(common, backtracks);
4765 length = 1;
4766 #ifdef SUPPORT_UTF
4767 if (common->utf)
4768 {
4769 #ifdef COMPILE_PCRE8
4770 c = *cc;
4771 if (c < 128)
4772 {
4773 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4774 if (type == OP_NOT || !char_has_othercase(common, cc))
4775 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4776 else
4777 {
4778 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4779 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4780 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4781 }
4782 /* Skip the variable-length character. */
4783 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4784 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4785 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4786 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4787 JUMPHERE(jump[0]);
4788 return cc + 1;
4789 }
4790 else
4791 #endif /* COMPILE_PCRE8 */
4792 {
4793 GETCHARLEN(c, cc, length);
4794 read_char(common);
4795 }
4796 }
4797 else
4798 #endif /* SUPPORT_UTF */
4799 {
4800 read_char(common);
4801 c = *cc;
4802 }
4803
4804 if (type == OP_NOT || !char_has_othercase(common, cc))
4805 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4806 else
4807 {
4808 oc = char_othercase(common, c);
4809 bit = c ^ oc;
4810 if (is_powerof2(bit))
4811 {
4812 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4813 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4814 }
4815 else
4816 {
4817 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4818 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4819 }
4820 }
4821 return cc + length;
4822
4823 case OP_CLASS:
4824 case OP_NCLASS:
4825 detect_partial_match(common, backtracks);
4826 read_char(common);
4827 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4828 return cc + 32 / sizeof(pcre_uchar);
4829
4830 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4831 jump[0] = NULL;
4832 #ifdef COMPILE_PCRE8
4833 /* This check only affects 8 bit mode. In other modes, we
4834 always need to compare the value with 255. */
4835 if (common->utf)
4836 #endif /* COMPILE_PCRE8 */
4837 {
4838 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4839 if (type == OP_CLASS)
4840 {
4841 add_jump(compiler, backtracks, jump[0]);
4842 jump[0] = NULL;
4843 }
4844 }
4845 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4846 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4847 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4848 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4849 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4850 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4851 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4852 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4853 if (jump[0] != NULL)
4854 JUMPHERE(jump[0]);
4855 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4856 return cc + 32 / sizeof(pcre_uchar);
4857
4858 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4859 case OP_XCLASS:
4860 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4861 return cc + GET(cc, 0) - 1;
4862 #endif
4863
4864 case OP_REVERSE:
4865 length = GET(cc, 0);
4866 if (length == 0)
4867 return cc + LINK_SIZE;
4868 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4869 #ifdef SUPPORT_UTF
4870 if (common->utf)
4871 {
4872 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4873 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4874 label = LABEL();
4875 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4876 skip_char_back(common);
4877 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4878 JUMPTO(SLJIT_C_NOT_ZERO, label);
4879 }
4880 else
4881 #endif
4882 {
4883 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4884 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4885 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4886 }
4887 check_start_used_ptr(common);
4888 return cc + LINK_SIZE;
4889 }
4890 SLJIT_ASSERT_STOP();
4891 return cc;
4892 }
4893
4894 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4895 {
4896 /* This function consumes at least one input character. */
4897 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4898 DEFINE_COMPILER;
4899 pcre_uchar *ccbegin = cc;
4900 compare_context context;
4901 int size;
4902
4903 context.length = 0;
4904 do
4905 {
4906 if (cc >= ccend)
4907 break;
4908
4909 if (*cc == OP_CHAR)
4910 {
4911 size = 1;
4912 #ifdef SUPPORT_UTF
4913 if (common->utf && HAS_EXTRALEN(cc[1]))
4914 size += GET_EXTRALEN(cc[1]);
4915 #endif
4916 }
4917 else if (*cc == OP_CHARI)
4918 {
4919 size = 1;
4920 #ifdef SUPPORT_UTF
4921 if (common->utf)
4922 {
4923 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4924 size = 0;
4925 else if (HAS_EXTRALEN(cc[1]))
4926 size += GET_EXTRALEN(cc[1]);
4927 }
4928 else
4929 #endif
4930 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4931 size = 0;
4932 }
4933 else
4934 size = 0;
4935
4936 cc += 1 + size;
4937 context.length += IN_UCHARS(size);
4938 }
4939 while (size > 0 && context.length <= 128);
4940
4941 cc = ccbegin;
4942 if (context.length > 0)
4943 {
4944 /* We have a fixed-length byte sequence. */
4945 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4946 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4947
4948 context.sourcereg = -1;
4949 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4950 context.ucharptr = 0;
4951 #endif
4952 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4953 return cc;
4954 }
4955
4956 /* A non-fixed length character will be checked if length == 0. */
4957 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4958 }
4959
4960 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4961 {
4962 DEFINE_COMPILER;
4963 int offset = GET2(cc, 1) << 1;
4964
4965 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4966 if (!common->jscript_compat)
4967 {
4968 if (backtracks == NULL)
4969 {
4970 /* OVECTOR(1) contains the "string begin - 1" constant. */
4971 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4972 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4973 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4974 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4975 return JUMP(SLJIT_C_NOT_ZERO);
4976 }
4977 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4978 }
4979 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4980 }
4981
4982 /* Forward definitions. */
4983 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4984 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4985
4986 #define PUSH_BACKTRACK(size, ccstart, error) \
4987 do \
4988 { \
4989 backtrack = sljit_alloc_memory(compiler, (size)); \
4990 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4991 return error; \
4992 memset(backtrack, 0, size); \
4993 backtrack->prev = parent->top; \
4994 backtrack->cc = (ccstart); \
4995 parent->top = backtrack; \
4996 } \
4997 while (0)
4998
4999 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
5000 do \
5001 { \
5002 backtrack = sljit_alloc_memory(compiler, (size)); \
5003 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
5004 return; \
5005 memset(backtrack, 0, size); \
5006 backtrack->prev = parent->top; \
5007 backtrack->cc = (ccstart); \
5008 parent->top = backtrack; \
5009 } \
5010 while (0)
5011
5012 #define BACKTRACK_AS(type) ((type *)backtrack)
5013
5014 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
5015 {
5016 DEFINE_COMPILER;
5017 int offset = GET2(cc, 1) << 1;
5018 struct sljit_jump *jump = NULL;
5019 struct sljit_jump *partial;
5020 struct sljit_jump *nopartial;
5021
5022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5023 /* OVECTOR(1) contains the "string begin - 1" constant. */
5024 if (withchecks && !common->jscript_compat)
5025 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5026
5027 #if defined SUPPORT_UTF && defined SUPPORT_UCP
5028 if (common->utf && *cc == OP_REFI)
5029 {
5030 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
5031 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5032 if (withchecks)
5033 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
5034
5035 /* Needed to save important temporary registers. */
5036 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5037 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
5038 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
5039 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
5040 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5041 if (common->mode == JIT_COMPILE)
5042 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
5043 else
5044 {
5045 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
5046 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
5047 check_partial(common, FALSE);
5048 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5049 JUMPHERE(nopartial);
5050 }
5051 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
5052 }
5053 else
5054 #endif /* SUPPORT_UTF && SUPPORT_UCP */
5055 {
5056 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
5057 if (withchecks)
5058 jump = JUMP(SLJIT_C_ZERO);
5059
5060 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5061 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
5062 if (common->mode == JIT_COMPILE)
5063 add_jump(compiler, backtracks, partial);
5064
5065 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5066 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5067
5068 if (common->mode != JIT_COMPILE)
5069 {
5070 nopartial = JUMP(SLJIT_JUMP);
5071 JUMPHERE(partial);
5072 /* TMP2 -= STR_END - STR_PTR */
5073 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
5074 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
5075 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
5076 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
5077 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
5078 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
5079 JUMPHERE(partial);
5080 check_partial(common, FALSE);
5081 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
5082 JUMPHERE(nopartial);
5083 }
5084 }
5085
5086 if (jump != NULL)
5087 {
5088 if (emptyfail)
5089 add_jump(compiler, backtracks, jump);
5090 else
5091 JUMPHERE(jump);
5092 }
5093 return cc + 1 + IMM2_SIZE;
5094 }
5095
5096 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5097 {
5098 DEFINE_COMPILER;
5099 backtrack_common *backtrack;
5100 pcre_uchar type;
5101 struct sljit_label *label;
5102 struct sljit_jump *zerolength;
5103 struct sljit_jump *jump = NULL;
5104 pcre_uchar *ccbegin = cc;
5105 int min = 0, max = 0;
5106 BOOL minimize;
5107
5108 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
5109
5110 type = cc[1 + IMM2_SIZE];
5111 minimize = (type & 0x1) != 0;
5112 switch(type)
5113 {
5114 case OP_CRSTAR:
5115 case OP_CRMINSTAR:
5116 min = 0;
5117 max = 0;
5118 cc += 1 + IMM2_SIZE + 1;
5119 break;
5120 case OP_CRPLUS:
5121 case OP_CRMINPLUS:
5122 min = 1;
5123 max = 0;
5124 cc += 1 + IMM2_SIZE + 1;
5125 break;
5126 case OP_CRQUERY:
5127 case OP_CRMINQUERY:
5128 min = 0;
5129 max = 1;
5130 cc += 1 + IMM2_SIZE + 1;
5131 break;
5132 case OP_CRRANGE:
5133 case OP_CRMINRANGE:
5134 min = GET2(cc, 1 + IMM2_SIZE + 1);
5135 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
5136 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
5137 break;
5138 default:
5139 SLJIT_ASSERT_STOP();
5140 break;
5141 }
5142
5143 if (!minimize)
5144 {
5145 if (min == 0)
5146 {
5147 allocate_stack(common, 2);
5148 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5150 /* Temporary release of STR_PTR. */
5151 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5152 zerolength = compile_ref_checks(common, ccbegin, NULL);
5153 /* Restore if not zero length. */
5154 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5155 }
5156 else
5157 {
5158 allocate_stack(common, 1);
5159 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5160 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5161 }
5162
5163 if (min > 1 || max > 1)
5164 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5165
5166 label = LABEL();
5167 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5168
5169 if (min > 1 || max > 1)
5170 {
5171 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5172 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5174 if (min > 1)
5175 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5176 if (max > 1)
5177 {
5178 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5179 allocate_stack(common, 1);
5180 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5181 JUMPTO(SLJIT_JUMP, label);
5182 JUMPHERE(jump);
5183 }
5184 }
5185
5186 if (max == 0)
5187 {
5188 /* Includes min > 1 case as well. */
5189 allocate_stack(common, 1);
5190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5191 JUMPTO(SLJIT_JUMP, label);
5192 }
5193
5194 JUMPHERE(zerolength);
5195 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5196
5197 decrease_call_count(common);
5198 return cc;
5199 }
5200
5201 allocate_stack(common, 2);
5202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5203 if (type != OP_CRMINSTAR)
5204 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5205
5206 if (min == 0)
5207 {
5208 zerolength = compile_ref_checks(common, ccbegin, NULL);
5209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5210 jump = JUMP(SLJIT_JUMP);
5211 }
5212 else
5213 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5214
5215 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5216 if (max > 0)
5217 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5218
5219 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5220 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5221
5222 if (min > 1)
5223 {
5224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5225 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5227 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5228 }
5229 else if (max > 0)
5230 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5231
5232 if (jump != NULL)
5233 JUMPHERE(jump);
5234 JUMPHERE(zerolength);
5235
5236 decrease_call_count(common);
5237 return cc;
5238 }
5239
5240 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5241 {
5242 DEFINE_COMPILER;
5243 backtrack_common *backtrack;
5244 recurse_entry *entry = common->entries;
5245 recurse_entry *prev = NULL;
5246 int start = GET(cc, 1);
5247 pcre_uchar *start_cc;
5248
5249 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5250
5251 /* Inlining simple patterns. */
5252 if (get_framesize(common, common->start + start, TRUE) == no_stack)
5253 {
5254 start_cc = common->start + start;
5255 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5256 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5257 return cc + 1 + LINK_SIZE;
5258 }
5259
5260 while (entry != NULL)
5261 {
5262 if (entry->start == start)
5263 break;
5264 prev = entry;
5265 entry = entry->next;
5266 }
5267
5268 if (entry == NULL)
5269 {
5270 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5271 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5272 return NULL;
5273 entry->next = NULL;
5274 entry->entry = NULL;
5275 entry->calls = NULL;
5276 entry->start = start;
5277
5278 if (prev != NULL)
5279 prev->next = entry;
5280 else
5281 common->entries = entry;
5282 }
5283
5284 if (common->has_set_som && common->mark_ptr != 0)
5285 {
5286 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5287 allocate_stack(common, 2);
5288 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5289 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5290 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5291 }
5292 else if (common->has_set_som || common->mark_ptr != 0)
5293 {
5294 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5295 allocate_stack(common, 1);
5296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5297 }
5298
5299 if (entry->entry == NULL)
5300 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5301 else
5302 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5303 /* Leave if the match is failed. */
5304 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5305 return cc + 1 + LINK_SIZE;
5306 }
5307
5308 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5309 {
5310 const pcre_uchar *begin = arguments->begin;
5311 int *offset_vector = arguments->offsets;
5312 int offset_count = arguments->offset_count;
5313 int i;
5314
5315 if (PUBL(callout) == NULL)
5316 return 0;
5317
5318 callout_block->version = 2;
5319 callout_block->callout_data = arguments->callout_data;
5320
5321 /* Offsets in subject. */
5322 callout_block->subject_length = arguments->end - arguments->begin;
5323 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5324 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5325 #if defined COMPILE_PCRE8
5326 callout_block->subject = (PCRE_SPTR)begin;
5327 #elif defined COMPILE_PCRE16
5328 callout_block->subject = (PCRE_SPTR16)begin;
5329 #elif defined COMPILE_PCRE32
5330 callout_block->subject = (PCRE_SPTR32)begin;
5331 #endif
5332
5333 /* Convert and copy the JIT offset vector to the offset_vector array. */
5334 callout_block->capture_top = 0;
5335 callout_block->offset_vector = offset_vector;
5336 for (i = 2; i < offset_count; i += 2)
5337 {
5338 offset_vector[i] = jit_ovector[i] - begin;
5339 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5340 if (jit_ovector[i] >= begin)
5341 callout_block->capture_top = i;
5342 }
5343
5344 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5345 if (offset_count > 0)
5346 offset_vector[0] = -1;
5347 if (offset_count > 1)
5348 offset_vector[1] = -1;
5349 return (*PUBL(callout))(callout_block);
5350 }
5351
5352 /* Aligning to 8 byte. */
5353 #define CALLOUT_ARG_SIZE \
5354 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5355
5356 #define CALLOUT_ARG_OFFSET(arg) \
5357 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5358
5359 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5360 {
5361 DEFINE_COMPILER;
5362 backtrack_common *backtrack;
5363
5364 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5365
5366 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5367
5368 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5369 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5370 SLJIT_ASSERT(common->capture_last_ptr != 0);
5371 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5372 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5373
5374 /* These pointer sized fields temporarly stores internal variables. */
5375 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5376 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5377 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5378
5379 if (common->mark_ptr != 0)
5380 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5381 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5382 OP1(SLJIT_MOV_SI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5384
5385 /* Needed to save important temporary registers. */
5386 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5387 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5388 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5389 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5390 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5391 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5392 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5393
5394 /* Check return value. */
5395 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5396 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5397 if (common->forced_quit_label == NULL)
5398 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5399 else
5400 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5401 return cc + 2 + 2 * LINK_SIZE;
5402 }
5403
5404 #undef CALLOUT_ARG_SIZE
5405 #undef CALLOUT_ARG_OFFSET
5406
5407 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5408 {
5409 DEFINE_COMPILER;
5410 int framesize;
5411 int extrasize;
5412 BOOL needs_control_head = common->control_head_ptr != 0;
5413 int private_data_ptr;
5414 backtrack_common altbacktrack;
5415 pcre_uchar *ccbegin;
5416 pcre_uchar opcode;
5417 pcre_uchar bra = OP_BRA;
5418 jump_list *tmp = NULL;
5419 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5420 jump_list **found;
5421 /* Saving previous accept variables. */
5422 struct sljit_label *save_quit_label = common->quit_label;
5423 struct sljit_label *save_accept_label = common->accept_label;
5424 jump_list *save_quit = common->quit;
5425 jump_list *save_accept = common->accept;
5426 BOOL save_local_exit = common->local_exit;
5427 struct sljit_jump *jump;
5428 struct sljit_jump *brajump = NULL;
5429
5430 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5431 {
5432 SLJIT_ASSERT(!conditional);
5433 bra = *cc;
5434 cc++;
5435 }
5436 private_data_ptr = PRIVATE_DATA(cc);
5437 SLJIT_ASSERT(private_data_ptr != 0);
5438 framesize = get_framesize(common, cc, FALSE);
5439 backtrack->framesize = framesize;
5440 backtrack->private_data_ptr = private_data_ptr;
5441 opcode = *cc;
5442 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5443 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5444 ccbegin = cc;
5445 cc += GET(cc, 1);
5446
5447 if (bra == OP_BRAMINZERO)
5448 {
5449 /* This is a braminzero backtrack path. */
5450 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5451 free_stack(common, 1);
5452 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5453 }
5454
5455 if (framesize < 0)
5456 {
5457 extrasize = needs_control_head ? 2 : 1;
5458 if (framesize == no_frame)
5459 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5460 allocate_stack(common, extrasize);
5461 if (needs_control_head)
5462 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5463 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5464 if (needs_control_head)
5465 {
5466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5468 }
5469 }
5470 else
5471 {
5472 extrasize = needs_control_head ? 3 : 2;
5473 allocate_stack(common, framesize + extrasize);
5474 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5475 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5476 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5477 if (needs_control_head)
5478 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
5479 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5480 if (needs_control_head)
5481 {
5482 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_IMM, 0);
5485 }
5486 else
5487 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5488 init_frame(common, ccbegin, framesize + extrasize - 1, extrasize, FALSE);
5489 }
5490
5491 memset(&altbacktrack, 0, sizeof(backtrack_common));
5492 common->local_exit = TRUE;
5493 common->quit_label = NULL;
5494 common->quit = NULL;
5495 while (1)
5496 {
5497 common->accept_label = NULL;
5498 common->accept = NULL;
5499 altbacktrack.top = NULL;
5500 altbacktrack.topbacktracks = NULL;
5501
5502 if (*ccbegin == OP_ALT)
5503 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5504
5505 altbacktrack.cc = ccbegin;
5506 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5507 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5508 {
5509 common->local_exit = save_local_exit;
5510 common->quit_label = save_quit_label;
5511 common->accept_label = save_accept_label;
5512 common->quit = save_quit;
5513 common->accept = save_accept;
5514 return NULL;
5515 }
5516 common->accept_label = LABEL();
5517 if (common->accept != NULL)
5518 set_jumps(common->accept, common->accept_label);
5519
5520 /* Reset stack. */
5521 if (framesize < 0)
5522 {
5523 if (framesize == no_frame)
5524 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5525 else
5526 free_stack(common, extrasize);
5527 if (needs_control_head)
5528 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5529 }
5530 else
5531 {
5532 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5533 {
5534 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5535 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5536 if (needs_control_head)
5537 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), 0);
5538 }
5539 else
5540 {
5541 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5542 if (needs_control_head)
5543 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5544 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5545 }
5546 }
5547
5548 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5549 {
5550 /* We know that STR_PTR was stored on the top of the stack. */
5551 if (conditional)
5552 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? sizeof(sljit_sw) : 0);
5553 else if (bra == OP_BRAZERO)
5554 {
5555 if (framesize < 0)
5556 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5557 else
5558 {
5559 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5560 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + extrasize - 1) * sizeof(sljit_sw));
5561 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5562 }
5563 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5564 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5565 }
5566 else if (framesize >= 0)
5567 {
5568 /* For OP_BRA and OP_BRAMINZERO. */
5569 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5570 }
5571 }
5572 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5573
5574 compile_backtrackingpath(common, altbacktrack.top);
5575 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5576 {
5577 common->local_exit = save_local_exit;
5578 common->quit_label = save_quit_label;
5579 common->accept_label = save_accept_label;
5580 common->quit = save_quit;
5581 common->accept = save_accept;
5582 return NULL;
5583 }
5584 set_jumps(altbacktrack.topbacktracks, LABEL());
5585
5586 if (*cc != OP_ALT)
5587 break;
5588
5589 ccbegin = cc;
5590 cc += GET(cc, 1);
5591 }
5592
5593 /* None of them matched. */
5594 if (common->quit != NULL)
5595 {
5596 jump = JUMP(SLJIT_JUMP);
5597 set_jumps(common->quit, LABEL());
5598 SLJIT_ASSERT(framesize != no_stack);
5599 if (framesize < 0)
5600 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
5601 else
5602 {
5603 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5604 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5605 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
5606 }
5607 JUMPHERE(jump);
5608 }
5609
5610 if (needs_control_head)
5611 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
5612
5613 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5614 {
5615 /* Assert is failed. */
5616 if (conditional || bra == OP_BRAZERO)
5617 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5618
5619 if (framesize < 0)
5620 {
5621 /* The topmost item should be 0. */
5622 if (bra == OP_BRAZERO)
5623 {
5624 if (extrasize == 2)
5625 free_stack(common, 1);
5626 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5627 }
5628 else
5629 free_stack(common, extrasize);
5630 }
5631 else
5632 {
5633 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5634 /* The topmost item should be 0. */
5635 if (bra == OP_BRAZERO)
5636 {
5637 free_stack(common, framesize + extrasize - 1);
5638 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5639 }
5640 else
5641 free_stack(common, framesize + extrasize);
5642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5643 }
5644 jump = JUMP(SLJIT_JUMP);
5645 if (bra != OP_BRAZERO)
5646 add_jump(compiler, target, jump);
5647
5648 /* Assert is successful. */
5649 set_jumps(tmp, LABEL());
5650 if (framesize < 0)
5651 {
5652 /* We know that STR_PTR was stored on the top of the stack. */
5653 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 1) * sizeof(sljit_sw));
5654 /* Keep the STR_PTR on the top of the stack. */
5655 if (bra == OP_BRAZERO)
5656 {
5657 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5658 if (extrasize == 2)
5659 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5660 }
5661 else if (bra == OP_BRAMINZERO)
5662 {
5663 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5665 }
5666 }
5667 else
5668 {
5669 if (bra == OP_BRA)
5670 {
5671 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5672 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5673 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (extrasize - 2) * sizeof(sljit_sw));
5674 }
5675 else
5676 {
5677 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5678 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5679 if (extrasize == 2)
5680 {
5681 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5682 if (bra == OP_BRAMINZERO)
5683 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5684 }
5685 else
5686 {
5687 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5688 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5689 }
5690 }
5691 }
5692
5693 if (bra == OP_BRAZERO)
5694 {
5695 backtrack->matchingpath = LABEL();
5696 SET_LABEL(jump, backtrack->matchingpath);
5697 }
5698 else if (bra == OP_BRAMINZERO)
5699 {
5700 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5701 JUMPHERE(brajump);
5702 if (framesize >= 0)
5703 {
5704 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5705 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5706 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5707 }
5708 set_jumps(backtrack->common.topbacktracks, LABEL());
5709 }
5710 }
5711 else
5712 {
5713 /* AssertNot is successful. */
5714 if (framesize < 0)
5715 {
5716 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5717 if (bra != OP_BRA)
5718 {
5719 if (extrasize == 2)
5720 free_stack(common, 1);
5721 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5722 }
5723 else
5724 free_stack(common, extrasize);
5725 }
5726 else
5727 {
5728 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5729 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
5730 /* The topmost item should be 0. */
5731 if (bra != OP_BRA)
5732 {
5733 free_stack(common, framesize + extrasize - 1);
5734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5735 }
5736 else
5737 free_stack(common, framesize + extrasize);
5738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5739 }
5740
5741 if (bra == OP_BRAZERO)
5742 backtrack->matchingpath = LABEL();
5743 else if (bra == OP_BRAMINZERO)
5744 {
5745 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5746 JUMPHERE(brajump);
5747 }
5748
5749 if (bra != OP_BRA)
5750 {
5751 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5752 set_jumps(backtrack->common.topbacktracks, LABEL());
5753 backtrack->common.topbacktracks = NULL;
5754 }
5755 }
5756
5757 common->local_exit = save_local_exit;
5758 common->quit_label = save_quit_label;
5759 common->accept_label = save_accept_label;
5760 common->quit = save_quit;
5761 common->accept = save_accept;
5762 return cc + 1 + LINK_SIZE;
5763 }
5764
5765 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5766 {
5767 int condition = FALSE;
5768 pcre_uchar *slotA = name_table;
5769 pcre_uchar *slotB;
5770 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5771 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5772 sljit_sw no_capture;
5773 int i;
5774
5775 locals += refno & 0xff;
5776 refno >>= 8;
5777 no_capture = locals[1];
5778
5779 for (i = 0; i < name_count; i++)
5780 {
5781 if (GET2(slotA, 0) == refno) break;
5782 slotA += name_entry_size;
5783 }
5784
5785 if (i < name_count)
5786 {
5787 /* Found a name for the number - there can be only one; duplicate names
5788 for different numbers are allowed, but not vice versa. First scan down
5789 for duplicates. */
5790
5791 slotB = slotA;
5792 while (slotB > name_table)
5793 {
5794 slotB -= name_entry_size;
5795 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5796 {
5797 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5798 if (condition) break;
5799 }
5800 else break;
5801 }
5802
5803 /* Scan up for duplicates */
5804 if (!condition)
5805 {
5806 slotB = slotA;
5807 for (i++; i < name_count; i++)
5808 {
5809 slotB += name_entry_size;
5810 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5811 {
5812 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5813 if (condition) break;
5814 }
5815 else break;
5816 }
5817 }
5818 }
5819 return condition;
5820 }
5821
5822 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5823 {
5824 int condition = FALSE;
5825 pcre_uchar *slotA = name_table;
5826 pcre_uchar *slotB;
5827 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5828 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5829 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5830 sljit_uw i;
5831
5832 for (i = 0; i < name_count; i++)
5833 {
5834 if (GET2(slotA, 0) == recno) break;
5835 slotA += name_entry_size;
5836 }
5837
5838 if (i < name_count)
5839 {
5840 /* Found a name for the number - there can be only one; duplicate
5841 names for different numbers are allowed, but not vice versa. First
5842 scan down for duplicates. */
5843
5844 slotB = slotA;
5845 while (slotB > name_table)
5846 {
5847 slotB -= name_entry_size;
5848 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5849 {
5850 condition = GET2(slotB, 0) == group_num;
5851 if (condition) break;
5852 }
5853 else break;
5854 }
5855
5856 /* Scan up for duplicates */
5857 if (!condition)
5858 {
5859 slotB = slotA;
5860 for (i++; i < name_count; i++)
5861 {
5862 slotB += name_entry_size;
5863 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5864 {
5865 condition = GET2(slotB, 0) == group_num;
5866 if (condition) break;
5867 }
5868 else break;
5869 }
5870 }
5871 }
5872 return condition;
5873 }
5874
5875 /*
5876 Handling bracketed expressions is probably the most complex part.
5877
5878 Stack layout naming characters:
5879 S - Push the current STR_PTR
5880 0 - Push a 0 (NULL)
5881 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5882 before the next alternative. Not pushed if there are no alternatives.
5883 M - Any values pushed by the current alternative. Can be empty, or anything.
5884 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5885 L - Push the previous local (pointed by localptr) to the stack
5886 () - opional values stored on the stack
5887 ()* - optonal, can be stored multiple times
5888
5889 The following list shows the regular expression templates, their PCRE byte codes
5890 and stack layout supported by pcre-sljit.
5891
5892 (?:) OP_BRA | OP_KET A M
5893 () OP_CBRA | OP_KET C M
5894 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5895 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5896 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5897 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5898 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5899 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5900 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5901 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5902 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5903 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5904 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5905 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5906 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5907 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5908 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5909 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5910 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5911 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5912 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5913 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5914
5915
5916 Stack layout naming characters:
5917 A - Push the alternative index (starting from 0) on the stack.
5918 Not pushed if there is no alternatives.
5919 M - Any values pushed by the current alternative. Can be empty, or anything.
5920
5921 The next list shows the possible content of a bracket:
5922 (|) OP_*BRA | OP_ALT ... M A
5923 (?()|) OP_*COND | OP_ALT M A
5924 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5925 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5926 Or nothing, if trace is unnecessary
5927 */
5928
5929 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5930 {
5931 DEFINE_COMPILER;
5932 backtrack_common *backtrack;
5933 pcre_uchar opcode;
5934 int private_data_ptr = 0;
5935 int offset = 0;
5936 int stacksize;
5937 pcre_uchar *ccbegin;
5938 pcre_uchar *matchingpath;
5939 pcre_uchar bra = OP_BRA;
5940 pcre_uchar ket;
5941 assert_backtrack *assert;
5942 BOOL has_alternatives;
5943 struct sljit_jump *jump;
5944 struct sljit_jump *skip;
5945 struct sljit_label *rmaxlabel = NULL;
5946 struct sljit_jump *braminzerojump = NULL;
5947
5948 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5949
5950 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5951 {
5952 bra = *cc;
5953 cc++;
5954 opcode = *cc;
5955 }
5956
5957 opcode = *cc;
5958 ccbegin = cc;
5959 matchingpath = ccbegin + 1 + LINK_SIZE;
5960
5961 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5962 {
5963 /* Drop this bracket_backtrack. */
5964 parent->top = backtrack->prev;
5965 return bracketend(cc);
5966 }
5967
5968 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5969 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5970 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5971 cc += GET(cc, 1);
5972
5973 has_alternatives = *cc == OP_ALT;
5974 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5975 {
5976 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5977 if (*matchingpath == OP_NRREF)
5978 {
5979 stacksize = GET2(matchingpath, 1);
5980 if (common->currententry == NULL || stacksize == RREF_ANY)
5981 has_alternatives = FALSE;
5982 else if (common->currententry->start == 0)
5983 has_alternatives = stacksize != 0;
5984 else
5985 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5986 }
5987 }
5988
5989 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5990 opcode = OP_SCOND;
5991 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5992 opcode = OP_ONCE;
5993
5994 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5995 {
5996 /* Capturing brackets has a pre-allocated space. */
5997 offset = GET2(ccbegin, 1 + LINK_SIZE);
5998 if (common->optimized_cbracket[offset] == 0)
5999 {
6000 private_data_ptr = OVECTOR_PRIV(offset);
6001 offset <<= 1;
6002 }
6003 else
6004 {
6005 offset <<= 1;
6006 private_data_ptr = OVECTOR(offset);
6007 }
6008 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6009 matchingpath += IMM2_SIZE;
6010 }
6011 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
6012 {
6013 /* Other brackets simply allocate the next entry. */
6014 private_data_ptr = PRIVATE_DATA(ccbegin);
6015 SLJIT_ASSERT(private_data_ptr != 0);
6016 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
6017 if (opcode == OP_ONCE)
6018 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
6019 }
6020
6021 /* Instructions before the first alternative. */
6022 stacksize = 0;
6023 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6024 stacksize++;
6025 if (bra == OP_BRAZERO)
6026 stacksize++;
6027
6028 if (stacksize > 0)
6029 allocate_stack(common, stacksize);
6030
6031 stacksize = 0;
6032 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
6033 {
6034 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6035 stacksize++;
6036 }
6037
6038 if (bra == OP_BRAZERO)
6039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6040
6041 if (bra == OP_BRAMINZERO)
6042 {
6043 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
6044 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6045 if (ket != OP_KETRMIN)
6046 {
6047 free_stack(common, 1);
6048 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6049 }
6050 else
6051 {
6052 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6053 {
6054 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6055 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6056 /* Nothing stored during the first run. */
6057 skip = JUMP(SLJIT_JUMP);
6058 JUMPHERE(jump);
6059 /* Checking zero-length iteration. */
6060 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6061 {
6062 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
6063 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6064 }
6065 else
6066 {
6067 /* Except when the whole stack frame must be saved. */
6068 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6069 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6070 }
6071 JUMPHERE(skip);
6072 }
6073 else
6074 {
6075 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
6076 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
6077 JUMPHERE(jump);
6078 }
6079 }
6080 }
6081
6082 if (ket == OP_KETRMIN)
6083 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6084
6085 if (ket == OP_KETRMAX)
6086 {
6087 rmaxlabel = LABEL();
6088 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
6089 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
6090 }
6091
6092 /* Handling capturing brackets and alternatives. */
6093 if (opcode == OP_ONCE)
6094 {
6095 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6096 {
6097 /* Neither capturing brackets nor recursions are found in the block. */
6098 if (ket == OP_KETRMIN)
6099 {
6100 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6101 allocate_stack(common, 2);
6102 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6103 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6104 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
6105 }
6106 else if (ket == OP_KETRMAX || has_alternatives)
6107 {
6108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6109 allocate_stack(common, 1);
6110 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6111 }
6112 else
6113 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6114 }
6115 else
6116 {
6117 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
6118 {
6119 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
6120 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6121 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + 2) * sizeof(sljit_sw));
6122 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6124 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
6125 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
6126 }
6127 else
6128 {
6129 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
6130 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6131 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
6132 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6133 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6134 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
6135 }
6136 }
6137 }
6138 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
6139 {
6140 /* Saving the previous values. */
6141 if (common->optimized_cbracket[offset >> 1] != 0)
6142 {
6143 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
6144 allocate_stack(common, 2);
6145 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6146 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
6147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6148 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6150 }
6151 else
6152 {
6153 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6154 allocate_stack(common, 1);
6155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6156 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6157 }
6158 }
6159 else if (opcode == OP_SBRA || opcode == OP_SCOND)
6160 {
6161 /* Saving the previous value. */
6162 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6163 allocate_stack(common, 1);
6164 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
6165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6166 }
6167 else if (has_alternatives)
6168 {
6169 /* Pushing the starting string pointer. */
6170 allocate_stack(common, 1);
6171 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6172 }
6173
6174 /* Generating code for the first alternative. */
6175 if (opcode == OP_COND || opcode == OP_SCOND)
6176 {
6177 if (*matchingpath == OP_CREF)
6178 {
6179 SLJIT_ASSERT(has_alternatives);
6180 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
6181 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6182 matchingpath += 1 + IMM2_SIZE;
6183 }
6184 else if (*matchingpath == OP_NCREF)
6185 {
6186 SLJIT_ASSERT(has_alternatives);
6187 stacksize = GET2(matchingpath, 1);
6188 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6189
6190 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6193 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
6194 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6195 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6196 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
6197 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6198 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6199
6200 JUMPHERE(jump);
6201 matchingpath += 1 + IMM2_SIZE;
6202 }
6203 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
6204 {
6205 /* Never has other case. */
6206 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6207
6208 stacksize = GET2(matchingpath, 1);
6209 if (common->currententry == NULL)
6210 stacksize = 0;
6211 else if (stacksize == RREF_ANY)
6212 stacksize = 1;
6213 else if (common->currententry->start == 0)
6214 stacksize = stacksize == 0;
6215 else
6216 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6217
6218 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6219 {
6220 SLJIT_ASSERT(!has_alternatives);
6221 if (stacksize != 0)
6222 matchingpath += 1 + IMM2_SIZE;
6223 else
6224 {
6225 if (*cc == OP_ALT)
6226 {
6227 matchingpath = cc + 1 + LINK_SIZE;
6228 cc += GET(cc, 1);
6229 }
6230 else
6231 matchingpath = cc;
6232 }
6233 }
6234 else
6235 {
6236 SLJIT_ASSERT(has_alternatives);
6237
6238 stacksize = GET2(matchingpath, 1);
6239 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6240 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6242 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6243 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6244 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6245 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6246 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6247 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6248 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6249 matchingpath += 1 + IMM2_SIZE;
6250 }
6251 }
6252 else
6253 {
6254 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6255 /* Similar code as PUSH_BACKTRACK macro. */
6256 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6257 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6258 return NULL;
6259 memset(assert, 0, sizeof(assert_backtrack));
6260 assert->common.cc = matchingpath;
6261 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6262 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6263 }
6264 }
6265
6266 compile_matchingpath(common, matchingpath, cc, backtrack);
6267 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6268 return NULL;
6269
6270 if (opcode == OP_ONCE)
6271 {
6272 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6273 {
6274 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6275 /* TMP2 which is set here used by OP_KETRMAX below. */
6276 if (ket == OP_KETRMAX)
6277 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6278 else if (ket == OP_KETRMIN)
6279 {
6280 /* Move the STR_PTR to the private_data_ptr. */
6281 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6282 }
6283 }
6284 else
6285 {
6286 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
6287 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
6288 if (ket == OP_KETRMAX)
6289 {
6290 /* TMP2 which is set here used by OP_KETRMAX below. */
6291 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6292 }
6293 }
6294 }
6295
6296 stacksize = 0;
6297 if (ket != OP_KET || bra != OP_BRA)
6298 stacksize++;
6299 if (offset != 0)
6300 {
6301 if (common->capture_last_ptr != 0)
6302 stacksize++;
6303 if (common->optimized_cbracket[offset >> 1] == 0)
6304 stacksize += 2;
6305 }
6306 if (has_alternatives && opcode != OP_ONCE)
6307 stacksize++;
6308
6309 if (stacksize > 0)
6310 allocate_stack(common, stacksize);
6311
6312 stacksize = 0;
6313 if (ket != OP_KET || bra != OP_BRA)
6314 {
6315 if (ket != OP_KET)
6316 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6317 else
6318 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6319 stacksize++;
6320 }
6321
6322 if (offset != 0)
6323 {
6324 if (common->capture_last_ptr != 0)
6325 {
6326 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6327 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6328 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0);
6329 stacksize++;
6330 }
6331 if (common->optimized_cbracket[offset >> 1] == 0)
6332 {
6333 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6334 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6335 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6336 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6337 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6339 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6340 stacksize += 2;
6341 }
6342 }
6343
6344 if (has_alternatives)
6345 {
6346 if (opcode != OP_ONCE)
6347 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6348 if (ket != OP_KETRMAX)
6349 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6350 }
6351
6352 /* Must be after the matchingpath label. */
6353 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6354 {
6355 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6357 }
6358
6359 if (ket == OP_KETRMAX)
6360 {
6361 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6362 {
6363 if (has_alternatives)
6364 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6365 /* Checking zero-length iteration. */
6366 if (opcode != OP_ONCE)
6367 {
6368 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6369 /* Drop STR_PTR for greedy plus quantifier. */
6370 if (bra != OP_BRAZERO)
6371 free_stack(common, 1);
6372 }
6373 else
6374 /* TMP2 must contain the starting STR_PTR. */
6375 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6376 }
6377 else
6378 JUMPTO(SLJIT_JUMP, rmaxlabel);
6379 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6380 }
6381
6382 if (bra == OP_BRAZERO)
6383 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6384
6385 if (bra == OP_BRAMINZERO)
6386 {
6387 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6388 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6389 if (braminzerojump != NULL)
6390 {
6391 JUMPHERE(braminzerojump);
6392 /* We need to release the end pointer to perform the
6393 backtrack for the zero-length iteration. When
6394 framesize is < 0, OP_ONCE will do the release itself. */
6395 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6396 {
6397 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6398 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6399 }
6400 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6401 free_stack(common, 1);
6402 }
6403 /* Continue to the normal backtrack. */
6404 }
6405
6406 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6407 decrease_call_count(common);
6408
6409 /* Skip the other alternatives. */
6410 while (*cc == OP_ALT)
6411 cc += GET(cc, 1);
6412 cc += 1 + LINK_SIZE;
6413 return cc;
6414 }
6415
6416 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6417 {
6418 DEFINE_COMPILER;
6419 backtrack_common *backtrack;
6420 pcre_uchar opcode;
6421 int private_data_ptr;
6422 int cbraprivptr = 0;
6423 BOOL needs_control_head = common->control_head_ptr != 0;
6424 int framesize;
6425 int stacksize;
6426 int offset = 0;
6427 BOOL zero = FALSE;
6428 pcre_uchar *ccbegin = NULL;
6429 int stack; /* Also contains the offset of control head. */
6430 struct sljit_label *loop = NULL;
6431 struct jump_list *emptymatch = NULL;
6432
6433 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6434 if (*cc == OP_BRAPOSZERO)
6435 {
6436 zero = TRUE;
6437 cc++;
6438 }
6439
6440 opcode = *cc;
6441 private_data_ptr = PRIVATE_DATA(cc);
6442 SLJIT_ASSERT(private_data_ptr != 0);
6443 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6444 switch(opcode)
6445 {
6446 case OP_BRAPOS:
6447 case OP_SBRAPOS:
6448 ccbegin = cc + 1 + LINK_SIZE;
6449 break;
6450
6451 case OP_CBRAPOS:
6452 case OP_SCBRAPOS:
6453 offset = GET2(cc, 1 + LINK_SIZE);
6454 /* This case cannot be optimized in the same was as
6455 normal capturing brackets. */
6456 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6457 cbraprivptr = OVECTOR_PRIV(offset);
6458 offset <<= 1;
6459 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6460 break;
6461
6462 default:
6463 SLJIT_ASSERT_STOP();
6464 break;
6465 }
6466
6467 framesize = get_framesize(common, cc, FALSE);
6468 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6469 if (framesize < 0)
6470 {
6471 if (offset != 0)
6472 {
6473 stacksize = 2;
6474 if (common->capture_last_ptr != 0)
6475 stacksize++;
6476 }
6477 else
6478 stacksize = 1;
6479
6480 if (needs_control_head)
6481 stacksize++;
6482 if (!zero)
6483 stacksize++;
6484
6485 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6486 allocate_stack(common, stacksize);
6487 if (framesize == no_frame)
6488 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6489
6490 stack = 0;
6491 if (offset != 0)
6492 {
6493 stack = 2;
6494 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6495 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6496 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6497 if (common->capture_last_ptr != 0)
6498 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6499 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6500 if (needs_control_head)
6501 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6502 if (common->capture_last_ptr != 0)
6503 {
6504 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6505 stack = 3;
6506 }
6507 }
6508 else
6509 {
6510 if (needs_control_head)
6511 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6512 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6513 stack = 1;
6514 }
6515
6516 if (needs_control_head)
6517 stack++;
6518 if (!zero)
6519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
6520 if (needs_control_head)
6521 {
6522 stack--;
6523 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6524 }
6525 }
6526 else
6527 {
6528 stacksize = framesize + 1;
6529 if (!zero)
6530 stacksize++;
6531 if (needs_control_head)
6532 stacksize++;
6533 if (offset == 0)
6534 stacksize++;
6535 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6536
6537 allocate_stack(common, stacksize);
6538 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6539 if (needs_control_head)
6540 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr);
6541 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6542
6543 stack = 0;
6544 if (!zero)
6545 {
6546 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6547 stack = 1;
6548 }
6549 if (needs_control_head)
6550 {
6551 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
6552 stack++;
6553 }
6554 if (offset == 0)
6555 {
6556 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6557 stack++;
6558 }
6559 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6560 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6561 stack -= 1 + (offset == 0);
6562 }
6563
6564 if (offset != 0)
6565 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6566
6567 loop = LABEL();
6568 while (*cc != OP_KETRPOS)
6569 {
6570 backtrack->top = NULL;
6571 backtrack->topbacktracks = NULL;
6572 cc += GET(cc, 1);
6573
6574 compile_matchingpath(common, ccbegin, cc, backtrack);
6575 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6576 return NULL;
6577
6578 if (framesize < 0)
6579 {
6580 if (framesize == no_frame)
6581 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6582
6583 if (offset != 0)
6584 {
6585 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6586 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6587 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6588 if (common->capture_last_ptr != 0)
6589 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6590 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6591 }
6592 else
6593 {
6594 if (opcode == OP_SBRAPOS)
6595 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6596 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6597 }
6598
6599 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6600 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6601
6602 if (!zero)
6603 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6604 }
6605 else
6606 {
6607 if (offset != 0)
6608 {
6609 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6611 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6612 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6613 if (common->capture_last_ptr != 0)
6614 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6616 }
6617 else
6618 {
6619 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6620 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6621 if (opcode == OP_SBRAPOS)
6622 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6623 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6624 }
6625
6626 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6627 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6628
6629 if (!zero)
6630 {
6631 if (framesize < 0)
6632 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6633 else
6634 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6635 }
6636 }
6637
6638 if (needs_control_head)
6639 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
6640
6641 JUMPTO(SLJIT_JUMP, loop);
6642 flush_stubs(common);
6643
6644 compile_backtrackingpath(common, backtrack->top);
6645 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6646 return NULL;
6647 set_jumps(backtrack->topbacktracks, LABEL());
6648
6649 if (framesize < 0)
6650 {
6651 if (offset != 0)
6652 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6653 else
6654 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6655 }
6656 else
6657 {
6658 if (offset != 0)
6659 {
6660 /* Last alternative. */
6661 if (*cc == OP_KETRPOS)
6662 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6663 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6664 }
6665 else
6666 {
6667 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6668 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6669 }
6670 }
6671
6672 if (*cc == OP_KETRPOS)
6673 break;
6674 ccbegin = cc + 1 + LINK_SIZE;
6675 }
6676
6677 /* We don't have to restore the control head in case of a failed match. */
6678
6679 backtrack->topbacktracks = NULL;
6680 if (!zero)
6681 {
6682 if (framesize < 0)
6683 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6684 else /* TMP2 is set to [private_data_ptr] above. */
6685 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6686 }
6687
6688 /* None of them matched. */
6689 set_jumps(emptymatch, LABEL());
6690 decrease_call_count(common);
6691 return cc + 1 + LINK_SIZE;
6692 }
6693
6694 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6695 {
6696 int class_len;
6697
6698 *opcode = *cc;
6699 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6700 {
6701 cc++;
6702 *type = OP_CHAR;
6703 }
6704 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6705 {
6706 cc++;
6707 *type = OP_CHARI;
6708 *opcode -= OP_STARI - OP_STAR;
6709 }
6710 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6711 {
6712 cc++;
6713 *type = OP_NOT;
6714 *opcode -= OP_NOTSTAR - OP_STAR;
6715 }
6716 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6717 {
6718 cc++;
6719 *type = OP_NOTI;
6720 *opcode -= OP_NOTSTARI - OP_STAR;
6721 }
6722 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6723 {
6724 cc++;
6725 *opcode -= OP_TYPESTAR - OP_STAR;
6726 *type = 0;
6727 }
6728 else
6729 {
6730 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6731 *type = *opcode;
6732 cc++;
6733 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6734 *opcode = cc[class_len - 1];
6735 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6736 {
6737 *opcode -= OP_CRSTAR - OP_STAR;
6738 if (end != NULL)
6739 *end = cc + class_len;
6740 }
6741 else
6742 { </