/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1244 - (show annotations)
Tue Feb 5 12:03:43 2013 UTC (6 years, 10 months ago) by zherczeg
File MIME type: text/plain
File size: 265411 byte(s)
Error occurred while calculating annotation data.
Optimizing fast_forward_start_bits in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory for the regex stack on the real machine stack.
69 Fast, but limited size. */
70 #define MACHINE_STACK_SIZE 32768
71
72 /* Growth rate for stack allocated by the OS. Should be the multiply
73 of page size. */
74 #define STACK_GROWTH_RATE 8192
75
76 /* Enable to check that the allocation could destroy temporaries. */
77 #if defined SLJIT_DEBUG && SLJIT_DEBUG
78 #define DESTROY_REGISTERS 1
79 #endif
80
81 /*
82 Short summary about the backtracking mechanism empolyed by the jit code generator:
83
84 The code generator follows the recursive nature of the PERL compatible regular
85 expressions. The basic blocks of regular expressions are condition checkers
86 whose execute different commands depending on the result of the condition check.
87 The relationship between the operators can be horizontal (concatenation) and
88 vertical (sub-expression) (See struct backtrack_common for more details).
89
90 'ab' - 'a' and 'b' regexps are concatenated
91 'a+' - 'a' is the sub-expression of the '+' operator
92
93 The condition checkers are boolean (true/false) checkers. Machine code is generated
94 for the checker itself and for the actions depending on the result of the checker.
95 The 'true' case is called as the matching path (expected path), and the other is called as
96 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97 branches on the matching path.
98
99 Greedy star operator (*) :
100 Matching path: match happens.
101 Backtrack path: match failed.
102 Non-greedy star operator (*?) :
103 Matching path: no need to perform a match.
104 Backtrack path: match is required.
105
106 The following example shows how the code generated for a capturing bracket
107 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
108 we have the following regular expression:
109
110 A(B|C)D
111
112 The generated code will be the following:
113
114 A matching path
115 '(' matching path (pushing arguments to the stack)
116 B matching path
117 ')' matching path (pushing arguments to the stack)
118 D matching path
119 return with successful match
120
121 D backtrack path
122 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123 B backtrack path
124 C expected path
125 jump to D matching path
126 C backtrack path
127 A backtrack path
128
129 Notice, that the order of backtrack code paths are the opposite of the fast
130 code paths. In this way the topmost value on the stack is always belong
131 to the current backtrack code path. The backtrack path must check
132 whether there is a next alternative. If so, it needs to jump back to
133 the matching path eventually. Otherwise it needs to clear out its own stack
134 frame and continue the execution on the backtrack code paths.
135 */
136
137 /*
138 Saved stack frames:
139
140 Atomic blocks and asserts require reloading the values of private data
141 when the backtrack mechanism performed. Because of OP_RECURSE, the data
142 are not necessarly known in compile time, thus we need a dynamic restore
143 mechanism.
144
145 The stack frames are stored in a chain list, and have the following format:
146 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147
148 Thus we can restore the private data to a particular point in the stack.
149 */
150
151 typedef struct jit_arguments {
152 /* Pointers first. */
153 struct sljit_stack *stack;
154 const pcre_uchar *str;
155 const pcre_uchar *begin;
156 const pcre_uchar *end;
157 int *offsets;
158 pcre_uchar *uchar_ptr;
159 pcre_uchar *mark_ptr;
160 /* Everything else after. */
161 int offsetcount;
162 int calllimit;
163 pcre_uint8 notbol;
164 pcre_uint8 noteol;
165 pcre_uint8 notempty;
166 pcre_uint8 notempty_atstart;
167 } jit_arguments;
168
169 typedef struct executable_functions {
170 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
171 PUBL(jit_callback) callback;
172 void *userdata;
173 pcre_uint32 top_bracket;
174 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
175 } executable_functions;
176
177 typedef struct jump_list {
178 struct sljit_jump *jump;
179 struct jump_list *next;
180 } jump_list;
181
182 enum stub_types { stack_alloc };
183
184 typedef struct stub_list {
185 enum stub_types type;
186 int data;
187 struct sljit_jump *start;
188 struct sljit_label *quit;
189 struct stub_list *next;
190 } stub_list;
191
192 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
193
194 /* The following structure is the key data type for the recursive
195 code generator. It is allocated by compile_matchingpath, and contains
196 the aguments for compile_backtrackingpath. Must be the first member
197 of its descendants. */
198 typedef struct backtrack_common {
199 /* Concatenation stack. */
200 struct backtrack_common *prev;
201 jump_list *nextbacktracks;
202 /* Internal stack (for component operators). */
203 struct backtrack_common *top;
204 jump_list *topbacktracks;
205 /* Opcode pointer. */
206 pcre_uchar *cc;
207 } backtrack_common;
208
209 typedef struct assert_backtrack {
210 backtrack_common common;
211 jump_list *condfailed;
212 /* Less than 0 (-1) if a frame is not needed. */
213 int framesize;
214 /* Points to our private memory word on the stack. */
215 int private_data_ptr;
216 /* For iterators. */
217 struct sljit_label *matchingpath;
218 } assert_backtrack;
219
220 typedef struct bracket_backtrack {
221 backtrack_common common;
222 /* Where to coninue if an alternative is successfully matched. */
223 struct sljit_label *alternative_matchingpath;
224 /* For rmin and rmax iterators. */
225 struct sljit_label *recursive_matchingpath;
226 /* For greedy ? operator. */
227 struct sljit_label *zero_matchingpath;
228 /* Contains the branches of a failed condition. */
229 union {
230 /* Both for OP_COND, OP_SCOND. */
231 jump_list *condfailed;
232 assert_backtrack *assert;
233 /* For OP_ONCE. -1 if not needed. */
234 int framesize;
235 } u;
236 /* Points to our private memory word on the stack. */
237 int private_data_ptr;
238 } bracket_backtrack;
239
240 typedef struct bracketpos_backtrack {
241 backtrack_common common;
242 /* Points to our private memory word on the stack. */
243 int private_data_ptr;
244 /* Reverting stack is needed. */
245 int framesize;
246 /* Allocated stack size. */
247 int stacksize;
248 } bracketpos_backtrack;
249
250 typedef struct braminzero_backtrack {
251 backtrack_common common;
252 struct sljit_label *matchingpath;
253 } braminzero_backtrack;
254
255 typedef struct iterator_backtrack {
256 backtrack_common common;
257 /* Next iteration. */
258 struct sljit_label *matchingpath;
259 } iterator_backtrack;
260
261 typedef struct recurse_entry {
262 struct recurse_entry *next;
263 /* Contains the function entry. */
264 struct sljit_label *entry;
265 /* Collects the calls until the function is not created. */
266 jump_list *calls;
267 /* Points to the starting opcode. */
268 int start;
269 } recurse_entry;
270
271 typedef struct recurse_backtrack {
272 backtrack_common common;
273 } recurse_backtrack;
274
275 #define MAX_RANGE_SIZE 6
276
277 typedef struct compiler_common {
278 struct sljit_compiler *compiler;
279 pcre_uchar *start;
280
281 /* Maps private data offset to each opcode. */
282 int *private_data_ptrs;
283 /* Tells whether the capturing bracket is optimized. */
284 pcre_uint8 *optimized_cbracket;
285 /* Starting offset of private data for capturing brackets. */
286 int cbraptr;
287 /* OVector starting point. Must be divisible by 2. */
288 int ovector_start;
289 /* Last known position of the requested byte. */
290 int req_char_ptr;
291 /* Head of the last recursion. */
292 int recursive_head;
293 /* First inspected character for partial matching. */
294 int start_used_ptr;
295 /* Starting pointer for partial soft matches. */
296 int hit_start;
297 /* End pointer of the first line. */
298 int first_line_end;
299 /* Points to the marked string. */
300 int mark_ptr;
301
302 /* Flipped and lower case tables. */
303 const pcre_uint8 *fcc;
304 sljit_sw lcc;
305 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
306 int mode;
307 /* Newline control. */
308 int nltype;
309 int newline;
310 int bsr_nltype;
311 /* Dollar endonly. */
312 int endonly;
313 BOOL has_set_som;
314 /* Tables. */
315 sljit_sw ctypes;
316 int digits[2 + MAX_RANGE_SIZE];
317 /* Named capturing brackets. */
318 sljit_uw name_table;
319 sljit_sw name_count;
320 sljit_sw name_entry_size;
321
322 /* Labels and jump lists. */
323 struct sljit_label *partialmatchlabel;
324 struct sljit_label *quitlabel;
325 struct sljit_label *acceptlabel;
326 stub_list *stubs;
327 recurse_entry *entries;
328 recurse_entry *currententry;
329 jump_list *partialmatch;
330 jump_list *quit;
331 jump_list *accept;
332 jump_list *calllimit;
333 jump_list *stackalloc;
334 jump_list *revertframes;
335 jump_list *wordboundary;
336 jump_list *anynewline;
337 jump_list *hspace;
338 jump_list *vspace;
339 jump_list *casefulcmp;
340 jump_list *caselesscmp;
341 BOOL jscript_compat;
342 #ifdef SUPPORT_UTF
343 BOOL utf;
344 #ifdef SUPPORT_UCP
345 BOOL use_ucp;
346 #endif
347 #ifndef COMPILE_PCRE32
348 jump_list *utfreadchar;
349 #endif
350 #ifdef COMPILE_PCRE8
351 jump_list *utfreadtype8;
352 #endif
353 #endif /* SUPPORT_UTF */
354 #ifdef SUPPORT_UCP
355 jump_list *getucd;
356 #endif
357 } compiler_common;
358
359 /* For byte_sequence_compare. */
360
361 typedef struct compare_context {
362 int length;
363 int sourcereg;
364 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
365 int ucharptr;
366 union {
367 sljit_si asint;
368 sljit_uh asushort;
369 #if defined COMPILE_PCRE8
370 sljit_ub asbyte;
371 sljit_ub asuchars[4];
372 #elif defined COMPILE_PCRE16
373 sljit_uh asuchars[2];
374 #elif defined COMPILE_PCRE32
375 sljit_ui asuchars[1];
376 #endif
377 } c;
378 union {
379 sljit_si asint;
380 sljit_uh asushort;
381 #if defined COMPILE_PCRE8
382 sljit_ub asbyte;
383 sljit_ub asuchars[4];
384 #elif defined COMPILE_PCRE16
385 sljit_uh asuchars[2];
386 #elif defined COMPILE_PCRE32
387 sljit_ui asuchars[1];
388 #endif
389 } oc;
390 #endif
391 } compare_context;
392
393 enum {
394 frame_end = 0,
395 frame_setstrbegin = -1,
396 frame_setmark = -2
397 };
398
399 /* Undefine sljit macros. */
400 #undef CMP
401
402 /* Used for accessing the elements of the stack. */
403 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
404
405 #define TMP1 SLJIT_SCRATCH_REG1
406 #define TMP2 SLJIT_SCRATCH_REG3
407 #define TMP3 SLJIT_TEMPORARY_EREG2
408 #define STR_PTR SLJIT_SAVED_REG1
409 #define STR_END SLJIT_SAVED_REG2
410 #define STACK_TOP SLJIT_SCRATCH_REG2
411 #define STACK_LIMIT SLJIT_SAVED_REG3
412 #define ARGUMENTS SLJIT_SAVED_EREG1
413 #define CALL_COUNT SLJIT_SAVED_EREG2
414 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
415
416 /* Local space layout. */
417 /* These two locals can be used by the current opcode. */
418 #define LOCALS0 (0 * sizeof(sljit_sw))
419 #define LOCALS1 (1 * sizeof(sljit_sw))
420 /* Two local variables for possessive quantifiers (char1 cannot use them). */
421 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
422 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
423 /* Max limit of recursions. */
424 #define CALL_LIMIT (4 * sizeof(sljit_sw))
425 /* The output vector is stored on the stack, and contains pointers
426 to characters. The vector data is divided into two groups: the first
427 group contains the start / end character pointers, and the second is
428 the start pointers when the end of the capturing group has not yet reached. */
429 #define OVECTOR_START (common->ovector_start)
430 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
431 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw))
432 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
433
434 #if defined COMPILE_PCRE8
435 #define MOV_UCHAR SLJIT_MOV_UB
436 #define MOVU_UCHAR SLJIT_MOVU_UB
437 #elif defined COMPILE_PCRE16
438 #define MOV_UCHAR SLJIT_MOV_UH
439 #define MOVU_UCHAR SLJIT_MOVU_UH
440 #elif defined COMPILE_PCRE32
441 #define MOV_UCHAR SLJIT_MOV_UI
442 #define MOVU_UCHAR SLJIT_MOVU_UI
443 #else
444 #error Unsupported compiling mode
445 #endif
446
447 /* Shortcuts. */
448 #define DEFINE_COMPILER \
449 struct sljit_compiler *compiler = common->compiler
450 #define OP1(op, dst, dstw, src, srcw) \
451 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
452 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
453 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
454 #define LABEL() \
455 sljit_emit_label(compiler)
456 #define JUMP(type) \
457 sljit_emit_jump(compiler, (type))
458 #define JUMPTO(type, label) \
459 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
460 #define JUMPHERE(jump) \
461 sljit_set_label((jump), sljit_emit_label(compiler))
462 #define CMP(type, src1, src1w, src2, src2w) \
463 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
464 #define CMPTO(type, src1, src1w, src2, src2w, label) \
465 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
466 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
467 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
468 #define GET_LOCAL_BASE(dst, dstw, offset) \
469 sljit_get_local_base(compiler, (dst), (dstw), (offset))
470
471 static pcre_uchar* bracketend(pcre_uchar* cc)
472 {
473 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
474 do cc += GET(cc, 1); while (*cc == OP_ALT);
475 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
476 cc += 1 + LINK_SIZE;
477 return cc;
478 }
479
480 /* Functions whose might need modification for all new supported opcodes:
481 next_opcode
482 get_private_data_length
483 set_private_data_ptrs
484 get_framesize
485 init_frame
486 get_private_data_length_for_copy
487 copy_private_data
488 compile_matchingpath
489 compile_backtrackingpath
490 */
491
492 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
493 {
494 SLJIT_UNUSED_ARG(common);
495 switch(*cc)
496 {
497 case OP_SOD:
498 case OP_SOM:
499 case OP_SET_SOM:
500 case OP_NOT_WORD_BOUNDARY:
501 case OP_WORD_BOUNDARY:
502 case OP_NOT_DIGIT:
503 case OP_DIGIT:
504 case OP_NOT_WHITESPACE:
505 case OP_WHITESPACE:
506 case OP_NOT_WORDCHAR:
507 case OP_WORDCHAR:
508 case OP_ANY:
509 case OP_ALLANY:
510 case OP_ANYNL:
511 case OP_NOT_HSPACE:
512 case OP_HSPACE:
513 case OP_NOT_VSPACE:
514 case OP_VSPACE:
515 case OP_EXTUNI:
516 case OP_EODN:
517 case OP_EOD:
518 case OP_CIRC:
519 case OP_CIRCM:
520 case OP_DOLL:
521 case OP_DOLLM:
522 case OP_TYPESTAR:
523 case OP_TYPEMINSTAR:
524 case OP_TYPEPLUS:
525 case OP_TYPEMINPLUS:
526 case OP_TYPEQUERY:
527 case OP_TYPEMINQUERY:
528 case OP_TYPEPOSSTAR:
529 case OP_TYPEPOSPLUS:
530 case OP_TYPEPOSQUERY:
531 case OP_CRSTAR:
532 case OP_CRMINSTAR:
533 case OP_CRPLUS:
534 case OP_CRMINPLUS:
535 case OP_CRQUERY:
536 case OP_CRMINQUERY:
537 case OP_DEF:
538 case OP_BRAZERO:
539 case OP_BRAMINZERO:
540 case OP_BRAPOSZERO:
541 case OP_COMMIT:
542 case OP_FAIL:
543 case OP_ACCEPT:
544 case OP_ASSERT_ACCEPT:
545 case OP_SKIPZERO:
546 return cc + 1;
547
548 case OP_ANYBYTE:
549 #ifdef SUPPORT_UTF
550 if (common->utf) return NULL;
551 #endif
552 return cc + 1;
553
554 case OP_CHAR:
555 case OP_CHARI:
556 case OP_NOT:
557 case OP_NOTI:
558 case OP_STAR:
559 case OP_MINSTAR:
560 case OP_PLUS:
561 case OP_MINPLUS:
562 case OP_QUERY:
563 case OP_MINQUERY:
564 case OP_POSSTAR:
565 case OP_POSPLUS:
566 case OP_POSQUERY:
567 case OP_STARI:
568 case OP_MINSTARI:
569 case OP_PLUSI:
570 case OP_MINPLUSI:
571 case OP_QUERYI:
572 case OP_MINQUERYI:
573 case OP_POSSTARI:
574 case OP_POSPLUSI:
575 case OP_POSQUERYI:
576 case OP_NOTSTAR:
577 case OP_NOTMINSTAR:
578 case OP_NOTPLUS:
579 case OP_NOTMINPLUS:
580 case OP_NOTQUERY:
581 case OP_NOTMINQUERY:
582 case OP_NOTPOSSTAR:
583 case OP_NOTPOSPLUS:
584 case OP_NOTPOSQUERY:
585 case OP_NOTSTARI:
586 case OP_NOTMINSTARI:
587 case OP_NOTPLUSI:
588 case OP_NOTMINPLUSI:
589 case OP_NOTQUERYI:
590 case OP_NOTMINQUERYI:
591 case OP_NOTPOSSTARI:
592 case OP_NOTPOSPLUSI:
593 case OP_NOTPOSQUERYI:
594 cc += 2;
595 #ifdef SUPPORT_UTF
596 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
597 #endif
598 return cc;
599
600 case OP_UPTO:
601 case OP_MINUPTO:
602 case OP_EXACT:
603 case OP_POSUPTO:
604 case OP_UPTOI:
605 case OP_MINUPTOI:
606 case OP_EXACTI:
607 case OP_POSUPTOI:
608 case OP_NOTUPTO:
609 case OP_NOTMINUPTO:
610 case OP_NOTEXACT:
611 case OP_NOTPOSUPTO:
612 case OP_NOTUPTOI:
613 case OP_NOTMINUPTOI:
614 case OP_NOTEXACTI:
615 case OP_NOTPOSUPTOI:
616 cc += 2 + IMM2_SIZE;
617 #ifdef SUPPORT_UTF
618 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
619 #endif
620 return cc;
621
622 case OP_NOTPROP:
623 case OP_PROP:
624 return cc + 1 + 2;
625
626 case OP_TYPEUPTO:
627 case OP_TYPEMINUPTO:
628 case OP_TYPEEXACT:
629 case OP_TYPEPOSUPTO:
630 case OP_REF:
631 case OP_REFI:
632 case OP_CREF:
633 case OP_NCREF:
634 case OP_RREF:
635 case OP_NRREF:
636 case OP_CLOSE:
637 cc += 1 + IMM2_SIZE;
638 return cc;
639
640 case OP_CRRANGE:
641 case OP_CRMINRANGE:
642 return cc + 1 + 2 * IMM2_SIZE;
643
644 case OP_CLASS:
645 case OP_NCLASS:
646 return cc + 1 + 32 / sizeof(pcre_uchar);
647
648 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
649 case OP_XCLASS:
650 return cc + GET(cc, 1);
651 #endif
652
653 case OP_RECURSE:
654 case OP_ASSERT:
655 case OP_ASSERT_NOT:
656 case OP_ASSERTBACK:
657 case OP_ASSERTBACK_NOT:
658 case OP_REVERSE:
659 case OP_ONCE:
660 case OP_ONCE_NC:
661 case OP_BRA:
662 case OP_BRAPOS:
663 case OP_COND:
664 case OP_SBRA:
665 case OP_SBRAPOS:
666 case OP_SCOND:
667 case OP_ALT:
668 case OP_KET:
669 case OP_KETRMAX:
670 case OP_KETRMIN:
671 case OP_KETRPOS:
672 return cc + 1 + LINK_SIZE;
673
674 case OP_CBRA:
675 case OP_CBRAPOS:
676 case OP_SCBRA:
677 case OP_SCBRAPOS:
678 return cc + 1 + LINK_SIZE + IMM2_SIZE;
679
680 case OP_MARK:
681 return cc + 1 + 2 + cc[1];
682
683 default:
684 return NULL;
685 }
686 }
687
688 #define CASE_ITERATOR_PRIVATE_DATA_1 \
689 case OP_MINSTAR: \
690 case OP_MINPLUS: \
691 case OP_QUERY: \
692 case OP_MINQUERY: \
693 case OP_MINSTARI: \
694 case OP_MINPLUSI: \
695 case OP_QUERYI: \
696 case OP_MINQUERYI: \
697 case OP_NOTMINSTAR: \
698 case OP_NOTMINPLUS: \
699 case OP_NOTQUERY: \
700 case OP_NOTMINQUERY: \
701 case OP_NOTMINSTARI: \
702 case OP_NOTMINPLUSI: \
703 case OP_NOTQUERYI: \
704 case OP_NOTMINQUERYI:
705
706 #define CASE_ITERATOR_PRIVATE_DATA_2A \
707 case OP_STAR: \
708 case OP_PLUS: \
709 case OP_STARI: \
710 case OP_PLUSI: \
711 case OP_NOTSTAR: \
712 case OP_NOTPLUS: \
713 case OP_NOTSTARI: \
714 case OP_NOTPLUSI:
715
716 #define CASE_ITERATOR_PRIVATE_DATA_2B \
717 case OP_UPTO: \
718 case OP_MINUPTO: \
719 case OP_UPTOI: \
720 case OP_MINUPTOI: \
721 case OP_NOTUPTO: \
722 case OP_NOTMINUPTO: \
723 case OP_NOTUPTOI: \
724 case OP_NOTMINUPTOI:
725
726 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
727 case OP_TYPEMINSTAR: \
728 case OP_TYPEMINPLUS: \
729 case OP_TYPEQUERY: \
730 case OP_TYPEMINQUERY:
731
732 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
733 case OP_TYPESTAR: \
734 case OP_TYPEPLUS:
735
736 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
737 case OP_TYPEUPTO: \
738 case OP_TYPEMINUPTO:
739
740 static int get_class_iterator_size(pcre_uchar *cc)
741 {
742 switch(*cc)
743 {
744 case OP_CRSTAR:
745 case OP_CRPLUS:
746 return 2;
747
748 case OP_CRMINSTAR:
749 case OP_CRMINPLUS:
750 case OP_CRQUERY:
751 case OP_CRMINQUERY:
752 return 1;
753
754 case OP_CRRANGE:
755 case OP_CRMINRANGE:
756 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
757 return 0;
758 return 2;
759
760 default:
761 return 0;
762 }
763 }
764
765 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
766 {
767 int private_data_length = 0;
768 pcre_uchar *alternative;
769 pcre_uchar *name;
770 pcre_uchar *end = NULL;
771 int space, size, i;
772 pcre_uint32 bracketlen;
773
774 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
775 while (cc < ccend)
776 {
777 space = 0;
778 size = 0;
779 bracketlen = 0;
780 switch(*cc)
781 {
782 case OP_SET_SOM:
783 common->has_set_som = TRUE;
784 cc += 1;
785 break;
786
787 case OP_REF:
788 case OP_REFI:
789 common->optimized_cbracket[GET2(cc, 1)] = 0;
790 cc += 1 + IMM2_SIZE;
791 break;
792
793 case OP_ASSERT:
794 case OP_ASSERT_NOT:
795 case OP_ASSERTBACK:
796 case OP_ASSERTBACK_NOT:
797 case OP_ONCE:
798 case OP_ONCE_NC:
799 case OP_BRAPOS:
800 case OP_SBRA:
801 case OP_SBRAPOS:
802 private_data_length += sizeof(sljit_sw);
803 bracketlen = 1 + LINK_SIZE;
804 break;
805
806 case OP_CBRAPOS:
807 case OP_SCBRAPOS:
808 private_data_length += sizeof(sljit_sw);
809 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
810 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
811 break;
812
813 case OP_COND:
814 case OP_SCOND:
815 bracketlen = cc[1 + LINK_SIZE];
816 if (bracketlen == OP_CREF)
817 {
818 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
819 common->optimized_cbracket[bracketlen] = 0;
820 }
821 else if (bracketlen == OP_NCREF)
822 {
823 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
824 name = (pcre_uchar *)common->name_table;
825 alternative = name;
826 for (i = 0; i < common->name_count; i++)
827 {
828 if (GET2(name, 0) == bracketlen) break;
829 name += common->name_entry_size;
830 }
831 SLJIT_ASSERT(i != common->name_count);
832
833 for (i = 0; i < common->name_count; i++)
834 {
835 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
836 common->optimized_cbracket[GET2(alternative, 0)] = 0;
837 alternative += common->name_entry_size;
838 }
839 }
840
841 if (*cc == OP_COND)
842 {
843 /* Might be a hidden SCOND. */
844 alternative = cc + GET(cc, 1);
845 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
846 private_data_length += sizeof(sljit_sw);
847 }
848 else
849 private_data_length += sizeof(sljit_sw);
850 bracketlen = 1 + LINK_SIZE;
851 break;
852
853 case OP_BRA:
854 bracketlen = 1 + LINK_SIZE;
855 break;
856
857 case OP_CBRA:
858 case OP_SCBRA:
859 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
860 break;
861
862 CASE_ITERATOR_PRIVATE_DATA_1
863 space = 1;
864 size = -2;
865 break;
866
867 CASE_ITERATOR_PRIVATE_DATA_2A
868 space = 2;
869 size = -2;
870 break;
871
872 CASE_ITERATOR_PRIVATE_DATA_2B
873 space = 2;
874 size = -(2 + IMM2_SIZE);
875 break;
876
877 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
878 space = 1;
879 size = 1;
880 break;
881
882 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
883 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
884 space = 2;
885 size = 1;
886 break;
887
888 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
889 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
890 space = 2;
891 size = 1 + IMM2_SIZE;
892 break;
893
894 case OP_CLASS:
895 case OP_NCLASS:
896 size += 1 + 32 / sizeof(pcre_uchar);
897 space = get_class_iterator_size(cc + size);
898 break;
899
900 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
901 case OP_XCLASS:
902 size = GET(cc, 1);
903 space = get_class_iterator_size(cc + size);
904 break;
905 #endif
906
907 case OP_RECURSE:
908 /* Set its value only once. */
909 if (common->recursive_head == 0)
910 {
911 common->recursive_head = common->ovector_start;
912 common->ovector_start += sizeof(sljit_sw);
913 }
914 cc += 1 + LINK_SIZE;
915 break;
916
917 case OP_MARK:
918 if (common->mark_ptr == 0)
919 {
920 common->mark_ptr = common->ovector_start;
921 common->ovector_start += sizeof(sljit_sw);
922 }
923 cc += 1 + 2 + cc[1];
924 break;
925
926 default:
927 cc = next_opcode(common, cc);
928 if (cc == NULL)
929 return -1;
930 break;
931 }
932
933 if (space > 0 && cc >= end)
934 private_data_length += sizeof(sljit_sw) * space;
935
936 if (size != 0)
937 {
938 if (size < 0)
939 {
940 cc += -size;
941 #ifdef SUPPORT_UTF
942 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
943 #endif
944 }
945 else
946 cc += size;
947 }
948
949 if (bracketlen != 0)
950 {
951 if (cc >= end)
952 {
953 end = bracketend(cc);
954 if (end[-1 - LINK_SIZE] == OP_KET)
955 end = NULL;
956 }
957 cc += bracketlen;
958 }
959 }
960 return private_data_length;
961 }
962
963 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
964 {
965 pcre_uchar *cc = common->start;
966 pcre_uchar *alternative;
967 pcre_uchar *end = NULL;
968 int space, size, bracketlen;
969
970 while (cc < ccend)
971 {
972 space = 0;
973 size = 0;
974 bracketlen = 0;
975 switch(*cc)
976 {
977 case OP_ASSERT:
978 case OP_ASSERT_NOT:
979 case OP_ASSERTBACK:
980 case OP_ASSERTBACK_NOT:
981 case OP_ONCE:
982 case OP_ONCE_NC:
983 case OP_BRAPOS:
984 case OP_SBRA:
985 case OP_SBRAPOS:
986 case OP_SCOND:
987 common->private_data_ptrs[cc - common->start] = private_data_ptr;
988 private_data_ptr += sizeof(sljit_sw);
989 bracketlen = 1 + LINK_SIZE;
990 break;
991
992 case OP_CBRAPOS:
993 case OP_SCBRAPOS:
994 common->private_data_ptrs[cc - common->start] = private_data_ptr;
995 private_data_ptr += sizeof(sljit_sw);
996 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
997 break;
998
999 case OP_COND:
1000 /* Might be a hidden SCOND. */
1001 alternative = cc + GET(cc, 1);
1002 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1003 {
1004 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1005 private_data_ptr += sizeof(sljit_sw);
1006 }
1007 bracketlen = 1 + LINK_SIZE;
1008 break;
1009
1010 case OP_BRA:
1011 bracketlen = 1 + LINK_SIZE;
1012 break;
1013
1014 case OP_CBRA:
1015 case OP_SCBRA:
1016 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1017 break;
1018
1019 CASE_ITERATOR_PRIVATE_DATA_1
1020 space = 1;
1021 size = -2;
1022 break;
1023
1024 CASE_ITERATOR_PRIVATE_DATA_2A
1025 space = 2;
1026 size = -2;
1027 break;
1028
1029 CASE_ITERATOR_PRIVATE_DATA_2B
1030 space = 2;
1031 size = -(2 + IMM2_SIZE);
1032 break;
1033
1034 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1035 space = 1;
1036 size = 1;
1037 break;
1038
1039 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1040 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1041 space = 2;
1042 size = 1;
1043 break;
1044
1045 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1046 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1047 space = 2;
1048 size = 1 + IMM2_SIZE;
1049 break;
1050
1051 case OP_CLASS:
1052 case OP_NCLASS:
1053 size += 1 + 32 / sizeof(pcre_uchar);
1054 space = get_class_iterator_size(cc + size);
1055 break;
1056
1057 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1058 case OP_XCLASS:
1059 size = GET(cc, 1);
1060 space = get_class_iterator_size(cc + size);
1061 break;
1062 #endif
1063
1064 default:
1065 cc = next_opcode(common, cc);
1066 SLJIT_ASSERT(cc != NULL);
1067 break;
1068 }
1069
1070 if (space > 0 && cc >= end)
1071 {
1072 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1073 private_data_ptr += sizeof(sljit_sw) * space;
1074 }
1075
1076 if (size != 0)
1077 {
1078 if (size < 0)
1079 {
1080 cc += -size;
1081 #ifdef SUPPORT_UTF
1082 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1083 #endif
1084 }
1085 else
1086 cc += size;
1087 }
1088
1089 if (bracketlen > 0)
1090 {
1091 if (cc >= end)
1092 {
1093 end = bracketend(cc);
1094 if (end[-1 - LINK_SIZE] == OP_KET)
1095 end = NULL;
1096 }
1097 cc += bracketlen;
1098 }
1099 }
1100 }
1101
1102 /* Returns with -1 if no need for frame. */
1103 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1104 {
1105 pcre_uchar *ccend = bracketend(cc);
1106 int length = 0;
1107 BOOL possessive = FALSE;
1108 BOOL setsom_found = recursive;
1109 BOOL setmark_found = recursive;
1110
1111 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1112 {
1113 length = 3;
1114 possessive = TRUE;
1115 }
1116
1117 cc = next_opcode(common, cc);
1118 SLJIT_ASSERT(cc != NULL);
1119 while (cc < ccend)
1120 switch(*cc)
1121 {
1122 case OP_SET_SOM:
1123 SLJIT_ASSERT(common->has_set_som);
1124 if (!setsom_found)
1125 {
1126 length += 2;
1127 setsom_found = TRUE;
1128 }
1129 cc += 1;
1130 break;
1131
1132 case OP_MARK:
1133 SLJIT_ASSERT(common->mark_ptr != 0);
1134 if (!setmark_found)
1135 {
1136 length += 2;
1137 setmark_found = TRUE;
1138 }
1139 cc += 1 + 2 + cc[1];
1140 break;
1141
1142 case OP_RECURSE:
1143 if (common->has_set_som && !setsom_found)
1144 {
1145 length += 2;
1146 setsom_found = TRUE;
1147 }
1148 if (common->mark_ptr != 0 && !setmark_found)
1149 {
1150 length += 2;
1151 setmark_found = TRUE;
1152 }
1153 cc += 1 + LINK_SIZE;
1154 break;
1155
1156 case OP_CBRA:
1157 case OP_CBRAPOS:
1158 case OP_SCBRA:
1159 case OP_SCBRAPOS:
1160 length += 3;
1161 cc += 1 + LINK_SIZE + IMM2_SIZE;
1162 break;
1163
1164 default:
1165 cc = next_opcode(common, cc);
1166 SLJIT_ASSERT(cc != NULL);
1167 break;
1168 }
1169
1170 /* Possessive quantifiers can use a special case. */
1171 if (SLJIT_UNLIKELY(possessive) && length == 3)
1172 return -1;
1173
1174 if (length > 0)
1175 return length + 1;
1176 return -1;
1177 }
1178
1179 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1180 {
1181 DEFINE_COMPILER;
1182 pcre_uchar *ccend = bracketend(cc);
1183 BOOL setsom_found = recursive;
1184 BOOL setmark_found = recursive;
1185 int offset;
1186
1187 /* >= 1 + shortest item size (2) */
1188 SLJIT_UNUSED_ARG(stacktop);
1189 SLJIT_ASSERT(stackpos >= stacktop + 2);
1190
1191 stackpos = STACK(stackpos);
1192 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1193 cc = next_opcode(common, cc);
1194 SLJIT_ASSERT(cc != NULL);
1195 while (cc < ccend)
1196 switch(*cc)
1197 {
1198 case OP_SET_SOM:
1199 SLJIT_ASSERT(common->has_set_som);
1200 if (!setsom_found)
1201 {
1202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1204 stackpos += (int)sizeof(sljit_sw);
1205 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1206 stackpos += (int)sizeof(sljit_sw);
1207 setsom_found = TRUE;
1208 }
1209 cc += 1;
1210 break;
1211
1212 case OP_MARK:
1213 SLJIT_ASSERT(common->mark_ptr != 0);
1214 if (!setmark_found)
1215 {
1216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1218 stackpos += (int)sizeof(sljit_sw);
1219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1220 stackpos += (int)sizeof(sljit_sw);
1221 setmark_found = TRUE;
1222 }
1223 cc += 1 + 2 + cc[1];
1224 break;
1225
1226 case OP_RECURSE:
1227 if (common->has_set_som && !setsom_found)
1228 {
1229 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1231 stackpos += (int)sizeof(sljit_sw);
1232 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1233 stackpos += (int)sizeof(sljit_sw);
1234 setsom_found = TRUE;
1235 }
1236 if (common->mark_ptr != 0 && !setmark_found)
1237 {
1238 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1240 stackpos += (int)sizeof(sljit_sw);
1241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1242 stackpos += (int)sizeof(sljit_sw);
1243 setmark_found = TRUE;
1244 }
1245 cc += 1 + LINK_SIZE;
1246 break;
1247
1248 case OP_CBRA:
1249 case OP_CBRAPOS:
1250 case OP_SCBRA:
1251 case OP_SCBRAPOS:
1252 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1254 stackpos += (int)sizeof(sljit_sw);
1255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1256 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1258 stackpos += (int)sizeof(sljit_sw);
1259 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1260 stackpos += (int)sizeof(sljit_sw);
1261
1262 cc += 1 + LINK_SIZE + IMM2_SIZE;
1263 break;
1264
1265 default:
1266 cc = next_opcode(common, cc);
1267 SLJIT_ASSERT(cc != NULL);
1268 break;
1269 }
1270
1271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1272 SLJIT_ASSERT(stackpos == STACK(stacktop));
1273 }
1274
1275 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1276 {
1277 int private_data_length = 2;
1278 int size;
1279 pcre_uchar *alternative;
1280 /* Calculate the sum of the private machine words. */
1281 while (cc < ccend)
1282 {
1283 size = 0;
1284 switch(*cc)
1285 {
1286 case OP_ASSERT:
1287 case OP_ASSERT_NOT:
1288 case OP_ASSERTBACK:
1289 case OP_ASSERTBACK_NOT:
1290 case OP_ONCE:
1291 case OP_ONCE_NC:
1292 case OP_BRAPOS:
1293 case OP_SBRA:
1294 case OP_SBRAPOS:
1295 case OP_SCOND:
1296 private_data_length++;
1297 cc += 1 + LINK_SIZE;
1298 break;
1299
1300 case OP_CBRA:
1301 case OP_SCBRA:
1302 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1303 private_data_length++;
1304 cc += 1 + LINK_SIZE + IMM2_SIZE;
1305 break;
1306
1307 case OP_CBRAPOS:
1308 case OP_SCBRAPOS:
1309 private_data_length += 2;
1310 cc += 1 + LINK_SIZE + IMM2_SIZE;
1311 break;
1312
1313 case OP_COND:
1314 /* Might be a hidden SCOND. */
1315 alternative = cc + GET(cc, 1);
1316 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1317 private_data_length++;
1318 cc += 1 + LINK_SIZE;
1319 break;
1320
1321 CASE_ITERATOR_PRIVATE_DATA_1
1322 if (PRIVATE_DATA(cc))
1323 private_data_length++;
1324 cc += 2;
1325 #ifdef SUPPORT_UTF
1326 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1327 #endif
1328 break;
1329
1330 CASE_ITERATOR_PRIVATE_DATA_2A
1331 if (PRIVATE_DATA(cc))
1332 private_data_length += 2;
1333 cc += 2;
1334 #ifdef SUPPORT_UTF
1335 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1336 #endif
1337 break;
1338
1339 CASE_ITERATOR_PRIVATE_DATA_2B
1340 if (PRIVATE_DATA(cc))
1341 private_data_length += 2;
1342 cc += 2 + IMM2_SIZE;
1343 #ifdef SUPPORT_UTF
1344 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1345 #endif
1346 break;
1347
1348 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1349 if (PRIVATE_DATA(cc))
1350 private_data_length++;
1351 cc += 1;
1352 break;
1353
1354 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1355 if (PRIVATE_DATA(cc))
1356 private_data_length += 2;
1357 cc += 1;
1358 break;
1359
1360 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1361 if (PRIVATE_DATA(cc))
1362 private_data_length += 2;
1363 cc += 1 + IMM2_SIZE;
1364 break;
1365
1366 case OP_CLASS:
1367 case OP_NCLASS:
1368 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1369 case OP_XCLASS:
1370 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1371 #else
1372 size = 1 + 32 / (int)sizeof(pcre_uchar);
1373 #endif
1374 if (PRIVATE_DATA(cc))
1375 private_data_length += get_class_iterator_size(cc + size);
1376 cc += size;
1377 break;
1378
1379 default:
1380 cc = next_opcode(common, cc);
1381 SLJIT_ASSERT(cc != NULL);
1382 break;
1383 }
1384 }
1385 SLJIT_ASSERT(cc == ccend);
1386 return private_data_length;
1387 }
1388
1389 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1390 BOOL save, int stackptr, int stacktop)
1391 {
1392 DEFINE_COMPILER;
1393 int srcw[2];
1394 int count, size;
1395 BOOL tmp1next = TRUE;
1396 BOOL tmp1empty = TRUE;
1397 BOOL tmp2empty = TRUE;
1398 pcre_uchar *alternative;
1399 enum {
1400 start,
1401 loop,
1402 end
1403 } status;
1404
1405 status = save ? start : loop;
1406 stackptr = STACK(stackptr - 2);
1407 stacktop = STACK(stacktop - 1);
1408
1409 if (!save)
1410 {
1411 stackptr += sizeof(sljit_sw);
1412 if (stackptr < stacktop)
1413 {
1414 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1415 stackptr += sizeof(sljit_sw);
1416 tmp1empty = FALSE;
1417 }
1418 if (stackptr < stacktop)
1419 {
1420 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1421 stackptr += sizeof(sljit_sw);
1422 tmp2empty = FALSE;
1423 }
1424 /* The tmp1next must be TRUE in either way. */
1425 }
1426
1427 while (status != end)
1428 {
1429 count = 0;
1430 switch(status)
1431 {
1432 case start:
1433 SLJIT_ASSERT(save && common->recursive_head != 0);
1434 count = 1;
1435 srcw[0] = common->recursive_head;
1436 status = loop;
1437 break;
1438
1439 case loop:
1440 if (cc >= ccend)
1441 {
1442 status = end;
1443 break;
1444 }
1445
1446 switch(*cc)
1447 {
1448 case OP_ASSERT:
1449 case OP_ASSERT_NOT:
1450 case OP_ASSERTBACK:
1451 case OP_ASSERTBACK_NOT:
1452 case OP_ONCE:
1453 case OP_ONCE_NC:
1454 case OP_BRAPOS:
1455 case OP_SBRA:
1456 case OP_SBRAPOS:
1457 case OP_SCOND:
1458 count = 1;
1459 srcw[0] = PRIVATE_DATA(cc);
1460 SLJIT_ASSERT(srcw[0] != 0);
1461 cc += 1 + LINK_SIZE;
1462 break;
1463
1464 case OP_CBRA:
1465 case OP_SCBRA:
1466 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1467 {
1468 count = 1;
1469 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1470 }
1471 cc += 1 + LINK_SIZE + IMM2_SIZE;
1472 break;
1473
1474 case OP_CBRAPOS:
1475 case OP_SCBRAPOS:
1476 count = 2;
1477 srcw[0] = PRIVATE_DATA(cc);
1478 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1479 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1480 cc += 1 + LINK_SIZE + IMM2_SIZE;
1481 break;
1482
1483 case OP_COND:
1484 /* Might be a hidden SCOND. */
1485 alternative = cc + GET(cc, 1);
1486 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1487 {
1488 count = 1;
1489 srcw[0] = PRIVATE_DATA(cc);
1490 SLJIT_ASSERT(srcw[0] != 0);
1491 }
1492 cc += 1 + LINK_SIZE;
1493 break;
1494
1495 CASE_ITERATOR_PRIVATE_DATA_1
1496 if (PRIVATE_DATA(cc))
1497 {
1498 count = 1;
1499 srcw[0] = PRIVATE_DATA(cc);
1500 }
1501 cc += 2;
1502 #ifdef SUPPORT_UTF
1503 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1504 #endif
1505 break;
1506
1507 CASE_ITERATOR_PRIVATE_DATA_2A
1508 if (PRIVATE_DATA(cc))
1509 {
1510 count = 2;
1511 srcw[0] = PRIVATE_DATA(cc);
1512 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1513 }
1514 cc += 2;
1515 #ifdef SUPPORT_UTF
1516 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1517 #endif
1518 break;
1519
1520 CASE_ITERATOR_PRIVATE_DATA_2B
1521 if (PRIVATE_DATA(cc))
1522 {
1523 count = 2;
1524 srcw[0] = PRIVATE_DATA(cc);
1525 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1526 }
1527 cc += 2 + IMM2_SIZE;
1528 #ifdef SUPPORT_UTF
1529 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1530 #endif
1531 break;
1532
1533 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1534 if (PRIVATE_DATA(cc))
1535 {
1536 count = 1;
1537 srcw[0] = PRIVATE_DATA(cc);
1538 }
1539 cc += 1;
1540 break;
1541
1542 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1543 if (PRIVATE_DATA(cc))
1544 {
1545 count = 2;
1546 srcw[0] = PRIVATE_DATA(cc);
1547 srcw[1] = srcw[0] + sizeof(sljit_sw);
1548 }
1549 cc += 1;
1550 break;
1551
1552 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1553 if (PRIVATE_DATA(cc))
1554 {
1555 count = 2;
1556 srcw[0] = PRIVATE_DATA(cc);
1557 srcw[1] = srcw[0] + sizeof(sljit_sw);
1558 }
1559 cc += 1 + IMM2_SIZE;
1560 break;
1561
1562 case OP_CLASS:
1563 case OP_NCLASS:
1564 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1565 case OP_XCLASS:
1566 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1567 #else
1568 size = 1 + 32 / (int)sizeof(pcre_uchar);
1569 #endif
1570 if (PRIVATE_DATA(cc))
1571 switch(get_class_iterator_size(cc + size))
1572 {
1573 case 1:
1574 count = 1;
1575 srcw[0] = PRIVATE_DATA(cc);
1576 break;
1577
1578 case 2:
1579 count = 2;
1580 srcw[0] = PRIVATE_DATA(cc);
1581 srcw[1] = srcw[0] + sizeof(sljit_sw);
1582 break;
1583
1584 default:
1585 SLJIT_ASSERT_STOP();
1586 break;
1587 }
1588 cc += size;
1589 break;
1590
1591 default:
1592 cc = next_opcode(common, cc);
1593 SLJIT_ASSERT(cc != NULL);
1594 break;
1595 }
1596 break;
1597
1598 case end:
1599 SLJIT_ASSERT_STOP();
1600 break;
1601 }
1602
1603 while (count > 0)
1604 {
1605 count--;
1606 if (save)
1607 {
1608 if (tmp1next)
1609 {
1610 if (!tmp1empty)
1611 {
1612 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1613 stackptr += sizeof(sljit_sw);
1614 }
1615 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1616 tmp1empty = FALSE;
1617 tmp1next = FALSE;
1618 }
1619 else
1620 {
1621 if (!tmp2empty)
1622 {
1623 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1624 stackptr += sizeof(sljit_sw);
1625 }
1626 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1627 tmp2empty = FALSE;
1628 tmp1next = TRUE;
1629 }
1630 }
1631 else
1632 {
1633 if (tmp1next)
1634 {
1635 SLJIT_ASSERT(!tmp1empty);
1636 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1637 tmp1empty = stackptr >= stacktop;
1638 if (!tmp1empty)
1639 {
1640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1641 stackptr += sizeof(sljit_sw);
1642 }
1643 tmp1next = FALSE;
1644 }
1645 else
1646 {
1647 SLJIT_ASSERT(!tmp2empty);
1648 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1649 tmp2empty = stackptr >= stacktop;
1650 if (!tmp2empty)
1651 {
1652 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1653 stackptr += sizeof(sljit_sw);
1654 }
1655 tmp1next = TRUE;
1656 }
1657 }
1658 }
1659 }
1660
1661 if (save)
1662 {
1663 if (tmp1next)
1664 {
1665 if (!tmp1empty)
1666 {
1667 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1668 stackptr += sizeof(sljit_sw);
1669 }
1670 if (!tmp2empty)
1671 {
1672 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1673 stackptr += sizeof(sljit_sw);
1674 }
1675 }
1676 else
1677 {
1678 if (!tmp2empty)
1679 {
1680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1681 stackptr += sizeof(sljit_sw);
1682 }
1683 if (!tmp1empty)
1684 {
1685 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1686 stackptr += sizeof(sljit_sw);
1687 }
1688 }
1689 }
1690 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1691 }
1692
1693 #undef CASE_ITERATOR_PRIVATE_DATA_1
1694 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1695 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1696 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1697 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1698 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1699
1700 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1701 {
1702 return (value & (value - 1)) == 0;
1703 }
1704
1705 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1706 {
1707 while (list)
1708 {
1709 /* sljit_set_label is clever enough to do nothing
1710 if either the jump or the label is NULL. */
1711 sljit_set_label(list->jump, label);
1712 list = list->next;
1713 }
1714 }
1715
1716 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1717 {
1718 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1719 if (list_item)
1720 {
1721 list_item->next = *list;
1722 list_item->jump = jump;
1723 *list = list_item;
1724 }
1725 }
1726
1727 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1728 {
1729 DEFINE_COMPILER;
1730 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1731
1732 if (list_item)
1733 {
1734 list_item->type = type;
1735 list_item->data = data;
1736 list_item->start = start;
1737 list_item->quit = LABEL();
1738 list_item->next = common->stubs;
1739 common->stubs = list_item;
1740 }
1741 }
1742
1743 static void flush_stubs(compiler_common *common)
1744 {
1745 DEFINE_COMPILER;
1746 stub_list* list_item = common->stubs;
1747
1748 while (list_item)
1749 {
1750 JUMPHERE(list_item->start);
1751 switch(list_item->type)
1752 {
1753 case stack_alloc:
1754 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1755 break;
1756 }
1757 JUMPTO(SLJIT_JUMP, list_item->quit);
1758 list_item = list_item->next;
1759 }
1760 common->stubs = NULL;
1761 }
1762
1763 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1764 {
1765 DEFINE_COMPILER;
1766
1767 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1768 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1769 }
1770
1771 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1772 {
1773 /* May destroy all locals and registers except TMP2. */
1774 DEFINE_COMPILER;
1775
1776 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1777 #ifdef DESTROY_REGISTERS
1778 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1779 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1780 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1783 #endif
1784 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1785 }
1786
1787 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1788 {
1789 DEFINE_COMPILER;
1790 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1791 }
1792
1793 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1794 {
1795 DEFINE_COMPILER;
1796 struct sljit_label *loop;
1797 int i;
1798 /* At this point we can freely use all temporary registers. */
1799 /* TMP1 returns with begin - 1. */
1800 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1801 if (length < 8)
1802 {
1803 for (i = 0; i < length; i++)
1804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1805 }
1806 else
1807 {
1808 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));
1809 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);
1810 loop = LABEL();
1811 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1812 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1813 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1814 }
1815 }
1816
1817 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1818 {
1819 DEFINE_COMPILER;
1820 struct sljit_label *loop;
1821 struct sljit_jump *earlyexit;
1822
1823 /* At this point we can freely use all registers. */
1824 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1825 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1826
1827 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1828 if (common->mark_ptr != 0)
1829 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1830 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1831 if (common->mark_ptr != 0)
1832 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1833 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1834 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1835 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1836 /* Unlikely, but possible */
1837 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1838 loop = LABEL();
1839 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1840 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
1841 /* Copy the integer value to the output buffer */
1842 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1843 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1844 #endif
1845 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1846 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1847 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1848 JUMPHERE(earlyexit);
1849
1850 /* Calculate the return value, which is the maximum ovector value. */
1851 if (topbracket > 1)
1852 {
1853 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
1854 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
1855
1856 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1857 loop = LABEL();
1858 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
1859 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1860 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1861 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
1862 }
1863 else
1864 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1865 }
1866
1867 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1868 {
1869 DEFINE_COMPILER;
1870
1871 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1872 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1873
1874 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
1875 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1876 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1877 CMPTO(SLJIT_C_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
1878
1879 /* Store match begin and end. */
1880 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1881 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1882 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1883 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1884 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1885 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1886 #endif
1887 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1888
1889 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
1890 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1891 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1892 #endif
1893 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
1894
1895 JUMPTO(SLJIT_JUMP, quit);
1896 }
1897
1898 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1899 {
1900 /* May destroy TMP1. */
1901 DEFINE_COMPILER;
1902 struct sljit_jump *jump;
1903
1904 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1905 {
1906 /* The value of -1 must be kept for start_used_ptr! */
1907 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1908 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1909 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1910 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1911 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1912 JUMPHERE(jump);
1913 }
1914 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1915 {
1916 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1917 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1918 JUMPHERE(jump);
1919 }
1920 }
1921
1922 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1923 {
1924 /* Detects if the character has an othercase. */
1925 unsigned int c;
1926
1927 #ifdef SUPPORT_UTF
1928 if (common->utf)
1929 {
1930 GETCHAR(c, cc);
1931 if (c > 127)
1932 {
1933 #ifdef SUPPORT_UCP
1934 return c != UCD_OTHERCASE(c);
1935 #else
1936 return FALSE;
1937 #endif
1938 }
1939 #ifndef COMPILE_PCRE8
1940 return common->fcc[c] != c;
1941 #endif
1942 }
1943 else
1944 #endif
1945 c = *cc;
1946 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1947 }
1948
1949 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1950 {
1951 /* Returns with the othercase. */
1952 #ifdef SUPPORT_UTF
1953 if (common->utf && c > 127)
1954 {
1955 #ifdef SUPPORT_UCP
1956 return UCD_OTHERCASE(c);
1957 #else
1958 return c;
1959 #endif
1960 }
1961 #endif
1962 return TABLE_GET(c, common->fcc, c);
1963 }
1964
1965 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1966 {
1967 /* Detects if the character and its othercase has only 1 bit difference. */
1968 unsigned int c, oc, bit;
1969 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1970 int n;
1971 #endif
1972
1973 #ifdef SUPPORT_UTF
1974 if (common->utf)
1975 {
1976 GETCHAR(c, cc);
1977 if (c <= 127)
1978 oc = common->fcc[c];
1979 else
1980 {
1981 #ifdef SUPPORT_UCP
1982 oc = UCD_OTHERCASE(c);
1983 #else
1984 oc = c;
1985 #endif
1986 }
1987 }
1988 else
1989 {
1990 c = *cc;
1991 oc = TABLE_GET(c, common->fcc, c);
1992 }
1993 #else
1994 c = *cc;
1995 oc = TABLE_GET(c, common->fcc, c);
1996 #endif
1997
1998 SLJIT_ASSERT(c != oc);
1999
2000 bit = c ^ oc;
2001 /* Optimized for English alphabet. */
2002 if (c <= 127 && bit == 0x20)
2003 return (0 << 8) | 0x20;
2004
2005 /* Since c != oc, they must have at least 1 bit difference. */
2006 if (!is_powerof2(bit))
2007 return 0;
2008
2009 #if defined COMPILE_PCRE8
2010
2011 #ifdef SUPPORT_UTF
2012 if (common->utf && c > 127)
2013 {
2014 n = GET_EXTRALEN(*cc);
2015 while ((bit & 0x3f) == 0)
2016 {
2017 n--;
2018 bit >>= 6;
2019 }
2020 return (n << 8) | bit;
2021 }
2022 #endif /* SUPPORT_UTF */
2023 return (0 << 8) | bit;
2024
2025 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2026
2027 #ifdef SUPPORT_UTF
2028 if (common->utf && c > 65535)
2029 {
2030 if (bit >= (1 << 10))
2031 bit >>= 10;
2032 else
2033 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2034 }
2035 #endif /* SUPPORT_UTF */
2036 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2037
2038 #endif /* COMPILE_PCRE[8|16|32] */
2039 }
2040
2041 static void check_partial(compiler_common *common, BOOL force)
2042 {
2043 /* Checks whether a partial matching is occured. Does not modify registers. */
2044 DEFINE_COMPILER;
2045 struct sljit_jump *jump = NULL;
2046
2047 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2048
2049 if (common->mode == JIT_COMPILE)
2050 return;
2051
2052 if (!force)
2053 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2054 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2055 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2056
2057 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2059 else
2060 {
2061 if (common->partialmatchlabel != NULL)
2062 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2063 else
2064 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2065 }
2066
2067 if (jump != NULL)
2068 JUMPHERE(jump);
2069 }
2070
2071 static struct sljit_jump *check_str_end(compiler_common *common)
2072 {
2073 /* Does not affect registers. Usually used in a tight spot. */
2074 DEFINE_COMPILER;
2075 struct sljit_jump *jump;
2076 struct sljit_jump *nohit;
2077 struct sljit_jump *return_value;
2078
2079 if (common->mode == JIT_COMPILE)
2080 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2081
2082 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2083 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2084 {
2085 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2086 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2087 JUMPHERE(nohit);
2088 return_value = JUMP(SLJIT_JUMP);
2089 }
2090 else
2091 {
2092 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2093 if (common->partialmatchlabel != NULL)
2094 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2095 else
2096 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2097 }
2098 JUMPHERE(jump);
2099 return return_value;
2100 }
2101
2102 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2103 {
2104 DEFINE_COMPILER;
2105 struct sljit_jump *jump;
2106
2107 if (common->mode == JIT_COMPILE)
2108 {
2109 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2110 return;
2111 }
2112
2113 /* Partial matching mode. */
2114 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2115 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2116 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2117 {
2118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2119 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2120 }
2121 else
2122 {
2123 if (common->partialmatchlabel != NULL)
2124 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2125 else
2126 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2127 }
2128 JUMPHERE(jump);
2129 }
2130
2131 static void read_char(compiler_common *common)
2132 {
2133 /* Reads the character into TMP1, updates STR_PTR.
2134 Does not check STR_END. TMP2 Destroyed. */
2135 DEFINE_COMPILER;
2136 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2137 struct sljit_jump *jump;
2138 #endif
2139
2140 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2141 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2142 if (common->utf)
2143 {
2144 #if defined COMPILE_PCRE8
2145 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2146 #elif defined COMPILE_PCRE16
2147 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2148 #endif /* COMPILE_PCRE[8|16] */
2149 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2150 JUMPHERE(jump);
2151 }
2152 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2153 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2154 }
2155
2156 static void peek_char(compiler_common *common)
2157 {
2158 /* Reads the character into TMP1, keeps STR_PTR.
2159 Does not check STR_END. TMP2 Destroyed. */
2160 DEFINE_COMPILER;
2161 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2162 struct sljit_jump *jump;
2163 #endif
2164
2165 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2166 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2167 if (common->utf)
2168 {
2169 #if defined COMPILE_PCRE8
2170 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2171 #elif defined COMPILE_PCRE16
2172 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2173 #endif /* COMPILE_PCRE[8|16] */
2174 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2175 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2176 JUMPHERE(jump);
2177 }
2178 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2179 }
2180
2181 static void read_char8_type(compiler_common *common)
2182 {
2183 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2184 DEFINE_COMPILER;
2185 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2186 struct sljit_jump *jump;
2187 #endif
2188
2189 #ifdef SUPPORT_UTF
2190 if (common->utf)
2191 {
2192 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2193 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2194 #if defined COMPILE_PCRE8
2195 /* This can be an extra read in some situations, but hopefully
2196 it is needed in most cases. */
2197 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2198 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2199 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2200 JUMPHERE(jump);
2201 #elif defined COMPILE_PCRE16
2202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2203 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2204 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2205 JUMPHERE(jump);
2206 /* Skip low surrogate if necessary. */
2207 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2208 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2210 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2211 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2212 #elif defined COMPILE_PCRE32
2213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2214 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2215 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2216 JUMPHERE(jump);
2217 #endif /* COMPILE_PCRE[8|16|32] */
2218 return;
2219 }
2220 #endif /* SUPPORT_UTF */
2221 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2222 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2223 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2224 /* The ctypes array contains only 256 values. */
2225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2226 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2227 #endif
2228 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2229 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2230 JUMPHERE(jump);
2231 #endif
2232 }
2233
2234 static void skip_char_back(compiler_common *common)
2235 {
2236 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2237 DEFINE_COMPILER;
2238 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2239 #if defined COMPILE_PCRE8
2240 struct sljit_label *label;
2241
2242 if (common->utf)
2243 {
2244 label = LABEL();
2245 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2246 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2247 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2248 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2249 return;
2250 }
2251 #elif defined COMPILE_PCRE16
2252 if (common->utf)
2253 {
2254 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2255 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2256 /* Skip low surrogate if necessary. */
2257 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2258 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2259 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2260 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2261 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2262 return;
2263 }
2264 #endif /* COMPILE_PCRE[8|16] */
2265 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2266 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2267 }
2268
2269 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2270 {
2271 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2272 DEFINE_COMPILER;
2273
2274 if (nltype == NLTYPE_ANY)
2275 {
2276 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2277 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2278 }
2279 else if (nltype == NLTYPE_ANYCRLF)
2280 {
2281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2282 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2283 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2284 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2285 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2286 }
2287 else
2288 {
2289 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2290 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2291 }
2292 }
2293
2294 #ifdef SUPPORT_UTF
2295
2296 #if defined COMPILE_PCRE8
2297 static void do_utfreadchar(compiler_common *common)
2298 {
2299 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2300 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2301 DEFINE_COMPILER;
2302 struct sljit_jump *jump;
2303
2304 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2305 /* Searching for the first zero. */
2306 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2307 jump = JUMP(SLJIT_C_NOT_ZERO);
2308 /* Two byte sequence. */
2309 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2310 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2311 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2312 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2313 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2314 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2315 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2316 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2317 JUMPHERE(jump);
2318
2319 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2320 jump = JUMP(SLJIT_C_NOT_ZERO);
2321 /* Three byte sequence. */
2322 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2323 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2324 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2325 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2326 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2327 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2328 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2330 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2331 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2333 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2334 JUMPHERE(jump);
2335
2336 /* Four byte sequence. */
2337 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2338 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2339 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2340 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2341 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2342 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2343 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2344 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2345 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2346 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2347 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2348 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2349 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2350 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2351 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2352 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2353 }
2354
2355 static void do_utfreadtype8(compiler_common *common)
2356 {
2357 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2358 of the character (>= 0xc0). Return value in TMP1. */
2359 DEFINE_COMPILER;
2360 struct sljit_jump *jump;
2361 struct sljit_jump *compare;
2362
2363 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2364
2365 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2366 jump = JUMP(SLJIT_C_NOT_ZERO);
2367 /* Two byte sequence. */
2368 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2369 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2370 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2371 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2372 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2373 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2374 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2375 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2376 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2377
2378 JUMPHERE(compare);
2379 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2380 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2381 JUMPHERE(jump);
2382
2383 /* We only have types for characters less than 256. */
2384 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2385 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2386 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2387 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2388 }
2389
2390 #elif defined COMPILE_PCRE16
2391
2392 static void do_utfreadchar(compiler_common *common)
2393 {
2394 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2395 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2396 DEFINE_COMPILER;
2397 struct sljit_jump *jump;
2398
2399 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2400 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2401 /* Do nothing, only return. */
2402 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2403
2404 JUMPHERE(jump);
2405 /* Combine two 16 bit characters. */
2406 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2407 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2408 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2409 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2410 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2411 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2412 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2413 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2414 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2415 }
2416
2417 #endif /* COMPILE_PCRE[8|16] */
2418
2419 #endif /* SUPPORT_UTF */
2420
2421 #ifdef SUPPORT_UCP
2422
2423 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2424 #define UCD_BLOCK_MASK 127
2425 #define UCD_BLOCK_SHIFT 7
2426
2427 static void do_getucd(compiler_common *common)
2428 {
2429 /* Search the UCD record for the character comes in TMP1.
2430 Returns chartype in TMP1 and UCD offset in TMP2. */
2431 DEFINE_COMPILER;
2432
2433 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2434
2435 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2436 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2437 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2438 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2439 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2440 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2441 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2442 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2444 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2445 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2446 }
2447 #endif
2448
2449 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2450 {
2451 DEFINE_COMPILER;
2452 struct sljit_label *mainloop;
2453 struct sljit_label *newlinelabel = NULL;
2454 struct sljit_jump *start;
2455 struct sljit_jump *end = NULL;
2456 struct sljit_jump *nl = NULL;
2457 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2458 struct sljit_jump *singlechar;
2459 #endif
2460 jump_list *newline = NULL;
2461 BOOL newlinecheck = FALSE;
2462 BOOL readuchar = FALSE;
2463
2464 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2465 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2466 newlinecheck = TRUE;
2467
2468 if (firstline)
2469 {
2470 /* Search for the end of the first line. */
2471 SLJIT_ASSERT(common->first_line_end != 0);
2472 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2473
2474 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2475 {
2476 mainloop = LABEL();
2477 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2478 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2479 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2480 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2481 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2482 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2483 JUMPHERE(end);
2484 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2485 }
2486 else
2487 {
2488 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2489 mainloop = LABEL();
2490 /* Continual stores does not cause data dependency. */
2491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2492 read_char(common);
2493 check_newlinechar(common, common->nltype, &newline, TRUE);
2494 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2495 JUMPHERE(end);
2496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2497 set_jumps(newline, LABEL());
2498 }
2499
2500 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2501 }
2502
2503 start = JUMP(SLJIT_JUMP);
2504
2505 if (newlinecheck)
2506 {
2507 newlinelabel = LABEL();
2508 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2509 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2510 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2511 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2512 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2513 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2514 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2515 #endif
2516 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2517 nl = JUMP(SLJIT_JUMP);
2518 }
2519
2520 mainloop = LABEL();
2521
2522 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2523 #ifdef SUPPORT_UTF
2524 if (common->utf) readuchar = TRUE;
2525 #endif
2526 if (newlinecheck) readuchar = TRUE;
2527
2528 if (readuchar)
2529 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2530
2531 if (newlinecheck)
2532 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2533
2534 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2535 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2536 #if defined COMPILE_PCRE8
2537 if (common->utf)
2538 {
2539 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2540 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2541 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2542 JUMPHERE(singlechar);
2543 }
2544 #elif defined COMPILE_PCRE16
2545 if (common->utf)
2546 {
2547 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2548 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2549 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2550 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2551 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2552 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2553 JUMPHERE(singlechar);
2554 }
2555 #endif /* COMPILE_PCRE[8|16] */
2556 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2557 JUMPHERE(start);
2558
2559 if (newlinecheck)
2560 {
2561 JUMPHERE(end);
2562 JUMPHERE(nl);
2563 }
2564
2565 return mainloop;
2566 }
2567
2568 #define MAX_N_CHARS 3
2569
2570 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2571 {
2572 DEFINE_COMPILER;
2573 struct sljit_label *start;
2574 struct sljit_jump *quit;
2575 pcre_uint32 chars[MAX_N_CHARS * 2];
2576 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2577 int location = 0;
2578 pcre_int32 len, c, bit, caseless;
2579 int must_stop;
2580
2581 /* We do not support alternatives now. */
2582 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2583 return FALSE;
2584
2585 while (TRUE)
2586 {
2587 caseless = 0;
2588 must_stop = 1;
2589 switch(*cc)
2590 {
2591 case OP_CHAR:
2592 must_stop = 0;
2593 cc++;
2594 break;
2595
2596 case OP_CHARI:
2597 caseless = 1;
2598 must_stop = 0;
2599 cc++;
2600 break;
2601
2602 case OP_SOD:
2603 case OP_SOM:
2604 case OP_SET_SOM:
2605 case OP_NOT_WORD_BOUNDARY:
2606 case OP_WORD_BOUNDARY:
2607 case OP_EODN:
2608 case OP_EOD:
2609 case OP_CIRC:
2610 case OP_CIRCM:
2611 case OP_DOLL:
2612 case OP_DOLLM:
2613 /* Zero width assertions. */
2614 cc++;
2615 continue;
2616
2617 case OP_PLUS:
2618 case OP_MINPLUS:
2619 case OP_POSPLUS:
2620 cc++;
2621 break;
2622
2623 case OP_EXACT:
2624 cc += 1 + IMM2_SIZE;
2625 break;
2626
2627 case OP_PLUSI:
2628 case OP_MINPLUSI:
2629 case OP_POSPLUSI:
2630 caseless = 1;
2631 cc++;
2632 break;
2633
2634 case OP_EXACTI:
2635 caseless = 1;
2636 cc += 1 + IMM2_SIZE;
2637 break;
2638
2639 default:
2640 must_stop = 2;
2641 break;
2642 }
2643
2644 if (must_stop == 2)
2645 break;
2646
2647 len = 1;
2648 #ifdef SUPPORT_UTF
2649 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2650 #endif
2651
2652 if (caseless && char_has_othercase(common, cc))
2653 {
2654 caseless = char_get_othercase_bit(common, cc);
2655 if (caseless == 0)
2656 return FALSE;
2657 #ifdef COMPILE_PCRE8
2658 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2659 #else
2660 if ((caseless & 0x100) != 0)
2661 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2662 else
2663 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2664 #endif
2665 }
2666 else
2667 caseless = 0;
2668
2669 while (len > 0 && location < MAX_N_CHARS * 2)
2670 {
2671 c = *cc;
2672 bit = 0;
2673 if (len == (caseless & 0xff))
2674 {
2675 bit = caseless >> 8;
2676 c |= bit;
2677 }
2678
2679 chars[location] = c;
2680 chars[location + 1] = bit;
2681
2682 len--;
2683 location += 2;
2684 cc++;
2685 }
2686
2687 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2688 break;
2689 }
2690
2691 /* At least two characters are required. */
2692 if (location < 2 * 2)
2693 return FALSE;
2694
2695 if (firstline)
2696 {
2697 SLJIT_ASSERT(common->first_line_end != 0);
2698 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2699 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2700 }
2701 else
2702 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2703
2704 start = LABEL();
2705 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2706
2707 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2708 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2709 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2710 if (chars[1] != 0)
2711 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2712 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2713 if (location > 2 * 2)
2714 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2715 if (chars[3] != 0)
2716 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2717 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2718 if (location > 2 * 2)
2719 {
2720 if (chars[5] != 0)
2721 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2722 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2723 }
2724 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2725
2726 JUMPHERE(quit);
2727
2728 if (firstline)
2729 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2730 else
2731 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2732 return TRUE;
2733 }
2734
2735 #undef MAX_N_CHARS
2736
2737 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2738 {
2739 DEFINE_COMPILER;
2740 struct sljit_label *start;
2741 struct sljit_jump *quit;
2742 struct sljit_jump *found;
2743 pcre_uchar oc, bit;
2744
2745 if (firstline)
2746 {
2747 SLJIT_ASSERT(common->first_line_end != 0);
2748 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2749 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2750 }
2751
2752 start = LABEL();
2753 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2754 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2755
2756 oc = first_char;
2757 if (caseless)
2758 {
2759 oc = TABLE_GET(first_char, common->fcc, first_char);
2760 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2761 if (first_char > 127 && common->utf)
2762 oc = UCD_OTHERCASE(first_char);
2763 #endif
2764 }
2765 if (first_char == oc)
2766 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2767 else
2768 {
2769 bit = first_char ^ oc;
2770 if (is_powerof2(bit))
2771 {
2772 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2773 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2774 }
2775 else
2776 {
2777 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2778 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2779 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2780 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2781 found = JUMP(SLJIT_C_NOT_ZERO);
2782 }
2783 }
2784
2785 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2786 JUMPTO(SLJIT_JUMP, start);
2787 JUMPHERE(found);
2788 JUMPHERE(quit);
2789
2790 if (firstline)
2791 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2792 }
2793
2794 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2795 {
2796 DEFINE_COMPILER;
2797 struct sljit_label *loop;
2798 struct sljit_jump *lastchar;
2799 struct sljit_jump *firstchar;
2800 struct sljit_jump *quit;
2801 struct sljit_jump *foundcr = NULL;
2802 struct sljit_jump *notfoundnl;
2803 jump_list *newline = NULL;
2804
2805 if (firstline)
2806 {
2807 SLJIT_ASSERT(common->first_line_end != 0);
2808 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2809 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2810 }
2811
2812 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2813 {
2814 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2815 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2816 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2818 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2819
2820 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2821 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2822 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2823 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2824 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2825 #endif
2826 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2827
2828 loop = LABEL();
2829 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2830 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2831 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2832 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2833 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2834 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2835
2836 JUMPHERE(quit);
2837 JUMPHERE(firstchar);
2838 JUMPHERE(lastchar);
2839
2840 if (firstline)
2841 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2842 return;
2843 }
2844
2845 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2846 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2847 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2848 skip_char_back(common);
2849
2850 loop = LABEL();
2851 read_char(common);
2852 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2853 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2854 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2855 check_newlinechar(common, common->nltype, &newline, FALSE);
2856 set_jumps(newline, loop);
2857
2858 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2859 {
2860 quit = JUMP(SLJIT_JUMP);
2861 JUMPHERE(foundcr);
2862 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2863 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2864 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2865 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2866 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2867 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2868 #endif
2869 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2870 JUMPHERE(notfoundnl);
2871 JUMPHERE(quit);
2872 }
2873 JUMPHERE(lastchar);
2874 JUMPHERE(firstchar);
2875
2876 if (firstline)
2877 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2878 }
2879
2880 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
2881
2882 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2883 {
2884 DEFINE_COMPILER;
2885 struct sljit_label *start;
2886 struct sljit_jump *quit;
2887 struct sljit_jump *found = NULL;
2888 jump_list *matches = NULL;
2889 pcre_uint8 inverted_start_bits[32];
2890 int i;
2891 #ifndef COMPILE_PCRE8
2892 struct sljit_jump *jump;
2893 #endif
2894
2895 for (i = 0; i < 32; ++i)
2896 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
2897
2898 if (firstline)
2899 {
2900 SLJIT_ASSERT(common->first_line_end != 0);
2901 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2902 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2903 }
2904
2905 start = LABEL();
2906 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2907 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2908 #ifdef SUPPORT_UTF
2909 if (common->utf)
2910 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2911 #endif
2912
2913 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
2914 {
2915 #ifndef COMPILE_PCRE8
2916 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2917 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2918 JUMPHERE(jump);
2919 #endif
2920 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2921 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2922 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2923 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2924 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2925 found = JUMP(SLJIT_C_NOT_ZERO);
2926 }
2927
2928 #ifdef SUPPORT_UTF
2929 if (common->utf)
2930 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2931 #endif
2932 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2933 #ifdef SUPPORT_UTF
2934 #if defined COMPILE_PCRE8
2935 if (common->utf)
2936 {
2937 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2938 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2939 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2940 }
2941 #elif defined COMPILE_PCRE16
2942 if (common->utf)
2943 {
2944 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2945 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2946 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2947 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2948 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2949 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2950 }
2951 #endif /* COMPILE_PCRE[8|16] */
2952 #endif /* SUPPORT_UTF */
2953 JUMPTO(SLJIT_JUMP, start);
2954 if (found != NULL)
2955 JUMPHERE(found);
2956 if (matches != NULL)
2957 set_jumps(matches, LABEL());
2958 JUMPHERE(quit);
2959
2960 if (firstline)
2961 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2962 }
2963
2964 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2965 {
2966 DEFINE_COMPILER;
2967 struct sljit_label *loop;
2968 struct sljit_jump *toolong;
2969 struct sljit_jump *alreadyfound;
2970 struct sljit_jump *found;
2971 struct sljit_jump *foundoc = NULL;
2972 struct sljit_jump *notfound;
2973 pcre_uint32 oc, bit;
2974
2975 SLJIT_ASSERT(common->req_char_ptr != 0);
2976 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2977 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2978 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2979 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2980
2981 if (has_firstchar)
2982 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2983 else
2984 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2985
2986 loop = LABEL();
2987 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2988
2989 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2990 oc = req_char;
2991 if (caseless)
2992 {
2993 oc = TABLE_GET(req_char, common->fcc, req_char);
2994 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2995 if (req_char > 127 && common->utf)
2996 oc = UCD_OTHERCASE(req_char);
2997 #endif
2998 }
2999 if (req_char == oc)
3000 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3001 else
3002 {
3003 bit = req_char ^ oc;
3004 if (is_powerof2(bit))
3005 {
3006 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3007 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3008 }
3009 else
3010 {
3011 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3012 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3013 }
3014 }
3015 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3016 JUMPTO(SLJIT_JUMP, loop);
3017
3018 JUMPHERE(found);
3019 if (foundoc)
3020 JUMPHERE(foundoc);
3021 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3022 JUMPHERE(alreadyfound);
3023 JUMPHERE(toolong);
3024 return notfound;
3025 }
3026
3027 static void do_revertframes(compiler_common *common)
3028 {
3029 DEFINE_COMPILER;
3030 struct sljit_jump *jump;
3031 struct sljit_label *mainloop;
3032
3033 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3034 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3035 GET_LOCAL_BASE(TMP3, 0, 0);
3036
3037 /* Drop frames until we reach STACK_TOP. */
3038 mainloop = LABEL();
3039 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3040 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3041 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3042 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3043 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3044 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3045 JUMPTO(SLJIT_JUMP, mainloop);
3046
3047 JUMPHERE(jump);
3048 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3049 /* End of dropping frames. */
3050 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3051
3052 JUMPHERE(jump);
3053 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3054 /* Set string begin. */
3055 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3056 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3057 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3058 JUMPTO(SLJIT_JUMP, mainloop);
3059
3060 JUMPHERE(jump);
3061 if (common->mark_ptr != 0)
3062 {
3063 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3064 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3065 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3066 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3067 JUMPTO(SLJIT_JUMP, mainloop);
3068
3069 JUMPHERE(jump);
3070 }
3071
3072 /* Unknown command. */
3073 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3074 JUMPTO(SLJIT_JUMP, mainloop);
3075 }
3076
3077 static void check_wordboundary(compiler_common *common)
3078 {
3079 DEFINE_COMPILER;
3080 struct sljit_jump *skipread;
3081 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3082 struct sljit_jump *jump;
3083 #endif
3084
3085 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3086
3087 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3088 /* Get type of the previous char, and put it to LOCALS1. */
3089 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3090 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3091 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3092 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3093 skip_char_back(common);
3094 check_start_used_ptr(common);
3095 read_char(common);
3096
3097 /* Testing char type. */
3098 #ifdef SUPPORT_UCP
3099 if (common->use_ucp)
3100 {
3101 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3102 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3103 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3104 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3105 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3106 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3107 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3108 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3109 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3110 JUMPHERE(jump);
3111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3112 }
3113 else
3114 #endif
3115 {
3116 #ifndef COMPILE_PCRE8
3117 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3118 #elif defined SUPPORT_UTF
3119 /* Here LOCALS1 has already been zeroed. */
3120 jump = NULL;
3121 if (common->utf)
3122 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3123 #endif /* COMPILE_PCRE8 */
3124 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3125 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3126 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3127 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3128 #ifndef COMPILE_PCRE8
3129 JUMPHERE(jump);
3130 #elif defined SUPPORT_UTF
3131 if (jump != NULL)
3132 JUMPHERE(jump);
3133 #endif /* COMPILE_PCRE8 */
3134 }
3135 JUMPHERE(skipread);
3136
3137 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3138 skipread = check_str_end(common);
3139 peek_char(common);
3140
3141 /* Testing char type. This is a code duplication. */
3142 #ifdef SUPPORT_UCP
3143 if (common->use_ucp)
3144 {
3145 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3146 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3147 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3148 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3149 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3150 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3151 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3152 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3153 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3154 JUMPHERE(jump);
3155 }
3156 else
3157 #endif
3158 {
3159 #ifndef COMPILE_PCRE8
3160 /* TMP2 may be destroyed by peek_char. */
3161 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3162 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3163 #elif defined SUPPORT_UTF
3164 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3165 jump = NULL;
3166 if (common->utf)
3167 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3168 #endif
3169 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3170 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3171 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3172 #ifndef COMPILE_PCRE8
3173 JUMPHERE(jump);
3174 #elif defined SUPPORT_UTF
3175 if (jump != NULL)
3176 JUMPHERE(jump);
3177 #endif /* COMPILE_PCRE8 */
3178 }
3179 JUMPHERE(skipread);
3180
3181 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3182 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3183 }
3184
3185 /*
3186 range format:
3187
3188 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3189 ranges[1] = first bit (0 or 1)
3190 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3191 */
3192
3193 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3194 {
3195 DEFINE_COMPILER;
3196 struct sljit_jump *jump;
3197
3198 if (ranges[0] < 0)
3199 return FALSE;
3200
3201 switch(ranges[0])
3202 {
3203 case 1:
3204 if (readch)
3205 read_char(common);
3206 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3207 return TRUE;
3208
3209 case 2:
3210 if (readch)
3211 read_char(common);
3212 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3213 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3214 return TRUE;
3215
3216 case 4:
3217 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3218 {
3219 if (readch)
3220 read_char(common);
3221 if (ranges[1] != 0)
3222 {
3223 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3224 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3225 }
3226 else
3227 {
3228 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3229 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3230 JUMPHERE(jump);
3231 }
3232 return TRUE;
3233 }
3234 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3235 {
3236 if (readch)
3237 read_char(common);
3238 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3239 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3240 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3241 return TRUE;
3242 }
3243 return FALSE;
3244
3245 default:
3246 return FALSE;
3247 }
3248 }
3249
3250 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3251 {
3252 int i, bit, length;
3253 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3254
3255 bit = ctypes[0] & flag;
3256 ranges[0] = -1;
3257 ranges[1] = bit != 0 ? 1 : 0;
3258 length = 0;
3259
3260 for (i = 1; i < 256; i++)
3261 if ((ctypes[i] & flag) != bit)
3262 {
3263 if (length >= MAX_RANGE_SIZE)
3264 return;
3265 ranges[2 + length] = i;
3266 length++;
3267 bit ^= flag;
3268 }
3269
3270 if (bit != 0)
3271 {
3272 if (length >= MAX_RANGE_SIZE)
3273 return;
3274 ranges[2 + length] = 256;
3275 length++;
3276 }
3277 ranges[0] = length;
3278 }
3279
3280 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3281 {
3282 int ranges[2 + MAX_RANGE_SIZE];
3283 pcre_uint8 bit, cbit, all;
3284 int i, byte, length = 0;
3285
3286 bit = bits[0] & 0x1;
3287 ranges[1] = bit;
3288 /* Can be 0 or 255. */
3289 all = -bit;
3290
3291 for (i = 0; i < 256; )
3292 {
3293 byte = i >> 3;
3294 if ((i & 0x7) == 0 && bits[byte] == all)
3295 i += 8;
3296 else
3297 {
3298 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3299 if (cbit != bit)
3300 {
3301 if (length >= MAX_RANGE_SIZE)
3302 return FALSE;
3303 ranges[2 + length] = i;
3304 length++;
3305 bit = cbit;
3306 all = -cbit;
3307 }
3308 i++;
3309 }
3310 }
3311
3312 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3313 {
3314 if (length >= MAX_RANGE_SIZE)
3315 return FALSE;
3316 ranges[2 + length] = 256;
3317 length++;
3318 }
3319 ranges[0] = length;
3320
3321 return check_ranges(common, ranges, backtracks, FALSE);
3322 }
3323
3324 static void check_anynewline(compiler_common *common)
3325 {
3326 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3327 DEFINE_COMPILER;
3328
3329 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3330
3331 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3332 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3333 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3334 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3335 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3336 #ifdef COMPILE_PCRE8
3337 if (common->utf)
3338 {
3339 #endif
3340 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3341 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3342 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3343 #ifdef COMPILE_PCRE8
3344 }
3345 #endif
3346 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3347 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3348 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3349 }
3350
3351 static void check_hspace(compiler_common *common)
3352 {
3353 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3354 DEFINE_COMPILER;
3355
3356 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3357
3358 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3359 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3360 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3361 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3362 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3363 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3364 #ifdef COMPILE_PCRE8
3365 if (common->utf)
3366 {
3367 #endif
3368 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3369 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3370 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3371 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3372 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3373 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3374 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3375 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3376 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3377 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3378 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3379 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3380 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3381 #ifdef COMPILE_PCRE8
3382 }
3383 #endif
3384 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3385 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3386
3387 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3388 }
3389
3390 static void check_vspace(compiler_common *common)
3391 {
3392 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3393 DEFINE_COMPILER;
3394
3395 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3396
3397 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3398 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3399 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3400 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3401 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3402 #ifdef COMPILE_PCRE8
3403 if (common->utf)
3404 {
3405 #endif
3406 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3407 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3408 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3409 #ifdef COMPILE_PCRE8
3410 }
3411 #endif
3412 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3413 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3414
3415 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3416 }
3417
3418 #define CHAR1 STR_END
3419 #define CHAR2 STACK_TOP
3420
3421 static void do_casefulcmp(compiler_common *common)
3422 {
3423 DEFINE_COMPILER;
3424 struct sljit_jump *jump;
3425 struct sljit_label *label;
3426
3427 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3428 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3429 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3431 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3432 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3433
3434 label = LABEL();
3435 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3436 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3437 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3438 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3439 JUMPTO(SLJIT_C_NOT_ZERO, label);
3440
3441 JUMPHERE(jump);
3442 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3443 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3444 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3445 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3446 }
3447
3448 #define LCC_TABLE STACK_LIMIT
3449
3450 static void do_caselesscmp(compiler_common *common)
3451 {
3452 DEFINE_COMPILER;
3453 struct sljit_jump *jump;
3454 struct sljit_label *label;
3455
3456 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3457 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3458
3459 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3460 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3461 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3462 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3463 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3464 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3465
3466 label = LABEL();
3467 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3468 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3469 #ifndef COMPILE_PCRE8
3470 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3471 #endif
3472 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3473 #ifndef COMPILE_PCRE8
3474 JUMPHERE(jump);
3475 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3476 #endif
3477 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3478 #ifndef COMPILE_PCRE8
3479 JUMPHERE(jump);
3480 #endif
3481 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3482 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3483 JUMPTO(SLJIT_C_NOT_ZERO, label);
3484
3485 JUMPHERE(jump);
3486 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3487 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3488 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3489 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3490 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3491 }
3492
3493 #undef LCC_TABLE
3494 #undef CHAR1
3495 #undef CHAR2
3496
3497 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3498
3499 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3500 {
3501 /* This function would be ineffective to do in JIT level. */
3502 pcre_uint32 c1, c2;
3503 const pcre_uchar *src2 = args->uchar_ptr;
3504 const pcre_uchar *end2 = args->end;
3505 const ucd_record *ur;
3506 const pcre_uint32 *pp;
3507
3508 while (src1 < end1)
3509 {
3510 if (src2 >= end2)
3511 return (pcre_uchar*)1;
3512 GETCHARINC(c1, src1);
3513 GETCHARINC(c2, src2);
3514 ur = GET_UCD(c2);
3515 if (c1 != c2 && c1 != c2 + ur->other_case)
3516 {
3517 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3518 for (;;)
3519 {
3520 if (c1 < *pp) return NULL;
3521 if (c1 == *pp++) break;
3522 }
3523 }
3524 }
3525 return src2;
3526 }
3527
3528 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3529
3530 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3531 compare_context* context, jump_list **backtracks)
3532 {
3533 DEFINE_COMPILER;
3534 unsigned int othercasebit = 0;
3535 pcre_uchar *othercasechar = NULL;
3536 #ifdef SUPPORT_UTF
3537 int utflength;
3538 #endif
3539
3540 if (caseless && char_has_othercase(common, cc))
3541 {
3542 othercasebit = char_get_othercase_bit(common, cc);
3543 SLJIT_ASSERT(othercasebit);
3544 /* Extracting bit difference info. */
3545 #if defined COMPILE_PCRE8
3546 othercasechar = cc + (othercasebit >> 8);
3547 othercasebit &= 0xff;
3548 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3549 /* Note that this code only handles characters in the BMP. If there
3550 ever are characters outside the BMP whose othercase differs in only one
3551 bit from itself (there currently are none), this code will need to be
3552 revised for COMPILE_PCRE32. */
3553 othercasechar = cc + (othercasebit >> 9);
3554 if ((othercasebit & 0x100) != 0)
3555 othercasebit = (othercasebit & 0xff) << 8;
3556 else
3557 othercasebit &= 0xff;
3558 #endif /* COMPILE_PCRE[8|16|32] */
3559 }
3560
3561 if (context->sourcereg == -1)
3562 {
3563 #if defined COMPILE_PCRE8
3564 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3565 if (context->length >= 4)
3566 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3567 else if (context->length >= 2)
3568 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3569 else
3570 #endif
3571 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3572 #elif defined COMPILE_PCRE16
3573 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3574 if (context->length >= 4)
3575 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3576 else
3577 #endif
3578 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3579 #elif defined COMPILE_PCRE32
3580 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3581 #endif /* COMPILE_PCRE[8|16|32] */
3582 context->sourcereg = TMP2;
3583 }
3584
3585 #ifdef SUPPORT_UTF
3586 utflength = 1;
3587 if (common->utf && HAS_EXTRALEN(*cc))
3588 utflength += GET_EXTRALEN(*cc);
3589
3590 do
3591 {
3592 #endif
3593
3594 context->length -= IN_UCHARS(1);
3595 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3596
3597 /* Unaligned read is supported. */
3598 if (othercasebit != 0 && othercasechar == cc)
3599 {
3600 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3601 context->oc.asuchars[context->ucharptr] = othercasebit;
3602 }
3603 else
3604 {
3605 context->c.asuchars[context->ucharptr] = *cc;
3606 context->oc.asuchars[context->ucharptr] = 0;
3607 }
3608 context->ucharptr++;
3609
3610 #if defined COMPILE_PCRE8
3611 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3612 #else
3613 if (context->ucharptr >= 2 || context->length == 0)
3614 #endif
3615 {
3616 if (context->length >= 4)
3617 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3618 else if (context->length >= 2)
3619 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3620 #if defined COMPILE_PCRE8
3621 else if (context->length >= 1)
3622 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3623 #endif /* COMPILE_PCRE8 */
3624 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3625
3626 switch(context->ucharptr)
3627 {
3628 case 4 / sizeof(pcre_uchar):
3629 if (context->oc.asint != 0)
3630 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3631 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3632 break;
3633
3634 case 2 / sizeof(pcre_uchar):
3635 if (context->oc.asushort != 0)
3636 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3637 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3638 break;
3639
3640 #ifdef COMPILE_PCRE8
3641 case 1:
3642 if (context->oc.asbyte != 0)
3643 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3644 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3645 break;
3646 #endif
3647
3648 default:
3649 SLJIT_ASSERT_STOP();
3650 break;
3651 }
3652 context->ucharptr = 0;
3653 }
3654
3655 #else
3656
3657 /* Unaligned read is unsupported or in 32 bit mode. */
3658 if (context->length >= 1)
3659 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3660
3661 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3662
3663 if (othercasebit != 0 && othercasechar == cc)
3664 {
3665 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3666 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3667 }
3668 else
3669 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3670
3671 #endif
3672
3673 cc++;
3674 #ifdef SUPPORT_UTF
3675 utflength--;
3676 }
3677 while (utflength > 0);
3678 #endif
3679
3680 return cc;
3681 }
3682
3683 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3684
3685 #define SET_TYPE_OFFSET(value) \
3686 if ((value) != typeoffset) \
3687 { \
3688 if ((value) > typeoffset) \
3689 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3690 else \
3691 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3692 } \
3693 typeoffset = (value);
3694
3695 #define SET_CHAR_OFFSET(value) \
3696 if ((value) != charoffset) \
3697 { \
3698 if ((value) > charoffset) \
3699 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3700 else \
3701 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3702 } \
3703 charoffset = (value);
3704
3705 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3706 {
3707 DEFINE_COMPILER;
3708 jump_list *found = NULL;
3709 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3710 pcre_int32 c, charoffset;
3711 const pcre_uint32 *other_cases;
3712 struct sljit_jump *jump = NULL;
3713 pcre_uchar *ccbegin;
3714 int compares, invertcmp, numberofcmps;
3715 #ifdef SUPPORT_UCP
3716 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3717 BOOL charsaved = FALSE;
3718 int typereg = TMP1, scriptreg = TMP1;
3719 pcre_int32 typeoffset;
3720 #endif
3721
3722 /* Although SUPPORT_UTF must be defined, we are
3723 not necessary in utf mode even in 8 bit mode. */
3724 detect_partial_match(common, backtracks);
3725 read_char(common);
3726
3727 if ((*cc++ & XCL_MAP) != 0)
3728 {
3729 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3730 #ifndef COMPILE_PCRE8
3731 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3732 #elif defined SUPPORT_UTF
3733 if (common->utf)
3734 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3735 #endif
3736
3737 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3738 {
3739 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3740 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3741 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3742 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3743 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3744 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3745 }
3746
3747 #ifndef COMPILE_PCRE8
3748 JUMPHERE(jump);
3749 #elif defined SUPPORT_UTF
3750 if (common->utf)
3751 JUMPHERE(jump);
3752 #endif
3753 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3754 #ifdef SUPPORT_UCP
3755 charsaved = TRUE;
3756 #endif
3757 cc += 32 / sizeof(pcre_uchar);
3758 }
3759
3760 /* Scanning the necessary info. */
3761 ccbegin = cc;
3762 compares = 0;
3763 while (*cc != XCL_END)
3764 {
3765 compares++;
3766 if (*cc == XCL_SINGLE)
3767 {
3768 cc += 2;
3769 #ifdef SUPPORT_UTF
3770 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3771 #endif
3772 #ifdef SUPPORT_UCP
3773 needschar = TRUE;
3774 #endif
3775 }
3776 else if (*cc == XCL_RANGE)
3777 {
3778 cc += 2;
3779 #ifdef SUPPORT_UTF
3780 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3781 #endif
3782 cc++;
3783 #ifdef SUPPORT_UTF
3784 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3785 #endif
3786 #ifdef SUPPORT_UCP
3787 needschar = TRUE;
3788 #endif
3789 }
3790 #ifdef SUPPORT_UCP
3791 else
3792 {
3793 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3794 cc++;
3795 switch(*cc)
3796 {
3797 case PT_ANY:
3798 break;
3799
3800 case PT_LAMP:
3801 case PT_GC:
3802 case PT_PC:
3803 case PT_ALNUM:
3804 needstype = TRUE;
3805 break;
3806
3807 case PT_SC:
3808 needsscript = TRUE;
3809 break;
3810
3811 case PT_SPACE:
3812 case PT_PXSPACE:
3813 case PT_WORD:
3814 needstype = TRUE;
3815 needschar = TRUE;
3816 break;
3817
3818 case PT_CLIST:
3819 needschar = TRUE;
3820 break;
3821
3822 default:
3823 SLJIT_ASSERT_STOP();
3824 break;
3825 }
3826 cc += 2;
3827 }
3828 #endif
3829 }
3830
3831 #ifdef SUPPORT_UCP
3832 /* Simple register allocation. TMP1 is preferred if possible. */
3833 if (needstype || needsscript)
3834 {
3835 if (needschar && !charsaved)
3836 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3837 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3838 if (needschar)
3839 {
3840 if (needstype)
3841 {
3842 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3843 typereg = RETURN_ADDR;
3844 }
3845
3846 if (needsscript)
3847 scriptreg = TMP3;
3848 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3849 }
3850 else if (needstype && needsscript)
3851 scriptreg = TMP3;
3852 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3853
3854 if (needsscript)
3855 {
3856 if (scriptreg == TMP1)
3857 {
3858 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3859 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3860 }
3861 else
3862 {
3863 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3864 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3865 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3866 }
3867 }
3868 }
3869 #endif
3870
3871 /* Generating code. */
3872 cc = ccbegin;
3873 charoffset = 0;
3874 numberofcmps = 0;
3875 #ifdef SUPPORT_UCP
3876 typeoffset = 0;
3877 #endif
3878
3879 while (*cc != XCL_END)
3880 {
3881 compares--;
3882 invertcmp = (compares == 0 && list != backtracks);
3883 jump = NULL;
3884
3885 if (*cc == XCL_SINGLE)
3886 {
3887 cc ++;
3888 #ifdef SUPPORT_UTF
3889 if (common->utf)
3890 {
3891 GETCHARINC(c, cc);
3892 }
3893 else
3894 #endif
3895 c = *cc++;
3896
3897 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3898 {
3899 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3900 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
3901 numberofcmps++;
3902 }
3903 else if (numberofcmps > 0)
3904 {
3905 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3906 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3907 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3908 numberofcmps = 0;
3909 }
3910 else
3911 {
3912 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3913 numberofcmps = 0;
3914 }
3915 }
3916 else if (*cc == XCL_RANGE)
3917 {
3918 cc ++;
3919 #ifdef SUPPORT_UTF
3920 if (common->utf)
3921 {
3922 GETCHARINC(c, cc);
3923 }
3924 else
3925 #endif
3926 c = *cc++;
3927 SET_CHAR_OFFSET(c);
3928 #ifdef SUPPORT_UTF
3929 if (common->utf)
3930 {
3931 GETCHARINC(c, cc);
3932 }
3933 else
3934 #endif
3935 c = *cc++;
3936 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3937 {
3938 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3939 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
3940 numberofcmps++;
3941 }
3942 else if (numberofcmps > 0)
3943 {
3944 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3945 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3946 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3947 numberofcmps = 0;
3948 }
3949 else
3950 {
3951 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3952 numberofcmps = 0;
3953 }
3954 }
3955 #ifdef SUPPORT_UCP
3956 else
3957 {
3958 if (*cc == XCL_NOTPROP)
3959 invertcmp ^= 0x1;
3960 cc++;
3961 switch(*cc)
3962 {
3963 case PT_ANY:
3964 if (list != backtracks)
3965 {
3966 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3967 continue;
3968 }
3969 else if (cc[-1] == XCL_NOTPROP)
3970 continue;
3971 jump = JUMP(SLJIT_JUMP);
3972 break;
3973
3974 case PT_LAMP:
3975 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3976 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3977 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3978 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3979 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3980 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3981 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3982 break;
3983
3984 case PT_GC:
3985 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3986 SET_TYPE_OFFSET(c);
3987 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3988 break;
3989
3990 case PT_PC:
3991 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3992 break;
3993
3994 case PT_SC:
3995 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3996 break;
3997
3998 case PT_SPACE:
3999 case PT_PXSPACE:
4000 if (*cc == PT_SPACE)
4001 {
4002 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4003 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4004 }
4005 SET_CHAR_OFFSET(9);
4006 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4007 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4008 if (*cc == PT_SPACE)
4009 JUMPHERE(jump);
4010
4011 SET_TYPE_OFFSET(ucp_Zl);
4012 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4013 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4014 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4015 break;
4016
4017 case PT_WORD:
4018 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4019 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4020 /* ... fall through */
4021
4022 case PT_ALNUM:
4023 SET_TYPE_OFFSET(ucp_Ll);
4024 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4025 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4026 SET_TYPE_OFFSET(ucp_Nd);
4027 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4028 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4029 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4030 break;
4031
4032 case PT_CLIST:
4033 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4034
4035 /* At least three characters are required.
4036 Otherwise this case would be handled by the normal code path. */
4037 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4038 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4039
4040 /* Optimizing character pairs, if their difference is power of 2. */
4041 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4042 {
4043 if (charoffset == 0)
4044 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4045 else
4046 {
4047 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4048 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4049 }
4050 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4051 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4052 other_cases += 2;
4053 }
4054 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4055 {
4056 if (charoffset == 0)
4057 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4058 else
4059 {
4060 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4061 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4062 }
4063 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4064 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4065
4066 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4067 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4068
4069 other_cases += 3;
4070 }
4071 else
4072 {
4073 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4074 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4075 }
4076
4077 while (*other_cases != NOTACHAR)
4078 {
4079 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4080 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4081 }
4082 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4083 break;
4084 }
4085 cc += 2;
4086 }
4087 #endif
4088
4089 if (jump != NULL)
4090 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4091 }
4092
4093 if (found != NULL)
4094 set_jumps(found, LABEL());
4095 }
4096
4097 #undef SET_TYPE_OFFSET
4098 #undef SET_CHAR_OFFSET
4099
4100 #endif
4101
4102 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4103 {
4104 DEFINE_COMPILER;
4105 int length;
4106 unsigned int c, oc, bit;
4107 compare_context context;
4108 struct sljit_jump *jump[4];
4109 #ifdef SUPPORT_UTF
4110 struct sljit_label *label;
4111 #ifdef SUPPORT_UCP
4112 pcre_uchar propdata[5];
4113 #endif
4114 #endif
4115
4116 switch(type)
4117 {
4118 case OP_SOD:
4119 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4120 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4121 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4122 return cc;
4123
4124 case OP_SOM:
4125 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4126 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4127 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4128 return cc;
4129
4130 case OP_NOT_WORD_BOUNDARY:
4131 case OP_WORD_BOUNDARY:
4132 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4133 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4134 return cc;
4135
4136 case OP_NOT_DIGIT:
4137 case OP_DIGIT:
4138 /* Digits are usually 0-9, so it is worth to optimize them. */
4139 if (common->digits[0] == -2)
4140 get_ctype_ranges(common, ctype_digit, common->digits);
4141 detect_partial_match(common, backtracks);
4142 /* Flip the starting bit in the negative case. */
4143 if (type == OP_NOT_DIGIT)
4144 common->digits[1] ^= 1;
4145 if (!check_ranges(common, common->digits, backtracks, TRUE))
4146 {
4147 read_char8_type(common);
4148 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4149 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4150 }
4151 if (type == OP_NOT_DIGIT)
4152 common->digits[1] ^= 1;
4153 return cc;
4154
4155 case OP_NOT_WHITESPACE:
4156 case OP_WHITESPACE:
4157 detect_partial_match(common, backtracks);
4158 read_char8_type(common);
4159 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4160 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4161 return cc;
4162
4163 case OP_NOT_WORDCHAR:
4164 case OP_WORDCHAR:
4165 detect_partial_match(common, backtracks);
4166 read_char8_type(common);
4167 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4168 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4169 return cc;
4170
4171 case OP_ANY:
4172 detect_partial_match(common, backtracks);
4173 read_char(common);
4174 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4175 {
4176 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4177 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4178 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4179 else
4180 jump[1] = check_str_end(common);
4181
4182 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4183 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4184 if (jump[1] != NULL)
4185 JUMPHERE(jump[1]);
4186 JUMPHERE(jump[0]);
4187 }
4188 else
4189 check_newlinechar(common, common->nltype, backtracks, TRUE);
4190 return cc;
4191
4192 case OP_ALLANY:
4193 detect_partial_match(common, backtracks);
4194 #ifdef SUPPORT_UTF
4195 if (common->utf)
4196 {
4197 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4198 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4199 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4200 #if defined COMPILE_PCRE8
4201 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4202 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4203 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4204 #elif defined COMPILE_PCRE16
4205 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4206 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4207 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4208 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4209 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4210 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4211 #endif
4212 JUMPHERE(jump[0]);
4213 #endif /* COMPILE_PCRE[8|16] */
4214 return cc;
4215 }
4216 #endif
4217 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4218 return cc;
4219
4220 case OP_ANYBYTE:
4221 detect_partial_match(common, backtracks);
4222 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4223 return cc;
4224
4225 #ifdef SUPPORT_UTF
4226 #ifdef SUPPORT_UCP
4227 case OP_NOTPROP:
4228 case OP_PROP:
4229 propdata[0] = 0;
4230 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4231 propdata[2] = cc[0];
4232 propdata[3] = cc[1];
4233 propdata[4] = XCL_END;
4234 compile_xclass_matchingpath(common, propdata, backtracks);
4235 return cc + 2;
4236 #endif
4237 #endif
4238
4239 case OP_ANYNL:
4240 detect_partial_match(common, backtracks);
4241 read_char(common);
4242 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4243 /* We don't need to handle soft partial matching case. */
4244 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4245 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4246 else
4247 jump[1] = check_str_end(common);
4248 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4249 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4250 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4251 jump[3] = JUMP(SLJIT_JUMP);
4252 JUMPHERE(jump[0]);
4253 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4254 JUMPHERE(jump[1]);
4255 JUMPHERE(jump[2]);
4256 JUMPHERE(jump[3]);
4257 return cc;
4258
4259 case OP_NOT_HSPACE:
4260 case OP_HSPACE:
4261 detect_partial_match(common, backtracks);
4262 read_char(common);
4263 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4264 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4265 return cc;
4266
4267 case OP_NOT_VSPACE:
4268 case OP_VSPACE:
4269 detect_partial_match(common, backtracks);
4270 read_char(common);
4271 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4272 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4273 return cc;
4274
4275 #ifdef SUPPORT_UCP
4276 case OP_EXTUNI:
4277 detect_partial_match(common, backtracks);
4278 read_char(common);
4279 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4280 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4281 /* Optimize register allocation: use a real register. */
4282 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4283 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4284
4285 label = LABEL();
4286 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4287 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4288 read_char(common);
4289 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4290 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4291 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4292
4293 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4294 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4295 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4296 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4297 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4298 JUMPTO(SLJIT_C_NOT_ZERO, label);
4299
4300 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4301 JUMPHERE(jump[0]);
4302 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4303
4304 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4305 {
4306 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4307 /* Since we successfully read a char above, partial matching must occure. */
4308 check_partial(common, TRUE);
4309 JUMPHERE(jump[0]);
4310 }
4311 return cc;
4312 #endif
4313
4314 case OP_EODN:
4315 /* Requires rather complex checks. */
4316 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4317 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4318 {
4319 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4320 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4321 if (common->mode == JIT_COMPILE)
4322 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4323 else
4324 {
4325 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4326 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4327 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4328 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4329 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4330 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4331 check_partial(common, TRUE);
4332 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4333 JUMPHERE(jump[1]);
4334 }
4335 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4336 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4337 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4338 }
4339 else if (common->nltype == NLTYPE_FIXED)
4340 {
4341 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4342 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4343 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4344 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4345 }
4346 else
4347 {
4348 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4349 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4350 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4351 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4352 jump[2] = JUMP(SLJIT_C_GREATER);
4353 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4354 /* Equal. */
4355 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4356 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4357 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4358
4359 JUMPHERE(jump[1]);
4360 if (common->nltype == NLTYPE_ANYCRLF)
4361 {
4362 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4363 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4364 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4365 }
4366 else
4367 {
4368 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4369 read_char(common);
4370 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4371 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4372 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4373 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4374 }
4375 JUMPHERE(jump[2]);
4376 JUMPHERE(jump[3]);
4377 }
4378 JUMPHERE(jump[0]);
4379 check_partial(common, FALSE);
4380 return cc;
4381
4382 case OP_EOD:
4383 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4384 check_partial(common, FALSE);
4385 return cc;
4386
4387 case OP_CIRC:
4388 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4389 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4390 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4391 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4392 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4393 return cc;
4394
4395 case OP_CIRCM:
4396 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4397 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4398 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4399 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4400 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4401 jump[0] = JUMP(SLJIT_JUMP);
4402 JUMPHERE(jump[1]);
4403
4404 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4405 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4406 {
4407 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4408 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4409 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4410 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4411 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4412 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4413 }
4414 else
4415 {
4416 skip_char_back(common);
4417 read_char(common);
4418 check_newlinechar(common, common->nltype, backtracks, FALSE);
4419 }
4420 JUMPHERE(jump[0]);
4421 return cc;
4422
4423 case OP_DOLL:
4424 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4425 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4426 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4427
4428 if (!common->endonly)
4429 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4430 else
4431 {
4432 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4433 check_partial(common, FALSE);
4434 }
4435 return cc;
4436
4437 case OP_DOLLM:
4438 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4439 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4440 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4441 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4442 check_partial(common, FALSE);
4443 jump[0] = JUMP(SLJIT_JUMP);
4444 JUMPHERE(jump[1]);
4445
4446 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4447 {
4448 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4449 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4450 if (common->mode == JIT_COMPILE)
4451 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4452 else
4453 {
4454 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4455 /* STR_PTR = STR_END - IN_UCHARS(1) */
4456 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4457 check_partial(common, TRUE);
4458 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4459 JUMPHERE(jump[1]);
4460 }
4461
4462 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4463 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4464 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4465 }
4466 else
4467 {
4468 peek_char(common);
4469 check_newlinechar(common, common->nltype, backtracks, FALSE);
4470 }
4471 JUMPHERE(jump[0]);
4472 return cc;
4473
4474 case OP_CHAR:
4475 case OP_CHARI:
4476 length = 1;
4477 #ifdef SUPPORT_UTF
4478 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4479 #endif
4480 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4481 {
4482 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4483 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4484
4485 context.length = IN_UCHARS(length);
4486 context.sourcereg = -1;
4487 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4488 context.ucharptr = 0;
4489 #endif
4490 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4491 }
4492 detect_partial_match(common, backtracks);
4493 read_char(common);
4494 #ifdef SUPPORT_UTF
4495 if (common->utf)
4496 {
4497 GETCHAR(c, cc);
4498 }
4499 else
4500 #endif
4501 c = *cc;
4502 if (type == OP_CHAR || !char_has_othercase(common, cc))
4503 {
4504 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4505 return cc + length;
4506 }
4507 oc = char_othercase(common, c);
4508 bit = c ^ oc;
4509 if (is_powerof2(bit))
4510 {
4511 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4512 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4513 return cc + length;
4514 }
4515 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4516 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4517 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4518 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4519 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4520 return cc + length;
4521
4522 case OP_NOT:
4523 case OP_NOTI:
4524 detect_partial_match(common, backtracks);
4525 length = 1;
4526 #ifdef SUPPORT_UTF
4527 if (common->utf)
4528 {
4529 #ifdef COMPILE_PCRE8
4530 c = *cc;
4531 if (c < 128)
4532 {
4533 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4534 if (type == OP_NOT || !char_has_othercase(common, cc))
4535 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4536 else
4537 {
4538 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4539 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4540 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4541 }
4542 /* Skip the variable-length character. */
4543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4544 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4545 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4546 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4547 JUMPHERE(jump[0]);
4548 return cc + 1;
4549 }
4550 else
4551 #endif /* COMPILE_PCRE8 */
4552 {
4553 GETCHARLEN(c, cc, length);
4554 read_char(common);
4555 }
4556 }
4557 else
4558 #endif /* SUPPORT_UTF */
4559 {
4560 read_char(common);
4561 c = *cc;
4562 }
4563
4564 if (type == OP_NOT || !char_has_othercase(common, cc))
4565 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4566 else
4567 {
4568 oc = char_othercase(common, c);
4569 bit = c ^ oc;
4570 if (is_powerof2(bit))
4571 {
4572 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4573 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4574 }
4575 else
4576 {
4577 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4578 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4579 }
4580 }
4581 return cc + length;
4582
4583 case OP_CLASS:
4584 case OP_NCLASS:
4585 detect_partial_match(common, backtracks);
4586 read_char(common);
4587 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4588 return cc + 32 / sizeof(pcre_uchar);
4589
4590 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4591 jump[0] = NULL;
4592 #ifdef COMPILE_PCRE8
4593 /* This check only affects 8 bit mode. In other modes, we
4594 always need to compare the value with 255. */
4595 if (common->utf)
4596 #endif /* COMPILE_PCRE8 */
4597 {
4598 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4599 if (type == OP_CLASS)
4600 {
4601 add_jump(compiler, backtracks, jump[0]);
4602 jump[0] = NULL;
4603 }
4604 }
4605 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4606 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4607 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4608 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4609 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4610 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4611 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4612 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4613 if (jump[0] != NULL)
4614 JUMPHERE(jump[0]);
4615 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4616 return cc + 32 / sizeof(pcre_uchar);
4617
4618 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4619 case OP_XCLASS:
4620 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4621 return cc + GET(cc, 0) - 1;
4622 #endif
4623
4624 case OP_REVERSE:
4625 length = GET(cc, 0);
4626 if (length == 0)
4627 return cc + LINK_SIZE;
4628 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4629 #ifdef SUPPORT_UTF
4630 if (common->utf)
4631 {
4632 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4633 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4634 label = LABEL();
4635 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4636 skip_char_back(common);
4637 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4638 JUMPTO(SLJIT_C_NOT_ZERO, label);
4639 }
4640 else
4641 #endif
4642 {
4643 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4644 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4645 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4646 }
4647 check_start_used_ptr(common);
4648 return cc + LINK_SIZE;
4649 }
4650 SLJIT_ASSERT_STOP();
4651 return cc;
4652 }
4653
4654 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4655 {
4656 /* This function consumes at least one input character. */
4657 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4658 DEFINE_COMPILER;
4659 pcre_uchar *ccbegin = cc;
4660 compare_context context;
4661 int size;
4662
4663 context.length = 0;
4664 do
4665 {
4666 if (cc >= ccend)
4667 break;
4668
4669 if (*cc == OP_CHAR)
4670 {
4671 size = 1;
4672 #ifdef SUPPORT_UTF
4673 if (common->utf && HAS_EXTRALEN(cc[1]))
4674 size += GET_EXTRALEN(cc[1]);
4675 #endif
4676 }
4677 else if (*cc == OP_CHARI)
4678 {
4679 size = 1;
4680 #ifdef SUPPORT_UTF
4681 if (common->utf)
4682 {
4683 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4684 size = 0;
4685 else if (HAS_EXTRALEN(cc[1]))
4686 size += GET_EXTRALEN(cc[1]);
4687 }
4688 else
4689 #endif
4690 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4691 size = 0;
4692 }
4693 else
4694 size = 0;
4695
4696 cc += 1 + size;
4697 context.length += IN_UCHARS(size);
4698 }
4699 while (size > 0 && context.length <= 128);
4700
4701 cc = ccbegin;
4702 if (context.length > 0)
4703 {
4704 /* We have a fixed-length byte sequence. */
4705 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4706 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4707
4708 context.sourcereg = -1;
4709 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4710 context.ucharptr = 0;
4711 #endif
4712 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4713 return cc;
4714 }
4715
4716 /* A non-fixed length character will be checked if length == 0. */
4717 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4718 }
4719
4720 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4721 {
4722 DEFINE_COMPILER;
4723 int offset = GET2(cc, 1) << 1;
4724
4725 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4726 if (!common->jscript_compat)
4727 {
4728 if (backtracks == NULL)
4729 {
4730 /* OVECTOR(1) contains the "string begin - 1" constant. */
4731 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4732 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4733 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4734 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4735 return JUMP(SLJIT_C_NOT_ZERO);
4736 }
4737 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4738 }
4739 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4740 }
4741
4742 /* Forward definitions. */
4743 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4744 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4745
4746 #define PUSH_BACKTRACK(size, ccstart, error) \
4747 do \
4748 { \
4749 backtrack = sljit_alloc_memory(compiler, (size)); \
4750 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4751 return error; \
4752 memset(backtrack, 0, size); \
4753 backtrack->prev = parent->top; \
4754 backtrack->cc = (ccstart); \
4755 parent->top = backtrack; \
4756 } \
4757 while (0)
4758
4759 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4760 do \
4761 { \
4762 backtrack = sljit_alloc_memory(compiler, (size)); \
4763 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4764 return; \
4765 memset(backtrack, 0, size); \
4766 backtrack->prev = parent->top; \
4767 backtrack->cc = (ccstart); \
4768 parent->top = backtrack; \
4769 } \
4770 while (0)
4771
4772 #define BACKTRACK_AS(type) ((type *)backtrack)
4773
4774 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4775 {
4776 DEFINE_COMPILER;
4777 int offset = GET2(cc, 1) << 1;
4778 struct sljit_jump *jump = NULL;
4779 struct sljit_jump *partial;
4780 struct sljit_jump *nopartial;
4781
4782 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4783 /* OVECTOR(1) contains the "string begin - 1" constant. */
4784 if (withchecks && !common->jscript_compat)
4785 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4786
4787 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4788 if (common->utf && *cc == OP_REFI)
4789 {
4790 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
4791 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4792 if (withchecks)
4793 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4794
4795 /* Needed to save important temporary registers. */
4796 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4797 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
4798 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4799 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4800 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4801 if (common->mode == JIT_COMPILE)
4802 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4803 else
4804 {
4805 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4806 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4807 check_partial(common, FALSE);
4808 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4809 JUMPHERE(nopartial);
4810 }
4811 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4812 }
4813 else
4814 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4815 {
4816 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4817 if (withchecks)
4818 jump = JUMP(SLJIT_C_ZERO);
4819
4820 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4821 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4822 if (common->mode == JIT_COMPILE)
4823 add_jump(compiler, backtracks, partial);
4824
4825 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4826 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4827
4828 if (common->mode != JIT_COMPILE)
4829 {
4830 nopartial = JUMP(SLJIT_JUMP);
4831 JUMPHERE(partial);
4832 /* TMP2 -= STR_END - STR_PTR */
4833 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4834 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4835 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4836 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4837 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4838 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4839 JUMPHERE(partial);
4840 check_partial(common, FALSE);
4841 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4842 JUMPHERE(nopartial);
4843 }
4844 }
4845
4846 if (jump != NULL)
4847 {
4848 if (emptyfail)
4849 add_jump(compiler, backtracks, jump);
4850 else
4851 JUMPHERE(jump);
4852 }
4853 return cc + 1 + IMM2_SIZE;
4854 }
4855
4856 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4857 {
4858 DEFINE_COMPILER;
4859 backtrack_common *backtrack;
4860 pcre_uchar type;
4861 struct sljit_label *label;
4862 struct sljit_jump *zerolength;
4863 struct sljit_jump *jump = NULL;
4864 pcre_uchar *ccbegin = cc;
4865 int min = 0, max = 0;
4866 BOOL minimize;
4867
4868 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4869
4870 type = cc[1 + IMM2_SIZE];
4871 minimize = (type & 0x1) != 0;
4872 switch(type)
4873 {
4874 case OP_CRSTAR:
4875 case OP_CRMINSTAR:
4876 min = 0;
4877 max = 0;
4878 cc += 1 + IMM2_SIZE + 1;
4879 break;
4880 case OP_CRPLUS:
4881 case OP_CRMINPLUS:
4882 min = 1;
4883 max = 0;
4884 cc += 1 + IMM2_SIZE + 1;
4885 break;
4886 case OP_CRQUERY:
4887 case OP_CRMINQUERY:
4888 min = 0;
4889 max = 1;
4890 cc += 1 + IMM2_SIZE + 1;
4891 break;
4892 case OP_CRRANGE:
4893 case OP_CRMINRANGE:
4894 min = GET2(cc, 1 + IMM2_SIZE + 1);
4895 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4896 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4897 break;
4898 default:
4899 SLJIT_ASSERT_STOP();
4900 break;
4901 }
4902
4903 if (!minimize)
4904 {
4905 if (min == 0)
4906 {
4907 allocate_stack(common, 2);
4908 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4909 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4910 /* Temporary release of STR_PTR. */
4911 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
4912 zerolength = compile_ref_checks(common, ccbegin, NULL);
4913 /* Restore if not zero length. */
4914 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
4915 }
4916 else
4917 {
4918 allocate_stack(common, 1);
4919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4920 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4921 }
4922
4923 if (min > 1 || max > 1)
4924 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4925
4926 label = LABEL();
4927 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4928
4929 if (min > 1 || max > 1)
4930 {
4931 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4932 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4933 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4934 if (min > 1)
4935 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4936 if (max > 1)
4937 {
4938 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4939 allocate_stack(common, 1);
4940 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4941 JUMPTO(SLJIT_JUMP, label);
4942 JUMPHERE(jump);
4943 }
4944 }
4945
4946 if (max == 0)
4947 {
4948 /* Includes min > 1 case as well. */
4949 allocate_stack(common, 1);
4950 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4951 JUMPTO(SLJIT_JUMP, label);
4952 }
4953
4954 JUMPHERE(zerolength);
4955 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4956
4957 decrease_call_count(common);
4958 return cc;
4959 }
4960
4961 allocate_stack(common, 2);
4962 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4963 if (type != OP_CRMINSTAR)
4964 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4965
4966 if (min == 0)
4967 {
4968 zerolength = compile_ref_checks(common, ccbegin, NULL);
4969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4970 jump = JUMP(SLJIT_JUMP);
4971 }
4972 else
4973 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4974
4975 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4976 if (max > 0)
4977 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4978
4979 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4981
4982 if (min > 1)
4983 {
4984 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4985 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4986 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4987 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
4988 }
4989 else if (max > 0)
4990 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4991
4992 if (jump != NULL)
4993 JUMPHERE(jump);
4994 JUMPHERE(zerolength);
4995
4996 decrease_call_count(common);
4997 return cc;
4998 }
4999
5000 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5001 {
5002 DEFINE_COMPILER;
5003 backtrack_common *backtrack;
5004 recurse_entry *entry = common->entries;
5005 recurse_entry *prev = NULL;
5006 int start = GET(cc, 1);
5007
5008 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5009 while (entry != NULL)
5010 {
5011 if (entry->start == start)
5012 break;
5013 prev = entry;
5014 entry = entry->next;
5015 }
5016
5017 if (entry == NULL)
5018 {
5019 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5020 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5021 return NULL;
5022 entry->next = NULL;
5023 entry->entry = NULL;
5024 entry->calls = NULL;
5025 entry->start = start;
5026
5027 if (prev != NULL)
5028 prev->next = entry;
5029 else
5030 common->entries = entry;
5031 }
5032
5033 if (common->has_set_som && common->mark_ptr != 0)
5034 {
5035 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5036 allocate_stack(common, 2);
5037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5038 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5039 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5040 }
5041 else if (common->has_set_som || common->mark_ptr != 0)
5042 {
5043 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5044 allocate_stack(common, 1);
5045 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5046 }
5047
5048 if (entry->entry == NULL)
5049 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5050 else
5051 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5052 /* Leave if the match is failed. */
5053 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5054 return cc + 1 + LINK_SIZE;
5055 }
5056
5057 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5058 {
5059 DEFINE_COMPILER;
5060 int framesize;
5061 int private_data_ptr;
5062 backtrack_common altbacktrack;
5063 pcre_uchar *ccbegin;
5064 pcre_uchar opcode;
5065 pcre_uchar bra = OP_BRA;
5066 jump_list *tmp = NULL;
5067 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5068 jump_list **found;
5069 /* Saving previous accept variables. */
5070 struct sljit_label *save_quitlabel = common->quitlabel;
5071 struct sljit_label *save_acceptlabel = common->acceptlabel;
5072 jump_list *save_quit = common->quit;
5073 jump_list *save_accept = common->accept;
5074 struct sljit_jump *jump;
5075 struct sljit_jump *brajump = NULL;
5076
5077 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5078 {
5079 SLJIT_ASSERT(!conditional);
5080 bra = *cc;
5081 cc++;
5082 }
5083 private_data_ptr = PRIVATE_DATA(cc);
5084 SLJIT_ASSERT(private_data_ptr != 0);
5085 framesize = get_framesize(common, cc, FALSE);
5086 backtrack->framesize = framesize;
5087 backtrack->private_data_ptr = private_data_ptr;
5088 opcode = *cc;
5089 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5090 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5091 ccbegin = cc;
5092 cc += GET(cc, 1);
5093
5094 if (bra == OP_BRAMINZERO)
5095 {
5096 /* This is a braminzero backtrack path. */
5097 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5098 free_stack(common, 1);
5099 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5100 }
5101
5102 if (framesize < 0)
5103 {
5104 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5105 allocate_stack(common, 1);
5106 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5107 }
5108 else
5109 {
5110 allocate_stack(common, framesize + 2);
5111 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5112 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5113 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5114 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5115 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5116 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5117 }
5118
5119 memset(&altbacktrack, 0, sizeof(backtrack_common));
5120 common->quitlabel = NULL;
5121 common->quit = NULL;
5122 while (1)
5123 {
5124 common->acceptlabel = NULL;
5125 common->accept = NULL;
5126 altbacktrack.top = NULL;
5127 altbacktrack.topbacktracks = NULL;
5128
5129 if (*ccbegin == OP_ALT)
5130 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5131
5132 altbacktrack.cc = ccbegin;
5133 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5134 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5135 {
5136 common->quitlabel = save_quitlabel;
5137 common->acceptlabel = save_acceptlabel;
5138 common->quit = save_quit;
5139 common->accept = save_accept;
5140 return NULL;
5141 }
5142 common->acceptlabel = LABEL();
5143 if (common->accept != NULL)
5144 set_jumps(common->accept, common->acceptlabel);
5145
5146 /* Reset stack. */
5147 if (framesize < 0)
5148 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5149 else {
5150 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5151 {
5152 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5153 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5154 }
5155 else
5156 {
5157 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5158 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5159 }
5160 }
5161
5162 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5163 {
5164 /* We know that STR_PTR was stored on the top of the stack. */
5165 if (conditional)
5166 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5167 else if (bra == OP_BRAZERO)
5168 {
5169 if (framesize < 0)
5170 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5171 else
5172 {
5173 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5174 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5175 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5176 }
5177 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5178 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5179 }
5180 else if (framesize >= 0)
5181 {
5182 /* For OP_BRA and OP_BRAMINZERO. */
5183 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5184 }
5185 }
5186 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5187
5188 compile_backtrackingpath(common, altbacktrack.top);
5189 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5190 {
5191 common->quitlabel = save_quitlabel;
5192 common->acceptlabel = save_acceptlabel;
5193 common->quit = save_quit;
5194 common->accept = save_accept;
5195 return NULL;
5196 }
5197 set_jumps(altbacktrack.topbacktracks, LABEL());
5198
5199 if (*cc != OP_ALT)
5200 break;
5201
5202 ccbegin = cc;
5203 cc += GET(cc, 1);
5204 }
5205 /* None of them matched. */
5206 if (common->quit != NULL)
5207 set_jumps(common->quit, LABEL());
5208
5209 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5210 {
5211 /* Assert is failed. */
5212 if (conditional || bra == OP_BRAZERO)
5213 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5214
5215 if (framesize < 0)
5216 {
5217 /* The topmost item should be 0. */
5218 if (bra == OP_BRAZERO)
5219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5220 else
5221 free_stack(common, 1);
5222 }
5223 else
5224 {
5225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5226 /* The topmost item should be 0. */
5227 if (bra == OP_BRAZERO)
5228 {
5229 free_stack(common, framesize + 1);
5230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5231 }
5232 else
5233 free_stack(common, framesize + 2);
5234 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5235 }
5236 jump = JUMP(SLJIT_JUMP);
5237 if (bra != OP_BRAZERO)
5238 add_jump(compiler, target, jump);
5239
5240 /* Assert is successful. */
5241 set_jumps(tmp, LABEL());
5242 if (framesize < 0)
5243 {
5244 /* We know that STR_PTR was stored on the top of the stack. */
5245 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5246 /* Keep the STR_PTR on the top of the stack. */
5247 if (bra == OP_BRAZERO)
5248 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5249 else if (bra == OP_BRAMINZERO)
5250 {
5251 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5252 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5253 }
5254 }
5255 else
5256 {
5257 if (bra == OP_BRA)
5258 {
5259 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5260 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5261 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5262 }
5263 else
5264 {
5265 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5266 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5267 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5268 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5269 }
5270 }
5271
5272 if (bra == OP_BRAZERO)
5273 {
5274 backtrack->matchingpath = LABEL();
5275 sljit_set_label(jump, backtrack->matchingpath);
5276 }
5277 else if (bra == OP_BRAMINZERO)
5278 {
5279 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5280 JUMPHERE(brajump);
5281 if (framesize >= 0)
5282 {
5283 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5284 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5285 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5286 }
5287 set_jumps(backtrack->common.topbacktracks, LABEL());
5288 }
5289 }
5290 else
5291 {
5292 /* AssertNot is successful. */
5293 if (framesize < 0)
5294 {
5295 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5296 if (bra != OP_BRA)
5297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5298 else
5299 free_stack(common, 1);
5300 }
5301 else
5302 {
5303 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5304 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5305 /* The topmost item should be 0. */
5306 if (bra != OP_BRA)
5307 {
5308 free_stack(common, framesize + 1);
5309 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5310 }
5311 else
5312 free_stack(common, framesize + 2);
5313 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5314 }
5315
5316 if (bra == OP_BRAZERO)
5317 backtrack->matchingpath = LABEL();
5318 else if (bra == OP_BRAMINZERO)
5319 {
5320 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5321 JUMPHERE(brajump);
5322 }
5323
5324 if (bra != OP_BRA)
5325 {
5326 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5327 set_jumps(backtrack->common.topbacktracks, LABEL());
5328 backtrack->common.topbacktracks = NULL;
5329 }
5330 }
5331
5332 common->quitlabel = save_quitlabel;
5333 common->acceptlabel = save_acceptlabel;
5334 common->quit = save_quit;
5335 common->accept = save_accept;
5336 return cc + 1 + LINK_SIZE;
5337 }
5338
5339 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5340 {
5341 int condition = FALSE;
5342 pcre_uchar *slotA = name_table;
5343 pcre_uchar *slotB;
5344 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5345 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5346 sljit_sw no_capture;
5347 int i;
5348
5349 locals += refno & 0xff;
5350 refno >>= 8;
5351 no_capture = locals[1];
5352
5353 for (i = 0; i < name_count; i++)
5354 {
5355 if (GET2(slotA, 0) == refno) break;
5356 slotA += name_entry_size;
5357 }
5358
5359 if (i < name_count)
5360 {
5361 /* Found a name for the number - there can be only one; duplicate names
5362 for different numbers are allowed, but not vice versa. First scan down
5363 for duplicates. */
5364
5365 slotB = slotA;
5366 while (slotB > name_table)
5367 {
5368 slotB -= name_entry_size;
5369 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5370 {
5371 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5372 if (condition) break;
5373 }
5374 else break;
5375 }
5376
5377 /* Scan up for duplicates */
5378 if (!condition)
5379 {
5380 slotB = slotA;
5381 for (i++; i < name_count; i++)
5382 {
5383 slotB += name_entry_size;
5384 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5385 {
5386 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5387 if (condition) break;
5388 }
5389 else break;
5390 }
5391 }
5392 }
5393 return condition;
5394 }
5395
5396 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5397 {
5398 int condition = FALSE;
5399 pcre_uchar *slotA = name_table;
5400 pcre_uchar *slotB;
5401 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5402 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5403 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5404 sljit_uw i;
5405
5406 for (i = 0; i < name_count; i++)
5407 {
5408 if (GET2(slotA, 0) == recno) break;
5409 slotA += name_entry_size;
5410 }
5411
5412 if (i < name_count)
5413 {
5414 /* Found a name for the number - there can be only one; duplicate
5415 names for different numbers are allowed, but not vice versa. First
5416 scan down for duplicates. */
5417
5418 slotB = slotA;
5419 while (slotB > name_table)
5420 {
5421 slotB -= name_entry_size;
5422 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5423 {
5424 condition = GET2(slotB, 0) == group_num;
5425 if (condition) break;
5426 }
5427 else break;
5428 }
5429
5430 /* Scan up for duplicates */
5431 if (!condition)
5432 {
5433 slotB = slotA;
5434 for (i++; i < name_count; i++)
5435 {
5436 slotB += name_entry_size;
5437 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5438 {
5439 condition = GET2(slotB, 0) == group_num;
5440 if (condition) break;
5441 }
5442 else break;
5443 }
5444 }
5445 }
5446 return condition;
5447 }
5448
5449 /*
5450 Handling bracketed expressions is probably the most complex part.
5451
5452 Stack layout naming characters:
5453 S - Push the current STR_PTR
5454 0 - Push a 0 (NULL)
5455 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5456 before the next alternative. Not pushed if there are no alternatives.
5457 M - Any values pushed by the current alternative. Can be empty, or anything.
5458 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5459 L - Push the previous local (pointed by localptr) to the stack
5460 () - opional values stored on the stack
5461 ()* - optonal, can be stored multiple times
5462
5463 The following list shows the regular expression templates, their PCRE byte codes
5464 and stack layout supported by pcre-sljit.
5465
5466 (?:) OP_BRA | OP_KET A M
5467 () OP_CBRA | OP_KET C M
5468 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5469 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5470 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5471 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5472 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5473 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5474 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5475 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5476 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5477 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5478 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5479 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5480 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5481 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5482 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5483 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5484 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5485 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5486 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5487 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5488
5489
5490 Stack layout naming characters:
5491 A - Push the alternative index (starting from 0) on the stack.
5492 Not pushed if there is no alternatives.
5493 M - Any values pushed by the current alternative. Can be empty, or anything.
5494
5495 The next list shows the possible content of a bracket:
5496 (|) OP_*BRA | OP_ALT ... M A
5497 (?()|) OP_*COND | OP_ALT M A
5498 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5499 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5500 Or nothing, if trace is unnecessary
5501 */
5502
5503 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5504 {
5505 DEFINE_COMPILER;
5506 backtrack_common *backtrack;
5507 pcre_uchar opcode;
5508 int private_data_ptr = 0;
5509 int offset = 0;
5510 int stacksize;
5511 pcre_uchar *ccbegin;
5512 pcre_uchar *matchingpath;
5513 pcre_uchar bra = OP_BRA;
5514 pcre_uchar ket;
5515 assert_backtrack *assert;
5516 BOOL has_alternatives;
5517 struct sljit_jump *jump;
5518 struct sljit_jump *skip;
5519 struct sljit_label *rmaxlabel = NULL;
5520 struct sljit_jump *braminzerojump = NULL;
5521
5522 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5523
5524 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5525 {
5526 bra = *cc;
5527 cc++;
5528 opcode = *cc;
5529 }
5530
5531 opcode = *cc;
5532 ccbegin = cc;
5533 matchingpath = ccbegin + 1 + LINK_SIZE;
5534
5535 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5536 {
5537 /* Drop this bracket_backtrack. */
5538 parent->top = backtrack->prev;
5539 return bracketend(cc);
5540 }
5541
5542 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5543 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5544 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5545 cc += GET(cc, 1);
5546
5547 has_alternatives = *cc == OP_ALT;
5548 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5549 {
5550 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5551 if (*matchingpath == OP_NRREF)
5552 {
5553 stacksize = GET2(matchingpath, 1);
5554 if (common->currententry == NULL || stacksize == RREF_ANY)
5555 has_alternatives = FALSE;
5556 else if (common->currententry->start == 0)
5557 has_alternatives = stacksize != 0;
5558 else
5559 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5560 }
5561 }
5562
5563 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5564 opcode = OP_SCOND;
5565 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5566 opcode = OP_ONCE;
5567
5568 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5569 {
5570 /* Capturing brackets has a pre-allocated space. */
5571 offset = GET2(ccbegin, 1 + LINK_SIZE);
5572 if (common->optimized_cbracket[offset] == 0)
5573 {
5574 private_data_ptr = OVECTOR_PRIV(offset);
5575 offset <<= 1;
5576 }
5577 else
5578 {
5579 offset <<= 1;
5580 private_data_ptr = OVECTOR(offset);
5581 }
5582 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5583 matchingpath += IMM2_SIZE;
5584 }
5585 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5586 {
5587 /* Other brackets simply allocate the next entry. */
5588 private_data_ptr = PRIVATE_DATA(ccbegin);
5589 SLJIT_ASSERT(private_data_ptr != 0);
5590 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5591 if (opcode == OP_ONCE)
5592 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5593 }
5594
5595 /* Instructions before the first alternative. */
5596 stacksize = 0;
5597 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5598 stacksize++;
5599 if (bra == OP_BRAZERO)
5600 stacksize++;
5601
5602 if (stacksize > 0)
5603 allocate_stack(common, stacksize);
5604
5605 stacksize = 0;
5606 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5607 {
5608 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5609 stacksize++;
5610 }
5611
5612 if (bra == OP_BRAZERO)
5613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5614
5615 if (bra == OP_BRAMINZERO)
5616 {
5617 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5618 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5619 if (ket != OP_KETRMIN)
5620 {
5621 free_stack(common, 1);
5622 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5623 }
5624 else
5625 {
5626 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5627 {
5628 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5629 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5630 /* Nothing stored during the first run. */
5631 skip = JUMP(SLJIT_JUMP);
5632 JUMPHERE(jump);
5633 /* Checking zero-length iteration. */
5634 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5635 {
5636 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5637 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5638 }
5639 else
5640 {
5641 /* Except when the whole stack frame must be saved. */
5642 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5643 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
5644 }
5645 JUMPHERE(skip);
5646 }
5647 else
5648 {
5649 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5650 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5651 JUMPHERE(jump);
5652 }
5653 }
5654 }
5655
5656 if (ket == OP_KETRMIN)
5657 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5658
5659 if (ket == OP_KETRMAX)
5660 {
5661 rmaxlabel = LABEL();
5662 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5663 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5664 }
5665
5666 /* Handling capturing brackets and alternatives. */
5667 if (opcode == OP_ONCE)
5668 {
5669 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5670 {
5671 /* Neither capturing brackets nor recursions are not found in the block. */
5672 if (ket == OP_KETRMIN)
5673 {
5674 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5675 allocate_stack(common, 2);
5676 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5677 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5678 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5679 }
5680 else if (ket == OP_KETRMAX || has_alternatives)
5681 {
5682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5683 allocate_stack(common, 1);
5684 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5685 }
5686 else
5687 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5688 }
5689 else
5690 {
5691 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5692 {
5693 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5694 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5695 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5696 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5697 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5698 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5699 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5700 }
5701 else
5702 {
5703 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5704 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5705 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5706 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5707 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5708 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5709 }
5710 }
5711 }
5712 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5713 {
5714 /* Saving the previous values. */
5715 if (common->optimized_cbracket[offset >> 1] == 0)
5716 {
5717 allocate_stack(common, 3);
5718 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5719 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5720 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5721 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5723 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5725 }
5726 else
5727 {
5728 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5729 allocate_stack(common, 2);
5730 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5731 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
5732 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5733 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5735 }
5736 }
5737 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5738 {
5739 /* Saving the previous value. */
5740 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5741 allocate_stack(common, 1);
5742 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5743 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5744 }
5745 else if (has_alternatives)
5746 {
5747 /* Pushing the starting string pointer. */
5748 allocate_stack(common, 1);
5749 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5750 }
5751
5752 /* Generating code for the first alternative. */
5753 if (opcode == OP_COND || opcode == OP_SCOND)
5754 {
5755 if (*matchingpath == OP_CREF)
5756 {
5757 SLJIT_ASSERT(has_alternatives);
5758 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5759 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5760 matchingpath += 1 + IMM2_SIZE;
5761 }
5762 else if (*matchingpath == OP_NCREF)
5763 {
5764 SLJIT_ASSERT(has_alternatives);
5765 stacksize = GET2(matchingpath, 1);
5766 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5767
5768 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5769 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5771 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
5772 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5773 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5774 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5775 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5776 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5777
5778 JUMPHERE(jump);
5779 matchingpath += 1 + IMM2_SIZE;
5780 }
5781 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5782 {
5783 /* Never has other case. */
5784 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5785
5786 stacksize = GET2(matchingpath, 1);
5787 if (common->currententry == NULL)
5788 stacksize = 0;
5789 else if (stacksize == RREF_ANY)
5790 stacksize = 1;
5791 else if (common->currententry->start == 0)
5792 stacksize = stacksize == 0;
5793 else
5794 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5795
5796 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5797 {
5798 SLJIT_ASSERT(!has_alternatives);
5799 if (stacksize != 0)
5800 matchingpath += 1 + IMM2_SIZE;
5801 else
5802 {
5803 if (*cc == OP_ALT)
5804 {
5805 matchingpath = cc + 1 + LINK_SIZE;
5806 cc += GET(cc, 1);
5807 }
5808 else
5809 matchingpath = cc;
5810 }
5811 }
5812 else
5813 {
5814 SLJIT_ASSERT(has_alternatives);
5815
5816 stacksize = GET2(matchingpath, 1);
5817 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5818 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5819 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5821 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
5822 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5823 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5824 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5825 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5826 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5827 matchingpath += 1 + IMM2_SIZE;
5828 }
5829 }
5830 else
5831 {
5832 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5833 /* Similar code as PUSH_BACKTRACK macro. */
5834 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5835 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5836 return NULL;
5837 memset(assert, 0, sizeof(assert_backtrack));
5838 assert->common.cc = matchingpath;
5839 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5840 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5841 }
5842 }
5843
5844 compile_matchingpath(common, matchingpath, cc, backtrack);
5845 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5846 return NULL;
5847
5848 if (opcode == OP_ONCE)
5849 {
5850 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5851 {
5852 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5853 /* TMP2 which is set here used by OP_KETRMAX below. */
5854 if (ket == OP_KETRMAX)
5855 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5856 else if (ket == OP_KETRMIN)
5857 {
5858 /* Move the STR_PTR to the private_data_ptr. */
5859 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5860 }
5861 }
5862 else
5863 {
5864 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5865 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
5866 if (ket == OP_KETRMAX)
5867 {
5868 /* TMP2 which is set here used by OP_KETRMAX below. */
5869 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5870 }
5871 }
5872 }
5873
5874 stacksize = 0;
5875 if (ket != OP_KET || bra != OP_BRA)
5876 stacksize++;
5877 if (has_alternatives && opcode != OP_ONCE)
5878 stacksize++;
5879
5880 if (stacksize > 0)
5881 allocate_stack(common, stacksize);
5882
5883 stacksize = 0;
5884 if (ket != OP_KET)
5885 {
5886 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5887 stacksize++;
5888 }
5889 else if (bra != OP_BRA)
5890 {
5891 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5892 stacksize++;
5893 }
5894
5895 if (has_alternatives)
5896 {
5897 if (opcode != OP_ONCE)
5898 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5899 if (ket != OP_KETRMAX)
5900 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5901 }
5902
5903 /* Must be after the matchingpath label. */
5904 if (offset != 0)
5905 {
5906 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5907 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5908 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5909 }
5910
5911 if (ket == OP_KETRMAX)
5912 {
5913 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5914 {
5915 if (has_alternatives)
5916 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5917 /* Checking zero-length iteration. */
5918 if (opcode != OP_ONCE)
5919 {
5920 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
5921 /* Drop STR_PTR for greedy plus quantifier. */
5922 if (bra != OP_BRAZERO)
5923 free_stack(common, 1);
5924 }
5925 else
5926 /* TMP2 must contain the starting STR_PTR. */
5927 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5928 }
5929 else
5930 JUMPTO(SLJIT_JUMP, rmaxlabel);
5931 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5932 }
5933
5934 if (bra == OP_BRAZERO)
5935 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
5936
5937 if (bra == OP_BRAMINZERO)
5938 {
5939 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5940 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
5941 if (braminzerojump != NULL)
5942 {
5943 JUMPHERE(braminzerojump);
5944 /* We need to release the end pointer to perform the
5945 backtrack for the zero-length iteration. When
5946 framesize is < 0, OP_ONCE will do the release itself. */
5947 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5948 {
5949 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5950 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5951 }
5952 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5953 free_stack(common, 1);
5954 }
5955 /* Continue to the normal backtrack. */
5956 }
5957
5958 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5959 decrease_call_count(common);
5960
5961 /* Skip the other alternatives. */
5962 while (*cc == OP_ALT)
5963 cc += GET(cc, 1);
5964 cc += 1 + LINK_SIZE;
5965 return cc;
5966 }
5967
5968 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5969 {
5970 DEFINE_COMPILER;
5971 backtrack_common *backtrack;
5972 pcre_uchar opcode;
5973 int private_data_ptr;
5974 int cbraprivptr = 0;
5975 int framesize;
5976 int stacksize;
5977 int offset = 0;
5978 BOOL zero = FALSE;
5979 pcre_uchar *ccbegin = NULL;
5980 int stack;
5981 struct sljit_label *loop = NULL;
5982 struct jump_list *emptymatch = NULL;
5983
5984 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5985 if (*cc == OP_BRAPOSZERO)
5986 {
5987 zero = TRUE;
5988 cc++;
5989 }
5990
5991 opcode = *cc;
5992 private_data_ptr = PRIVATE_DATA(cc);
5993 SLJIT_ASSERT(private_data_ptr != 0);
5994 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
5995 switch(opcode)
5996 {
5997 case OP_BRAPOS:
5998 case OP_SBRAPOS:
5999 ccbegin = cc + 1 + LINK_SIZE;
6000 break;
6001
6002 case OP_CBRAPOS:
6003 case OP_SCBRAPOS:
6004 offset = GET2(cc, 1 + LINK_SIZE);
6005 /* This case cannot be optimized in the same was as
6006 normal capturing brackets. */
6007 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6008 cbraprivptr = OVECTOR_PRIV(offset);
6009 offset <<= 1;
6010 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6011 break;
6012
6013 default:
6014 SLJIT_ASSERT_STOP();
6015 break;
6016 }
6017
6018 framesize = get_framesize(common, cc, FALSE);
6019 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6020 if (framesize < 0)
6021 {
6022 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
6023 if (!zero)
6024 stacksize++;
6025 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6026 allocate_stack(common, stacksize);
6027 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6028
6029 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6030 {
6031 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6032 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6033 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6034 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6035 }
6036 else
6037 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6038
6039 if (!zero)
6040 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6041 }
6042 else
6043 {
6044 stacksize = framesize + 1;
6045 if (!zero)
6046 stacksize++;
6047 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6048 stacksize++;
6049 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6050 allocate_stack(common, stacksize);
6051
6052 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6053 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6054 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6055 stack = 0;
6056 if (!zero)
6057 {
6058 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6059 stack++;
6060 }
6061 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6062 {
6063 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6064 stack++;
6065 }
6066 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6067 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6068 }
6069
6070 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6071 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6072
6073 loop = LABEL();
6074 while (*cc != OP_KETRPOS)
6075 {
6076 backtrack->top = NULL;
6077 backtrack->topbacktracks = NULL;
6078 cc += GET(cc, 1);
6079
6080 compile_matchingpath(common, ccbegin, cc, backtrack);
6081 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6082 return NULL;
6083
6084 if (framesize < 0)
6085 {
6086 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6087
6088 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6089 {
6090 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6091 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6092 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6094 }
6095 else
6096 {
6097 if (opcode == OP_SBRAPOS)
6098 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6099 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6100 }
6101
6102 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6103 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6104
6105 if (!zero)
6106 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6107 }
6108 else
6109 {
6110 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6111 {
6112 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6114 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6115 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6117 }
6118 else
6119 {
6120 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6121 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6122 if (opcode == OP_SBRAPOS)
6123 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6124 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6125 }
6126
6127 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6128 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6129
6130 if (!zero)
6131 {
6132 if (framesize < 0)
6133 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6134 else
6135 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6136 }
6137 }
6138 JUMPTO(SLJIT_JUMP, loop);
6139 flush_stubs(common);
6140
6141 compile_backtrackingpath(common, backtrack->top);
6142 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6143 return NULL;
6144 set_jumps(backtrack->topbacktracks, LABEL());
6145
6146 if (framesize < 0)
6147 {
6148 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6149 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6150 else
6151 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6152 }
6153 else
6154 {
6155 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6156 {
6157 /* Last alternative. */
6158 if (*cc == OP_KETRPOS)
6159 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6160 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6161 }
6162 else
6163 {
6164 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6165 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6166 }
6167 }
6168
6169 if (*cc == OP_KETRPOS)
6170 break;
6171 ccbegin = cc + 1 + LINK_SIZE;
6172 }
6173
6174 backtrack->topbacktracks = NULL;
6175 if (!zero)
6176 {
6177 if (framesize < 0)
6178 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6179 else /* TMP2 is set to [private_data_ptr] above. */
6180 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6181 }
6182
6183 /* None of them matched. */
6184 set_jumps(emptymatch, LABEL());
6185 decrease_call_count(common);
6186 return cc + 1 + LINK_SIZE;
6187 }
6188
6189 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6190 {
6191 int class_len;
6192
6193 *opcode = *cc;
6194 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6195 {
6196 cc++;
6197 *type = OP_CHAR;
6198 }
6199 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6200 {
6201 cc++;
6202 *type = OP_CHARI;
6203 *opcode -= OP_STARI - OP_STAR;
6204 }
6205 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6206 {
6207 cc++;
6208 *type = OP_NOT;
6209 *opcode -= OP_NOTSTAR - OP_STAR;
6210 }
6211 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6212 {
6213 cc++;
6214 *type = OP_NOTI;
6215 *opcode -= OP_NOTSTARI - OP_STAR;
6216 }
6217 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6218 {
6219 cc++;
6220 *opcode -= OP_TYPESTAR - OP_STAR;
6221 *type = 0;
6222 }
6223 else
6224 {
6225 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6226 *type = *opcode;
6227 cc++;
6228 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6229 *opcode = cc[class_len - 1];
6230 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6231 {
6232 *opcode -= OP_CRSTAR - OP_STAR;
6233 if (end != NULL)
6234 *end = cc + class_len;
6235 }
6236 else
6237 {
6238 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6239 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6240 *arg2 = GET2(cc, class_len);
6241
6242 if (*arg2 == 0)
6243 {
6244 SLJIT_ASSERT(*arg1 != 0);
6245 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6246 }
6247 if (*arg1 == *arg2)
6248 *opcode = OP_EXACT;
6249
6250 if (end != NULL)
6251 *end = cc + class_len + 2 * IMM2_SIZE;
6252 }
6253 return cc;
6254 }
6255
6256 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6257 {
6258 *arg1 = GET2(cc, 0);
6259 cc += IMM2_SIZE;
6260 }
6261
6262 if (*type == 0)
6263 {
6264 *type = *cc;
6265 if (end != NULL)
6266 *end = next_opcode(common, cc);
6267 cc++;
6268 return cc;
6269 }
6270
6271 if (end != NULL)
6272 {
6273 *end = cc + 1;
6274 #ifdef SUPPORT_UTF
6275 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6276 #endif
6277 }
6278 return cc;
6279 }
6280
6281 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6282 {
6283 DEFINE_COMPILER;
6284 backtrack_common *backtrack;
6285 pcre_uchar opcode;
6286 pcre_uchar type;
6287 int arg1 = -1, arg2 = -1;
6288 pcre_uchar* end;
6289 jump_list *nomatch = NULL;
6290 struct sljit_jump *jump = NULL;
6291 struct sljit_label *label;
6292 int private_data_ptr = PRIVATE_DATA(cc);
6293 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6294 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6295 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
6296 int tmp_base, tmp_offset;
6297
6298 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6299
6300 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6301
6302 switch (type)
6303 {
6304 case OP_NOT_DIGIT:
6305 case OP_DIGIT:
6306 case OP_NOT_WHITESPACE:
6307 case OP_WHITESPACE:
6308 case OP_NOT_WORDCHAR:
6309 case OP_WORDCHAR:
6310 case OP_ANY:
6311 case OP_ALLANY:
6312 case OP_ANYBYTE:
6313 case OP_ANYNL:
6314 case OP_NOT_HSPACE:
6315 case OP_HSPACE:
6316 case OP_NOT_VSPACE:
6317 case OP_VSPACE:
6318 case OP_CHAR:
6319 case OP_CHARI:
6320 case OP_NOT:
6321 case OP_NOTI:
6322 case OP_CLASS:
6323 case OP_NCLASS:
6324 tmp_base = TMP3;
6325 tmp_offset = 0;
6326 break;
6327
6328 default:
6329 SLJIT_ASSERT_STOP();
6330 /* Fall through. */
6331
6332 case OP_EXTUNI:
6333 case OP_XCLASS:
6334 case OP_NOTPROP:
6335 case OP_PROP:
6336 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6337 tmp_offset = POSSESSIVE0;
6338 break;
6339 }
6340
6341 switch(opcode)
6342 {
6343 case OP_STAR:
6344 case OP_PLUS:
6345 case OP_UPTO:
6346 case OP_CRRANGE:
6347 if (type == OP_ANYNL || type == OP_EXTUNI)
6348 {
6349 SLJIT_ASSERT(private_data_ptr == 0);
6350 if (opcode == OP_STAR || opcode == OP_UPTO)
6351 {
6352 allocate_stack(common, 2);
6353 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6354 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6355 }
6356 else
6357 {
6358 allocate_stack(common, 1);
6359 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6360 }
6361
6362 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6363 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6364
6365 label = LABEL();
6366 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6367 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6368 {
6369 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6370 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6371 if (opcode == OP_CRRANGE && arg2 > 0)
6372 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6373 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6374 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6375 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6376 }
6377
6378 /* We cannot use TMP3 because of this allocate_stack. */
6379 allocate_stack(common, 1);
6380 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6381 JUMPTO(SLJIT_JUMP, label);
6382 if (jump != NULL)
6383 JUMPHERE(jump);
6384 }
6385 else
6386 {
6387 if (opcode == OP_PLUS)
6388 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6389 if (private_data_ptr == 0)
6390 allocate_stack(common, 2);
6391 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6392 if (opcode <= OP_PLUS)
6393 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6394 else
6395 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6396 label = LABEL();
6397 compile_char1_matchingpath(common, type, cc, &nomatch);
6398 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6399 if (opcode <= OP_PLUS)
6400 JUMPTO(SLJIT_JUMP, label);
6401 else if (opcode == OP_CRRANGE && arg1 == 0)
6402 {
6403 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6404 JUMPTO(SLJIT_JUMP, label);
6405 }
6406 else
6407 {
6408 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6409 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6410 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6411 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6412 }
6413 set_jumps(nomatch, LABEL());
6414 if (opcode == OP_CRRANGE)
6415 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6416 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6417 }
6418 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6419 break;
6420
6421 case OP_MINSTAR:
6422 case OP_MINPLUS:
6423 if (opcode == OP_MINPLUS)
6424 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6425 if (private_data_ptr == 0)
6426 allocate_stack(common, 1);
6427 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6428 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6429 break;
6430
6431 case OP_MINUPTO:
6432 case OP_CRMINRANGE:
6433 if (private_data_ptr == 0)
6434 allocate_stack(common, 2);
6435 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6436 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6437 if (opcode == OP_CRMINRANGE)
6438 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6439 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6440 break;
6441
6442 case OP_QUERY:
6443 case OP_MINQUERY:
6444 if (private_data_ptr == 0)
6445 allocate_stack(common, 1);
6446 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6447 if (opcode == OP_QUERY)
6448 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6449 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6450 break;
6451
6452 case OP_EXACT:
6453 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6454 label = LABEL();
6455 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6456 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6457 JUMPTO(SLJIT_C_NOT_ZERO, label);
6458 break;
6459
6460 case OP_POSSTAR:
6461 case OP_POSPLUS:
6462 case OP_POSUPTO:
6463 if (opcode == OP_POSPLUS)
6464 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6465 if (opcode == OP_POSUPTO)
6466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6467 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6468 label = LABEL();
6469 compile_char1_matchingpath(common, type, cc, &nomatch);
6470 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6471 if (opcode != OP_POSUPTO)
6472 JUMPTO(SLJIT_JUMP, label);
6473 else
6474 {
6475 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6476 JUMPTO(SLJIT_C_NOT_ZERO, label);
6477 }
6478 set_jumps(nomatch, LABEL());
6479 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6480 break;
6481
6482 case OP_POSQUERY:
6483 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6484 compile_char1_matchingpath(common, type, cc, &nomatch);
6485 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6486 set_jumps(nomatch, LABEL());
6487 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6488 break;
6489
6490 default:
6491 SLJIT_ASSERT_STOP();
6492 break;
6493 }
6494
6495 decrease_call_count(common);
6496 return end;
6497 }
6498
6499 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6500 {
6501 DEFINE_COMPILER;
6502 backtrack_common *backtrack;
6503
6504 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6505
6506 if (*cc == OP_FAIL)
6507 {
6508 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6509 return cc + 1;
6510 }
6511
6512 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6513 {
6514 /* No need to check notempty conditions. */
6515 if (common->acceptlabel == NULL)
6516 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6517 else
6518 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6519 return cc + 1;
6520 }
6521
6522 if (common->acceptlabel == NULL)
6523 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6524 else
6525 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6526 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6527 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6528 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6529 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6530 if (common->acceptlabel == NULL)
6531 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6532 else
6533 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6534 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6535 if (common->acceptlabel == NULL)
6536 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6537 else
6538 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6539 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6540 return cc + 1;
6541 }
6542
6543 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6544 {
6545 DEFINE_COMPILER;
6546 int offset = GET2(cc, 1);
6547 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
6548
6549 /* Data will be discarded anyway... */
6550 if (common->currententry != NULL)
6551 return cc + 1 + IMM2_SIZE;
6552
6553 if (!optimized_cbracket)
6554 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6555 offset <<= 1;
6556 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6557 if (!optimized_cbracket)
6558 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6559 return cc + 1 + IMM2_SIZE;
6560 }
6561
6562 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6563 {
6564 DEFINE_COMPILER;
6565 backtrack_common *backtrack;
6566
6567 while (cc < ccend)
6568 {
6569 switch(*cc)
6570 {
6571 case OP_SOD:
6572 case OP_SOM:
6573 case OP_NOT_WORD_BOUNDARY:
6574 case OP_WORD_BOUNDARY:
6575 case OP_NOT_DIGIT:
6576 case OP_DIGIT:
6577 case OP_NOT_WHITESPACE:
6578 case OP_WHITESPACE:
6579 case OP_NOT_WORDCHAR:
6580 case OP_WORDCHAR:
6581 case OP_ANY:
6582 case OP_ALLANY:
6583 case OP_ANYBYTE:
6584 case OP_NOTPROP:
6585 case OP_PROP:
6586 case OP_ANYNL:
6587 case OP_NOT_HSPACE:
6588 case OP_HSPACE:
6589 case OP_NOT_VSPACE:
6590 case OP_VSPACE:
6591 case OP_EXTUNI:
6592 case OP_EODN:
6593 case OP_EOD:
6594 case OP_CIRC:
6595 case OP_CIRCM:
6596 case OP_DOLL:
6597 case OP_DOLLM:
6598 case OP_NOT:
6599 case OP_NOTI:
6600 case OP_REVERSE:
6601 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6602 break;
6603
6604 case OP_SET_SOM:
6605 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6606 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6607 allocate_stack(common, 1);
6608 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6609 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6610 cc++;
6611 break;
6612
6613 case OP_CHAR:
6614 case OP_CHARI:
6615 if (common->mode == JIT_COMPILE)
6616 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6617 else
6618 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6619 break;
6620
6621 case OP_STAR:
6622 case OP_MINSTAR:
6623 case OP_PLUS:
6624 case OP_MINPLUS:
6625 case OP_QUERY:
6626 case OP_MINQUERY:
6627 case OP_UPTO:
6628 case OP_MINUPTO:
6629 case OP_EXACT:
6630 case OP_POSSTAR:
6631 case OP_POSPLUS:
6632 case OP_POSQUERY:
6633 case OP_POSUPTO:
6634 case OP_STARI:
6635 case OP_MINSTARI:
6636 case OP_PLUSI:
6637 case OP_MINPLUSI:
6638 case OP_QUERYI:
6639 case OP_MINQUERYI:
6640 case OP_UPTOI:
6641 case OP_MINUPTOI:
6642 case OP_EXACTI:
6643 case OP_POSSTARI:
6644 case OP_POSPLUSI:
6645 case OP_POSQUERYI:
6646 case OP_POSUPTOI:
6647 case OP_NOTSTAR:
6648 case OP_NOTMINSTAR:
6649 case OP_NOTPLUS:
6650 case OP_NOTMINPLUS:
6651 case OP_NOTQUERY:
6652 case OP_NOTMINQUERY:
6653 case OP_NOTUPTO:
6654 case OP_NOTMINUPTO:
6655 case OP_NOTEXACT:
6656 case OP_NOTPOSSTAR:
6657 case OP_NOTPOSPLUS:
6658 case OP_NOTPOSQUERY:
6659 case OP_NOTPOSUPTO:
6660 case OP_NOTSTARI:
6661 case OP_NOTMINSTARI:
6662 case OP_NOTPLUSI:
6663 case OP_NOTMINPLUSI:
6664 case OP_NOTQUERYI:
6665 case OP_NOTMINQUERYI:
6666 case OP_NOTUPTOI:
6667 case OP_NOTMINUPTOI:
6668 case OP_NOTEXACTI:
6669 case OP_NOTPOSSTARI:
6670 case OP_NOTPOSPLUSI:
6671 case OP_NOTPOSQUERYI:
6672 case OP_NOTPOSUPTOI:
6673 case OP_TYPESTAR:
6674 case OP_TYPEMINSTAR:
6675 case OP_TYPEPLUS:
6676 case OP_TYPEMINPLUS:
6677 case OP_TYPEQUERY:
6678 case OP_TYPEMINQUERY:
6679 case OP_TYPEUPTO:
6680 case OP_TYPEMINUPTO:
6681 case OP_TYPEEXACT:
6682 case OP_TYPEPOSSTAR:
6683 case OP_TYPEPOSPLUS:
6684 case OP_TYPEPOSQUERY:
6685 case OP_TYPEPOSUPTO:
6686 cc = compile_iterator_matchingpath(common, cc, parent);
6687 break;
6688
6689 case OP_CLASS:
6690 case OP_NCLASS:
6691 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6692 cc = compile_iterator_matchingpath(common, cc, parent);
6693 else
6694 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6695 break;
6696
6697 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6698 case OP_XCLASS:
6699 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6700 cc = compile_iterator_matchingpath(common, cc, parent);
6701 else
6702 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6703 break;
6704 #endif
6705
6706 case OP_REF:
6707 case OP_REFI:
6708 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6709 cc = compile_ref_iterator_matchingpath(common, cc, parent);
6710 else
6711 cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6712 break;
6713
6714 case OP_RECURSE:
6715 cc = compile_recurse_matchingpath(common, cc, parent);
6716 break;
6717
6718 case OP_ASSERT:
6719 case OP_ASSERT_NOT:
6720 case OP_ASSERTBACK:
6721 case OP_ASSERTBACK_NOT:
6722 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6723 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6724 break;
6725
6726 case OP_BRAMINZERO:
6727 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6728 cc = bracketend(cc + 1);
6729 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6730 {
6731 allocate_stack(common, 1);
6732 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6733 }
6734 else
6735 {
6736 allocate_stack(common, 2);
6737 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6739 }
6740 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
6741 if (cc[1] > OP_ASSERTBACK_NOT)
6742 decrease_call_count(common);
6743 break;
6744
6745 case OP_ONCE:
6746 case OP_ONCE_NC:
6747 case OP_BRA:
6748 case OP_CBRA:
6749 case OP_COND:
6750 case OP_SBRA:
6751 case OP_SCBRA:
6752 case OP_SCOND:
6753 cc = compile_bracket_matchingpath(common, cc, parent);
6754 break;
6755
6756 case OP_BRAZERO:
6757 if (cc[1] > OP_ASSERTBACK_NOT)