/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1242 - (show annotations)
Sat Jan 26 17:51:43 2013 UTC (6 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 264926 byte(s)
Fix two buffer over read issues in 16 and 32 bit modes. Affects JIT only.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory for the regex stack on the real machine stack.
69 Fast, but limited size. */
70 #define MACHINE_STACK_SIZE 32768
71
72 /* Growth rate for stack allocated by the OS. Should be the multiply
73 of page size. */
74 #define STACK_GROWTH_RATE 8192
75
76 /* Enable to check that the allocation could destroy temporaries. */
77 #if defined SLJIT_DEBUG && SLJIT_DEBUG
78 #define DESTROY_REGISTERS 1
79 #endif
80
81 /*
82 Short summary about the backtracking mechanism empolyed by the jit code generator:
83
84 The code generator follows the recursive nature of the PERL compatible regular
85 expressions. The basic blocks of regular expressions are condition checkers
86 whose execute different commands depending on the result of the condition check.
87 The relationship between the operators can be horizontal (concatenation) and
88 vertical (sub-expression) (See struct backtrack_common for more details).
89
90 'ab' - 'a' and 'b' regexps are concatenated
91 'a+' - 'a' is the sub-expression of the '+' operator
92
93 The condition checkers are boolean (true/false) checkers. Machine code is generated
94 for the checker itself and for the actions depending on the result of the checker.
95 The 'true' case is called as the matching path (expected path), and the other is called as
96 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97 branches on the matching path.
98
99 Greedy star operator (*) :
100 Matching path: match happens.
101 Backtrack path: match failed.
102 Non-greedy star operator (*?) :
103 Matching path: no need to perform a match.
104 Backtrack path: match is required.
105
106 The following example shows how the code generated for a capturing bracket
107 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
108 we have the following regular expression:
109
110 A(B|C)D
111
112 The generated code will be the following:
113
114 A matching path
115 '(' matching path (pushing arguments to the stack)
116 B matching path
117 ')' matching path (pushing arguments to the stack)
118 D matching path
119 return with successful match
120
121 D backtrack path
122 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123 B backtrack path
124 C expected path
125 jump to D matching path
126 C backtrack path
127 A backtrack path
128
129 Notice, that the order of backtrack code paths are the opposite of the fast
130 code paths. In this way the topmost value on the stack is always belong
131 to the current backtrack code path. The backtrack path must check
132 whether there is a next alternative. If so, it needs to jump back to
133 the matching path eventually. Otherwise it needs to clear out its own stack
134 frame and continue the execution on the backtrack code paths.
135 */
136
137 /*
138 Saved stack frames:
139
140 Atomic blocks and asserts require reloading the values of private data
141 when the backtrack mechanism performed. Because of OP_RECURSE, the data
142 are not necessarly known in compile time, thus we need a dynamic restore
143 mechanism.
144
145 The stack frames are stored in a chain list, and have the following format:
146 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147
148 Thus we can restore the private data to a particular point in the stack.
149 */
150
151 typedef struct jit_arguments {
152 /* Pointers first. */
153 struct sljit_stack *stack;
154 const pcre_uchar *str;
155 const pcre_uchar *begin;
156 const pcre_uchar *end;
157 int *offsets;
158 pcre_uchar *uchar_ptr;
159 pcre_uchar *mark_ptr;
160 /* Everything else after. */
161 int offsetcount;
162 int calllimit;
163 pcre_uint8 notbol;
164 pcre_uint8 noteol;
165 pcre_uint8 notempty;
166 pcre_uint8 notempty_atstart;
167 } jit_arguments;
168
169 typedef struct executable_functions {
170 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
171 PUBL(jit_callback) callback;
172 void *userdata;
173 pcre_uint32 top_bracket;
174 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
175 } executable_functions;
176
177 typedef struct jump_list {
178 struct sljit_jump *jump;
179 struct jump_list *next;
180 } jump_list;
181
182 enum stub_types { stack_alloc };
183
184 typedef struct stub_list {
185 enum stub_types type;
186 int data;
187 struct sljit_jump *start;
188 struct sljit_label *quit;
189 struct stub_list *next;
190 } stub_list;
191
192 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
193
194 /* The following structure is the key data type for the recursive
195 code generator. It is allocated by compile_matchingpath, and contains
196 the aguments for compile_backtrackingpath. Must be the first member
197 of its descendants. */
198 typedef struct backtrack_common {
199 /* Concatenation stack. */
200 struct backtrack_common *prev;
201 jump_list *nextbacktracks;
202 /* Internal stack (for component operators). */
203 struct backtrack_common *top;
204 jump_list *topbacktracks;
205 /* Opcode pointer. */
206 pcre_uchar *cc;
207 } backtrack_common;
208
209 typedef struct assert_backtrack {
210 backtrack_common common;
211 jump_list *condfailed;
212 /* Less than 0 (-1) if a frame is not needed. */
213 int framesize;
214 /* Points to our private memory word on the stack. */
215 int private_data_ptr;
216 /* For iterators. */
217 struct sljit_label *matchingpath;
218 } assert_backtrack;
219
220 typedef struct bracket_backtrack {
221 backtrack_common common;
222 /* Where to coninue if an alternative is successfully matched. */
223 struct sljit_label *alternative_matchingpath;
224 /* For rmin and rmax iterators. */
225 struct sljit_label *recursive_matchingpath;
226 /* For greedy ? operator. */
227 struct sljit_label *zero_matchingpath;
228 /* Contains the branches of a failed condition. */
229 union {
230 /* Both for OP_COND, OP_SCOND. */
231 jump_list *condfailed;
232 assert_backtrack *assert;
233 /* For OP_ONCE. -1 if not needed. */
234 int framesize;
235 } u;
236 /* Points to our private memory word on the stack. */
237 int private_data_ptr;
238 } bracket_backtrack;
239
240 typedef struct bracketpos_backtrack {
241 backtrack_common common;
242 /* Points to our private memory word on the stack. */
243 int private_data_ptr;
244 /* Reverting stack is needed. */
245 int framesize;
246 /* Allocated stack size. */
247 int stacksize;
248 } bracketpos_backtrack;
249
250 typedef struct braminzero_backtrack {
251 backtrack_common common;
252 struct sljit_label *matchingpath;
253 } braminzero_backtrack;
254
255 typedef struct iterator_backtrack {
256 backtrack_common common;
257 /* Next iteration. */
258 struct sljit_label *matchingpath;
259 } iterator_backtrack;
260
261 typedef struct recurse_entry {
262 struct recurse_entry *next;
263 /* Contains the function entry. */
264 struct sljit_label *entry;
265 /* Collects the calls until the function is not created. */
266 jump_list *calls;
267 /* Points to the starting opcode. */
268 int start;
269 } recurse_entry;
270
271 typedef struct recurse_backtrack {
272 backtrack_common common;
273 } recurse_backtrack;
274
275 #define MAX_RANGE_SIZE 6
276
277 typedef struct compiler_common {
278 struct sljit_compiler *compiler;
279 pcre_uchar *start;
280
281 /* Maps private data offset to each opcode. */
282 int *private_data_ptrs;
283 /* Tells whether the capturing bracket is optimized. */
284 pcre_uint8 *optimized_cbracket;
285 /* Starting offset of private data for capturing brackets. */
286 int cbraptr;
287 /* OVector starting point. Must be divisible by 2. */
288 int ovector_start;
289 /* Last known position of the requested byte. */
290 int req_char_ptr;
291 /* Head of the last recursion. */
292 int recursive_head;
293 /* First inspected character for partial matching. */
294 int start_used_ptr;
295 /* Starting pointer for partial soft matches. */
296 int hit_start;
297 /* End pointer of the first line. */
298 int first_line_end;
299 /* Points to the marked string. */
300 int mark_ptr;
301
302 /* Flipped and lower case tables. */
303 const pcre_uint8 *fcc;
304 sljit_sw lcc;
305 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
306 int mode;
307 /* Newline control. */
308 int nltype;
309 int newline;
310 int bsr_nltype;
311 /* Dollar endonly. */
312 int endonly;
313 BOOL has_set_som;
314 /* Tables. */
315 sljit_sw ctypes;
316 int digits[2 + MAX_RANGE_SIZE];
317 /* Named capturing brackets. */
318 sljit_uw name_table;
319 sljit_sw name_count;
320 sljit_sw name_entry_size;
321
322 /* Labels and jump lists. */
323 struct sljit_label *partialmatchlabel;
324 struct sljit_label *quitlabel;
325 struct sljit_label *acceptlabel;
326 stub_list *stubs;
327 recurse_entry *entries;
328 recurse_entry *currententry;
329 jump_list *partialmatch;
330 jump_list *quit;
331 jump_list *accept;
332 jump_list *calllimit;
333 jump_list *stackalloc;
334 jump_list *revertframes;
335 jump_list *wordboundary;
336 jump_list *anynewline;
337 jump_list *hspace;
338 jump_list *vspace;
339 jump_list *casefulcmp;
340 jump_list *caselesscmp;
341 BOOL jscript_compat;
342 #ifdef SUPPORT_UTF
343 BOOL utf;
344 #ifdef SUPPORT_UCP
345 BOOL use_ucp;
346 #endif
347 #ifndef COMPILE_PCRE32
348 jump_list *utfreadchar;
349 #endif
350 #ifdef COMPILE_PCRE8
351 jump_list *utfreadtype8;
352 #endif
353 #endif /* SUPPORT_UTF */
354 #ifdef SUPPORT_UCP
355 jump_list *getucd;
356 #endif
357 } compiler_common;
358
359 /* For byte_sequence_compare. */
360
361 typedef struct compare_context {
362 int length;
363 int sourcereg;
364 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
365 int ucharptr;
366 union {
367 sljit_si asint;
368 sljit_uh asushort;
369 #if defined COMPILE_PCRE8
370 sljit_ub asbyte;
371 sljit_ub asuchars[4];
372 #elif defined COMPILE_PCRE16
373 sljit_uh asuchars[2];
374 #elif defined COMPILE_PCRE32
375 sljit_ui asuchars[1];
376 #endif
377 } c;
378 union {
379 sljit_si asint;
380 sljit_uh asushort;
381 #if defined COMPILE_PCRE8
382 sljit_ub asbyte;
383 sljit_ub asuchars[4];
384 #elif defined COMPILE_PCRE16
385 sljit_uh asuchars[2];
386 #elif defined COMPILE_PCRE32
387 sljit_ui asuchars[1];
388 #endif
389 } oc;
390 #endif
391 } compare_context;
392
393 enum {
394 frame_end = 0,
395 frame_setstrbegin = -1,
396 frame_setmark = -2
397 };
398
399 /* Undefine sljit macros. */
400 #undef CMP
401
402 /* Used for accessing the elements of the stack. */
403 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
404
405 #define TMP1 SLJIT_SCRATCH_REG1
406 #define TMP2 SLJIT_SCRATCH_REG3
407 #define TMP3 SLJIT_TEMPORARY_EREG2
408 #define STR_PTR SLJIT_SAVED_REG1
409 #define STR_END SLJIT_SAVED_REG2
410 #define STACK_TOP SLJIT_SCRATCH_REG2
411 #define STACK_LIMIT SLJIT_SAVED_REG3
412 #define ARGUMENTS SLJIT_SAVED_EREG1
413 #define CALL_COUNT SLJIT_SAVED_EREG2
414 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
415
416 /* Local space layout. */
417 /* These two locals can be used by the current opcode. */
418 #define LOCALS0 (0 * sizeof(sljit_sw))
419 #define LOCALS1 (1 * sizeof(sljit_sw))
420 /* Two local variables for possessive quantifiers (char1 cannot use them). */
421 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
422 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
423 /* Max limit of recursions. */
424 #define CALL_LIMIT (4 * sizeof(sljit_sw))
425 /* The output vector is stored on the stack, and contains pointers
426 to characters. The vector data is divided into two groups: the first
427 group contains the start / end character pointers, and the second is
428 the start pointers when the end of the capturing group has not yet reached. */
429 #define OVECTOR_START (common->ovector_start)
430 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
431 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw))
432 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
433
434 #if defined COMPILE_PCRE8
435 #define MOV_UCHAR SLJIT_MOV_UB
436 #define MOVU_UCHAR SLJIT_MOVU_UB
437 #elif defined COMPILE_PCRE16
438 #define MOV_UCHAR SLJIT_MOV_UH
439 #define MOVU_UCHAR SLJIT_MOVU_UH
440 #elif defined COMPILE_PCRE32
441 #define MOV_UCHAR SLJIT_MOV_UI
442 #define MOVU_UCHAR SLJIT_MOVU_UI
443 #else
444 #error Unsupported compiling mode
445 #endif
446
447 /* Shortcuts. */
448 #define DEFINE_COMPILER \
449 struct sljit_compiler *compiler = common->compiler
450 #define OP1(op, dst, dstw, src, srcw) \
451 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
452 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
453 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
454 #define LABEL() \
455 sljit_emit_label(compiler)
456 #define JUMP(type) \
457 sljit_emit_jump(compiler, (type))
458 #define JUMPTO(type, label) \
459 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
460 #define JUMPHERE(jump) \
461 sljit_set_label((jump), sljit_emit_label(compiler))
462 #define CMP(type, src1, src1w, src2, src2w) \
463 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
464 #define CMPTO(type, src1, src1w, src2, src2w, label) \
465 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
466 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
467 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
468 #define GET_LOCAL_BASE(dst, dstw, offset) \
469 sljit_get_local_base(compiler, (dst), (dstw), (offset))
470
471 static pcre_uchar* bracketend(pcre_uchar* cc)
472 {
473 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
474 do cc += GET(cc, 1); while (*cc == OP_ALT);
475 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
476 cc += 1 + LINK_SIZE;
477 return cc;
478 }
479
480 /* Functions whose might need modification for all new supported opcodes:
481 next_opcode
482 get_private_data_length
483 set_private_data_ptrs
484 get_framesize
485 init_frame
486 get_private_data_length_for_copy
487 copy_private_data
488 compile_matchingpath
489 compile_backtrackingpath
490 */
491
492 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
493 {
494 SLJIT_UNUSED_ARG(common);
495 switch(*cc)
496 {
497 case OP_SOD:
498 case OP_SOM:
499 case OP_SET_SOM:
500 case OP_NOT_WORD_BOUNDARY:
501 case OP_WORD_BOUNDARY:
502 case OP_NOT_DIGIT:
503 case OP_DIGIT:
504 case OP_NOT_WHITESPACE:
505 case OP_WHITESPACE:
506 case OP_NOT_WORDCHAR:
507 case OP_WORDCHAR:
508 case OP_ANY:
509 case OP_ALLANY:
510 case OP_ANYNL:
511 case OP_NOT_HSPACE:
512 case OP_HSPACE:
513 case OP_NOT_VSPACE:
514 case OP_VSPACE:
515 case OP_EXTUNI:
516 case OP_EODN:
517 case OP_EOD:
518 case OP_CIRC:
519 case OP_CIRCM:
520 case OP_DOLL:
521 case OP_DOLLM:
522 case OP_TYPESTAR:
523 case OP_TYPEMINSTAR:
524 case OP_TYPEPLUS:
525 case OP_TYPEMINPLUS:
526 case OP_TYPEQUERY:
527 case OP_TYPEMINQUERY:
528 case OP_TYPEPOSSTAR:
529 case OP_TYPEPOSPLUS:
530 case OP_TYPEPOSQUERY:
531 case OP_CRSTAR:
532 case OP_CRMINSTAR:
533 case OP_CRPLUS:
534 case OP_CRMINPLUS:
535 case OP_CRQUERY:
536 case OP_CRMINQUERY:
537 case OP_DEF:
538 case OP_BRAZERO:
539 case OP_BRAMINZERO:
540 case OP_BRAPOSZERO:
541 case OP_COMMIT:
542 case OP_FAIL:
543 case OP_ACCEPT:
544 case OP_ASSERT_ACCEPT:
545 case OP_SKIPZERO:
546 return cc + 1;
547
548 case OP_ANYBYTE:
549 #ifdef SUPPORT_UTF
550 if (common->utf) return NULL;
551 #endif
552 return cc + 1;
553
554 case OP_CHAR:
555 case OP_CHARI:
556 case OP_NOT:
557 case OP_NOTI:
558 case OP_STAR:
559 case OP_MINSTAR:
560 case OP_PLUS:
561 case OP_MINPLUS:
562 case OP_QUERY:
563 case OP_MINQUERY:
564 case OP_POSSTAR:
565 case OP_POSPLUS:
566 case OP_POSQUERY:
567 case OP_STARI:
568 case OP_MINSTARI:
569 case OP_PLUSI:
570 case OP_MINPLUSI:
571 case OP_QUERYI:
572 case OP_MINQUERYI:
573 case OP_POSSTARI:
574 case OP_POSPLUSI:
575 case OP_POSQUERYI:
576 case OP_NOTSTAR:
577 case OP_NOTMINSTAR:
578 case OP_NOTPLUS:
579 case OP_NOTMINPLUS:
580 case OP_NOTQUERY:
581 case OP_NOTMINQUERY:
582 case OP_NOTPOSSTAR:
583 case OP_NOTPOSPLUS:
584 case OP_NOTPOSQUERY:
585 case OP_NOTSTARI:
586 case OP_NOTMINSTARI:
587 case OP_NOTPLUSI:
588 case OP_NOTMINPLUSI:
589 case OP_NOTQUERYI:
590 case OP_NOTMINQUERYI:
591 case OP_NOTPOSSTARI:
592 case OP_NOTPOSPLUSI:
593 case OP_NOTPOSQUERYI:
594 cc += 2;
595 #ifdef SUPPORT_UTF
596 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
597 #endif
598 return cc;
599
600 case OP_UPTO:
601 case OP_MINUPTO:
602 case OP_EXACT:
603 case OP_POSUPTO:
604 case OP_UPTOI:
605 case OP_MINUPTOI:
606 case OP_EXACTI:
607 case OP_POSUPTOI:
608 case OP_NOTUPTO:
609 case OP_NOTMINUPTO:
610 case OP_NOTEXACT:
611 case OP_NOTPOSUPTO:
612 case OP_NOTUPTOI:
613 case OP_NOTMINUPTOI:
614 case OP_NOTEXACTI:
615 case OP_NOTPOSUPTOI:
616 cc += 2 + IMM2_SIZE;
617 #ifdef SUPPORT_UTF
618 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
619 #endif
620 return cc;
621
622 case OP_NOTPROP:
623 case OP_PROP:
624 return cc + 1 + 2;
625
626 case OP_TYPEUPTO:
627 case OP_TYPEMINUPTO:
628 case OP_TYPEEXACT:
629 case OP_TYPEPOSUPTO:
630 case OP_REF:
631 case OP_REFI:
632 case OP_CREF:
633 case OP_NCREF:
634 case OP_RREF:
635 case OP_NRREF:
636 case OP_CLOSE:
637 cc += 1 + IMM2_SIZE;
638 return cc;
639
640 case OP_CRRANGE:
641 case OP_CRMINRANGE:
642 return cc + 1 + 2 * IMM2_SIZE;
643
644 case OP_CLASS:
645 case OP_NCLASS:
646 return cc + 1 + 32 / sizeof(pcre_uchar);
647
648 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
649 case OP_XCLASS:
650 return cc + GET(cc, 1);
651 #endif
652
653 case OP_RECURSE:
654 case OP_ASSERT:
655 case OP_ASSERT_NOT:
656 case OP_ASSERTBACK:
657 case OP_ASSERTBACK_NOT:
658 case OP_REVERSE:
659 case OP_ONCE:
660 case OP_ONCE_NC:
661 case OP_BRA:
662 case OP_BRAPOS:
663 case OP_COND:
664 case OP_SBRA:
665 case OP_SBRAPOS:
666 case OP_SCOND:
667 case OP_ALT:
668 case OP_KET:
669 case OP_KETRMAX:
670 case OP_KETRMIN:
671 case OP_KETRPOS:
672 return cc + 1 + LINK_SIZE;
673
674 case OP_CBRA:
675 case OP_CBRAPOS:
676 case OP_SCBRA:
677 case OP_SCBRAPOS:
678 return cc + 1 + LINK_SIZE + IMM2_SIZE;
679
680 case OP_MARK:
681 return cc + 1 + 2 + cc[1];
682
683 default:
684 return NULL;
685 }
686 }
687
688 #define CASE_ITERATOR_PRIVATE_DATA_1 \
689 case OP_MINSTAR: \
690 case OP_MINPLUS: \
691 case OP_QUERY: \
692 case OP_MINQUERY: \
693 case OP_MINSTARI: \
694 case OP_MINPLUSI: \
695 case OP_QUERYI: \
696 case OP_MINQUERYI: \
697 case OP_NOTMINSTAR: \
698 case OP_NOTMINPLUS: \
699 case OP_NOTQUERY: \
700 case OP_NOTMINQUERY: \
701 case OP_NOTMINSTARI: \
702 case OP_NOTMINPLUSI: \
703 case OP_NOTQUERYI: \
704 case OP_NOTMINQUERYI:
705
706 #define CASE_ITERATOR_PRIVATE_DATA_2A \
707 case OP_STAR: \
708 case OP_PLUS: \
709 case OP_STARI: \
710 case OP_PLUSI: \
711 case OP_NOTSTAR: \
712 case OP_NOTPLUS: \
713 case OP_NOTSTARI: \
714 case OP_NOTPLUSI:
715
716 #define CASE_ITERATOR_PRIVATE_DATA_2B \
717 case OP_UPTO: \
718 case OP_MINUPTO: \
719 case OP_UPTOI: \
720 case OP_MINUPTOI: \
721 case OP_NOTUPTO: \
722 case OP_NOTMINUPTO: \
723 case OP_NOTUPTOI: \
724 case OP_NOTMINUPTOI:
725
726 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
727 case OP_TYPEMINSTAR: \
728 case OP_TYPEMINPLUS: \
729 case OP_TYPEQUERY: \
730 case OP_TYPEMINQUERY:
731
732 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
733 case OP_TYPESTAR: \
734 case OP_TYPEPLUS:
735
736 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
737 case OP_TYPEUPTO: \
738 case OP_TYPEMINUPTO:
739
740 static int get_class_iterator_size(pcre_uchar *cc)
741 {
742 switch(*cc)
743 {
744 case OP_CRSTAR:
745 case OP_CRPLUS:
746 return 2;
747
748 case OP_CRMINSTAR:
749 case OP_CRMINPLUS:
750 case OP_CRQUERY:
751 case OP_CRMINQUERY:
752 return 1;
753
754 case OP_CRRANGE:
755 case OP_CRMINRANGE:
756 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
757 return 0;
758 return 2;
759
760 default:
761 return 0;
762 }
763 }
764
765 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
766 {
767 int private_data_length = 0;
768 pcre_uchar *alternative;
769 pcre_uchar *name;
770 pcre_uchar *end = NULL;
771 int space, size, i;
772 pcre_uint32 bracketlen;
773
774 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
775 while (cc < ccend)
776 {
777 space = 0;
778 size = 0;
779 bracketlen = 0;
780 switch(*cc)
781 {
782 case OP_SET_SOM:
783 common->has_set_som = TRUE;
784 cc += 1;
785 break;
786
787 case OP_REF:
788 case OP_REFI:
789 common->optimized_cbracket[GET2(cc, 1)] = 0;
790 cc += 1 + IMM2_SIZE;
791 break;
792
793 case OP_ASSERT:
794 case OP_ASSERT_NOT:
795 case OP_ASSERTBACK:
796 case OP_ASSERTBACK_NOT:
797 case OP_ONCE:
798 case OP_ONCE_NC:
799 case OP_BRAPOS:
800 case OP_SBRA:
801 case OP_SBRAPOS:
802 private_data_length += sizeof(sljit_sw);
803 bracketlen = 1 + LINK_SIZE;
804 break;
805
806 case OP_CBRAPOS:
807 case OP_SCBRAPOS:
808 private_data_length += sizeof(sljit_sw);
809 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
810 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
811 break;
812
813 case OP_COND:
814 case OP_SCOND:
815 bracketlen = cc[1 + LINK_SIZE];
816 if (bracketlen == OP_CREF)
817 {
818 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
819 common->optimized_cbracket[bracketlen] = 0;
820 }
821 else if (bracketlen == OP_NCREF)
822 {
823 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
824 name = (pcre_uchar *)common->name_table;
825 alternative = name;
826 for (i = 0; i < common->name_count; i++)
827 {
828 if (GET2(name, 0) == bracketlen) break;
829 name += common->name_entry_size;
830 }
831 SLJIT_ASSERT(i != common->name_count);
832
833 for (i = 0; i < common->name_count; i++)
834 {
835 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
836 common->optimized_cbracket[GET2(alternative, 0)] = 0;
837 alternative += common->name_entry_size;
838 }
839 }
840
841 if (*cc == OP_COND)
842 {
843 /* Might be a hidden SCOND. */
844 alternative = cc + GET(cc, 1);
845 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
846 private_data_length += sizeof(sljit_sw);
847 }
848 else
849 private_data_length += sizeof(sljit_sw);
850 bracketlen = 1 + LINK_SIZE;
851 break;
852
853 case OP_BRA:
854 bracketlen = 1 + LINK_SIZE;
855 break;
856
857 case OP_CBRA:
858 case OP_SCBRA:
859 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
860 break;
861
862 CASE_ITERATOR_PRIVATE_DATA_1
863 space = 1;
864 size = -2;
865 break;
866
867 CASE_ITERATOR_PRIVATE_DATA_2A
868 space = 2;
869 size = -2;
870 break;
871
872 CASE_ITERATOR_PRIVATE_DATA_2B
873 space = 2;
874 size = -(2 + IMM2_SIZE);
875 break;
876
877 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
878 space = 1;
879 size = 1;
880 break;
881
882 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
883 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
884 space = 2;
885 size = 1;
886 break;
887
888 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
889 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
890 space = 2;
891 size = 1 + IMM2_SIZE;
892 break;
893
894 case OP_CLASS:
895 case OP_NCLASS:
896 size += 1 + 32 / sizeof(pcre_uchar);
897 space = get_class_iterator_size(cc + size);
898 break;
899
900 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
901 case OP_XCLASS:
902 size = GET(cc, 1);
903 space = get_class_iterator_size(cc + size);
904 break;
905 #endif
906
907 case OP_RECURSE:
908 /* Set its value only once. */
909 if (common->recursive_head == 0)
910 {
911 common->recursive_head = common->ovector_start;
912 common->ovector_start += sizeof(sljit_sw);
913 }
914 cc += 1 + LINK_SIZE;
915 break;
916
917 case OP_MARK:
918 if (common->mark_ptr == 0)
919 {
920 common->mark_ptr = common->ovector_start;
921 common->ovector_start += sizeof(sljit_sw);
922 }
923 cc += 1 + 2 + cc[1];
924 break;
925
926 default:
927 cc = next_opcode(common, cc);
928 if (cc == NULL)
929 return -1;
930 break;
931 }
932
933 if (space > 0 && cc >= end)
934 private_data_length += sizeof(sljit_sw) * space;
935
936 if (size != 0)
937 {
938 if (size < 0)
939 {
940 cc += -size;
941 #ifdef SUPPORT_UTF
942 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
943 #endif
944 }
945 else
946 cc += size;
947 }
948
949 if (bracketlen != 0)
950 {
951 if (cc >= end)
952 {
953 end = bracketend(cc);
954 if (end[-1 - LINK_SIZE] == OP_KET)
955 end = NULL;
956 }
957 cc += bracketlen;
958 }
959 }
960 return private_data_length;
961 }
962
963 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
964 {
965 pcre_uchar *cc = common->start;
966 pcre_uchar *alternative;
967 pcre_uchar *end = NULL;
968 int space, size, bracketlen;
969
970 while (cc < ccend)
971 {
972 space = 0;
973 size = 0;
974 bracketlen = 0;
975 switch(*cc)
976 {
977 case OP_ASSERT:
978 case OP_ASSERT_NOT:
979 case OP_ASSERTBACK:
980 case OP_ASSERTBACK_NOT:
981 case OP_ONCE:
982 case OP_ONCE_NC:
983 case OP_BRAPOS:
984 case OP_SBRA:
985 case OP_SBRAPOS:
986 case OP_SCOND:
987 common->private_data_ptrs[cc - common->start] = private_data_ptr;
988 private_data_ptr += sizeof(sljit_sw);
989 bracketlen = 1 + LINK_SIZE;
990 break;
991
992 case OP_CBRAPOS:
993 case OP_SCBRAPOS:
994 common->private_data_ptrs[cc - common->start] = private_data_ptr;
995 private_data_ptr += sizeof(sljit_sw);
996 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
997 break;
998
999 case OP_COND:
1000 /* Might be a hidden SCOND. */
1001 alternative = cc + GET(cc, 1);
1002 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1003 {
1004 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1005 private_data_ptr += sizeof(sljit_sw);
1006 }
1007 bracketlen = 1 + LINK_SIZE;
1008 break;
1009
1010 case OP_BRA:
1011 bracketlen = 1 + LINK_SIZE;
1012 break;
1013
1014 case OP_CBRA:
1015 case OP_SCBRA:
1016 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1017 break;
1018
1019 CASE_ITERATOR_PRIVATE_DATA_1
1020 space = 1;
1021 size = -2;
1022 break;
1023
1024 CASE_ITERATOR_PRIVATE_DATA_2A
1025 space = 2;
1026 size = -2;
1027 break;
1028
1029 CASE_ITERATOR_PRIVATE_DATA_2B
1030 space = 2;
1031 size = -(2 + IMM2_SIZE);
1032 break;
1033
1034 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1035 space = 1;
1036 size = 1;
1037 break;
1038
1039 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1040 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1041 space = 2;
1042 size = 1;
1043 break;
1044
1045 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1046 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1047 space = 2;
1048 size = 1 + IMM2_SIZE;
1049 break;
1050
1051 case OP_CLASS:
1052 case OP_NCLASS:
1053 size += 1 + 32 / sizeof(pcre_uchar);
1054 space = get_class_iterator_size(cc + size);
1055 break;
1056
1057 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1058 case OP_XCLASS:
1059 size = GET(cc, 1);
1060 space = get_class_iterator_size(cc + size);
1061 break;
1062 #endif
1063
1064 default:
1065 cc = next_opcode(common, cc);
1066 SLJIT_ASSERT(cc != NULL);
1067 break;
1068 }
1069
1070 if (space > 0 && cc >= end)
1071 {
1072 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1073 private_data_ptr += sizeof(sljit_sw) * space;
1074 }
1075
1076 if (size != 0)
1077 {
1078 if (size < 0)
1079 {
1080 cc += -size;
1081 #ifdef SUPPORT_UTF
1082 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1083 #endif
1084 }
1085 else
1086 cc += size;
1087 }
1088
1089 if (bracketlen > 0)
1090 {
1091 if (cc >= end)
1092 {
1093 end = bracketend(cc);
1094 if (end[-1 - LINK_SIZE] == OP_KET)
1095 end = NULL;
1096 }
1097 cc += bracketlen;
1098 }
1099 }
1100 }
1101
1102 /* Returns with -1 if no need for frame. */
1103 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1104 {
1105 pcre_uchar *ccend = bracketend(cc);
1106 int length = 0;
1107 BOOL possessive = FALSE;
1108 BOOL setsom_found = recursive;
1109 BOOL setmark_found = recursive;
1110
1111 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1112 {
1113 length = 3;
1114 possessive = TRUE;
1115 }
1116
1117 cc = next_opcode(common, cc);
1118 SLJIT_ASSERT(cc != NULL);
1119 while (cc < ccend)
1120 switch(*cc)
1121 {
1122 case OP_SET_SOM:
1123 SLJIT_ASSERT(common->has_set_som);
1124 if (!setsom_found)
1125 {
1126 length += 2;
1127 setsom_found = TRUE;
1128 }
1129 cc += 1;
1130 break;
1131
1132 case OP_MARK:
1133 SLJIT_ASSERT(common->mark_ptr != 0);
1134 if (!setmark_found)
1135 {
1136 length += 2;
1137 setmark_found = TRUE;
1138 }
1139 cc += 1 + 2 + cc[1];
1140 break;
1141
1142 case OP_RECURSE:
1143 if (common->has_set_som && !setsom_found)
1144 {
1145 length += 2;
1146 setsom_found = TRUE;
1147 }
1148 if (common->mark_ptr != 0 && !setmark_found)
1149 {
1150 length += 2;
1151 setmark_found = TRUE;
1152 }
1153 cc += 1 + LINK_SIZE;
1154 break;
1155
1156 case OP_CBRA:
1157 case OP_CBRAPOS:
1158 case OP_SCBRA:
1159 case OP_SCBRAPOS:
1160 length += 3;
1161 cc += 1 + LINK_SIZE + IMM2_SIZE;
1162 break;
1163
1164 default:
1165 cc = next_opcode(common, cc);
1166 SLJIT_ASSERT(cc != NULL);
1167 break;
1168 }
1169
1170 /* Possessive quantifiers can use a special case. */
1171 if (SLJIT_UNLIKELY(possessive) && length == 3)
1172 return -1;
1173
1174 if (length > 0)
1175 return length + 1;
1176 return -1;
1177 }
1178
1179 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1180 {
1181 DEFINE_COMPILER;
1182 pcre_uchar *ccend = bracketend(cc);
1183 BOOL setsom_found = recursive;
1184 BOOL setmark_found = recursive;
1185 int offset;
1186
1187 /* >= 1 + shortest item size (2) */
1188 SLJIT_UNUSED_ARG(stacktop);
1189 SLJIT_ASSERT(stackpos >= stacktop + 2);
1190
1191 stackpos = STACK(stackpos);
1192 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1193 cc = next_opcode(common, cc);
1194 SLJIT_ASSERT(cc != NULL);
1195 while (cc < ccend)
1196 switch(*cc)
1197 {
1198 case OP_SET_SOM:
1199 SLJIT_ASSERT(common->has_set_som);
1200 if (!setsom_found)
1201 {
1202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1204 stackpos += (int)sizeof(sljit_sw);
1205 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1206 stackpos += (int)sizeof(sljit_sw);
1207 setsom_found = TRUE;
1208 }
1209 cc += 1;
1210 break;
1211
1212 case OP_MARK:
1213 SLJIT_ASSERT(common->mark_ptr != 0);
1214 if (!setmark_found)
1215 {
1216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1218 stackpos += (int)sizeof(sljit_sw);
1219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1220 stackpos += (int)sizeof(sljit_sw);
1221 setmark_found = TRUE;
1222 }
1223 cc += 1 + 2 + cc[1];
1224 break;
1225
1226 case OP_RECURSE:
1227 if (common->has_set_som && !setsom_found)
1228 {
1229 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1231 stackpos += (int)sizeof(sljit_sw);
1232 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1233 stackpos += (int)sizeof(sljit_sw);
1234 setsom_found = TRUE;
1235 }
1236 if (common->mark_ptr != 0 && !setmark_found)
1237 {
1238 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1240 stackpos += (int)sizeof(sljit_sw);
1241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1242 stackpos += (int)sizeof(sljit_sw);
1243 setmark_found = TRUE;
1244 }
1245 cc += 1 + LINK_SIZE;
1246 break;
1247
1248 case OP_CBRA:
1249 case OP_CBRAPOS:
1250 case OP_SCBRA:
1251 case OP_SCBRAPOS:
1252 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1254 stackpos += (int)sizeof(sljit_sw);
1255 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1256 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1258 stackpos += (int)sizeof(sljit_sw);
1259 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1260 stackpos += (int)sizeof(sljit_sw);
1261
1262 cc += 1 + LINK_SIZE + IMM2_SIZE;
1263 break;
1264
1265 default:
1266 cc = next_opcode(common, cc);
1267 SLJIT_ASSERT(cc != NULL);
1268 break;
1269 }
1270
1271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1272 SLJIT_ASSERT(stackpos == STACK(stacktop));
1273 }
1274
1275 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1276 {
1277 int private_data_length = 2;
1278 int size;
1279 pcre_uchar *alternative;
1280 /* Calculate the sum of the private machine words. */
1281 while (cc < ccend)
1282 {
1283 size = 0;
1284 switch(*cc)
1285 {
1286 case OP_ASSERT:
1287 case OP_ASSERT_NOT:
1288 case OP_ASSERTBACK:
1289 case OP_ASSERTBACK_NOT:
1290 case OP_ONCE:
1291 case OP_ONCE_NC:
1292 case OP_BRAPOS:
1293 case OP_SBRA:
1294 case OP_SBRAPOS:
1295 case OP_SCOND:
1296 private_data_length++;
1297 cc += 1 + LINK_SIZE;
1298 break;
1299
1300 case OP_CBRA:
1301 case OP_SCBRA:
1302 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1303 private_data_length++;
1304 cc += 1 + LINK_SIZE + IMM2_SIZE;
1305 break;
1306
1307 case OP_CBRAPOS:
1308 case OP_SCBRAPOS:
1309 private_data_length += 2;
1310 cc += 1 + LINK_SIZE + IMM2_SIZE;
1311 break;
1312
1313 case OP_COND:
1314 /* Might be a hidden SCOND. */
1315 alternative = cc + GET(cc, 1);
1316 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1317 private_data_length++;
1318 cc += 1 + LINK_SIZE;
1319 break;
1320
1321 CASE_ITERATOR_PRIVATE_DATA_1
1322 if (PRIVATE_DATA(cc))
1323 private_data_length++;
1324 cc += 2;
1325 #ifdef SUPPORT_UTF
1326 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1327 #endif
1328 break;
1329
1330 CASE_ITERATOR_PRIVATE_DATA_2A
1331 if (PRIVATE_DATA(cc))
1332 private_data_length += 2;
1333 cc += 2;
1334 #ifdef SUPPORT_UTF
1335 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1336 #endif
1337 break;
1338
1339 CASE_ITERATOR_PRIVATE_DATA_2B
1340 if (PRIVATE_DATA(cc))
1341 private_data_length += 2;
1342 cc += 2 + IMM2_SIZE;
1343 #ifdef SUPPORT_UTF
1344 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1345 #endif
1346 break;
1347
1348 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1349 if (PRIVATE_DATA(cc))
1350 private_data_length++;
1351 cc += 1;
1352 break;
1353
1354 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1355 if (PRIVATE_DATA(cc))
1356 private_data_length += 2;
1357 cc += 1;
1358 break;
1359
1360 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1361 if (PRIVATE_DATA(cc))
1362 private_data_length += 2;
1363 cc += 1 + IMM2_SIZE;
1364 break;
1365
1366 case OP_CLASS:
1367 case OP_NCLASS:
1368 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1369 case OP_XCLASS:
1370 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1371 #else
1372 size = 1 + 32 / (int)sizeof(pcre_uchar);
1373 #endif
1374 if (PRIVATE_DATA(cc))
1375 private_data_length += get_class_iterator_size(cc + size);
1376 cc += size;
1377 break;
1378
1379 default:
1380 cc = next_opcode(common, cc);
1381 SLJIT_ASSERT(cc != NULL);
1382 break;
1383 }
1384 }
1385 SLJIT_ASSERT(cc == ccend);
1386 return private_data_length;
1387 }
1388
1389 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1390 BOOL save, int stackptr, int stacktop)
1391 {
1392 DEFINE_COMPILER;
1393 int srcw[2];
1394 int count, size;
1395 BOOL tmp1next = TRUE;
1396 BOOL tmp1empty = TRUE;
1397 BOOL tmp2empty = TRUE;
1398 pcre_uchar *alternative;
1399 enum {
1400 start,
1401 loop,
1402 end
1403 } status;
1404
1405 status = save ? start : loop;
1406 stackptr = STACK(stackptr - 2);
1407 stacktop = STACK(stacktop - 1);
1408
1409 if (!save)
1410 {
1411 stackptr += sizeof(sljit_sw);
1412 if (stackptr < stacktop)
1413 {
1414 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1415 stackptr += sizeof(sljit_sw);
1416 tmp1empty = FALSE;
1417 }
1418 if (stackptr < stacktop)
1419 {
1420 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1421 stackptr += sizeof(sljit_sw);
1422 tmp2empty = FALSE;
1423 }
1424 /* The tmp1next must be TRUE in either way. */
1425 }
1426
1427 while (status != end)
1428 {
1429 count = 0;
1430 switch(status)
1431 {
1432 case start:
1433 SLJIT_ASSERT(save && common->recursive_head != 0);
1434 count = 1;
1435 srcw[0] = common->recursive_head;
1436 status = loop;
1437 break;
1438
1439 case loop:
1440 if (cc >= ccend)
1441 {
1442 status = end;
1443 break;
1444 }
1445
1446 switch(*cc)
1447 {
1448 case OP_ASSERT:
1449 case OP_ASSERT_NOT:
1450 case OP_ASSERTBACK:
1451 case OP_ASSERTBACK_NOT:
1452 case OP_ONCE:
1453 case OP_ONCE_NC:
1454 case OP_BRAPOS:
1455 case OP_SBRA:
1456 case OP_SBRAPOS:
1457 case OP_SCOND:
1458 count = 1;
1459 srcw[0] = PRIVATE_DATA(cc);
1460 SLJIT_ASSERT(srcw[0] != 0);
1461 cc += 1 + LINK_SIZE;
1462 break;
1463
1464 case OP_CBRA:
1465 case OP_SCBRA:
1466 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1467 {
1468 count = 1;
1469 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1470 }
1471 cc += 1 + LINK_SIZE + IMM2_SIZE;
1472 break;
1473
1474 case OP_CBRAPOS:
1475 case OP_SCBRAPOS:
1476 count = 2;
1477 srcw[0] = PRIVATE_DATA(cc);
1478 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1479 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1480 cc += 1 + LINK_SIZE + IMM2_SIZE;
1481 break;
1482
1483 case OP_COND:
1484 /* Might be a hidden SCOND. */
1485 alternative = cc + GET(cc, 1);
1486 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1487 {
1488 count = 1;
1489 srcw[0] = PRIVATE_DATA(cc);
1490 SLJIT_ASSERT(srcw[0] != 0);
1491 }
1492 cc += 1 + LINK_SIZE;
1493 break;
1494
1495 CASE_ITERATOR_PRIVATE_DATA_1
1496 if (PRIVATE_DATA(cc))
1497 {
1498 count = 1;
1499 srcw[0] = PRIVATE_DATA(cc);
1500 }
1501 cc += 2;
1502 #ifdef SUPPORT_UTF
1503 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1504 #endif
1505 break;
1506
1507 CASE_ITERATOR_PRIVATE_DATA_2A
1508 if (PRIVATE_DATA(cc))
1509 {
1510 count = 2;
1511 srcw[0] = PRIVATE_DATA(cc);
1512 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1513 }
1514 cc += 2;
1515 #ifdef SUPPORT_UTF
1516 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1517 #endif
1518 break;
1519
1520 CASE_ITERATOR_PRIVATE_DATA_2B
1521 if (PRIVATE_DATA(cc))
1522 {
1523 count = 2;
1524 srcw[0] = PRIVATE_DATA(cc);
1525 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1526 }
1527 cc += 2 + IMM2_SIZE;
1528 #ifdef SUPPORT_UTF
1529 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1530 #endif
1531 break;
1532
1533 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1534 if (PRIVATE_DATA(cc))
1535 {
1536 count = 1;
1537 srcw[0] = PRIVATE_DATA(cc);
1538 }
1539 cc += 1;
1540 break;
1541
1542 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1543 if (PRIVATE_DATA(cc))
1544 {
1545 count = 2;
1546 srcw[0] = PRIVATE_DATA(cc);
1547 srcw[1] = srcw[0] + sizeof(sljit_sw);
1548 }
1549 cc += 1;
1550 break;
1551
1552 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1553 if (PRIVATE_DATA(cc))
1554 {
1555 count = 2;
1556 srcw[0] = PRIVATE_DATA(cc);
1557 srcw[1] = srcw[0] + sizeof(sljit_sw);
1558 }
1559 cc += 1 + IMM2_SIZE;
1560 break;
1561
1562 case OP_CLASS:
1563 case OP_NCLASS:
1564 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1565 case OP_XCLASS:
1566 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1567 #else
1568 size = 1 + 32 / (int)sizeof(pcre_uchar);
1569 #endif
1570 if (PRIVATE_DATA(cc))
1571 switch(get_class_iterator_size(cc + size))
1572 {
1573 case 1:
1574 count = 1;
1575 srcw[0] = PRIVATE_DATA(cc);
1576 break;
1577
1578 case 2:
1579 count = 2;
1580 srcw[0] = PRIVATE_DATA(cc);
1581 srcw[1] = srcw[0] + sizeof(sljit_sw);
1582 break;
1583
1584 default:
1585 SLJIT_ASSERT_STOP();
1586 break;
1587 }
1588 cc += size;
1589 break;
1590
1591 default:
1592 cc = next_opcode(common, cc);
1593 SLJIT_ASSERT(cc != NULL);
1594 break;
1595 }
1596 break;
1597
1598 case end:
1599 SLJIT_ASSERT_STOP();
1600 break;
1601 }
1602
1603 while (count > 0)
1604 {
1605 count--;
1606 if (save)
1607 {
1608 if (tmp1next)
1609 {
1610 if (!tmp1empty)
1611 {
1612 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1613 stackptr += sizeof(sljit_sw);
1614 }
1615 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1616 tmp1empty = FALSE;
1617 tmp1next = FALSE;
1618 }
1619 else
1620 {
1621 if (!tmp2empty)
1622 {
1623 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1624 stackptr += sizeof(sljit_sw);
1625 }
1626 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1627 tmp2empty = FALSE;
1628 tmp1next = TRUE;
1629 }
1630 }
1631 else
1632 {
1633 if (tmp1next)
1634 {
1635 SLJIT_ASSERT(!tmp1empty);
1636 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1637 tmp1empty = stackptr >= stacktop;
1638 if (!tmp1empty)
1639 {
1640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1641 stackptr += sizeof(sljit_sw);
1642 }
1643 tmp1next = FALSE;
1644 }
1645 else
1646 {
1647 SLJIT_ASSERT(!tmp2empty);
1648 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1649 tmp2empty = stackptr >= stacktop;
1650 if (!tmp2empty)
1651 {
1652 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1653 stackptr += sizeof(sljit_sw);
1654 }
1655 tmp1next = TRUE;
1656 }
1657 }
1658 }
1659 }
1660
1661 if (save)
1662 {
1663 if (tmp1next)
1664 {
1665 if (!tmp1empty)
1666 {
1667 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1668 stackptr += sizeof(sljit_sw);
1669 }
1670 if (!tmp2empty)
1671 {
1672 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1673 stackptr += sizeof(sljit_sw);
1674 }
1675 }
1676 else
1677 {
1678 if (!tmp2empty)
1679 {
1680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1681 stackptr += sizeof(sljit_sw);
1682 }
1683 if (!tmp1empty)
1684 {
1685 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1686 stackptr += sizeof(sljit_sw);
1687 }
1688 }
1689 }
1690 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1691 }
1692
1693 #undef CASE_ITERATOR_PRIVATE_DATA_1
1694 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1695 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1696 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1697 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1698 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1699
1700 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1701 {
1702 return (value & (value - 1)) == 0;
1703 }
1704
1705 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1706 {
1707 while (list)
1708 {
1709 /* sljit_set_label is clever enough to do nothing
1710 if either the jump or the label is NULL. */
1711 sljit_set_label(list->jump, label);
1712 list = list->next;
1713 }
1714 }
1715
1716 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1717 {
1718 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1719 if (list_item)
1720 {
1721 list_item->next = *list;
1722 list_item->jump = jump;
1723 *list = list_item;
1724 }
1725 }
1726
1727 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1728 {
1729 DEFINE_COMPILER;
1730 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1731
1732 if (list_item)
1733 {
1734 list_item->type = type;
1735 list_item->data = data;
1736 list_item->start = start;
1737 list_item->quit = LABEL();
1738 list_item->next = common->stubs;
1739 common->stubs = list_item;
1740 }
1741 }
1742
1743 static void flush_stubs(compiler_common *common)
1744 {
1745 DEFINE_COMPILER;
1746 stub_list* list_item = common->stubs;
1747
1748 while (list_item)
1749 {
1750 JUMPHERE(list_item->start);
1751 switch(list_item->type)
1752 {
1753 case stack_alloc:
1754 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1755 break;
1756 }
1757 JUMPTO(SLJIT_JUMP, list_item->quit);
1758 list_item = list_item->next;
1759 }
1760 common->stubs = NULL;
1761 }
1762
1763 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1764 {
1765 DEFINE_COMPILER;
1766
1767 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1768 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1769 }
1770
1771 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1772 {
1773 /* May destroy all locals and registers except TMP2. */
1774 DEFINE_COMPILER;
1775
1776 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1777 #ifdef DESTROY_REGISTERS
1778 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1779 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1780 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1783 #endif
1784 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1785 }
1786
1787 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1788 {
1789 DEFINE_COMPILER;
1790 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1791 }
1792
1793 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1794 {
1795 DEFINE_COMPILER;
1796 struct sljit_label *loop;
1797 int i;
1798 /* At this point we can freely use all temporary registers. */
1799 /* TMP1 returns with begin - 1. */
1800 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1801 if (length < 8)
1802 {
1803 for (i = 0; i < length; i++)
1804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1805 }
1806 else
1807 {
1808 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));
1809 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);
1810 loop = LABEL();
1811 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1812 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1813 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1814 }
1815 }
1816
1817 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1818 {
1819 DEFINE_COMPILER;
1820 struct sljit_label *loop;
1821 struct sljit_jump *earlyexit;
1822
1823 /* At this point we can freely use all registers. */
1824 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1825 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1826
1827 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1828 if (common->mark_ptr != 0)
1829 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1830 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1831 if (common->mark_ptr != 0)
1832 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1833 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1834 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1835 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1836 /* Unlikely, but possible */
1837 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1838 loop = LABEL();
1839 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1840 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
1841 /* Copy the integer value to the output buffer */
1842 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1843 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1844 #endif
1845 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1846 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1847 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1848 JUMPHERE(earlyexit);
1849
1850 /* Calculate the return value, which is the maximum ovector value. */
1851 if (topbracket > 1)
1852 {
1853 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
1854 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
1855
1856 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1857 loop = LABEL();
1858 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
1859 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1860 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1861 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
1862 }
1863 else
1864 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1865 }
1866
1867 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1868 {
1869 DEFINE_COMPILER;
1870
1871 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1872 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1873
1874 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
1875 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1876 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1877 CMPTO(SLJIT_C_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
1878
1879 /* Store match begin and end. */
1880 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1881 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1882 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1883 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1884 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1885 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1886 #endif
1887 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1888
1889 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
1890 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1891 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1892 #endif
1893 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
1894
1895 JUMPTO(SLJIT_JUMP, quit);
1896 }
1897
1898 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1899 {
1900 /* May destroy TMP1. */
1901 DEFINE_COMPILER;
1902 struct sljit_jump *jump;
1903
1904 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1905 {
1906 /* The value of -1 must be kept for start_used_ptr! */
1907 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1908 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1909 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1910 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1911 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1912 JUMPHERE(jump);
1913 }
1914 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1915 {
1916 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1917 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1918 JUMPHERE(jump);
1919 }
1920 }
1921
1922 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1923 {
1924 /* Detects if the character has an othercase. */
1925 unsigned int c;
1926
1927 #ifdef SUPPORT_UTF
1928 if (common->utf)
1929 {
1930 GETCHAR(c, cc);
1931 if (c > 127)
1932 {
1933 #ifdef SUPPORT_UCP
1934 return c != UCD_OTHERCASE(c);
1935 #else
1936 return FALSE;
1937 #endif
1938 }
1939 #ifndef COMPILE_PCRE8
1940 return common->fcc[c] != c;
1941 #endif
1942 }
1943 else
1944 #endif
1945 c = *cc;
1946 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1947 }
1948
1949 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1950 {
1951 /* Returns with the othercase. */
1952 #ifdef SUPPORT_UTF
1953 if (common->utf && c > 127)
1954 {
1955 #ifdef SUPPORT_UCP
1956 return UCD_OTHERCASE(c);
1957 #else
1958 return c;
1959 #endif
1960 }
1961 #endif
1962 return TABLE_GET(c, common->fcc, c);
1963 }
1964
1965 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1966 {
1967 /* Detects if the character and its othercase has only 1 bit difference. */
1968 unsigned int c, oc, bit;
1969 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1970 int n;
1971 #endif
1972
1973 #ifdef SUPPORT_UTF
1974 if (common->utf)
1975 {
1976 GETCHAR(c, cc);
1977 if (c <= 127)
1978 oc = common->fcc[c];
1979 else
1980 {
1981 #ifdef SUPPORT_UCP
1982 oc = UCD_OTHERCASE(c);
1983 #else
1984 oc = c;
1985 #endif
1986 }
1987 }
1988 else
1989 {
1990 c = *cc;
1991 oc = TABLE_GET(c, common->fcc, c);
1992 }
1993 #else
1994 c = *cc;
1995 oc = TABLE_GET(c, common->fcc, c);
1996 #endif
1997
1998 SLJIT_ASSERT(c != oc);
1999
2000 bit = c ^ oc;
2001 /* Optimized for English alphabet. */
2002 if (c <= 127 && bit == 0x20)
2003 return (0 << 8) | 0x20;
2004
2005 /* Since c != oc, they must have at least 1 bit difference. */
2006 if (!is_powerof2(bit))
2007 return 0;
2008
2009 #if defined COMPILE_PCRE8
2010
2011 #ifdef SUPPORT_UTF
2012 if (common->utf && c > 127)
2013 {
2014 n = GET_EXTRALEN(*cc);
2015 while ((bit & 0x3f) == 0)
2016 {
2017 n--;
2018 bit >>= 6;
2019 }
2020 return (n << 8) | bit;
2021 }
2022 #endif /* SUPPORT_UTF */
2023 return (0 << 8) | bit;
2024
2025 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2026
2027 #ifdef SUPPORT_UTF
2028 if (common->utf && c > 65535)
2029 {
2030 if (bit >= (1 << 10))
2031 bit >>= 10;
2032 else
2033 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2034 }
2035 #endif /* SUPPORT_UTF */
2036 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2037
2038 #endif /* COMPILE_PCRE[8|16|32] */
2039 }
2040
2041 static void check_partial(compiler_common *common, BOOL force)
2042 {
2043 /* Checks whether a partial matching is occured. Does not modify registers. */
2044 DEFINE_COMPILER;
2045 struct sljit_jump *jump = NULL;
2046
2047 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2048
2049 if (common->mode == JIT_COMPILE)
2050 return;
2051
2052 if (!force)
2053 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2054 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2055 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2056
2057 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2058 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2059 else
2060 {
2061 if (common->partialmatchlabel != NULL)
2062 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2063 else
2064 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2065 }
2066
2067 if (jump != NULL)
2068 JUMPHERE(jump);
2069 }
2070
2071 static struct sljit_jump *check_str_end(compiler_common *common)
2072 {
2073 /* Does not affect registers. Usually used in a tight spot. */
2074 DEFINE_COMPILER;
2075 struct sljit_jump *jump;
2076 struct sljit_jump *nohit;
2077 struct sljit_jump *return_value;
2078
2079 if (common->mode == JIT_COMPILE)
2080 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2081
2082 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2083 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2084 {
2085 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2086 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2087 JUMPHERE(nohit);
2088 return_value = JUMP(SLJIT_JUMP);
2089 }
2090 else
2091 {
2092 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2093 if (common->partialmatchlabel != NULL)
2094 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2095 else
2096 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2097 }
2098 JUMPHERE(jump);
2099 return return_value;
2100 }
2101
2102 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2103 {
2104 DEFINE_COMPILER;
2105 struct sljit_jump *jump;
2106
2107 if (common->mode == JIT_COMPILE)
2108 {
2109 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2110 return;
2111 }
2112
2113 /* Partial matching mode. */
2114 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2115 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2116 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2117 {
2118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2119 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2120 }
2121 else
2122 {
2123 if (common->partialmatchlabel != NULL)
2124 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2125 else
2126 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2127 }
2128 JUMPHERE(jump);
2129 }
2130
2131 static void read_char(compiler_common *common)
2132 {
2133 /* Reads the character into TMP1, updates STR_PTR.
2134 Does not check STR_END. TMP2 Destroyed. */
2135 DEFINE_COMPILER;
2136 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2137 struct sljit_jump *jump;
2138 #endif
2139
2140 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2141 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2142 if (common->utf)
2143 {
2144 #if defined COMPILE_PCRE8
2145 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2146 #elif defined COMPILE_PCRE16
2147 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2148 #endif /* COMPILE_PCRE[8|16] */
2149 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2150 JUMPHERE(jump);
2151 }
2152 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2153 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2154 }
2155
2156 static void peek_char(compiler_common *common)
2157 {
2158 /* Reads the character into TMP1, keeps STR_PTR.
2159 Does not check STR_END. TMP2 Destroyed. */
2160 DEFINE_COMPILER;
2161 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2162 struct sljit_jump *jump;
2163 #endif
2164
2165 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2166 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2167 if (common->utf)
2168 {
2169 #if defined COMPILE_PCRE8
2170 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2171 #elif defined COMPILE_PCRE16
2172 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2173 #endif /* COMPILE_PCRE[8|16] */
2174 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2175 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2176 JUMPHERE(jump);
2177 }
2178 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2179 }
2180
2181 static void read_char8_type(compiler_common *common)
2182 {
2183 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2184 DEFINE_COMPILER;
2185 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2186 struct sljit_jump *jump;
2187 #endif
2188
2189 #ifdef SUPPORT_UTF
2190 if (common->utf)
2191 {
2192 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2193 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2194 #if defined COMPILE_PCRE8
2195 /* This can be an extra read in some situations, but hopefully
2196 it is needed in most cases. */
2197 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2198 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2199 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2200 JUMPHERE(jump);
2201 #elif defined COMPILE_PCRE16
2202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2203 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2204 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2205 JUMPHERE(jump);
2206 /* Skip low surrogate if necessary. */
2207 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2208 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2209 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2210 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2211 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2212 #elif defined COMPILE_PCRE32
2213 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2214 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2215 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2216 JUMPHERE(jump);
2217 #endif /* COMPILE_PCRE[8|16|32] */
2218 return;
2219 }
2220 #endif /* SUPPORT_UTF */
2221 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2222 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2223 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2224 /* The ctypes array contains only 256 values. */
2225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2226 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2227 #endif
2228 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2229 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2230 JUMPHERE(jump);
2231 #endif
2232 }
2233
2234 static void skip_char_back(compiler_common *common)
2235 {
2236 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2237 DEFINE_COMPILER;
2238 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2239 #if defined COMPILE_PCRE8
2240 struct sljit_label *label;
2241
2242 if (common->utf)
2243 {
2244 label = LABEL();
2245 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2246 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2247 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2248 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2249 return;
2250 }
2251 #elif defined COMPILE_PCRE16
2252 if (common->utf)
2253 {
2254 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2255 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2256 /* Skip low surrogate if necessary. */
2257 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2258 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2259 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2260 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2261 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2262 return;
2263 }
2264 #endif /* COMPILE_PCRE[8|16] */
2265 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2266 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2267 }
2268
2269 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2270 {
2271 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2272 DEFINE_COMPILER;
2273
2274 if (nltype == NLTYPE_ANY)
2275 {
2276 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2277 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2278 }
2279 else if (nltype == NLTYPE_ANYCRLF)
2280 {
2281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2282 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2283 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2284 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2285 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2286 }
2287 else
2288 {
2289 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2290 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2291 }
2292 }
2293
2294 #ifdef SUPPORT_UTF
2295
2296 #if defined COMPILE_PCRE8
2297 static void do_utfreadchar(compiler_common *common)
2298 {
2299 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2300 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2301 DEFINE_COMPILER;
2302 struct sljit_jump *jump;
2303
2304 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2305 /* Searching for the first zero. */
2306 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2307 jump = JUMP(SLJIT_C_NOT_ZERO);
2308 /* Two byte sequence. */
2309 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2310 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2311 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2312 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2313 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2314 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2315 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2316 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2317 JUMPHERE(jump);
2318
2319 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2320 jump = JUMP(SLJIT_C_NOT_ZERO);
2321 /* Three byte sequence. */
2322 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2323 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2324 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2325 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2326 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2327 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2328 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2329 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2330 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2331 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2332 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2333 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2334 JUMPHERE(jump);
2335
2336 /* Four byte sequence. */
2337 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2338 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2339 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2340 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2341 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2342 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2343 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2344 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2345 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2346 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2347 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2348 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2349 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2350 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2351 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2352 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2353 }
2354
2355 static void do_utfreadtype8(compiler_common *common)
2356 {
2357 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2358 of the character (>= 0xc0). Return value in TMP1. */
2359 DEFINE_COMPILER;
2360 struct sljit_jump *jump;
2361 struct sljit_jump *compare;
2362
2363 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2364
2365 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2366 jump = JUMP(SLJIT_C_NOT_ZERO);
2367 /* Two byte sequence. */
2368 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2369 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2370 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2371 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2372 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2373 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2374 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2375 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2376 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2377
2378 JUMPHERE(compare);
2379 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2380 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2381 JUMPHERE(jump);
2382
2383 /* We only have types for characters less than 256. */
2384 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2385 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2386 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2387 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2388 }
2389
2390 #elif defined COMPILE_PCRE16
2391
2392 static void do_utfreadchar(compiler_common *common)
2393 {
2394 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2395 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2396 DEFINE_COMPILER;
2397 struct sljit_jump *jump;
2398
2399 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2400 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2401 /* Do nothing, only return. */
2402 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2403
2404 JUMPHERE(jump);
2405 /* Combine two 16 bit characters. */
2406 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2407 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2408 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2409 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2410 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2411 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2412 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2413 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2414 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2415 }
2416
2417 #endif /* COMPILE_PCRE[8|16] */
2418
2419 #endif /* SUPPORT_UTF */
2420
2421 #ifdef SUPPORT_UCP
2422
2423 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2424 #define UCD_BLOCK_MASK 127
2425 #define UCD_BLOCK_SHIFT 7
2426
2427 static void do_getucd(compiler_common *common)
2428 {
2429 /* Search the UCD record for the character comes in TMP1.
2430 Returns chartype in TMP1 and UCD offset in TMP2. */
2431 DEFINE_COMPILER;
2432
2433 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2434
2435 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2436 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2437 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2438 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2439 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2440 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2441 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2442 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2443 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2444 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2445 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2446 }
2447 #endif
2448
2449 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2450 {
2451 DEFINE_COMPILER;
2452 struct sljit_label *mainloop;
2453 struct sljit_label *newlinelabel = NULL;
2454 struct sljit_jump *start;
2455 struct sljit_jump *end = NULL;
2456 struct sljit_jump *nl = NULL;
2457 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2458 struct sljit_jump *singlechar;
2459 #endif
2460 jump_list *newline = NULL;
2461 BOOL newlinecheck = FALSE;
2462 BOOL readuchar = FALSE;
2463
2464 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2465 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2466 newlinecheck = TRUE;
2467
2468 if (firstline)
2469 {
2470 /* Search for the end of the first line. */
2471 SLJIT_ASSERT(common->first_line_end != 0);
2472 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2473
2474 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2475 {
2476 mainloop = LABEL();
2477 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2478 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2479 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2480 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2481 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2482 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2483 JUMPHERE(end);
2484 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2485 }
2486 else
2487 {
2488 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2489 mainloop = LABEL();
2490 /* Continual stores does not cause data dependency. */
2491 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2492 read_char(common);
2493 check_newlinechar(common, common->nltype, &newline, TRUE);
2494 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2495 JUMPHERE(end);
2496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2497 set_jumps(newline, LABEL());
2498 }
2499
2500 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2501 }
2502
2503 start = JUMP(SLJIT_JUMP);
2504
2505 if (newlinecheck)
2506 {
2507 newlinelabel = LABEL();
2508 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2509 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2510 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2511 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2512 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2513 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2514 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2515 #endif
2516 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2517 nl = JUMP(SLJIT_JUMP);
2518 }
2519
2520 mainloop = LABEL();
2521
2522 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2523 #ifdef SUPPORT_UTF
2524 if (common->utf) readuchar = TRUE;
2525 #endif
2526 if (newlinecheck) readuchar = TRUE;
2527
2528 if (readuchar)
2529 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2530
2531 if (newlinecheck)
2532 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2533
2534 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2535 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2536 #if defined COMPILE_PCRE8
2537 if (common->utf)
2538 {
2539 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2540 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2541 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2542 JUMPHERE(singlechar);
2543 }
2544 #elif defined COMPILE_PCRE16
2545 if (common->utf)
2546 {
2547 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2548 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2549 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2550 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2551 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2552 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2553 JUMPHERE(singlechar);
2554 }
2555 #endif /* COMPILE_PCRE[8|16] */
2556 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2557 JUMPHERE(start);
2558
2559 if (newlinecheck)
2560 {
2561 JUMPHERE(end);
2562 JUMPHERE(nl);
2563 }
2564
2565 return mainloop;
2566 }
2567
2568 #define MAX_N_CHARS 3
2569
2570 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2571 {
2572 DEFINE_COMPILER;
2573 struct sljit_label *start;
2574 struct sljit_jump *quit;
2575 pcre_uint32 chars[MAX_N_CHARS * 2];
2576 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2577 int location = 0;
2578 pcre_int32 len, c, bit, caseless;
2579 int must_stop;
2580
2581 /* We do not support alternatives now. */
2582 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2583 return FALSE;
2584
2585 while (TRUE)
2586 {
2587 caseless = 0;
2588 must_stop = 1;
2589 switch(*cc)
2590 {
2591 case OP_CHAR:
2592 must_stop = 0;
2593 cc++;
2594 break;
2595
2596 case OP_CHARI:
2597 caseless = 1;
2598 must_stop = 0;
2599 cc++;
2600 break;
2601
2602 case OP_SOD:
2603 case OP_SOM:
2604 case OP_SET_SOM:
2605 case OP_NOT_WORD_BOUNDARY:
2606 case OP_WORD_BOUNDARY:
2607 case OP_EODN:
2608 case OP_EOD:
2609 case OP_CIRC:
2610 case OP_CIRCM:
2611 case OP_DOLL:
2612 case OP_DOLLM:
2613 /* Zero width assertions. */
2614 cc++;
2615 continue;
2616
2617 case OP_PLUS:
2618 case OP_MINPLUS:
2619 case OP_POSPLUS:
2620 cc++;
2621 break;
2622
2623 case OP_EXACT:
2624 cc += 1 + IMM2_SIZE;
2625 break;
2626
2627 case OP_PLUSI:
2628 case OP_MINPLUSI:
2629 case OP_POSPLUSI:
2630 caseless = 1;
2631 cc++;
2632 break;
2633
2634 case OP_EXACTI:
2635 caseless = 1;
2636 cc += 1 + IMM2_SIZE;
2637 break;
2638
2639 default:
2640 must_stop = 2;
2641 break;
2642 }
2643
2644 if (must_stop == 2)
2645 break;
2646
2647 len = 1;
2648 #ifdef SUPPORT_UTF
2649 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2650 #endif
2651
2652 if (caseless && char_has_othercase(common, cc))
2653 {
2654 caseless = char_get_othercase_bit(common, cc);
2655 if (caseless == 0)
2656 return FALSE;
2657 #ifdef COMPILE_PCRE8
2658 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2659 #else
2660 if ((caseless & 0x100) != 0)
2661 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2662 else
2663 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2664 #endif
2665 }
2666 else
2667 caseless = 0;
2668
2669 while (len > 0 && location < MAX_N_CHARS * 2)
2670 {
2671 c = *cc;
2672 bit = 0;
2673 if (len == (caseless & 0xff))
2674 {
2675 bit = caseless >> 8;
2676 c |= bit;
2677 }
2678
2679 chars[location] = c;
2680 chars[location + 1] = bit;
2681
2682 len--;
2683 location += 2;
2684 cc++;
2685 }
2686
2687 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2688 break;
2689 }
2690
2691 /* At least two characters are required. */
2692 if (location < 2 * 2)
2693 return FALSE;
2694
2695 if (firstline)
2696 {
2697 SLJIT_ASSERT(common->first_line_end != 0);
2698 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2699 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2700 }
2701 else
2702 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2703
2704 start = LABEL();
2705 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2706
2707 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2708 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2709 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2710 if (chars[1] != 0)
2711 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2712 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2713 if (location > 2 * 2)
2714 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2715 if (chars[3] != 0)
2716 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2717 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2718 if (location > 2 * 2)
2719 {
2720 if (chars[5] != 0)
2721 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2722 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2723 }
2724 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2725
2726 JUMPHERE(quit);
2727
2728 if (firstline)
2729 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2730 else
2731 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2732 return TRUE;
2733 }
2734
2735 #undef MAX_N_CHARS
2736
2737 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2738 {
2739 DEFINE_COMPILER;
2740 struct sljit_label *start;
2741 struct sljit_jump *quit;
2742 struct sljit_jump *found;
2743 pcre_uchar oc, bit;
2744
2745 if (firstline)
2746 {
2747 SLJIT_ASSERT(common->first_line_end != 0);
2748 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2749 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2750 }
2751
2752 start = LABEL();
2753 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2754 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2755
2756 oc = first_char;
2757 if (caseless)
2758 {
2759 oc = TABLE_GET(first_char, common->fcc, first_char);
2760 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2761 if (first_char > 127 && common->utf)
2762 oc = UCD_OTHERCASE(first_char);
2763 #endif
2764 }
2765 if (first_char == oc)
2766 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2767 else
2768 {
2769 bit = first_char ^ oc;
2770 if (is_powerof2(bit))
2771 {
2772 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2773 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2774 }
2775 else
2776 {
2777 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2778 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2779 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2780 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2781 found = JUMP(SLJIT_C_NOT_ZERO);
2782 }
2783 }
2784
2785 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2786 JUMPTO(SLJIT_JUMP, start);
2787 JUMPHERE(found);
2788 JUMPHERE(quit);
2789
2790 if (firstline)
2791 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2792 }
2793
2794 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2795 {
2796 DEFINE_COMPILER;
2797 struct sljit_label *loop;
2798 struct sljit_jump *lastchar;
2799 struct sljit_jump *firstchar;
2800 struct sljit_jump *quit;
2801 struct sljit_jump *foundcr = NULL;
2802 struct sljit_jump *notfoundnl;
2803 jump_list *newline = NULL;
2804
2805 if (firstline)
2806 {
2807 SLJIT_ASSERT(common->first_line_end != 0);
2808 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2809 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2810 }
2811
2812 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2813 {
2814 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2815 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2816 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2818 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2819
2820 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2821 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2822 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2823 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2824 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2825 #endif
2826 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2827
2828 loop = LABEL();
2829 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2830 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2831 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2832 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2833 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2834 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2835
2836 JUMPHERE(quit);
2837 JUMPHERE(firstchar);
2838 JUMPHERE(lastchar);
2839
2840 if (firstline)
2841 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2842 return;
2843 }
2844
2845 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2846 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2847 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2848 skip_char_back(common);
2849
2850 loop = LABEL();
2851 read_char(common);
2852 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2853 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2854 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2855 check_newlinechar(common, common->nltype, &newline, FALSE);
2856 set_jumps(newline, loop);
2857
2858 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2859 {
2860 quit = JUMP(SLJIT_JUMP);
2861 JUMPHERE(foundcr);
2862 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2863 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2864 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2865 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2866 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2867 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2868 #endif
2869 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2870 JUMPHERE(notfoundnl);
2871 JUMPHERE(quit);
2872 }
2873 JUMPHERE(lastchar);
2874 JUMPHERE(firstchar);
2875
2876 if (firstline)
2877 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2878 }
2879
2880 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2881 {
2882 DEFINE_COMPILER;
2883 struct sljit_label *start;
2884 struct sljit_jump *quit;
2885 struct sljit_jump *found;
2886 #ifndef COMPILE_PCRE8
2887 struct sljit_jump *jump;
2888 #endif
2889
2890 if (firstline)
2891 {
2892 SLJIT_ASSERT(common->first_line_end != 0);
2893 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2894 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2895 }
2896
2897 start = LABEL();
2898 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2899 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2900 #ifdef SUPPORT_UTF
2901 if (common->utf)
2902 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2903 #endif
2904 #ifndef COMPILE_PCRE8
2905 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2906 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2907 JUMPHERE(jump);
2908 #endif
2909 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2910 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2911 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2912 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2913 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2914 found = JUMP(SLJIT_C_NOT_ZERO);
2915
2916 #ifdef SUPPORT_UTF
2917 if (common->utf)
2918 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2919 #endif
2920 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2921 #ifdef SUPPORT_UTF
2922 #if defined COMPILE_PCRE8
2923 if (common->utf)
2924 {
2925 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2926 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2927 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2928 }
2929 #elif defined COMPILE_PCRE16
2930 if (common->utf)
2931 {
2932 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2933 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2934 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2935 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2936 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2937 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2938 }
2939 #endif /* COMPILE_PCRE[8|16] */
2940 #endif /* SUPPORT_UTF */
2941 JUMPTO(SLJIT_JUMP, start);
2942 JUMPHERE(found);
2943 JUMPHERE(quit);
2944
2945 if (firstline)
2946 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2947 }
2948
2949 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2950 {
2951 DEFINE_COMPILER;
2952 struct sljit_label *loop;
2953 struct sljit_jump *toolong;
2954 struct sljit_jump *alreadyfound;
2955 struct sljit_jump *found;
2956 struct sljit_jump *foundoc = NULL;
2957 struct sljit_jump *notfound;
2958 pcre_uint32 oc, bit;
2959
2960 SLJIT_ASSERT(common->req_char_ptr != 0);
2961 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2962 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2963 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2964 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2965
2966 if (has_firstchar)
2967 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2968 else
2969 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2970
2971 loop = LABEL();
2972 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2973
2974 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2975 oc = req_char;
2976 if (caseless)
2977 {
2978 oc = TABLE_GET(req_char, common->fcc, req_char);
2979 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2980 if (req_char > 127 && common->utf)
2981 oc = UCD_OTHERCASE(req_char);
2982 #endif
2983 }
2984 if (req_char == oc)
2985 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2986 else
2987 {
2988 bit = req_char ^ oc;
2989 if (is_powerof2(bit))
2990 {
2991 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2992 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2993 }
2994 else
2995 {
2996 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2997 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2998 }
2999 }
3000 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3001 JUMPTO(SLJIT_JUMP, loop);
3002
3003 JUMPHERE(found);
3004 if (foundoc)
3005 JUMPHERE(foundoc);
3006 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3007 JUMPHERE(alreadyfound);
3008 JUMPHERE(toolong);
3009 return notfound;
3010 }
3011
3012 static void do_revertframes(compiler_common *common)
3013 {
3014 DEFINE_COMPILER;
3015 struct sljit_jump *jump;
3016 struct sljit_label *mainloop;
3017
3018 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3019 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3020 GET_LOCAL_BASE(TMP3, 0, 0);
3021
3022 /* Drop frames until we reach STACK_TOP. */
3023 mainloop = LABEL();
3024 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3025 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3026 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3027 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3028 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3029 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3030 JUMPTO(SLJIT_JUMP, mainloop);
3031
3032 JUMPHERE(jump);
3033 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3034 /* End of dropping frames. */
3035 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3036
3037 JUMPHERE(jump);
3038 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3039 /* Set string begin. */
3040 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3041 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3043 JUMPTO(SLJIT_JUMP, mainloop);
3044
3045 JUMPHERE(jump);
3046 if (common->mark_ptr != 0)
3047 {
3048 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3049 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3050 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3052 JUMPTO(SLJIT_JUMP, mainloop);
3053
3054 JUMPHERE(jump);
3055 }
3056
3057 /* Unknown command. */
3058 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3059 JUMPTO(SLJIT_JUMP, mainloop);
3060 }
3061
3062 static void check_wordboundary(compiler_common *common)
3063 {
3064 DEFINE_COMPILER;
3065 struct sljit_jump *skipread;
3066 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3067 struct sljit_jump *jump;
3068 #endif
3069
3070 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3071
3072 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3073 /* Get type of the previous char, and put it to LOCALS1. */
3074 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3077 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3078 skip_char_back(common);
3079 check_start_used_ptr(common);
3080 read_char(common);
3081
3082 /* Testing char type. */
3083 #ifdef SUPPORT_UCP
3084 if (common->use_ucp)
3085 {
3086 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3087 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3088 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3089 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3090 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3091 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3092 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3093 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3094 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3095 JUMPHERE(jump);
3096 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3097 }
3098 else
3099 #endif
3100 {
3101 #ifndef COMPILE_PCRE8
3102 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3103 #elif defined SUPPORT_UTF
3104 /* Here LOCALS1 has already been zeroed. */
3105 jump = NULL;
3106 if (common->utf)
3107 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3108 #endif /* COMPILE_PCRE8 */
3109 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3110 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3111 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3113 #ifndef COMPILE_PCRE8
3114 JUMPHERE(jump);
3115 #elif defined SUPPORT_UTF
3116 if (jump != NULL)
3117 JUMPHERE(jump);
3118 #endif /* COMPILE_PCRE8 */
3119 }
3120 JUMPHERE(skipread);
3121
3122 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3123 skipread = check_str_end(common);
3124 peek_char(common);
3125
3126 /* Testing char type. This is a code duplication. */
3127 #ifdef SUPPORT_UCP
3128 if (common->use_ucp)
3129 {
3130 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3131 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3132 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3133 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3134 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3135 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3136 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3137 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3138 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3139 JUMPHERE(jump);
3140 }
3141 else
3142 #endif
3143 {
3144 #ifndef COMPILE_PCRE8
3145 /* TMP2 may be destroyed by peek_char. */
3146 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3147 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3148 #elif defined SUPPORT_UTF
3149 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3150 jump = NULL;
3151 if (common->utf)
3152 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3153 #endif
3154 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3155 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3156 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3157 #ifndef COMPILE_PCRE8
3158 JUMPHERE(jump);
3159 #elif defined SUPPORT_UTF
3160 if (jump != NULL)
3161 JUMPHERE(jump);
3162 #endif /* COMPILE_PCRE8 */
3163 }
3164 JUMPHERE(skipread);
3165
3166 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3167 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3168 }
3169
3170 /*
3171 range format:
3172
3173 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3174 ranges[1] = first bit (0 or 1)
3175 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3176 */
3177
3178 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3179 {
3180 DEFINE_COMPILER;
3181 struct sljit_jump *jump;
3182
3183 if (ranges[0] < 0)
3184 return FALSE;
3185
3186 switch(ranges[0])
3187 {
3188 case 1:
3189 if (readch)
3190 read_char(common);
3191 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3192 return TRUE;
3193
3194 case 2:
3195 if (readch)
3196 read_char(common);
3197 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3198 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3199 return TRUE;
3200
3201 case 4:
3202 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3203 {
3204 if (readch)
3205 read_char(common);
3206 if (ranges[1] != 0)
3207 {
3208 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3209 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3210 }
3211 else
3212 {
3213 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3214 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3215 JUMPHERE(jump);
3216 }
3217 return TRUE;
3218 }
3219 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3220 {
3221 if (readch)
3222 read_char(common);
3223 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3224 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3225 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3226 return TRUE;
3227 }
3228 return FALSE;
3229
3230 default:
3231 return FALSE;
3232 }
3233 }
3234
3235 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3236 {
3237 int i, bit, length;
3238 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3239
3240 bit = ctypes[0] & flag;
3241 ranges[0] = -1;
3242 ranges[1] = bit != 0 ? 1 : 0;
3243 length = 0;
3244
3245 for (i = 1; i < 256; i++)
3246 if ((ctypes[i] & flag) != bit)
3247 {
3248 if (length >= MAX_RANGE_SIZE)
3249 return;
3250 ranges[2 + length] = i;
3251 length++;
3252 bit ^= flag;
3253 }
3254
3255 if (bit != 0)
3256 {
3257 if (length >= MAX_RANGE_SIZE)
3258 return;
3259 ranges[2 + length] = 256;
3260 length++;
3261 }
3262 ranges[0] = length;
3263 }
3264
3265 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3266 {
3267 int ranges[2 + MAX_RANGE_SIZE];
3268 pcre_uint8 bit, cbit, all;
3269 int i, byte, length = 0;
3270
3271 bit = bits[0] & 0x1;
3272 ranges[1] = bit;
3273 /* Can be 0 or 255. */
3274 all = -bit;
3275
3276 for (i = 0; i < 256; )
3277 {
3278 byte = i >> 3;
3279 if ((i & 0x7) == 0 && bits[byte] == all)
3280 i += 8;
3281 else
3282 {
3283 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3284 if (cbit != bit)
3285 {
3286 if (length >= MAX_RANGE_SIZE)
3287 return FALSE;
3288 ranges[2 + length] = i;
3289 length++;
3290 bit = cbit;
3291 all = -cbit;
3292 }
3293 i++;
3294 }
3295 }
3296
3297 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3298 {
3299 if (length >= MAX_RANGE_SIZE)
3300 return FALSE;
3301 ranges[2 + length] = 256;
3302 length++;
3303 }
3304 ranges[0] = length;
3305
3306 return check_ranges(common, ranges, backtracks, FALSE);
3307 }
3308
3309 static void check_anynewline(compiler_common *common)
3310 {
3311 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3312 DEFINE_COMPILER;
3313
3314 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3315
3316 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3317 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3318 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3319 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3320 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3321 #ifdef COMPILE_PCRE8
3322 if (common->utf)
3323 {
3324 #endif
3325 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3326 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3327 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3328 #ifdef COMPILE_PCRE8
3329 }
3330 #endif
3331 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3332 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3333 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3334 }
3335
3336 static void check_hspace(compiler_common *common)
3337 {
3338 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3339 DEFINE_COMPILER;
3340
3341 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3342
3343 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3344 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3345 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3346 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3347 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3348 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3349 #ifdef COMPILE_PCRE8
3350 if (common->utf)
3351 {
3352 #endif
3353 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3354 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3355 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3356 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3357 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3358 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3359 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3360 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3361 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3362 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3363 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3364 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3365 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3366 #ifdef COMPILE_PCRE8
3367 }
3368 #endif
3369 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3370 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3371
3372 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3373 }
3374
3375 static void check_vspace(compiler_common *common)
3376 {
3377 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3378 DEFINE_COMPILER;
3379
3380 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3381
3382 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3383 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3384 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3385 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3386 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3387 #ifdef COMPILE_PCRE8
3388 if (common->utf)
3389 {
3390 #endif
3391 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3392 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3393 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3394 #ifdef COMPILE_PCRE8
3395 }
3396 #endif
3397 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3398 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3399
3400 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3401 }
3402
3403 #define CHAR1 STR_END
3404 #define CHAR2 STACK_TOP
3405
3406 static void do_casefulcmp(compiler_common *common)
3407 {
3408 DEFINE_COMPILER;
3409 struct sljit_jump *jump;
3410 struct sljit_label *label;
3411
3412 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3413 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3414 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3416 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3417 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3418
3419 label = LABEL();
3420 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3421 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3422 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3423 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3424 JUMPTO(SLJIT_C_NOT_ZERO, label);
3425
3426 JUMPHERE(jump);
3427 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3428 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3429 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3430 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3431 }
3432
3433 #define LCC_TABLE STACK_LIMIT
3434
3435 static void do_caselesscmp(compiler_common *common)
3436 {
3437 DEFINE_COMPILER;
3438 struct sljit_jump *jump;
3439 struct sljit_label *label;
3440
3441 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3442 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3443
3444 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3445 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3446 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3447 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3448 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3449 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3450
3451 label = LABEL();
3452 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3453 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3454 #ifndef COMPILE_PCRE8
3455 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3456 #endif
3457 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3458 #ifndef COMPILE_PCRE8
3459 JUMPHERE(jump);
3460 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3461 #endif
3462 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3463 #ifndef COMPILE_PCRE8
3464 JUMPHERE(jump);
3465 #endif
3466 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3467 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3468 JUMPTO(SLJIT_C_NOT_ZERO, label);
3469
3470 JUMPHERE(jump);
3471 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3472 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3473 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3474 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3475 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3476 }
3477
3478 #undef LCC_TABLE
3479 #undef CHAR1
3480 #undef CHAR2
3481
3482 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3483
3484 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3485 {
3486 /* This function would be ineffective to do in JIT level. */
3487 pcre_uint32 c1, c2;
3488 const pcre_uchar *src2 = args->uchar_ptr;
3489 const pcre_uchar *end2 = args->end;
3490 const ucd_record *ur;
3491 const pcre_uint32 *pp;
3492
3493 while (src1 < end1)
3494 {
3495 if (src2 >= end2)
3496 return (pcre_uchar*)1;
3497 GETCHARINC(c1, src1);
3498 GETCHARINC(c2, src2);
3499 ur = GET_UCD(c2);
3500 if (c1 != c2 && c1 != c2 + ur->other_case)
3501 {
3502 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3503 for (;;)
3504 {
3505 if (c1 < *pp) return NULL;
3506 if (c1 == *pp++) break;
3507 }
3508 }
3509 }
3510 return src2;
3511 }
3512
3513 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3514
3515 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3516 compare_context* context, jump_list **backtracks)
3517 {
3518 DEFINE_COMPILER;
3519 unsigned int othercasebit = 0;
3520 pcre_uchar *othercasechar = NULL;
3521 #ifdef SUPPORT_UTF
3522 int utflength;
3523 #endif
3524
3525 if (caseless && char_has_othercase(common, cc))
3526 {
3527 othercasebit = char_get_othercase_bit(common, cc);
3528 SLJIT_ASSERT(othercasebit);
3529 /* Extracting bit difference info. */
3530 #if defined COMPILE_PCRE8
3531 othercasechar = cc + (othercasebit >> 8);
3532 othercasebit &= 0xff;
3533 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3534 /* Note that this code only handles characters in the BMP. If there
3535 ever are characters outside the BMP whose othercase differs in only one
3536 bit from itself (there currently are none), this code will need to be
3537 revised for COMPILE_PCRE32. */
3538 othercasechar = cc + (othercasebit >> 9);
3539 if ((othercasebit & 0x100) != 0)
3540 othercasebit = (othercasebit & 0xff) << 8;
3541 else
3542 othercasebit &= 0xff;
3543 #endif /* COMPILE_PCRE[8|16|32] */
3544 }
3545
3546 if (context->sourcereg == -1)
3547 {
3548 #if defined COMPILE_PCRE8
3549 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3550 if (context->length >= 4)
3551 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3552 else if (context->length >= 2)
3553 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3554 else
3555 #endif
3556 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3557 #elif defined COMPILE_PCRE16
3558 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3559 if (context->length >= 4)
3560 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3561 else
3562 #endif
3563 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3564 #elif defined COMPILE_PCRE32
3565 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3566 #endif /* COMPILE_PCRE[8|16|32] */
3567 context->sourcereg = TMP2;
3568 }
3569
3570 #ifdef SUPPORT_UTF
3571 utflength = 1;
3572 if (common->utf && HAS_EXTRALEN(*cc))
3573 utflength += GET_EXTRALEN(*cc);
3574
3575 do
3576 {
3577 #endif
3578
3579 context->length -= IN_UCHARS(1);
3580 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3581
3582 /* Unaligned read is supported. */
3583 if (othercasebit != 0 && othercasechar == cc)
3584 {
3585 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3586 context->oc.asuchars[context->ucharptr] = othercasebit;
3587 }
3588 else
3589 {
3590 context->c.asuchars[context->ucharptr] = *cc;
3591 context->oc.asuchars[context->ucharptr] = 0;
3592 }
3593 context->ucharptr++;
3594
3595 #if defined COMPILE_PCRE8
3596 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3597 #else
3598 if (context->ucharptr >= 2 || context->length == 0)
3599 #endif
3600 {
3601 if (context->length >= 4)
3602 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3603 else if (context->length >= 2)
3604 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3605 #if defined COMPILE_PCRE8
3606 else if (context->length >= 1)
3607 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3608 #endif /* COMPILE_PCRE8 */
3609 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3610
3611 switch(context->ucharptr)
3612 {
3613 case 4 / sizeof(pcre_uchar):
3614 if (context->oc.asint != 0)
3615 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3616 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3617 break;
3618
3619 case 2 / sizeof(pcre_uchar):
3620 if (context->oc.asushort != 0)
3621 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3622 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3623 break;
3624
3625 #ifdef COMPILE_PCRE8
3626 case 1:
3627 if (context->oc.asbyte != 0)
3628 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3629 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3630 break;
3631 #endif
3632
3633 default:
3634 SLJIT_ASSERT_STOP();
3635 break;
3636 }
3637 context->ucharptr = 0;
3638 }
3639
3640 #else
3641
3642 /* Unaligned read is unsupported or in 32 bit mode. */
3643 if (context->length >= 1)
3644 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3645
3646 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3647
3648 if (othercasebit != 0 && othercasechar == cc)
3649 {
3650 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3651 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3652 }
3653 else
3654 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3655
3656 #endif
3657
3658 cc++;
3659 #ifdef SUPPORT_UTF
3660 utflength--;
3661 }
3662 while (utflength > 0);
3663 #endif
3664
3665 return cc;
3666 }
3667
3668 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3669
3670 #define SET_TYPE_OFFSET(value) \
3671 if ((value) != typeoffset) \
3672 { \
3673 if ((value) > typeoffset) \
3674 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3675 else \
3676 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3677 } \
3678 typeoffset = (value);
3679
3680 #define SET_CHAR_OFFSET(value) \
3681 if ((value) != charoffset) \
3682 { \
3683 if ((value) > charoffset) \
3684 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3685 else \
3686 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3687 } \
3688 charoffset = (value);
3689
3690 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3691 {
3692 DEFINE_COMPILER;
3693 jump_list *found = NULL;
3694 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3695 pcre_int32 c, charoffset;
3696 const pcre_uint32 *other_cases;
3697 struct sljit_jump *jump = NULL;
3698 pcre_uchar *ccbegin;
3699 int compares, invertcmp, numberofcmps;
3700 #ifdef SUPPORT_UCP
3701 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3702 BOOL charsaved = FALSE;
3703 int typereg = TMP1, scriptreg = TMP1;
3704 pcre_int32 typeoffset;
3705 #endif
3706
3707 /* Although SUPPORT_UTF must be defined, we are
3708 not necessary in utf mode even in 8 bit mode. */
3709 detect_partial_match(common, backtracks);
3710 read_char(common);
3711
3712 if ((*cc++ & XCL_MAP) != 0)
3713 {
3714 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3715 #ifndef COMPILE_PCRE8
3716 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3717 #elif defined SUPPORT_UTF
3718 if (common->utf)
3719 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3720 #endif
3721
3722 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3723 {
3724 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3725 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3726 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3727 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3728 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3729 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3730 }
3731
3732 #ifndef COMPILE_PCRE8
3733 JUMPHERE(jump);
3734 #elif defined SUPPORT_UTF
3735 if (common->utf)
3736 JUMPHERE(jump);
3737 #endif
3738 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3739 #ifdef SUPPORT_UCP
3740 charsaved = TRUE;
3741 #endif
3742 cc += 32 / sizeof(pcre_uchar);
3743 }
3744
3745 /* Scanning the necessary info. */
3746 ccbegin = cc;
3747 compares = 0;
3748 while (*cc != XCL_END)
3749 {
3750 compares++;
3751 if (*cc == XCL_SINGLE)
3752 {
3753 cc += 2;
3754 #ifdef SUPPORT_UTF
3755 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3756 #endif
3757 #ifdef SUPPORT_UCP
3758 needschar = TRUE;
3759 #endif
3760 }
3761 else if (*cc == XCL_RANGE)
3762 {
3763 cc += 2;
3764 #ifdef SUPPORT_UTF
3765 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3766 #endif
3767 cc++;
3768 #ifdef SUPPORT_UTF
3769 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3770 #endif
3771 #ifdef SUPPORT_UCP
3772 needschar = TRUE;
3773 #endif
3774 }
3775 #ifdef SUPPORT_UCP
3776 else
3777 {
3778 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3779 cc++;
3780 switch(*cc)
3781 {
3782 case PT_ANY:
3783 break;
3784
3785 case PT_LAMP:
3786 case PT_GC:
3787 case PT_PC:
3788 case PT_ALNUM:
3789 needstype = TRUE;
3790 break;
3791
3792 case PT_SC:
3793 needsscript = TRUE;
3794 break;
3795
3796 case PT_SPACE:
3797 case PT_PXSPACE:
3798 case PT_WORD:
3799 needstype = TRUE;
3800 needschar = TRUE;
3801 break;
3802
3803 case PT_CLIST:
3804 needschar = TRUE;
3805 break;
3806
3807 default:
3808 SLJIT_ASSERT_STOP();
3809 break;
3810 }
3811 cc += 2;
3812 }
3813 #endif
3814 }
3815
3816 #ifdef SUPPORT_UCP
3817 /* Simple register allocation. TMP1 is preferred if possible. */
3818 if (needstype || needsscript)
3819 {
3820 if (needschar && !charsaved)
3821 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3822 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3823 if (needschar)
3824 {
3825 if (needstype)
3826 {
3827 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3828 typereg = RETURN_ADDR;
3829 }
3830
3831 if (needsscript)
3832 scriptreg = TMP3;
3833 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3834 }
3835 else if (needstype && needsscript)
3836 scriptreg = TMP3;
3837 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3838
3839 if (needsscript)
3840 {
3841 if (scriptreg == TMP1)
3842 {
3843 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3844 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3845 }
3846 else
3847 {
3848 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3849 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3850 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3851 }
3852 }
3853 }
3854 #endif
3855
3856 /* Generating code. */
3857 cc = ccbegin;
3858 charoffset = 0;
3859 numberofcmps = 0;
3860 #ifdef SUPPORT_UCP
3861 typeoffset = 0;
3862 #endif
3863
3864 while (*cc != XCL_END)
3865 {
3866 compares--;
3867 invertcmp = (compares == 0 && list != backtracks);
3868 jump = NULL;
3869
3870 if (*cc == XCL_SINGLE)
3871 {
3872 cc ++;
3873 #ifdef SUPPORT_UTF
3874 if (common->utf)
3875 {
3876 GETCHARINC(c, cc);
3877 }
3878 else
3879 #endif
3880 c = *cc++;
3881
3882 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3883 {
3884 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3885 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
3886 numberofcmps++;
3887 }
3888 else if (numberofcmps > 0)
3889 {
3890 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3891 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3892 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3893 numberofcmps = 0;
3894 }
3895 else
3896 {
3897 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3898 numberofcmps = 0;
3899 }
3900 }
3901 else if (*cc == XCL_RANGE)
3902 {
3903 cc ++;
3904 #ifdef SUPPORT_UTF
3905 if (common->utf)
3906 {
3907 GETCHARINC(c, cc);
3908 }
3909 else
3910 #endif
3911 c = *cc++;
3912 SET_CHAR_OFFSET(c);
3913 #ifdef SUPPORT_UTF
3914 if (common->utf)
3915 {
3916 GETCHARINC(c, cc);
3917 }
3918 else
3919 #endif
3920 c = *cc++;
3921 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3922 {
3923 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3924 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
3925 numberofcmps++;
3926 }
3927 else if (numberofcmps > 0)
3928 {
3929 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3930 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3931 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3932 numberofcmps = 0;
3933 }
3934 else
3935 {
3936 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3937 numberofcmps = 0;
3938 }
3939 }
3940 #ifdef SUPPORT_UCP
3941 else
3942 {
3943 if (*cc == XCL_NOTPROP)
3944 invertcmp ^= 0x1;
3945 cc++;
3946 switch(*cc)
3947 {
3948 case PT_ANY:
3949 if (list != backtracks)
3950 {
3951 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3952 continue;
3953 }
3954 else if (cc[-1] == XCL_NOTPROP)
3955 continue;
3956 jump = JUMP(SLJIT_JUMP);
3957 break;
3958
3959 case PT_LAMP:
3960 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3961 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3962 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3963 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3964 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3965 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3966 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3967 break;
3968
3969 case PT_GC:
3970 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3971 SET_TYPE_OFFSET(c);
3972 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3973 break;
3974
3975 case PT_PC:
3976 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3977 break;
3978
3979 case PT_SC:
3980 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3981 break;
3982
3983 case PT_SPACE:
3984 case PT_PXSPACE:
3985 if (*cc == PT_SPACE)
3986 {
3987 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3988 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3989 }
3990 SET_CHAR_OFFSET(9);
3991 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3992 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3993 if (*cc == PT_SPACE)
3994 JUMPHERE(jump);
3995
3996 SET_TYPE_OFFSET(ucp_Zl);
3997 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3998 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3999 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4000 break;
4001
4002 case PT_WORD:
4003 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4004 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4005 /* ... fall through */
4006
4007 case PT_ALNUM:
4008 SET_TYPE_OFFSET(ucp_Ll);
4009 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4010 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4011 SET_TYPE_OFFSET(ucp_Nd);
4012 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4013 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4014 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4015 break;
4016
4017 case PT_CLIST:
4018 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4019
4020 /* At least three characters are required.
4021 Otherwise this case would be handled by the normal code path. */
4022 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4023 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4024
4025 /* Optimizing character pairs, if their difference is power of 2. */
4026 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4027 {
4028 if (charoffset == 0)
4029 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4030 else
4031 {
4032 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4033 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4034 }
4035 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4036 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4037 other_cases += 2;
4038 }
4039 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4040 {
4041 if (charoffset == 0)
4042 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4043 else
4044 {
4045 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4046 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4047 }
4048 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4049 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4050
4051 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4052 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4053
4054 other_cases += 3;
4055 }
4056 else
4057 {
4058 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4059 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4060 }
4061
4062 while (*other_cases != NOTACHAR)
4063 {
4064 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4065 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4066 }
4067 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4068 break;
4069 }
4070 cc += 2;
4071 }
4072 #endif
4073
4074 if (jump != NULL)
4075 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4076 }
4077
4078 if (found != NULL)
4079 set_jumps(found, LABEL());
4080 }
4081
4082 #undef SET_TYPE_OFFSET
4083 #undef SET_CHAR_OFFSET
4084
4085 #endif
4086
4087 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4088 {
4089 DEFINE_COMPILER;
4090 int length;
4091 unsigned int c, oc, bit;
4092 compare_context context;
4093 struct sljit_jump *jump[4];
4094 #ifdef SUPPORT_UTF
4095 struct sljit_label *label;
4096 #ifdef SUPPORT_UCP
4097 pcre_uchar propdata[5];
4098 #endif
4099 #endif
4100
4101 switch(type)
4102 {
4103 case OP_SOD:
4104 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4106 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4107 return cc;
4108
4109 case OP_SOM:
4110 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4111 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4112 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4113 return cc;
4114
4115 case OP_NOT_WORD_BOUNDARY:
4116 case OP_WORD_BOUNDARY:
4117 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4118 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4119 return cc;
4120
4121 case OP_NOT_DIGIT:
4122 case OP_DIGIT:
4123 /* Digits are usually 0-9, so it is worth to optimize them. */
4124 if (common->digits[0] == -2)
4125 get_ctype_ranges(common, ctype_digit, common->digits);
4126 detect_partial_match(common, backtracks);
4127 /* Flip the starting bit in the negative case. */
4128 if (type == OP_NOT_DIGIT)
4129 common->digits[1] ^= 1;
4130 if (!check_ranges(common, common->digits, backtracks, TRUE))
4131 {
4132 read_char8_type(common);
4133 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4134 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4135 }
4136 if (type == OP_NOT_DIGIT)
4137 common->digits[1] ^= 1;
4138 return cc;
4139
4140 case OP_NOT_WHITESPACE:
4141 case OP_WHITESPACE:
4142 detect_partial_match(common, backtracks);
4143 read_char8_type(common);
4144 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4145 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4146 return cc;
4147
4148 case OP_NOT_WORDCHAR:
4149 case OP_WORDCHAR:
4150 detect_partial_match(common, backtracks);
4151 read_char8_type(common);
4152 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4153 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4154 return cc;
4155
4156 case OP_ANY:
4157 detect_partial_match(common, backtracks);
4158 read_char(common);
4159 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4160 {
4161 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4162 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4163 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4164 else
4165 jump[1] = check_str_end(common);
4166
4167 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4168 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4169 if (jump[1] != NULL)
4170 JUMPHERE(jump[1]);
4171 JUMPHERE(jump[0]);
4172 }
4173 else
4174 check_newlinechar(common, common->nltype, backtracks, TRUE);
4175 return cc;
4176
4177 case OP_ALLANY:
4178 detect_partial_match(common, backtracks);
4179 #ifdef SUPPORT_UTF
4180 if (common->utf)
4181 {
4182 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4183 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4184 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4185 #if defined COMPILE_PCRE8
4186 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4187 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4188 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4189 #elif defined COMPILE_PCRE16
4190 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4191 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4192 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4193 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4194 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4195 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4196 #endif
4197 JUMPHERE(jump[0]);
4198 #endif /* COMPILE_PCRE[8|16] */
4199 return cc;
4200 }
4201 #endif
4202 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4203 return cc;
4204
4205 case OP_ANYBYTE:
4206 detect_partial_match(common, backtracks);
4207 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4208 return cc;
4209
4210 #ifdef SUPPORT_UTF
4211 #ifdef SUPPORT_UCP
4212 case OP_NOTPROP:
4213 case OP_PROP:
4214 propdata[0] = 0;
4215 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4216 propdata[2] = cc[0];
4217 propdata[3] = cc[1];
4218 propdata[4] = XCL_END;
4219 compile_xclass_matchingpath(common, propdata, backtracks);
4220 return cc + 2;
4221 #endif
4222 #endif
4223
4224 case OP_ANYNL:
4225 detect_partial_match(common, backtracks);
4226 read_char(common);
4227 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4228 /* We don't need to handle soft partial matching case. */
4229 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4230 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4231 else
4232 jump[1] = check_str_end(common);
4233 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4234 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4235 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4236 jump[3] = JUMP(SLJIT_JUMP);
4237 JUMPHERE(jump[0]);
4238 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4239 JUMPHERE(jump[1]);
4240 JUMPHERE(jump[2]);
4241 JUMPHERE(jump[3]);
4242 return cc;
4243
4244 case OP_NOT_HSPACE:
4245 case OP_HSPACE:
4246 detect_partial_match(common, backtracks);
4247 read_char(common);
4248 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4249 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4250 return cc;
4251
4252 case OP_NOT_VSPACE:
4253 case OP_VSPACE:
4254 detect_partial_match(common, backtracks);
4255 read_char(common);
4256 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4257 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4258 return cc;
4259
4260 #ifdef SUPPORT_UCP
4261 case OP_EXTUNI:
4262 detect_partial_match(common, backtracks);
4263 read_char(common);
4264 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4265 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4266 /* Optimize register allocation: use a real register. */
4267 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4268 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4269
4270 label = LABEL();
4271 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4272 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4273 read_char(common);
4274 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4275 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4276 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4277
4278 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4279 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4280 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4281 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4282 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4283 JUMPTO(SLJIT_C_NOT_ZERO, label);
4284
4285 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4286 JUMPHERE(jump[0]);
4287 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4288
4289 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4290 {
4291 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4292 /* Since we successfully read a char above, partial matching must occure. */
4293 check_partial(common, TRUE);
4294 JUMPHERE(jump[0]);
4295 }
4296 return cc;
4297 #endif
4298
4299 case OP_EODN:
4300 /* Requires rather complex checks. */
4301 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4302 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4303 {
4304 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4305 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4306 if (common->mode == JIT_COMPILE)
4307 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4308 else
4309 {
4310 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4311 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4312 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4313 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4314 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4315 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4316 check_partial(common, TRUE);
4317 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4318 JUMPHERE(jump[1]);
4319 }
4320 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4321 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4322 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4323 }
4324 else if (common->nltype == NLTYPE_FIXED)
4325 {
4326 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4327 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4328 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4329 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4330 }
4331 else
4332 {
4333 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4334 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4335 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4336 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4337 jump[2] = JUMP(SLJIT_C_GREATER);
4338 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4339 /* Equal. */
4340 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4341 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4342 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4343
4344 JUMPHERE(jump[1]);
4345 if (common->nltype == NLTYPE_ANYCRLF)
4346 {
4347 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4348 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4349 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4350 }
4351 else
4352 {
4353 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4354 read_char(common);
4355 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4356 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4357 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4358 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4359 }
4360 JUMPHERE(jump[2]);
4361 JUMPHERE(jump[3]);
4362 }
4363 JUMPHERE(jump[0]);
4364 check_partial(common, FALSE);
4365 return cc;
4366
4367 case OP_EOD:
4368 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4369 check_partial(common, FALSE);
4370 return cc;
4371
4372 case OP_CIRC:
4373 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4374 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4375 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4376 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4377 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4378 return cc;
4379
4380 case OP_CIRCM:
4381 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4382 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4383 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4384 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4385 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4386 jump[0] = JUMP(SLJIT_JUMP);
4387 JUMPHERE(jump[1]);
4388
4389 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4390 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4391 {
4392 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4393 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4394 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4395 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4396 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4397 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4398 }
4399 else
4400 {
4401 skip_char_back(common);
4402 read_char(common);
4403 check_newlinechar(common, common->nltype, backtracks, FALSE);
4404 }
4405 JUMPHERE(jump[0]);
4406 return cc;
4407
4408 case OP_DOLL:
4409 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4410 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4411 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4412
4413 if (!common->endonly)
4414 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4415 else
4416 {
4417 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4418 check_partial(common, FALSE);
4419 }
4420 return cc;
4421
4422 case OP_DOLLM:
4423 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4424 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4425 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4426 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4427 check_partial(common, FALSE);
4428 jump[0] = JUMP(SLJIT_JUMP);
4429 JUMPHERE(jump[1]);
4430
4431 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4432 {
4433 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4434 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4435 if (common->mode == JIT_COMPILE)
4436 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4437 else
4438 {
4439 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4440 /* STR_PTR = STR_END - IN_UCHARS(1) */
4441 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4442 check_partial(common, TRUE);
4443 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4444 JUMPHERE(jump[1]);
4445 }
4446
4447 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4448 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4449 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4450 }
4451 else
4452 {
4453 peek_char(common);
4454 check_newlinechar(common, common->nltype, backtracks, FALSE);
4455 }
4456 JUMPHERE(jump[0]);
4457 return cc;
4458
4459 case OP_CHAR:
4460 case OP_CHARI:
4461 length = 1;
4462 #ifdef SUPPORT_UTF
4463 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4464 #endif
4465 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4466 {
4467 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4468 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4469
4470 context.length = IN_UCHARS(length);
4471 context.sourcereg = -1;
4472 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4473 context.ucharptr = 0;
4474 #endif
4475 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4476 }
4477 detect_partial_match(common, backtracks);
4478 read_char(common);
4479 #ifdef SUPPORT_UTF
4480 if (common->utf)
4481 {
4482 GETCHAR(c, cc);
4483 }
4484 else
4485 #endif
4486 c = *cc;
4487 if (type == OP_CHAR || !char_has_othercase(common, cc))
4488 {
4489 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4490 return cc + length;
4491 }
4492 oc = char_othercase(common, c);
4493 bit = c ^ oc;
4494 if (is_powerof2(bit))
4495 {
4496 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4497 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4498 return cc + length;
4499 }
4500 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4501 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4502 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4503 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4504 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4505 return cc + length;
4506
4507 case OP_NOT:
4508 case OP_NOTI:
4509 detect_partial_match(common, backtracks);
4510 length = 1;
4511 #ifdef SUPPORT_UTF
4512 if (common->utf)
4513 {
4514 #ifdef COMPILE_PCRE8
4515 c = *cc;
4516 if (c < 128)
4517 {
4518 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4519 if (type == OP_NOT || !char_has_othercase(common, cc))
4520 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4521 else
4522 {
4523 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4524 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4525 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4526 }
4527 /* Skip the variable-length character. */
4528 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4529 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4530 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4531 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4532 JUMPHERE(jump[0]);
4533 return cc + 1;
4534 }
4535 else
4536 #endif /* COMPILE_PCRE8 */
4537 {
4538 GETCHARLEN(c, cc, length);
4539 read_char(common);
4540 }
4541 }
4542 else
4543 #endif /* SUPPORT_UTF */
4544 {
4545 read_char(common);
4546 c = *cc;
4547 }
4548
4549 if (type == OP_NOT || !char_has_othercase(common, cc))
4550 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4551 else
4552 {
4553 oc = char_othercase(common, c);
4554 bit = c ^ oc;
4555 if (is_powerof2(bit))
4556 {
4557 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4558 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4559 }
4560 else
4561 {
4562 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4563 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4564 }
4565 }
4566 return cc + length;
4567
4568 case OP_CLASS:
4569 case OP_NCLASS:
4570 detect_partial_match(common, backtracks);
4571 read_char(common);
4572 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4573 return cc + 32 / sizeof(pcre_uchar);
4574
4575 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4576 jump[0] = NULL;
4577 #ifdef COMPILE_PCRE8
4578 /* This check only affects 8 bit mode. In other modes, we
4579 always need to compare the value with 255. */
4580 if (common->utf)
4581 #endif /* COMPILE_PCRE8 */
4582 {
4583 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4584 if (type == OP_CLASS)
4585 {
4586 add_jump(compiler, backtracks, jump[0]);
4587 jump[0] = NULL;
4588 }
4589 }
4590 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4591 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4592 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4593 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4594 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4595 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4596 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4597 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4598 if (jump[0] != NULL)
4599 JUMPHERE(jump[0]);
4600 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4601 return cc + 32 / sizeof(pcre_uchar);
4602
4603 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4604 case OP_XCLASS:
4605 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4606 return cc + GET(cc, 0) - 1;
4607 #endif
4608
4609 case OP_REVERSE:
4610 length = GET(cc, 0);
4611 if (length == 0)
4612 return cc + LINK_SIZE;
4613 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4614 #ifdef SUPPORT_UTF
4615 if (common->utf)
4616 {
4617 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4618 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4619 label = LABEL();
4620 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4621 skip_char_back(common);
4622 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4623 JUMPTO(SLJIT_C_NOT_ZERO, label);
4624 }
4625 else
4626 #endif
4627 {
4628 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4629 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4630 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4631 }
4632 check_start_used_ptr(common);
4633 return cc + LINK_SIZE;
4634 }
4635 SLJIT_ASSERT_STOP();
4636 return cc;
4637 }
4638
4639 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4640 {
4641 /* This function consumes at least one input character. */
4642 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4643 DEFINE_COMPILER;
4644 pcre_uchar *ccbegin = cc;
4645 compare_context context;
4646 int size;
4647
4648 context.length = 0;
4649 do
4650 {
4651 if (cc >= ccend)
4652 break;
4653
4654 if (*cc == OP_CHAR)
4655 {
4656 size = 1;
4657 #ifdef SUPPORT_UTF
4658 if (common->utf && HAS_EXTRALEN(cc[1]))
4659 size += GET_EXTRALEN(cc[1]);
4660 #endif
4661 }
4662 else if (*cc == OP_CHARI)
4663 {
4664 size = 1;
4665 #ifdef SUPPORT_UTF
4666 if (common->utf)
4667 {
4668 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4669 size = 0;
4670 else if (HAS_EXTRALEN(cc[1]))
4671 size += GET_EXTRALEN(cc[1]);
4672 }
4673 else
4674 #endif
4675 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4676 size = 0;
4677 }
4678 else
4679 size = 0;
4680
4681 cc += 1 + size;
4682 context.length += IN_UCHARS(size);
4683 }
4684 while (size > 0 && context.length <= 128);
4685
4686 cc = ccbegin;
4687 if (context.length > 0)
4688 {
4689 /* We have a fixed-length byte sequence. */
4690 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4691 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4692
4693 context.sourcereg = -1;
4694 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4695 context.ucharptr = 0;
4696 #endif
4697 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4698 return cc;
4699 }
4700
4701 /* A non-fixed length character will be checked if length == 0. */
4702 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4703 }
4704
4705 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4706 {
4707 DEFINE_COMPILER;
4708 int offset = GET2(cc, 1) << 1;
4709
4710 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4711 if (!common->jscript_compat)
4712 {
4713 if (backtracks == NULL)
4714 {
4715 /* OVECTOR(1) contains the "string begin - 1" constant. */
4716 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4717 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4718 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4719 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4720 return JUMP(SLJIT_C_NOT_ZERO);
4721 }
4722 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4723 }
4724 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4725 }
4726
4727 /* Forward definitions. */
4728 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4729 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4730
4731 #define PUSH_BACKTRACK(size, ccstart, error) \
4732 do \
4733 { \
4734 backtrack = sljit_alloc_memory(compiler, (size)); \
4735 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4736 return error; \
4737 memset(backtrack, 0, size); \
4738 backtrack->prev = parent->top; \
4739 backtrack->cc = (ccstart); \
4740 parent->top = backtrack; \
4741 } \
4742 while (0)
4743
4744 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4745 do \
4746 { \
4747 backtrack = sljit_alloc_memory(compiler, (size)); \
4748 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4749 return; \
4750 memset(backtrack, 0, size); \
4751 backtrack->prev = parent->top; \
4752 backtrack->cc = (ccstart); \
4753 parent->top = backtrack; \
4754 } \
4755 while (0)
4756
4757 #define BACKTRACK_AS(type) ((type *)backtrack)
4758
4759 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4760 {
4761 DEFINE_COMPILER;
4762 int offset = GET2(cc, 1) << 1;
4763 struct sljit_jump *jump = NULL;
4764 struct sljit_jump *partial;
4765 struct sljit_jump *nopartial;
4766
4767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4768 /* OVECTOR(1) contains the "string begin - 1" constant. */
4769 if (withchecks && !common->jscript_compat)
4770 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4771
4772 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4773 if (common->utf && *cc == OP_REFI)
4774 {
4775 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
4776 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4777 if (withchecks)
4778 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4779
4780 /* Needed to save important temporary registers. */
4781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4782 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
4783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4784 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4785 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4786 if (common->mode == JIT_COMPILE)
4787 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4788 else
4789 {
4790 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4791 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4792 check_partial(common, FALSE);
4793 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4794 JUMPHERE(nopartial);
4795 }
4796 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4797 }
4798 else
4799 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4800 {
4801 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4802 if (withchecks)
4803 jump = JUMP(SLJIT_C_ZERO);
4804
4805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4806 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4807 if (common->mode == JIT_COMPILE)
4808 add_jump(compiler, backtracks, partial);
4809
4810 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4811 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4812
4813 if (common->mode != JIT_COMPILE)
4814 {
4815 nopartial = JUMP(SLJIT_JUMP);
4816 JUMPHERE(partial);
4817 /* TMP2 -= STR_END - STR_PTR */
4818 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4819 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4820 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4821 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4822 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4823 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4824 JUMPHERE(partial);
4825 check_partial(common, FALSE);
4826 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4827 JUMPHERE(nopartial);
4828 }
4829 }
4830
4831 if (jump != NULL)
4832 {
4833 if (emptyfail)
4834 add_jump(compiler, backtracks, jump);
4835 else
4836 JUMPHERE(jump);
4837 }
4838 return cc + 1 + IMM2_SIZE;
4839 }
4840
4841 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4842 {
4843 DEFINE_COMPILER;
4844 backtrack_common *backtrack;
4845 pcre_uchar type;
4846 struct sljit_label *label;
4847 struct sljit_jump *zerolength;
4848 struct sljit_jump *jump = NULL;
4849 pcre_uchar *ccbegin = cc;
4850 int min = 0, max = 0;
4851 BOOL minimize;
4852
4853 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4854
4855 type = cc[1 + IMM2_SIZE];
4856 minimize = (type & 0x1) != 0;
4857 switch(type)
4858 {
4859 case OP_CRSTAR:
4860 case OP_CRMINSTAR:
4861 min = 0;
4862 max = 0;
4863 cc += 1 + IMM2_SIZE + 1;
4864 break;
4865 case OP_CRPLUS:
4866 case OP_CRMINPLUS:
4867 min = 1;
4868 max = 0;
4869 cc += 1 + IMM2_SIZE + 1;
4870 break;
4871 case OP_CRQUERY:
4872 case OP_CRMINQUERY:
4873 min = 0;
4874 max = 1;
4875 cc += 1 + IMM2_SIZE + 1;
4876 break;
4877 case OP_CRRANGE:
4878 case OP_CRMINRANGE:
4879 min = GET2(cc, 1 + IMM2_SIZE + 1);
4880 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4881 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4882 break;
4883 default:
4884 SLJIT_ASSERT_STOP();
4885 break;
4886 }
4887
4888 if (!minimize)
4889 {
4890 if (min == 0)
4891 {
4892 allocate_stack(common, 2);
4893 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4895 /* Temporary release of STR_PTR. */
4896 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
4897 zerolength = compile_ref_checks(common, ccbegin, NULL);
4898 /* Restore if not zero length. */
4899 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
4900 }
4901 else
4902 {
4903 allocate_stack(common, 1);
4904 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4905 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4906 }
4907
4908 if (min > 1 || max > 1)
4909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4910
4911 label = LABEL();
4912 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4913
4914 if (min > 1 || max > 1)
4915 {
4916 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4917 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4918 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4919 if (min > 1)
4920 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4921 if (max > 1)
4922 {
4923 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4924 allocate_stack(common, 1);
4925 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4926 JUMPTO(SLJIT_JUMP, label);
4927 JUMPHERE(jump);
4928 }
4929 }
4930
4931 if (max == 0)
4932 {
4933 /* Includes min > 1 case as well. */
4934 allocate_stack(common, 1);
4935 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4936 JUMPTO(SLJIT_JUMP, label);
4937 }
4938
4939 JUMPHERE(zerolength);
4940 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4941
4942 decrease_call_count(common);
4943 return cc;
4944 }
4945
4946 allocate_stack(common, 2);
4947 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4948 if (type != OP_CRMINSTAR)
4949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4950
4951 if (min == 0)
4952 {
4953 zerolength = compile_ref_checks(common, ccbegin, NULL);
4954 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4955 jump = JUMP(SLJIT_JUMP);
4956 }
4957 else
4958 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4959
4960 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4961 if (max > 0)
4962 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4963
4964 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4965 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4966
4967 if (min > 1)
4968 {
4969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4970 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4971 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4972 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
4973 }
4974 else if (max > 0)
4975 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4976
4977 if (jump != NULL)
4978 JUMPHERE(jump);
4979 JUMPHERE(zerolength);
4980
4981 decrease_call_count(common);
4982 return cc;
4983 }
4984
4985 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4986 {
4987 DEFINE_COMPILER;
4988 backtrack_common *backtrack;
4989 recurse_entry *entry = common->entries;
4990 recurse_entry *prev = NULL;
4991 int start = GET(cc, 1);
4992
4993 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4994 while (entry != NULL)
4995 {
4996 if (entry->start == start)
4997 break;
4998 prev = entry;
4999 entry = entry->next;
5000 }
5001
5002 if (entry == NULL)
5003 {
5004 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5005 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5006 return NULL;
5007 entry->next = NULL;
5008 entry->entry = NULL;
5009 entry->calls = NULL;
5010 entry->start = start;
5011
5012 if (prev != NULL)
5013 prev->next = entry;
5014 else
5015 common->entries = entry;
5016 }
5017
5018 if (common->has_set_som && common->mark_ptr != 0)
5019 {
5020 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5021 allocate_stack(common, 2);
5022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5023 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5025 }
5026 else if (common->has_set_som || common->mark_ptr != 0)
5027 {
5028 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5029 allocate_stack(common, 1);
5030 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5031 }
5032
5033 if (entry->entry == NULL)
5034 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5035 else
5036 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5037 /* Leave if the match is failed. */
5038 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5039 return cc + 1 + LINK_SIZE;
5040 }
5041
5042 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5043 {
5044 DEFINE_COMPILER;
5045 int framesize;
5046 int private_data_ptr;
5047 backtrack_common altbacktrack;
5048 pcre_uchar *ccbegin;
5049 pcre_uchar opcode;
5050 pcre_uchar bra = OP_BRA;
5051 jump_list *tmp = NULL;
5052 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5053 jump_list **found;
5054 /* Saving previous accept variables. */
5055 struct sljit_label *save_quitlabel = common->quitlabel;
5056 struct sljit_label *save_acceptlabel = common->acceptlabel;
5057 jump_list *save_quit = common->quit;
5058 jump_list *save_accept = common->accept;
5059 struct sljit_jump *jump;
5060 struct sljit_jump *brajump = NULL;
5061
5062 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5063 {
5064 SLJIT_ASSERT(!conditional);
5065 bra = *cc;
5066 cc++;
5067 }
5068 private_data_ptr = PRIVATE_DATA(cc);
5069 SLJIT_ASSERT(private_data_ptr != 0);
5070 framesize = get_framesize(common, cc, FALSE);
5071 backtrack->framesize = framesize;
5072 backtrack->private_data_ptr = private_data_ptr;
5073 opcode = *cc;
5074 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5075 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5076 ccbegin = cc;
5077 cc += GET(cc, 1);
5078
5079 if (bra == OP_BRAMINZERO)
5080 {
5081 /* This is a braminzero backtrack path. */
5082 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5083 free_stack(common, 1);
5084 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5085 }
5086
5087 if (framesize < 0)
5088 {
5089 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5090 allocate_stack(common, 1);
5091 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5092 }
5093 else
5094 {
5095 allocate_stack(common, framesize + 2);
5096 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5097 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5098 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5099 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5100 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5101 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5102 }
5103
5104 memset(&altbacktrack, 0, sizeof(backtrack_common));
5105 common->quitlabel = NULL;
5106 common->quit = NULL;
5107 while (1)
5108 {
5109 common->acceptlabel = NULL;
5110 common->accept = NULL;
5111 altbacktrack.top = NULL;
5112 altbacktrack.topbacktracks = NULL;
5113
5114 if (*ccbegin == OP_ALT)
5115 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5116
5117 altbacktrack.cc = ccbegin;
5118 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5119 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5120 {
5121 common->quitlabel = save_quitlabel;
5122 common->acceptlabel = save_acceptlabel;
5123 common->quit = save_quit;
5124 common->accept = save_accept;
5125 return NULL;
5126 }
5127 common->acceptlabel = LABEL();
5128 if (common->accept != NULL)
5129 set_jumps(common->accept, common->acceptlabel);
5130
5131 /* Reset stack. */
5132 if (framesize < 0)
5133 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5134 else {
5135 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5136 {
5137 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5138 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5139 }
5140 else
5141 {
5142 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5143 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5144 }
5145 }
5146
5147 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5148 {
5149 /* We know that STR_PTR was stored on the top of the stack. */
5150 if (conditional)
5151 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5152 else if (bra == OP_BRAZERO)
5153 {
5154 if (framesize < 0)
5155 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5156 else
5157 {
5158 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5159 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5160 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5161 }
5162 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5163 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5164 }
5165 else if (framesize >= 0)
5166 {
5167 /* For OP_BRA and OP_BRAMINZERO. */
5168 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5169 }
5170 }
5171 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5172
5173 compile_backtrackingpath(common, altbacktrack.top);
5174 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5175 {
5176 common->quitlabel = save_quitlabel;
5177 common->acceptlabel = save_acceptlabel;
5178 common->quit = save_quit;
5179 common->accept = save_accept;
5180 return NULL;
5181 }
5182 set_jumps(altbacktrack.topbacktracks, LABEL());
5183
5184 if (*cc != OP_ALT)
5185 break;
5186
5187 ccbegin = cc;
5188 cc += GET(cc, 1);
5189 }
5190 /* None of them matched. */
5191 if (common->quit != NULL)
5192 set_jumps(common->quit, LABEL());
5193
5194 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5195 {
5196 /* Assert is failed. */
5197 if (conditional || bra == OP_BRAZERO)
5198 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5199
5200 if (framesize < 0)
5201 {
5202 /* The topmost item should be 0. */
5203 if (bra == OP_BRAZERO)
5204 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5205 else
5206 free_stack(common, 1);
5207 }
5208 else
5209 {
5210 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5211 /* The topmost item should be 0. */
5212 if (bra == OP_BRAZERO)
5213 {
5214 free_stack(common, framesize + 1);
5215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5216 }
5217 else
5218 free_stack(common, framesize + 2);
5219 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5220 }
5221 jump = JUMP(SLJIT_JUMP);
5222 if (bra != OP_BRAZERO)
5223 add_jump(compiler, target, jump);
5224
5225 /* Assert is successful. */
5226 set_jumps(tmp, LABEL());
5227 if (framesize < 0)
5228 {
5229 /* We know that STR_PTR was stored on the top of the stack. */
5230 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5231 /* Keep the STR_PTR on the top of the stack. */
5232 if (bra == OP_BRAZERO)
5233 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5234 else if (bra == OP_BRAMINZERO)
5235 {
5236 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5238 }
5239 }
5240 else
5241 {
5242 if (bra == OP_BRA)
5243 {
5244 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5245 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5246 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5247 }
5248 else
5249 {
5250 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5251 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5252 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5254 }
5255 }
5256
5257 if (bra == OP_BRAZERO)
5258 {
5259 backtrack->matchingpath = LABEL();
5260 sljit_set_label(jump, backtrack->matchingpath);
5261 }
5262 else if (bra == OP_BRAMINZERO)
5263 {
5264 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5265 JUMPHERE(brajump);
5266 if (framesize >= 0)
5267 {
5268 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5269 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5270 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5271 }
5272 set_jumps(backtrack->common.topbacktracks, LABEL());
5273 }
5274 }
5275 else
5276 {
5277 /* AssertNot is successful. */
5278 if (framesize < 0)
5279 {
5280 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5281 if (bra != OP_BRA)
5282 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5283 else
5284 free_stack(common, 1);
5285 }
5286 else
5287 {
5288 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5289 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5290 /* The topmost item should be 0. */
5291 if (bra != OP_BRA)
5292 {
5293 free_stack(common, framesize + 1);
5294 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5295 }
5296 else
5297 free_stack(common, framesize + 2);
5298 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5299 }
5300
5301 if (bra == OP_BRAZERO)
5302 backtrack->matchingpath = LABEL();
5303 else if (bra == OP_BRAMINZERO)
5304 {
5305 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5306 JUMPHERE(brajump);
5307 }
5308
5309 if (bra != OP_BRA)
5310 {
5311 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5312 set_jumps(backtrack->common.topbacktracks, LABEL());
5313 backtrack->common.topbacktracks = NULL;
5314 }
5315 }
5316
5317 common->quitlabel = save_quitlabel;
5318 common->acceptlabel = save_acceptlabel;
5319 common->quit = save_quit;
5320 common->accept = save_accept;
5321 return cc + 1 + LINK_SIZE;
5322 }
5323
5324 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5325 {
5326 int condition = FALSE;
5327 pcre_uchar *slotA = name_table;
5328 pcre_uchar *slotB;
5329 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5330 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5331 sljit_sw no_capture;
5332 int i;
5333
5334 locals += refno & 0xff;
5335 refno >>= 8;
5336 no_capture = locals[1];
5337
5338 for (i = 0; i < name_count; i++)
5339 {
5340 if (GET2(slotA, 0) == refno) break;
5341 slotA += name_entry_size;
5342 }
5343
5344 if (i < name_count)
5345 {
5346 /* Found a name for the number - there can be only one; duplicate names
5347 for different numbers are allowed, but not vice versa. First scan down
5348 for duplicates. */
5349
5350 slotB = slotA;
5351 while (slotB > name_table)
5352 {
5353 slotB -= name_entry_size;
5354 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5355 {
5356 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5357 if (condition) break;
5358 }
5359 else break;
5360 }
5361
5362 /* Scan up for duplicates */
5363 if (!condition)
5364 {
5365 slotB = slotA;
5366 for (i++; i < name_count; i++)
5367 {
5368 slotB += name_entry_size;
5369 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5370 {
5371 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5372 if (condition) break;
5373 }
5374 else break;
5375 }
5376 }
5377 }
5378 return condition;
5379 }
5380
5381 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5382 {
5383 int condition = FALSE;
5384 pcre_uchar *slotA = name_table;
5385 pcre_uchar *slotB;
5386 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5387 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5388 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5389 sljit_uw i;
5390
5391 for (i = 0; i < name_count; i++)
5392 {
5393 if (GET2(slotA, 0) == recno) break;
5394 slotA += name_entry_size;
5395 }
5396
5397 if (i < name_count)
5398 {
5399 /* Found a name for the number - there can be only one; duplicate
5400 names for different numbers are allowed, but not vice versa. First
5401 scan down for duplicates. */
5402
5403 slotB = slotA;
5404 while (slotB > name_table)
5405 {
5406 slotB -= name_entry_size;
5407 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5408 {
5409 condition = GET2(slotB, 0) == group_num;
5410 if (condition) break;
5411 }
5412 else break;
5413 }
5414
5415 /* Scan up for duplicates */
5416 if (!condition)
5417 {
5418 slotB = slotA;
5419 for (i++; i < name_count; i++)
5420 {
5421 slotB += name_entry_size;
5422 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5423 {
5424 condition = GET2(slotB, 0) == group_num;
5425 if (condition) break;
5426 }
5427 else break;
5428 }
5429 }
5430 }
5431 return condition;
5432 }
5433
5434 /*
5435 Handling bracketed expressions is probably the most complex part.
5436
5437 Stack layout naming characters:
5438 S - Push the current STR_PTR
5439 0 - Push a 0 (NULL)
5440 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5441 before the next alternative. Not pushed if there are no alternatives.
5442 M - Any values pushed by the current alternative. Can be empty, or anything.
5443 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5444 L - Push the previous local (pointed by localptr) to the stack
5445 () - opional values stored on the stack
5446 ()* - optonal, can be stored multiple times
5447
5448 The following list shows the regular expression templates, their PCRE byte codes
5449 and stack layout supported by pcre-sljit.
5450
5451 (?:) OP_BRA | OP_KET A M
5452 () OP_CBRA | OP_KET C M
5453 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5454 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5455 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5456 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5457 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5458 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5459 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5460 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5461 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5462 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5463 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5464 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5465 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5466 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5467 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5468 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5469 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5470 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5471 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5472 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5473
5474
5475 Stack layout naming characters:
5476 A - Push the alternative index (starting from 0) on the stack.
5477 Not pushed if there is no alternatives.
5478 M - Any values pushed by the current alternative. Can be empty, or anything.
5479
5480 The next list shows the possible content of a bracket:
5481 (|) OP_*BRA | OP_ALT ... M A
5482 (?()|) OP_*COND | OP_ALT M A
5483 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5484 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5485 Or nothing, if trace is unnecessary
5486 */
5487
5488 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5489 {
5490 DEFINE_COMPILER;
5491 backtrack_common *backtrack;
5492 pcre_uchar opcode;
5493 int private_data_ptr = 0;
5494 int offset = 0;
5495 int stacksize;
5496 pcre_uchar *ccbegin;
5497 pcre_uchar *matchingpath;
5498 pcre_uchar bra = OP_BRA;
5499 pcre_uchar ket;
5500 assert_backtrack *assert;
5501 BOOL has_alternatives;
5502 struct sljit_jump *jump;
5503 struct sljit_jump *skip;
5504 struct sljit_label *rmaxlabel = NULL;
5505 struct sljit_jump *braminzerojump = NULL;
5506
5507 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5508
5509 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5510 {
5511 bra = *cc;
5512 cc++;
5513 opcode = *cc;
5514 }
5515
5516 opcode = *cc;
5517 ccbegin = cc;
5518 matchingpath = ccbegin + 1 + LINK_SIZE;
5519
5520 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5521 {
5522 /* Drop this bracket_backtrack. */
5523 parent->top = backtrack->prev;
5524 return bracketend(cc);
5525 }
5526
5527 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5528 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5529 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5530 cc += GET(cc, 1);
5531
5532 has_alternatives = *cc == OP_ALT;
5533 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5534 {
5535 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5536 if (*matchingpath == OP_NRREF)
5537 {
5538 stacksize = GET2(matchingpath, 1);
5539 if (common->currententry == NULL || stacksize == RREF_ANY)
5540 has_alternatives = FALSE;
5541 else if (common->currententry->start == 0)
5542 has_alternatives = stacksize != 0;
5543 else
5544 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5545 }
5546 }
5547
5548 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5549 opcode = OP_SCOND;
5550 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5551 opcode = OP_ONCE;
5552
5553 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5554 {
5555 /* Capturing brackets has a pre-allocated space. */
5556 offset = GET2(ccbegin, 1 + LINK_SIZE);
5557 if (common->optimized_cbracket[offset] == 0)
5558 {
5559 private_data_ptr = OVECTOR_PRIV(offset);
5560 offset <<= 1;
5561 }
5562 else
5563 {
5564 offset <<= 1;
5565 private_data_ptr = OVECTOR(offset);
5566 }
5567 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5568 matchingpath += IMM2_SIZE;
5569 }
5570 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5571 {
5572 /* Other brackets simply allocate the next entry. */
5573 private_data_ptr = PRIVATE_DATA(ccbegin);
5574 SLJIT_ASSERT(private_data_ptr != 0);
5575 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5576 if (opcode == OP_ONCE)
5577 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5578 }
5579
5580 /* Instructions before the first alternative. */
5581 stacksize = 0;
5582 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5583 stacksize++;
5584 if (bra == OP_BRAZERO)
5585 stacksize++;
5586
5587 if (stacksize > 0)
5588 allocate_stack(common, stacksize);
5589
5590 stacksize = 0;
5591 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5592 {
5593 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5594 stacksize++;
5595 }
5596
5597 if (bra == OP_BRAZERO)
5598 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5599
5600 if (bra == OP_BRAMINZERO)
5601 {
5602 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5603 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5604 if (ket != OP_KETRMIN)
5605 {
5606 free_stack(common, 1);
5607 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5608 }
5609 else
5610 {
5611 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5612 {
5613 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5614 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5615 /* Nothing stored during the first run. */
5616 skip = JUMP(SLJIT_JUMP);
5617 JUMPHERE(jump);
5618 /* Checking zero-length iteration. */
5619 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5620 {
5621 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5622 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5623 }
5624 else
5625 {
5626 /* Except when the whole stack frame must be saved. */
5627 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5628 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
5629 }
5630 JUMPHERE(skip);
5631 }
5632 else
5633 {
5634 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5635 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5636 JUMPHERE(jump);
5637 }
5638 }
5639 }
5640
5641 if (ket == OP_KETRMIN)
5642 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5643
5644 if (ket == OP_KETRMAX)
5645 {
5646 rmaxlabel = LABEL();
5647 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5648 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5649 }
5650
5651 /* Handling capturing brackets and alternatives. */
5652 if (opcode == OP_ONCE)
5653 {
5654 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5655 {
5656 /* Neither capturing brackets nor recursions are not found in the block. */
5657 if (ket == OP_KETRMIN)
5658 {
5659 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5660 allocate_stack(common, 2);
5661 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5663 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5664 }
5665 else if (ket == OP_KETRMAX || has_alternatives)
5666 {
5667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5668 allocate_stack(common, 1);
5669 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5670 }
5671 else
5672 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5673 }
5674 else
5675 {
5676 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5677 {
5678 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5679 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5680 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5683 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5684 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5685 }
5686 else
5687 {
5688 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5689 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5690 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5691 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5692 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5693 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5694 }
5695 }
5696 }
5697 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5698 {
5699 /* Saving the previous values. */
5700 if (common->optimized_cbracket[offset >> 1] == 0)
5701 {
5702 allocate_stack(common, 3);
5703 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5704 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5705 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5706 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5707 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5709 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5710 }
5711 else
5712 {
5713 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5714 allocate_stack(common, 2);
5715 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5716 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
5717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5719 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5720 }
5721 }
5722 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5723 {
5724 /* Saving the previous value. */
5725 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5726 allocate_stack(common, 1);
5727 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5729 }
5730 else if (has_alternatives)
5731 {
5732 /* Pushing the starting string pointer. */
5733 allocate_stack(common, 1);
5734 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5735 }
5736
5737 /* Generating code for the first alternative. */
5738 if (opcode == OP_COND || opcode == OP_SCOND)
5739 {
5740 if (*matchingpath == OP_CREF)
5741 {
5742 SLJIT_ASSERT(has_alternatives);
5743 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5744 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5745 matchingpath += 1 + IMM2_SIZE;
5746 }
5747 else if (*matchingpath == OP_NCREF)
5748 {
5749 SLJIT_ASSERT(has_alternatives);
5750 stacksize = GET2(matchingpath, 1);
5751 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5752
5753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5756 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
5757 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5758 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5759 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5760 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5761 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5762
5763 JUMPHERE(jump);
5764 matchingpath += 1 + IMM2_SIZE;
5765 }
5766 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5767 {
5768 /* Never has other case. */
5769 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5770
5771 stacksize = GET2(matchingpath, 1);
5772 if (common->currententry == NULL)
5773 stacksize = 0;
5774 else if (stacksize == RREF_ANY)
5775 stacksize = 1;
5776 else if (common->currententry->start == 0)
5777 stacksize = stacksize == 0;
5778 else
5779 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5780
5781 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5782 {
5783 SLJIT_ASSERT(!has_alternatives);
5784 if (stacksize != 0)
5785 matchingpath += 1 + IMM2_SIZE;
5786 else
5787 {
5788 if (*cc == OP_ALT)
5789 {
5790 matchingpath = cc + 1 + LINK_SIZE;
5791 cc += GET(cc, 1);
5792 }
5793 else
5794 matchingpath = cc;
5795 }
5796 }
5797 else
5798 {
5799 SLJIT_ASSERT(has_alternatives);
5800
5801 stacksize = GET2(matchingpath, 1);
5802 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5803 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5806 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
5807 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5808 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5809 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5810 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5811 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5812 matchingpath += 1 + IMM2_SIZE;
5813 }
5814 }
5815 else
5816 {
5817 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5818 /* Similar code as PUSH_BACKTRACK macro. */
5819 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5820 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5821 return NULL;
5822 memset(assert, 0, sizeof(assert_backtrack));
5823 assert->common.cc = matchingpath;
5824 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5825 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5826 }
5827 }
5828
5829 compile_matchingpath(common, matchingpath, cc, backtrack);
5830 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5831 return NULL;
5832
5833 if (opcode == OP_ONCE)
5834 {
5835 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5836 {
5837 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5838 /* TMP2 which is set here used by OP_KETRMAX below. */
5839 if (ket == OP_KETRMAX)
5840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5841 else if (ket == OP_KETRMIN)
5842 {
5843 /* Move the STR_PTR to the private_data_ptr. */
5844 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5845 }
5846 }
5847 else
5848 {
5849 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5850 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
5851 if (ket == OP_KETRMAX)
5852 {
5853 /* TMP2 which is set here used by OP_KETRMAX below. */
5854 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5855 }
5856 }
5857 }
5858
5859 stacksize = 0;
5860 if (ket != OP_KET || bra != OP_BRA)
5861 stacksize++;
5862 if (has_alternatives && opcode != OP_ONCE)
5863 stacksize++;
5864
5865 if (stacksize > 0)
5866 allocate_stack(common, stacksize);
5867
5868 stacksize = 0;
5869 if (ket != OP_KET)
5870 {
5871 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5872 stacksize++;
5873 }
5874 else if (bra != OP_BRA)
5875 {
5876 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5877 stacksize++;
5878 }
5879
5880 if (has_alternatives)
5881 {
5882 if (opcode != OP_ONCE)
5883 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5884 if (ket != OP_KETRMAX)
5885 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5886 }
5887
5888 /* Must be after the matchingpath label. */
5889 if (offset != 0)
5890 {
5891 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5892 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5893 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5894 }
5895
5896 if (ket == OP_KETRMAX)
5897 {
5898 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5899 {
5900 if (has_alternatives)
5901 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5902 /* Checking zero-length iteration. */
5903 if (opcode != OP_ONCE)
5904 {
5905 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
5906 /* Drop STR_PTR for greedy plus quantifier. */
5907 if (bra != OP_BRAZERO)
5908 free_stack(common, 1);
5909 }
5910 else
5911 /* TMP2 must contain the starting STR_PTR. */
5912 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5913 }
5914 else
5915 JUMPTO(SLJIT_JUMP, rmaxlabel);
5916 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5917 }
5918
5919 if (bra == OP_BRAZERO)
5920 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
5921
5922 if (bra == OP_BRAMINZERO)
5923 {
5924 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5925 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
5926 if (braminzerojump != NULL)
5927 {
5928 JUMPHERE(braminzerojump);
5929 /* We need to release the end pointer to perform the
5930 backtrack for the zero-length iteration. When
5931 framesize is < 0, OP_ONCE will do the release itself. */
5932 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5933 {
5934 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5935 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5936 }
5937 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5938 free_stack(common, 1);
5939 }
5940 /* Continue to the normal backtrack. */
5941 }
5942
5943 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5944 decrease_call_count(common);
5945
5946 /* Skip the other alternatives. */
5947 while (*cc == OP_ALT)
5948 cc += GET(cc, 1);
5949 cc += 1 + LINK_SIZE;
5950 return cc;
5951 }
5952
5953 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5954 {
5955 DEFINE_COMPILER;
5956 backtrack_common *backtrack;
5957 pcre_uchar opcode;
5958 int private_data_ptr;
5959 int cbraprivptr = 0;
5960 int framesize;
5961 int stacksize;
5962 int offset = 0;
5963 BOOL zero = FALSE;
5964 pcre_uchar *ccbegin = NULL;
5965 int stack;
5966 struct sljit_label *loop = NULL;
5967 struct jump_list *emptymatch = NULL;
5968
5969 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5970 if (*cc == OP_BRAPOSZERO)
5971 {
5972 zero = TRUE;
5973 cc++;
5974 }
5975
5976 opcode = *cc;
5977 private_data_ptr = PRIVATE_DATA(cc);
5978 SLJIT_ASSERT(private_data_ptr != 0);
5979 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
5980 switch(opcode)
5981 {
5982 case OP_BRAPOS:
5983 case OP_SBRAPOS:
5984 ccbegin = cc + 1 + LINK_SIZE;
5985 break;
5986
5987 case OP_CBRAPOS:
5988 case OP_SCBRAPOS:
5989 offset = GET2(cc, 1 + LINK_SIZE);
5990 /* This case cannot be optimized in the same was as
5991 normal capturing brackets. */
5992 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
5993 cbraprivptr = OVECTOR_PRIV(offset);
5994 offset <<= 1;
5995 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5996 break;
5997
5998 default:
5999 SLJIT_ASSERT_STOP();
6000 break;
6001 }
6002
6003 framesize = get_framesize(common, cc, FALSE);
6004 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6005 if (framesize < 0)
6006 {
6007 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
6008 if (!zero)
6009 stacksize++;
6010 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6011 allocate_stack(common, stacksize);
6012 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6013
6014 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6015 {
6016 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6017 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6018 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6019 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6020 }
6021 else
6022 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6023
6024 if (!zero)
6025 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6026 }
6027 else
6028 {
6029 stacksize = framesize + 1;
6030 if (!zero)
6031 stacksize++;
6032 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6033 stacksize++;
6034 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6035 allocate_stack(common, stacksize);
6036
6037 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6038 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6040 stack = 0;
6041 if (!zero)
6042 {
6043 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6044 stack++;
6045 }
6046 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6047 {
6048 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6049 stack++;
6050 }
6051 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6052 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6053 }
6054
6055 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6056 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6057
6058 loop = LABEL();
6059 while (*cc != OP_KETRPOS)
6060 {
6061 backtrack->top = NULL;
6062 backtrack->topbacktracks = NULL;
6063 cc += GET(cc, 1);
6064
6065 compile_matchingpath(common, ccbegin, cc, backtrack);
6066 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6067 return NULL;
6068
6069 if (framesize < 0)
6070 {
6071 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6072
6073 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6074 {
6075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6077 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6079 }
6080 else
6081 {
6082 if (opcode == OP_SBRAPOS)
6083 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6084 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6085 }
6086
6087 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6088 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6089
6090 if (!zero)
6091 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6092 }
6093 else
6094 {
6095 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6096 {
6097 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6098 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6099 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6100 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6101 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6102 }
6103 else
6104 {
6105 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6106 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6107 if (opcode == OP_SBRAPOS)
6108 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6109 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6110 }
6111
6112 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6113 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6114
6115 if (!zero)
6116 {
6117 if (framesize < 0)
6118 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6119 else
6120 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6121 }
6122 }
6123 JUMPTO(SLJIT_JUMP, loop);
6124 flush_stubs(common);
6125
6126 compile_backtrackingpath(common, backtrack->top);
6127 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6128 return NULL;
6129 set_jumps(backtrack->topbacktracks, LABEL());
6130
6131 if (framesize < 0)
6132 {
6133 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6134 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6135 else
6136 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6137 }
6138 else
6139 {
6140 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6141 {
6142 /* Last alternative. */
6143 if (*cc == OP_KETRPOS)
6144 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6145 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6146 }
6147 else
6148 {
6149 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6150 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6151 }
6152 }
6153
6154 if (*cc == OP_KETRPOS)
6155 break;
6156 ccbegin = cc + 1 + LINK_SIZE;
6157 }
6158
6159 backtrack->topbacktracks = NULL;
6160 if (!zero)
6161 {
6162 if (framesize < 0)
6163 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6164 else /* TMP2 is set to [private_data_ptr] above. */
6165 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6166 }
6167
6168 /* None of them matched. */
6169 set_jumps(emptymatch, LABEL());
6170 decrease_call_count(common);
6171 return cc + 1 + LINK_SIZE;
6172 }
6173
6174 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6175 {
6176 int class_len;
6177
6178 *opcode = *cc;
6179 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6180 {
6181 cc++;
6182 *type = OP_CHAR;
6183 }
6184 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6185 {
6186 cc++;
6187 *type = OP_CHARI;
6188 *opcode -= OP_STARI - OP_STAR;
6189 }
6190 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6191 {
6192 cc++;
6193 *type = OP_NOT;
6194 *opcode -= OP_NOTSTAR - OP_STAR;
6195 }
6196 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6197 {
6198 cc++;
6199 *type = OP_NOTI;
6200 *opcode -= OP_NOTSTARI - OP_STAR;
6201 }
6202 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6203 {
6204 cc++;
6205 *opcode -= OP_TYPESTAR - OP_STAR;
6206 *type = 0;
6207 }
6208 else
6209 {
6210 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6211 *type = *opcode;
6212 cc++;
6213 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6214 *opcode = cc[class_len - 1];
6215 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6216 {
6217 *opcode -= OP_CRSTAR - OP_STAR;
6218 if (end != NULL)
6219 *end = cc + class_len;
6220 }
6221 else
6222 {
6223 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6224 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6225 *arg2 = GET2(cc, class_len);
6226
6227 if (*arg2 == 0)
6228 {
6229 SLJIT_ASSERT(*arg1 != 0);
6230 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6231 }
6232 if (*arg1 == *arg2)
6233 *opcode = OP_EXACT;
6234
6235 if (end != NULL)
6236 *end = cc + class_len + 2 * IMM2_SIZE;
6237 }
6238 return cc;
6239 }
6240
6241 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6242 {
6243 *arg1 = GET2(cc, 0);
6244 cc += IMM2_SIZE;
6245 }
6246
6247 if (*type == 0)
6248 {
6249 *type = *cc;
6250 if (end != NULL)
6251 *end = next_opcode(common, cc);
6252 cc++;
6253 return cc;
6254 }
6255
6256 if (end != NULL)
6257 {
6258 *end = cc + 1;
6259 #ifdef SUPPORT_UTF
6260 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6261 #endif
6262 }
6263 return cc;
6264 }
6265
6266 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6267 {
6268 DEFINE_COMPILER;
6269 backtrack_common *backtrack;
6270 pcre_uchar opcode;
6271 pcre_uchar type;
6272 int arg1 = -1, arg2 = -1;
6273 pcre_uchar* end;
6274 jump_list *nomatch = NULL;
6275 struct sljit_jump *jump = NULL;
6276 struct sljit_label *label;
6277 int private_data_ptr = PRIVATE_DATA(cc);
6278 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6279 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6280 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
6281 int tmp_base, tmp_offset;
6282
6283 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6284
6285 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6286
6287 switch (type)
6288 {
6289 case OP_NOT_DIGIT:
6290 case OP_DIGIT:
6291 case OP_NOT_WHITESPACE:
6292 case OP_WHITESPACE:
6293 case OP_NOT_WORDCHAR:
6294 case OP_WORDCHAR:
6295 case OP_ANY:
6296 case OP_ALLANY:
6297 case OP_ANYBYTE:
6298 case OP_ANYNL:
6299 case OP_NOT_HSPACE:
6300 case OP_HSPACE:
6301 case OP_NOT_VSPACE:
6302 case OP_VSPACE:
6303 case OP_CHAR:
6304 case OP_CHARI:
6305 case OP_NOT:
6306 case OP_NOTI:
6307 case OP_CLASS:
6308 case OP_NCLASS:
6309 tmp_base = TMP3;
6310 tmp_offset = 0;
6311 break;
6312
6313 default:
6314 SLJIT_ASSERT_STOP();
6315 /* Fall through. */
6316
6317 case OP_EXTUNI:
6318 case OP_XCLASS:
6319 case OP_NOTPROP:
6320 case OP_PROP:
6321 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6322 tmp_offset = POSSESSIVE0;
6323 break;
6324 }
6325
6326 switch(opcode)
6327 {
6328 case OP_STAR:
6329 case OP_PLUS:
6330 case OP_UPTO:
6331 case OP_CRRANGE:
6332 if (type == OP_ANYNL || type == OP_EXTUNI)
6333 {
6334 SLJIT_ASSERT(private_data_ptr == 0);
6335 if (opcode == OP_STAR || opcode == OP_UPTO)
6336 {
6337 allocate_stack(common, 2);
6338 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6339 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6340 }
6341 else
6342 {
6343 allocate_stack(common, 1);
6344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6345 }
6346
6347 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6348 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6349
6350 label = LABEL();
6351 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6352 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6353 {
6354 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6355 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6356 if (opcode == OP_CRRANGE && arg2 > 0)
6357 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6358 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6359 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6360 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6361 }
6362
6363 /* We cannot use TMP3 because of this allocate_stack. */
6364 allocate_stack(common, 1);
6365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6366 JUMPTO(SLJIT_JUMP, label);
6367 if (jump != NULL)
6368 JUMPHERE(jump);
6369 }
6370 else
6371 {
6372 if (opcode == OP_PLUS)
6373 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6374 if (private_data_ptr == 0)
6375 allocate_stack(common, 2);
6376 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6377 if (opcode <= OP_PLUS)
6378 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6379 else
6380 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6381 label = LABEL();
6382 compile_char1_matchingpath(common, type, cc, &nomatch);
6383 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6384 if (opcode <= OP_PLUS)
6385 JUMPTO(SLJIT_JUMP, label);
6386 else if (opcode == OP_CRRANGE && arg1 == 0)
6387 {
6388 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6389 JUMPTO(SLJIT_JUMP, label);
6390 }
6391 else
6392 {
6393 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6394 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6395 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6396 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6397 }
6398 set_jumps(nomatch, LABEL());
6399 if (opcode == OP_CRRANGE)
6400 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6401 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6402 }
6403 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6404 break;
6405
6406 case OP_MINSTAR:
6407 case OP_MINPLUS:
6408 if (opcode == OP_MINPLUS)
6409 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6410 if (private_data_ptr == 0)
6411 allocate_stack(common, 1);
6412 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6413 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6414 break;
6415
6416 case OP_MINUPTO:
6417 case OP_CRMINRANGE:
6418 if (private_data_ptr == 0)
6419 allocate_stack(common, 2);
6420 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6421 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6422 if (opcode == OP_CRMINRANGE)
6423 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6424 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6425 break;
6426
6427 case OP_QUERY:
6428 case OP_MINQUERY:
6429 if (private_data_ptr == 0)
6430 allocate_stack(common, 1);
6431 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6432 if (opcode == OP_QUERY)
6433 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6434 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6435 break;
6436
6437 case OP_EXACT:
6438 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6439 label = LABEL();
6440 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6441 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6442 JUMPTO(SLJIT_C_NOT_ZERO, label);
6443 break;
6444
6445 case OP_POSSTAR:
6446 case OP_POSPLUS:
6447 case OP_POSUPTO:
6448 if (opcode == OP_POSPLUS)
6449 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6450 if (opcode == OP_POSUPTO)
6451 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6452 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6453 label = LABEL();
6454 compile_char1_matchingpath(common, type, cc, &nomatch);
6455 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6456 if (opcode != OP_POSUPTO)
6457 JUMPTO(SLJIT_JUMP, label);
6458 else
6459 {
6460 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6461 JUMPTO(SLJIT_C_NOT_ZERO, label);
6462 }
6463 set_jumps(nomatch, LABEL());
6464 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6465 break;
6466
6467 case OP_POSQUERY:
6468 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6469 compile_char1_matchingpath(common, type, cc, &nomatch);
6470 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6471 set_jumps(nomatch, LABEL());
6472 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6473 break;
6474
6475 default:
6476 SLJIT_ASSERT_STOP();
6477 break;
6478 }
6479
6480 decrease_call_count(common);
6481 return end;
6482 }
6483
6484 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6485 {
6486 DEFINE_COMPILER;
6487 backtrack_common *backtrack;
6488
6489 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6490
6491 if (*cc == OP_FAIL)
6492 {
6493 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6494 return cc + 1;
6495 }
6496
6497 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6498 {
6499 /* No need to check notempty conditions. */
6500 if (common->acceptlabel == NULL)
6501 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6502 else
6503 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6504 return cc + 1;
6505 }
6506
6507 if (common->acceptlabel == NULL)
6508 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6509 else
6510 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6511 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6512 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6513 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6514 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6515 if (common->acceptlabel == NULL)
6516 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6517 else
6518 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6519 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6520 if (common->acceptlabel == NULL)
6521 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6522 else
6523 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6524 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6525 return cc + 1;
6526 }
6527
6528 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6529 {
6530 DEFINE_COMPILER;
6531 int offset = GET2(cc, 1);
6532 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
6533
6534 /* Data will be discarded anyway... */
6535 if (common->currententry != NULL)
6536 return cc + 1 + IMM2_SIZE;
6537
6538 if (!optimized_cbracket)
6539 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6540 offset <<= 1;
6541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6542 if (!optimized_cbracket)
6543 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6544 return cc + 1 + IMM2_SIZE;
6545 }
6546
6547 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6548 {
6549 DEFINE_COMPILER;
6550 backtrack_common *backtrack;
6551
6552 while (cc < ccend)
6553 {
6554 switch(*cc)
6555 {
6556 case OP_SOD:
6557 case OP_SOM:
6558 case OP_NOT_WORD_BOUNDARY:
6559 case OP_WORD_BOUNDARY:
6560 case OP_NOT_DIGIT:
6561 case OP_DIGIT:
6562 case OP_NOT_WHITESPACE:
6563 case OP_WHITESPACE:
6564 case OP_NOT_WORDCHAR:
6565 case OP_WORDCHAR:
6566 case OP_ANY:
6567 case OP_ALLANY:
6568 case OP_ANYBYTE:
6569 case OP_NOTPROP:
6570 case OP_PROP:
6571 case OP_ANYNL:
6572 case OP_NOT_HSPACE:
6573 case OP_HSPACE:
6574 case OP_NOT_VSPACE:
6575 case OP_VSPACE:
6576 case OP_EXTUNI:
6577 case OP_EODN:
6578 case OP_EOD:
6579 case OP_CIRC:
6580 case OP_CIRCM:
6581 case OP_DOLL:
6582 case OP_DOLLM:
6583 case OP_NOT:
6584 case OP_NOTI:
6585 case OP_REVERSE:
6586 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6587 break;
6588
6589 case OP_SET_SOM:
6590 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6591 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6592 allocate_stack(common, 1);
6593 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6594 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6595 cc++;
6596 break;
6597
6598 case OP_CHAR:
6599 case OP_CHARI:
6600 if (common->mode == JIT_COMPILE)
6601 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6602 else
6603 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6604 break;
6605
6606 case OP_STAR:
6607 case OP_MINSTAR:
6608 case OP_PLUS:
6609 case OP_MINPLUS:
6610 case OP_QUERY:
6611 case OP_MINQUERY:
6612 case OP_UPTO:
6613 case OP_MINUPTO:
6614 case OP_EXACT:
6615 case OP_POSSTAR:
6616 case OP_POSPLUS:
6617 case OP_POSQUERY:
6618 case OP_POSUPTO:
6619 case OP_STARI:
6620 case OP_MINSTARI:
6621 case OP_PLUSI:
6622 case OP_MINPLUSI:
6623 case OP_QUERYI:
6624 case OP_MINQUERYI:
6625 case OP_UPTOI:
6626 case OP_MINUPTOI:
6627 case OP_EXACTI:
6628 case OP_POSSTARI:
6629 case OP_POSPLUSI:
6630 case OP_POSQUERYI:
6631 case OP_POSUPTOI:
6632 case OP_NOTSTAR:
6633 case OP_NOTMINSTAR:
6634 case OP_NOTPLUS:
6635 case OP_NOTMINPLUS:
6636 case OP_NOTQUERY:
6637 case OP_NOTMINQUERY:
6638 case OP_NOTUPTO:
6639 case OP_NOTMINUPTO:
6640 case OP_NOTEXACT:
6641 case OP_NOTPOSSTAR:
6642 case OP_NOTPOSPLUS:
6643 case OP_NOTPOSQUERY:
6644 case OP_NOTPOSUPTO:
6645 case OP_NOTSTARI:
6646 case OP_NOTMINSTARI:
6647 case OP_NOTPLUSI:
6648 case OP_NOTMINPLUSI:
6649 case OP_NOTQUERYI:
6650 case OP_NOTMINQUERYI:
6651 case OP_NOTUPTOI:
6652 case OP_NOTMINUPTOI:
6653 case OP_NOTEXACTI:
6654 case OP_NOTPOSSTARI:
6655 case OP_NOTPOSPLUSI:
6656 case OP_NOTPOSQUERYI:
6657 case OP_NOTPOSUPTOI:
6658 case OP_TYPESTAR:
6659 case OP_TYPEMINSTAR:
6660 case OP_TYPEPLUS:
6661 case OP_TYPEMINPLUS:
6662 case OP_TYPEQUERY:
6663 case OP_TYPEMINQUERY:
6664 case OP_TYPEUPTO:
6665 case OP_TYPEMINUPTO:
6666 case OP_TYPEEXACT:
6667 case OP_TYPEPOSSTAR:
6668 case OP_TYPEPOSPLUS:
6669 case OP_TYPEPOSQUERY:
6670 case OP_TYPEPOSUPTO:
6671 cc = compile_iterator_matchingpath(common, cc, parent);
6672 break;
6673
6674 case OP_CLASS:
6675 case OP_NCLASS:
6676 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6677 cc = compile_iterator_matchingpath(common, cc, parent);
6678 else
6679 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6680 break;
6681
6682 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6683 case OP_XCLASS:
6684 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6685 cc = compile_iterator_matchingpath(common, cc, parent);
6686 else
6687 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6688 break;
6689 #endif
6690
6691 case OP_REF:
6692 case OP_REFI:
6693 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6694 cc = compile_ref_iterator_matchingpath(common, cc, parent);
6695 else
6696 cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6697 break;
6698
6699 case OP_RECURSE:
6700 cc = compile_recurse_matchingpath(common, cc, parent);
6701 break;
6702
6703 case OP_ASSERT:
6704 case OP_ASSERT_NOT:
6705 case OP_ASSERTBACK:
6706 case OP_ASSERTBACK_NOT:
6707 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6708 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6709 break;
6710
6711 case OP_BRAMINZERO:
6712 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6713 cc = bracketend(cc + 1);
6714 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6715 {
6716 allocate_stack(common, 1);
6717 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6718 }
6719 else
6720 {
6721 allocate_stack(common, 2);
6722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6724 }
6725 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
6726 if (cc[1] > OP_ASSERTBACK_NOT)
6727 decrease_call_count(common);
6728 break;
6729
6730 case OP_ONCE:
6731 case OP_ONCE_NC:
6732 case OP_BRA:
6733 case OP_CBRA:
6734 case OP_COND:
6735 case OP_SBRA:
6736 case OP_SCBRA:
6737 case OP_SCOND:
6738 cc = compile_bracket_matchingpath(common, cc, parent);
6739 break;
6740
6741 case OP_BRAZERO:
6742 if (cc[1] > OP_ASSERTBACK_NOT)
6743 cc = compile_bracket_matchingpath(common, cc, parent);
6744 else
6745 {
6746 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6747 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6748 }
6749 break;
6750
6751 case OP_BRAPOS:
6752 case OP_CBRAPOS:
6753 case OP_SBRAPOS:
6754 case OP_SCBRAPOS:
6755 case OP_BRAPOSZERO:
6756 cc = compile_bracketpos_matchingpath(common, cc, parent);
6757 break;
6758