/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1002 - (show annotations)
Tue Aug 14 09:31:00 2012 UTC (7 years, 3 months ago) by zherczeg
File MIME type: text/plain
File size: 252197 byte(s)
Some renamings
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory for the regex stack on the real machine stack.
69 Fast, but limited size. */
70 #define MACHINE_STACK_SIZE 32768
71
72 /* Growth rate for stack allocated by the OS. Should be the multiply
73 of page size. */
74 #define STACK_GROWTH_RATE 8192
75
76 /* Enable to check that the allocation could destroy temporaries. */
77 #if defined SLJIT_DEBUG && SLJIT_DEBUG
78 #define DESTROY_REGISTERS 1
79 #endif
80
81 /*
82 Short summary about the backtracking mechanism empolyed by the jit code generator:
83
84 The code generator follows the recursive nature of the PERL compatible regular
85 expressions. The basic blocks of regular expressions are condition checkers
86 whose execute different commands depending on the result of the condition check.
87 The relationship between the operators can be horizontal (concatenation) and
88 vertical (sub-expression) (See struct backtrack_common for more details).
89
90 'ab' - 'a' and 'b' regexps are concatenated
91 'a+' - 'a' is the sub-expression of the '+' operator
92
93 The condition checkers are boolean (true/false) checkers. Machine code is generated
94 for the checker itself and for the actions depending on the result of the checker.
95 The 'true' case is called as the matching path (expected path), and the other is called as
96 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97 branches on the matching path.
98
99 Greedy star operator (*) :
100 Matching path: match happens.
101 Backtrack path: match failed.
102 Non-greedy star operator (*?) :
103 Matching path: no need to perform a match.
104 Backtrack path: match is required.
105
106 The following example shows how the code generated for a capturing bracket
107 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
108 we have the following regular expression:
109
110 A(B|C)D
111
112 The generated code will be the following:
113
114 A matching path
115 '(' matching path (pushing arguments to the stack)
116 B matching path
117 ')' matching path (pushing arguments to the stack)
118 D matching path
119 return with successful match
120
121 D backtrack path
122 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123 B backtrack path
124 C expected path
125 jump to D matching path
126 C backtrack path
127 A backtrack path
128
129 Notice, that the order of backtrack code paths are the opposite of the fast
130 code paths. In this way the topmost value on the stack is always belong
131 to the current backtrack code path. The backtrack path must check
132 whether there is a next alternative. If so, it needs to jump back to
133 the matching path eventually. Otherwise it needs to clear out its own stack
134 frame and continue the execution on the backtrack code paths.
135 */
136
137 /*
138 Saved stack frames:
139
140 Atomic blocks and asserts require reloading the values of private data
141 when the backtrack mechanism performed. Because of OP_RECURSE, the data
142 are not necessarly known in compile time, thus we need a dynamic restore
143 mechanism.
144
145 The stack frames are stored in a chain list, and have the following format:
146 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147
148 Thus we can restore the private data to a particular point in the stack.
149 */
150
151 typedef struct jit_arguments {
152 /* Pointers first. */
153 struct sljit_stack *stack;
154 const pcre_uchar *str;
155 const pcre_uchar *begin;
156 const pcre_uchar *end;
157 int *offsets;
158 pcre_uchar *uchar_ptr;
159 pcre_uchar *mark_ptr;
160 /* Everything else after. */
161 int offsetcount;
162 int calllimit;
163 pcre_uint8 notbol;
164 pcre_uint8 noteol;
165 pcre_uint8 notempty;
166 pcre_uint8 notempty_atstart;
167 } jit_arguments;
168
169 typedef struct executable_functions {
170 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
171 PUBL(jit_callback) callback;
172 void *userdata;
173 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
174 } executable_functions;
175
176 typedef struct jump_list {
177 struct sljit_jump *jump;
178 struct jump_list *next;
179 } jump_list;
180
181 enum stub_types { stack_alloc };
182
183 typedef struct stub_list {
184 enum stub_types type;
185 int data;
186 struct sljit_jump *start;
187 struct sljit_label *quit;
188 struct stub_list *next;
189 } stub_list;
190
191 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
192
193 /* The following structure is the key data type for the recursive
194 code generator. It is allocated by compile_matchingpath, and contains
195 the aguments for compile_backtrackingpath. Must be the first member
196 of its descendants. */
197 typedef struct backtrack_common {
198 /* Concatenation stack. */
199 struct backtrack_common *prev;
200 jump_list *nextbacktracks;
201 /* Internal stack (for component operators). */
202 struct backtrack_common *top;
203 jump_list *topbacktracks;
204 /* Opcode pointer. */
205 pcre_uchar *cc;
206 } backtrack_common;
207
208 typedef struct assert_backtrack {
209 backtrack_common common;
210 jump_list *condfailed;
211 /* Less than 0 (-1) if a frame is not needed. */
212 int framesize;
213 /* Points to our private memory word on the stack. */
214 int private_data_ptr;
215 /* For iterators. */
216 struct sljit_label *matchingpath;
217 } assert_backtrack;
218
219 typedef struct bracket_backtrack {
220 backtrack_common common;
221 /* Where to coninue if an alternative is successfully matched. */
222 struct sljit_label *alternative_matchingpath;
223 /* For rmin and rmax iterators. */
224 struct sljit_label *recursive_matchingpath;
225 /* For greedy ? operator. */
226 struct sljit_label *zero_matchingpath;
227 /* Contains the branches of a failed condition. */
228 union {
229 /* Both for OP_COND, OP_SCOND. */
230 jump_list *condfailed;
231 assert_backtrack *assert;
232 /* For OP_ONCE. -1 if not needed. */
233 int framesize;
234 } u;
235 /* Points to our private memory word on the stack. */
236 int private_data_ptr;
237 } bracket_backtrack;
238
239 typedef struct bracketpos_backtrack {
240 backtrack_common common;
241 /* Points to our private memory word on the stack. */
242 int private_data_ptr;
243 /* Reverting stack is needed. */
244 int framesize;
245 /* Allocated stack size. */
246 int stacksize;
247 } bracketpos_backtrack;
248
249 typedef struct braminzero_backtrack {
250 backtrack_common common;
251 struct sljit_label *matchingpath;
252 } braminzero_backtrack;
253
254 typedef struct iterator_backtrack {
255 backtrack_common common;
256 /* Next iteration. */
257 struct sljit_label *matchingpath;
258 } iterator_backtrack;
259
260 typedef struct recurse_entry {
261 struct recurse_entry *next;
262 /* Contains the function entry. */
263 struct sljit_label *entry;
264 /* Collects the calls until the function is not created. */
265 jump_list *calls;
266 /* Points to the starting opcode. */
267 int start;
268 } recurse_entry;
269
270 typedef struct recurse_backtrack {
271 backtrack_common common;
272 } recurse_backtrack;
273
274 #define MAX_RANGE_SIZE 6
275
276 typedef struct compiler_common {
277 struct sljit_compiler *compiler;
278 pcre_uchar *start;
279
280 /* Maps private data offset to each opcode. */
281 int *private_data_ptrs;
282 int cbraptr;
283 /* OVector starting point. Must be divisible by 2. */
284 int ovector_start;
285 /* Last known position of the requested byte. */
286 int req_char_ptr;
287 /* Head of the last recursion. */
288 int recursive_head;
289 /* First inspected character for partial matching. */
290 int start_used_ptr;
291 /* Starting pointer for partial soft matches. */
292 int hit_start;
293 /* End pointer of the first line. */
294 int first_line_end;
295 /* Points to the marked string. */
296 int mark_ptr;
297
298 /* Flipped and lower case tables. */
299 const pcre_uint8 *fcc;
300 sljit_w lcc;
301 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
302 int mode;
303 /* Newline control. */
304 int nltype;
305 int newline;
306 int bsr_nltype;
307 /* Dollar endonly. */
308 int endonly;
309 BOOL has_set_som;
310 /* Tables. */
311 sljit_w ctypes;
312 int digits[2 + MAX_RANGE_SIZE];
313 /* Named capturing brackets. */
314 sljit_uw name_table;
315 sljit_w name_count;
316 sljit_w name_entry_size;
317
318 /* Labels and jump lists. */
319 struct sljit_label *partialmatchlabel;
320 struct sljit_label *quitlabel;
321 struct sljit_label *acceptlabel;
322 stub_list *stubs;
323 recurse_entry *entries;
324 recurse_entry *currententry;
325 jump_list *partialmatch;
326 jump_list *quit;
327 jump_list *accept;
328 jump_list *calllimit;
329 jump_list *stackalloc;
330 jump_list *revertframes;
331 jump_list *wordboundary;
332 jump_list *anynewline;
333 jump_list *hspace;
334 jump_list *vspace;
335 jump_list *casefulcmp;
336 jump_list *caselesscmp;
337 BOOL jscript_compat;
338 #ifdef SUPPORT_UTF
339 BOOL utf;
340 #ifdef SUPPORT_UCP
341 BOOL use_ucp;
342 #endif
343 jump_list *utfreadchar;
344 #ifdef COMPILE_PCRE8
345 jump_list *utfreadtype8;
346 #endif
347 #endif /* SUPPORT_UTF */
348 #ifdef SUPPORT_UCP
349 jump_list *getucd;
350 #endif
351 } compiler_common;
352
353 /* For byte_sequence_compare. */
354
355 typedef struct compare_context {
356 int length;
357 int sourcereg;
358 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
359 int ucharptr;
360 union {
361 sljit_i asint;
362 sljit_uh asushort;
363 #ifdef COMPILE_PCRE8
364 sljit_ub asbyte;
365 sljit_ub asuchars[4];
366 #else
367 #ifdef COMPILE_PCRE16
368 sljit_uh asuchars[2];
369 #endif
370 #endif
371 } c;
372 union {
373 sljit_i asint;
374 sljit_uh asushort;
375 #ifdef COMPILE_PCRE8
376 sljit_ub asbyte;
377 sljit_ub asuchars[4];
378 #else
379 #ifdef COMPILE_PCRE16
380 sljit_uh asuchars[2];
381 #endif
382 #endif
383 } oc;
384 #endif
385 } compare_context;
386
387 enum {
388 frame_end = 0,
389 frame_setstrbegin = -1,
390 frame_setmark = -2
391 };
392
393 /* Undefine sljit macros. */
394 #undef CMP
395
396 /* Used for accessing the elements of the stack. */
397 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
398
399 #define TMP1 SLJIT_TEMPORARY_REG1
400 #define TMP2 SLJIT_TEMPORARY_REG3
401 #define TMP3 SLJIT_TEMPORARY_EREG2
402 #define STR_PTR SLJIT_SAVED_REG1
403 #define STR_END SLJIT_SAVED_REG2
404 #define STACK_TOP SLJIT_TEMPORARY_REG2
405 #define STACK_LIMIT SLJIT_SAVED_REG3
406 #define ARGUMENTS SLJIT_SAVED_EREG1
407 #define CALL_COUNT SLJIT_SAVED_EREG2
408 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
409
410 /* Local space layout. */
411 /* These two locals can be used by the current opcode. */
412 #define LOCALS0 (0 * sizeof(sljit_w))
413 #define LOCALS1 (1 * sizeof(sljit_w))
414 /* Two local variables for possessive quantifiers (char1 cannot use them). */
415 #define POSSESSIVE0 (2 * sizeof(sljit_w))
416 #define POSSESSIVE1 (3 * sizeof(sljit_w))
417 /* Max limit of recursions. */
418 #define CALL_LIMIT (4 * sizeof(sljit_w))
419 /* The output vector is stored on the stack, and contains pointers
420 to characters. The vector data is divided into two groups: the first
421 group contains the start / end character pointers, and the second is
422 the start pointers when the end of the capturing group has not yet reached. */
423 #define OVECTOR_START (common->ovector_start)
424 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
425 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
426 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
427
428 #ifdef COMPILE_PCRE8
429 #define MOV_UCHAR SLJIT_MOV_UB
430 #define MOVU_UCHAR SLJIT_MOVU_UB
431 #else
432 #ifdef COMPILE_PCRE16
433 #define MOV_UCHAR SLJIT_MOV_UH
434 #define MOVU_UCHAR SLJIT_MOVU_UH
435 #else
436 #error Unsupported compiling mode
437 #endif
438 #endif
439
440 /* Shortcuts. */
441 #define DEFINE_COMPILER \
442 struct sljit_compiler *compiler = common->compiler
443 #define OP1(op, dst, dstw, src, srcw) \
444 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
445 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
446 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
447 #define LABEL() \
448 sljit_emit_label(compiler)
449 #define JUMP(type) \
450 sljit_emit_jump(compiler, (type))
451 #define JUMPTO(type, label) \
452 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
453 #define JUMPHERE(jump) \
454 sljit_set_label((jump), sljit_emit_label(compiler))
455 #define CMP(type, src1, src1w, src2, src2w) \
456 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
457 #define CMPTO(type, src1, src1w, src2, src2w, label) \
458 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
459 #define COND_VALUE(op, dst, dstw, type) \
460 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
461 #define GET_LOCAL_BASE(dst, dstw, offset) \
462 sljit_get_local_base(compiler, (dst), (dstw), (offset))
463
464 static pcre_uchar* bracketend(pcre_uchar* cc)
465 {
466 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
467 do cc += GET(cc, 1); while (*cc == OP_ALT);
468 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
469 cc += 1 + LINK_SIZE;
470 return cc;
471 }
472
473 /* Functions whose might need modification for all new supported opcodes:
474 next_opcode
475 get_private_data_length
476 set_private_data_ptrs
477 get_framesize
478 init_frame
479 get_private_data_length_for_copy
480 copy_private_data
481 compile_matchingpath
482 compile_backtrackingpath
483 */
484
485 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
486 {
487 SLJIT_UNUSED_ARG(common);
488 switch(*cc)
489 {
490 case OP_SOD:
491 case OP_SOM:
492 case OP_SET_SOM:
493 case OP_NOT_WORD_BOUNDARY:
494 case OP_WORD_BOUNDARY:
495 case OP_NOT_DIGIT:
496 case OP_DIGIT:
497 case OP_NOT_WHITESPACE:
498 case OP_WHITESPACE:
499 case OP_NOT_WORDCHAR:
500 case OP_WORDCHAR:
501 case OP_ANY:
502 case OP_ALLANY:
503 case OP_ANYNL:
504 case OP_NOT_HSPACE:
505 case OP_HSPACE:
506 case OP_NOT_VSPACE:
507 case OP_VSPACE:
508 case OP_EXTUNI:
509 case OP_EODN:
510 case OP_EOD:
511 case OP_CIRC:
512 case OP_CIRCM:
513 case OP_DOLL:
514 case OP_DOLLM:
515 case OP_TYPESTAR:
516 case OP_TYPEMINSTAR:
517 case OP_TYPEPLUS:
518 case OP_TYPEMINPLUS:
519 case OP_TYPEQUERY:
520 case OP_TYPEMINQUERY:
521 case OP_TYPEPOSSTAR:
522 case OP_TYPEPOSPLUS:
523 case OP_TYPEPOSQUERY:
524 case OP_CRSTAR:
525 case OP_CRMINSTAR:
526 case OP_CRPLUS:
527 case OP_CRMINPLUS:
528 case OP_CRQUERY:
529 case OP_CRMINQUERY:
530 case OP_DEF:
531 case OP_BRAZERO:
532 case OP_BRAMINZERO:
533 case OP_BRAPOSZERO:
534 case OP_COMMIT:
535 case OP_FAIL:
536 case OP_ACCEPT:
537 case OP_ASSERT_ACCEPT:
538 case OP_SKIPZERO:
539 return cc + 1;
540
541 case OP_ANYBYTE:
542 #ifdef SUPPORT_UTF
543 if (common->utf) return NULL;
544 #endif
545 return cc + 1;
546
547 case OP_CHAR:
548 case OP_CHARI:
549 case OP_NOT:
550 case OP_NOTI:
551 case OP_STAR:
552 case OP_MINSTAR:
553 case OP_PLUS:
554 case OP_MINPLUS:
555 case OP_QUERY:
556 case OP_MINQUERY:
557 case OP_POSSTAR:
558 case OP_POSPLUS:
559 case OP_POSQUERY:
560 case OP_STARI:
561 case OP_MINSTARI:
562 case OP_PLUSI:
563 case OP_MINPLUSI:
564 case OP_QUERYI:
565 case OP_MINQUERYI:
566 case OP_POSSTARI:
567 case OP_POSPLUSI:
568 case OP_POSQUERYI:
569 case OP_NOTSTAR:
570 case OP_NOTMINSTAR:
571 case OP_NOTPLUS:
572 case OP_NOTMINPLUS:
573 case OP_NOTQUERY:
574 case OP_NOTMINQUERY:
575 case OP_NOTPOSSTAR:
576 case OP_NOTPOSPLUS:
577 case OP_NOTPOSQUERY:
578 case OP_NOTSTARI:
579 case OP_NOTMINSTARI:
580 case OP_NOTPLUSI:
581 case OP_NOTMINPLUSI:
582 case OP_NOTQUERYI:
583 case OP_NOTMINQUERYI:
584 case OP_NOTPOSSTARI:
585 case OP_NOTPOSPLUSI:
586 case OP_NOTPOSQUERYI:
587 cc += 2;
588 #ifdef SUPPORT_UTF
589 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
590 #endif
591 return cc;
592
593 case OP_UPTO:
594 case OP_MINUPTO:
595 case OP_EXACT:
596 case OP_POSUPTO:
597 case OP_UPTOI:
598 case OP_MINUPTOI:
599 case OP_EXACTI:
600 case OP_POSUPTOI:
601 case OP_NOTUPTO:
602 case OP_NOTMINUPTO:
603 case OP_NOTEXACT:
604 case OP_NOTPOSUPTO:
605 case OP_NOTUPTOI:
606 case OP_NOTMINUPTOI:
607 case OP_NOTEXACTI:
608 case OP_NOTPOSUPTOI:
609 cc += 2 + IMM2_SIZE;
610 #ifdef SUPPORT_UTF
611 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
612 #endif
613 return cc;
614
615 case OP_NOTPROP:
616 case OP_PROP:
617 return cc + 1 + 2;
618
619 case OP_TYPEUPTO:
620 case OP_TYPEMINUPTO:
621 case OP_TYPEEXACT:
622 case OP_TYPEPOSUPTO:
623 case OP_REF:
624 case OP_REFI:
625 case OP_CREF:
626 case OP_NCREF:
627 case OP_RREF:
628 case OP_NRREF:
629 case OP_CLOSE:
630 cc += 1 + IMM2_SIZE;
631 return cc;
632
633 case OP_CRRANGE:
634 case OP_CRMINRANGE:
635 return cc + 1 + 2 * IMM2_SIZE;
636
637 case OP_CLASS:
638 case OP_NCLASS:
639 return cc + 1 + 32 / sizeof(pcre_uchar);
640
641 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
642 case OP_XCLASS:
643 return cc + GET(cc, 1);
644 #endif
645
646 case OP_RECURSE:
647 case OP_ASSERT:
648 case OP_ASSERT_NOT:
649 case OP_ASSERTBACK:
650 case OP_ASSERTBACK_NOT:
651 case OP_REVERSE:
652 case OP_ONCE:
653 case OP_ONCE_NC:
654 case OP_BRA:
655 case OP_BRAPOS:
656 case OP_COND:
657 case OP_SBRA:
658 case OP_SBRAPOS:
659 case OP_SCOND:
660 case OP_ALT:
661 case OP_KET:
662 case OP_KETRMAX:
663 case OP_KETRMIN:
664 case OP_KETRPOS:
665 return cc + 1 + LINK_SIZE;
666
667 case OP_CBRA:
668 case OP_CBRAPOS:
669 case OP_SCBRA:
670 case OP_SCBRAPOS:
671 return cc + 1 + LINK_SIZE + IMM2_SIZE;
672
673 case OP_MARK:
674 return cc + 1 + 2 + cc[1];
675
676 default:
677 return NULL;
678 }
679 }
680
681 #define CASE_ITERATOR_PRIVATE_DATA_1 \
682 case OP_MINSTAR: \
683 case OP_MINPLUS: \
684 case OP_QUERY: \
685 case OP_MINQUERY: \
686 case OP_MINSTARI: \
687 case OP_MINPLUSI: \
688 case OP_QUERYI: \
689 case OP_MINQUERYI: \
690 case OP_NOTMINSTAR: \
691 case OP_NOTMINPLUS: \
692 case OP_NOTQUERY: \
693 case OP_NOTMINQUERY: \
694 case OP_NOTMINSTARI: \
695 case OP_NOTMINPLUSI: \
696 case OP_NOTQUERYI: \
697 case OP_NOTMINQUERYI:
698
699 #define CASE_ITERATOR_PRIVATE_DATA_2A \
700 case OP_STAR: \
701 case OP_PLUS: \
702 case OP_STARI: \
703 case OP_PLUSI: \
704 case OP_NOTSTAR: \
705 case OP_NOTPLUS: \
706 case OP_NOTSTARI: \
707 case OP_NOTPLUSI:
708
709 #define CASE_ITERATOR_PRIVATE_DATA_2B \
710 case OP_UPTO: \
711 case OP_MINUPTO: \
712 case OP_UPTOI: \
713 case OP_MINUPTOI: \
714 case OP_NOTUPTO: \
715 case OP_NOTMINUPTO: \
716 case OP_NOTUPTOI: \
717 case OP_NOTMINUPTOI:
718
719 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
720 case OP_TYPEMINSTAR: \
721 case OP_TYPEMINPLUS: \
722 case OP_TYPEQUERY: \
723 case OP_TYPEMINQUERY:
724
725 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
726 case OP_TYPESTAR: \
727 case OP_TYPEPLUS:
728
729 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
730 case OP_TYPEUPTO: \
731 case OP_TYPEMINUPTO:
732
733 static int get_class_iterator_size(pcre_uchar *cc)
734 {
735 switch(*cc)
736 {
737 case OP_CRSTAR:
738 case OP_CRPLUS:
739 return 2;
740
741 case OP_CRMINSTAR:
742 case OP_CRMINPLUS:
743 case OP_CRQUERY:
744 case OP_CRMINQUERY:
745 return 1;
746
747 case OP_CRRANGE:
748 case OP_CRMINRANGE:
749 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
750 return 0;
751 return 2;
752
753 default:
754 return 0;
755 }
756 }
757
758 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
759 {
760 int private_data_length = 0;
761 pcre_uchar *alternative;
762 pcre_uchar *end = NULL;
763 int space, size, bracketlen;
764
765 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
766 while (cc < ccend)
767 {
768 space = 0;
769 size = 0;
770 bracketlen = 0;
771 switch(*cc)
772 {
773 case OP_SET_SOM:
774 common->has_set_som = TRUE;
775 cc += 1;
776 break;
777
778 case OP_ASSERT:
779 case OP_ASSERT_NOT:
780 case OP_ASSERTBACK:
781 case OP_ASSERTBACK_NOT:
782 case OP_ONCE:
783 case OP_ONCE_NC:
784 case OP_BRAPOS:
785 case OP_SBRA:
786 case OP_SBRAPOS:
787 case OP_SCOND:
788 private_data_length += sizeof(sljit_w);
789 bracketlen = 1 + LINK_SIZE;
790 break;
791
792 case OP_CBRAPOS:
793 case OP_SCBRAPOS:
794 private_data_length += sizeof(sljit_w);
795 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
796 break;
797
798 case OP_COND:
799 /* Might be a hidden SCOND. */
800 alternative = cc + GET(cc, 1);
801 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
802 private_data_length += sizeof(sljit_w);
803 bracketlen = 1 + LINK_SIZE;
804 break;
805
806 case OP_BRA:
807 bracketlen = 1 + LINK_SIZE;
808 break;
809
810 case OP_CBRA:
811 case OP_SCBRA:
812 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
813 break;
814
815 CASE_ITERATOR_PRIVATE_DATA_1
816 space = 1;
817 size = -2;
818 break;
819
820 CASE_ITERATOR_PRIVATE_DATA_2A
821 space = 2;
822 size = -2;
823 break;
824
825 CASE_ITERATOR_PRIVATE_DATA_2B
826 space = 2;
827 size = -(2 + IMM2_SIZE);
828 break;
829
830 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
831 space = 1;
832 size = 1;
833 break;
834
835 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
836 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
837 space = 2;
838 size = 1;
839 break;
840
841 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
842 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
843 space = 2;
844 size = 1 + IMM2_SIZE;
845 break;
846
847 case OP_CLASS:
848 case OP_NCLASS:
849 size += 1 + 32 / sizeof(pcre_uchar);
850 space = get_class_iterator_size(cc + size);
851 break;
852
853 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
854 case OP_XCLASS:
855 size = GET(cc, 1);
856 space = get_class_iterator_size(cc + size);
857 break;
858 #endif
859
860 case OP_RECURSE:
861 /* Set its value only once. */
862 if (common->recursive_head == 0)
863 {
864 common->recursive_head = common->ovector_start;
865 common->ovector_start += sizeof(sljit_w);
866 }
867 cc += 1 + LINK_SIZE;
868 break;
869
870 case OP_MARK:
871 if (common->mark_ptr == 0)
872 {
873 common->mark_ptr = common->ovector_start;
874 common->ovector_start += sizeof(sljit_w);
875 }
876 cc += 1 + 2 + cc[1];
877 break;
878
879 default:
880 cc = next_opcode(common, cc);
881 if (cc == NULL)
882 return -1;
883 break;
884 }
885
886 if (space > 0 && cc >= end)
887 private_data_length += sizeof(sljit_w) * space;
888
889 if (size != 0)
890 {
891 if (size < 0)
892 {
893 cc += -size;
894 #ifdef SUPPORT_UTF
895 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
896 #endif
897 }
898 else
899 cc += size;
900 }
901
902 if (bracketlen > 0)
903 {
904 if (cc >= end)
905 {
906 end = bracketend(cc);
907 if (end[-1 - LINK_SIZE] == OP_KET)
908 end = NULL;
909 }
910 cc += bracketlen;
911 }
912 }
913 return private_data_length;
914 }
915
916 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
917 {
918 pcre_uchar *cc = common->start;
919 pcre_uchar *alternative;
920 pcre_uchar *end = NULL;
921 int space, size, bracketlen;
922
923 while (cc < ccend)
924 {
925 space = 0;
926 size = 0;
927 bracketlen = 0;
928 switch(*cc)
929 {
930 case OP_ASSERT:
931 case OP_ASSERT_NOT:
932 case OP_ASSERTBACK:
933 case OP_ASSERTBACK_NOT:
934 case OP_ONCE:
935 case OP_ONCE_NC:
936 case OP_BRAPOS:
937 case OP_SBRA:
938 case OP_SBRAPOS:
939 case OP_SCOND:
940 common->private_data_ptrs[cc - common->start] = private_data_ptr;
941 private_data_ptr += sizeof(sljit_w);
942 bracketlen = 1 + LINK_SIZE;
943 break;
944
945 case OP_CBRAPOS:
946 case OP_SCBRAPOS:
947 common->private_data_ptrs[cc - common->start] = private_data_ptr;
948 private_data_ptr += sizeof(sljit_w);
949 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
950 break;
951
952 case OP_COND:
953 /* Might be a hidden SCOND. */
954 alternative = cc + GET(cc, 1);
955 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
956 {
957 common->private_data_ptrs[cc - common->start] = private_data_ptr;
958 private_data_ptr += sizeof(sljit_w);
959 }
960 bracketlen = 1 + LINK_SIZE;
961 break;
962
963 case OP_BRA:
964 bracketlen = 1 + LINK_SIZE;
965 break;
966
967 case OP_CBRA:
968 case OP_SCBRA:
969 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
970 break;
971
972 CASE_ITERATOR_PRIVATE_DATA_1
973 space = 1;
974 size = -2;
975 break;
976
977 CASE_ITERATOR_PRIVATE_DATA_2A
978 space = 2;
979 size = -2;
980 break;
981
982 CASE_ITERATOR_PRIVATE_DATA_2B
983 space = 2;
984 size = -(2 + IMM2_SIZE);
985 break;
986
987 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
988 space = 1;
989 size = 1;
990 break;
991
992 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
993 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
994 space = 2;
995 size = 1;
996 break;
997
998 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
999 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1000 space = 2;
1001 size = 1 + IMM2_SIZE;
1002 break;
1003
1004 case OP_CLASS:
1005 case OP_NCLASS:
1006 size += 1 + 32 / sizeof(pcre_uchar);
1007 space = get_class_iterator_size(cc + size);
1008 break;
1009
1010 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1011 case OP_XCLASS:
1012 size = GET(cc, 1);
1013 space = get_class_iterator_size(cc + size);
1014 break;
1015 #endif
1016
1017 default:
1018 cc = next_opcode(common, cc);
1019 SLJIT_ASSERT(cc != NULL);
1020 break;
1021 }
1022
1023 if (space > 0 && cc >= end)
1024 {
1025 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1026 private_data_ptr += sizeof(sljit_w) * space;
1027 }
1028
1029 if (size != 0)
1030 {
1031 if (size < 0)
1032 {
1033 cc += -size;
1034 #ifdef SUPPORT_UTF
1035 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1036 #endif
1037 }
1038 else
1039 cc += size;
1040 }
1041
1042 if (bracketlen > 0)
1043 {
1044 if (cc >= end)
1045 {
1046 end = bracketend(cc);
1047 if (end[-1 - LINK_SIZE] == OP_KET)
1048 end = NULL;
1049 }
1050 cc += bracketlen;
1051 }
1052 }
1053 }
1054
1055 /* Returns with -1 if no need for frame. */
1056 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1057 {
1058 pcre_uchar *ccend = bracketend(cc);
1059 int length = 0;
1060 BOOL possessive = FALSE;
1061 BOOL setsom_found = recursive;
1062 BOOL setmark_found = recursive;
1063
1064 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1065 {
1066 length = 3;
1067 possessive = TRUE;
1068 }
1069
1070 cc = next_opcode(common, cc);
1071 SLJIT_ASSERT(cc != NULL);
1072 while (cc < ccend)
1073 switch(*cc)
1074 {
1075 case OP_SET_SOM:
1076 SLJIT_ASSERT(common->has_set_som);
1077 if (!setsom_found)
1078 {
1079 length += 2;
1080 setsom_found = TRUE;
1081 }
1082 cc += 1;
1083 break;
1084
1085 case OP_MARK:
1086 SLJIT_ASSERT(common->mark_ptr != 0);
1087 if (!setmark_found)
1088 {
1089 length += 2;
1090 setmark_found = TRUE;
1091 }
1092 cc += 1 + 2 + cc[1];
1093 break;
1094
1095 case OP_RECURSE:
1096 if (common->has_set_som && !setsom_found)
1097 {
1098 length += 2;
1099 setsom_found = TRUE;
1100 }
1101 if (common->mark_ptr != 0 && !setmark_found)
1102 {
1103 length += 2;
1104 setmark_found = TRUE;
1105 }
1106 cc += 1 + LINK_SIZE;
1107 break;
1108
1109 case OP_CBRA:
1110 case OP_CBRAPOS:
1111 case OP_SCBRA:
1112 case OP_SCBRAPOS:
1113 length += 3;
1114 cc += 1 + LINK_SIZE + IMM2_SIZE;
1115 break;
1116
1117 default:
1118 cc = next_opcode(common, cc);
1119 SLJIT_ASSERT(cc != NULL);
1120 break;
1121 }
1122
1123 /* Possessive quantifiers can use a special case. */
1124 if (SLJIT_UNLIKELY(possessive) && length == 3)
1125 return -1;
1126
1127 if (length > 0)
1128 return length + 1;
1129 return -1;
1130 }
1131
1132 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1133 {
1134 DEFINE_COMPILER;
1135 pcre_uchar *ccend = bracketend(cc);
1136 BOOL setsom_found = recursive;
1137 BOOL setmark_found = recursive;
1138 int offset;
1139
1140 /* >= 1 + shortest item size (2) */
1141 SLJIT_UNUSED_ARG(stacktop);
1142 SLJIT_ASSERT(stackpos >= stacktop + 2);
1143
1144 stackpos = STACK(stackpos);
1145 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1146 cc = next_opcode(common, cc);
1147 SLJIT_ASSERT(cc != NULL);
1148 while (cc < ccend)
1149 switch(*cc)
1150 {
1151 case OP_SET_SOM:
1152 SLJIT_ASSERT(common->has_set_som);
1153 if (!setsom_found)
1154 {
1155 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1156 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1157 stackpos += (int)sizeof(sljit_w);
1158 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1159 stackpos += (int)sizeof(sljit_w);
1160 setsom_found = TRUE;
1161 }
1162 cc += 1;
1163 break;
1164
1165 case OP_MARK:
1166 SLJIT_ASSERT(common->mark_ptr != 0);
1167 if (!setmark_found)
1168 {
1169 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1171 stackpos += (int)sizeof(sljit_w);
1172 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1173 stackpos += (int)sizeof(sljit_w);
1174 setmark_found = TRUE;
1175 }
1176 cc += 1 + 2 + cc[1];
1177 break;
1178
1179 case OP_RECURSE:
1180 if (common->has_set_som && !setsom_found)
1181 {
1182 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1183 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1184 stackpos += (int)sizeof(sljit_w);
1185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1186 stackpos += (int)sizeof(sljit_w);
1187 setsom_found = TRUE;
1188 }
1189 if (common->mark_ptr != 0 && !setmark_found)
1190 {
1191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1193 stackpos += (int)sizeof(sljit_w);
1194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1195 stackpos += (int)sizeof(sljit_w);
1196 setmark_found = TRUE;
1197 }
1198 cc += 1 + LINK_SIZE;
1199 break;
1200
1201 case OP_CBRA:
1202 case OP_CBRAPOS:
1203 case OP_SCBRA:
1204 case OP_SCBRAPOS:
1205 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1206 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1207 stackpos += (int)sizeof(sljit_w);
1208 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1209 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1210 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1211 stackpos += (int)sizeof(sljit_w);
1212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1213 stackpos += (int)sizeof(sljit_w);
1214
1215 cc += 1 + LINK_SIZE + IMM2_SIZE;
1216 break;
1217
1218 default:
1219 cc = next_opcode(common, cc);
1220 SLJIT_ASSERT(cc != NULL);
1221 break;
1222 }
1223
1224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1225 SLJIT_ASSERT(stackpos == STACK(stacktop));
1226 }
1227
1228 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1229 {
1230 int private_data_length = 2;
1231 int size;
1232 pcre_uchar *alternative;
1233 /* Calculate the sum of the private machine words. */
1234 while (cc < ccend)
1235 {
1236 size = 0;
1237 switch(*cc)
1238 {
1239 case OP_ASSERT:
1240 case OP_ASSERT_NOT:
1241 case OP_ASSERTBACK:
1242 case OP_ASSERTBACK_NOT:
1243 case OP_ONCE:
1244 case OP_ONCE_NC:
1245 case OP_BRAPOS:
1246 case OP_SBRA:
1247 case OP_SBRAPOS:
1248 case OP_SCOND:
1249 private_data_length++;
1250 cc += 1 + LINK_SIZE;
1251 break;
1252
1253 case OP_CBRA:
1254 case OP_SCBRA:
1255 private_data_length++;
1256 cc += 1 + LINK_SIZE + IMM2_SIZE;
1257 break;
1258
1259 case OP_CBRAPOS:
1260 case OP_SCBRAPOS:
1261 private_data_length += 2;
1262 cc += 1 + LINK_SIZE + IMM2_SIZE;
1263 break;
1264
1265 case OP_COND:
1266 /* Might be a hidden SCOND. */
1267 alternative = cc + GET(cc, 1);
1268 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1269 private_data_length++;
1270 cc += 1 + LINK_SIZE;
1271 break;
1272
1273 CASE_ITERATOR_PRIVATE_DATA_1
1274 if (PRIVATE_DATA(cc))
1275 private_data_length++;
1276 cc += 2;
1277 #ifdef SUPPORT_UTF
1278 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1279 #endif
1280 break;
1281
1282 CASE_ITERATOR_PRIVATE_DATA_2A
1283 if (PRIVATE_DATA(cc))
1284 private_data_length += 2;
1285 cc += 2;
1286 #ifdef SUPPORT_UTF
1287 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1288 #endif
1289 break;
1290
1291 CASE_ITERATOR_PRIVATE_DATA_2B
1292 if (PRIVATE_DATA(cc))
1293 private_data_length += 2;
1294 cc += 2 + IMM2_SIZE;
1295 #ifdef SUPPORT_UTF
1296 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1297 #endif
1298 break;
1299
1300 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1301 if (PRIVATE_DATA(cc))
1302 private_data_length++;
1303 cc += 1;
1304 break;
1305
1306 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1307 if (PRIVATE_DATA(cc))
1308 private_data_length += 2;
1309 cc += 1;
1310 break;
1311
1312 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1313 if (PRIVATE_DATA(cc))
1314 private_data_length += 2;
1315 cc += 1 + IMM2_SIZE;
1316 break;
1317
1318 case OP_CLASS:
1319 case OP_NCLASS:
1320 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1321 case OP_XCLASS:
1322 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1323 #else
1324 size = 1 + 32 / (int)sizeof(pcre_uchar);
1325 #endif
1326 if (PRIVATE_DATA(cc))
1327 private_data_length += get_class_iterator_size(cc + size);
1328 cc += size;
1329 break;
1330
1331 default:
1332 cc = next_opcode(common, cc);
1333 SLJIT_ASSERT(cc != NULL);
1334 break;
1335 }
1336 }
1337 SLJIT_ASSERT(cc == ccend);
1338 return private_data_length;
1339 }
1340
1341 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1342 BOOL save, int stackptr, int stacktop)
1343 {
1344 DEFINE_COMPILER;
1345 int srcw[2];
1346 int count, size;
1347 BOOL tmp1next = TRUE;
1348 BOOL tmp1empty = TRUE;
1349 BOOL tmp2empty = TRUE;
1350 pcre_uchar *alternative;
1351 enum {
1352 start,
1353 loop,
1354 end
1355 } status;
1356
1357 status = save ? start : loop;
1358 stackptr = STACK(stackptr - 2);
1359 stacktop = STACK(stacktop - 1);
1360
1361 if (!save)
1362 {
1363 stackptr += sizeof(sljit_w);
1364 if (stackptr < stacktop)
1365 {
1366 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1367 stackptr += sizeof(sljit_w);
1368 tmp1empty = FALSE;
1369 }
1370 if (stackptr < stacktop)
1371 {
1372 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1373 stackptr += sizeof(sljit_w);
1374 tmp2empty = FALSE;
1375 }
1376 /* The tmp1next must be TRUE in either way. */
1377 }
1378
1379 while (status != end)
1380 {
1381 count = 0;
1382 switch(status)
1383 {
1384 case start:
1385 SLJIT_ASSERT(save && common->recursive_head != 0);
1386 count = 1;
1387 srcw[0] = common->recursive_head;
1388 status = loop;
1389 break;
1390
1391 case loop:
1392 if (cc >= ccend)
1393 {
1394 status = end;
1395 break;
1396 }
1397
1398 switch(*cc)
1399 {
1400 case OP_ASSERT:
1401 case OP_ASSERT_NOT:
1402 case OP_ASSERTBACK:
1403 case OP_ASSERTBACK_NOT:
1404 case OP_ONCE:
1405 case OP_ONCE_NC:
1406 case OP_BRAPOS:
1407 case OP_SBRA:
1408 case OP_SBRAPOS:
1409 case OP_SCOND:
1410 count = 1;
1411 srcw[0] = PRIVATE_DATA(cc);
1412 SLJIT_ASSERT(srcw[0] != 0);
1413 cc += 1 + LINK_SIZE;
1414 break;
1415
1416 case OP_CBRA:
1417 case OP_SCBRA:
1418 count = 1;
1419 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1420 cc += 1 + LINK_SIZE + IMM2_SIZE;
1421 break;
1422
1423 case OP_CBRAPOS:
1424 case OP_SCBRAPOS:
1425 count = 2;
1426 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1427 srcw[1] = PRIVATE_DATA(cc);
1428 SLJIT_ASSERT(srcw[0] != 0);
1429 cc += 1 + LINK_SIZE + IMM2_SIZE;
1430 break;
1431
1432 case OP_COND:
1433 /* Might be a hidden SCOND. */
1434 alternative = cc + GET(cc, 1);
1435 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1436 {
1437 count = 1;
1438 srcw[0] = PRIVATE_DATA(cc);
1439 SLJIT_ASSERT(srcw[0] != 0);
1440 }
1441 cc += 1 + LINK_SIZE;
1442 break;
1443
1444 CASE_ITERATOR_PRIVATE_DATA_1
1445 if (PRIVATE_DATA(cc))
1446 {
1447 count = 1;
1448 srcw[0] = PRIVATE_DATA(cc);
1449 }
1450 cc += 2;
1451 #ifdef SUPPORT_UTF
1452 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1453 #endif
1454 break;
1455
1456 CASE_ITERATOR_PRIVATE_DATA_2A
1457 if (PRIVATE_DATA(cc))
1458 {
1459 count = 2;
1460 srcw[0] = PRIVATE_DATA(cc);
1461 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1462 }
1463 cc += 2;
1464 #ifdef SUPPORT_UTF
1465 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1466 #endif
1467 break;
1468
1469 CASE_ITERATOR_PRIVATE_DATA_2B
1470 if (PRIVATE_DATA(cc))
1471 {
1472 count = 2;
1473 srcw[0] = PRIVATE_DATA(cc);
1474 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1475 }
1476 cc += 2 + IMM2_SIZE;
1477 #ifdef SUPPORT_UTF
1478 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1479 #endif
1480 break;
1481
1482 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1483 if (PRIVATE_DATA(cc))
1484 {
1485 count = 1;
1486 srcw[0] = PRIVATE_DATA(cc);
1487 }
1488 cc += 1;
1489 break;
1490
1491 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1492 if (PRIVATE_DATA(cc))
1493 {
1494 count = 2;
1495 srcw[0] = PRIVATE_DATA(cc);
1496 srcw[1] = srcw[0] + sizeof(sljit_w);
1497 }
1498 cc += 1;
1499 break;
1500
1501 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1502 if (PRIVATE_DATA(cc))
1503 {
1504 count = 2;
1505 srcw[0] = PRIVATE_DATA(cc);
1506 srcw[1] = srcw[0] + sizeof(sljit_w);
1507 }
1508 cc += 1 + IMM2_SIZE;
1509 break;
1510
1511 case OP_CLASS:
1512 case OP_NCLASS:
1513 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1514 case OP_XCLASS:
1515 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1516 #else
1517 size = 1 + 32 / (int)sizeof(pcre_uchar);
1518 #endif
1519 if (PRIVATE_DATA(cc))
1520 switch(get_class_iterator_size(cc + size))
1521 {
1522 case 1:
1523 count = 1;
1524 srcw[0] = PRIVATE_DATA(cc);
1525 break;
1526
1527 case 2:
1528 count = 2;
1529 srcw[0] = PRIVATE_DATA(cc);
1530 srcw[1] = srcw[0] + sizeof(sljit_w);
1531 break;
1532
1533 default:
1534 SLJIT_ASSERT_STOP();
1535 break;
1536 }
1537 cc += size;
1538 break;
1539
1540 default:
1541 cc = next_opcode(common, cc);
1542 SLJIT_ASSERT(cc != NULL);
1543 break;
1544 }
1545 break;
1546
1547 case end:
1548 SLJIT_ASSERT_STOP();
1549 break;
1550 }
1551
1552 while (count > 0)
1553 {
1554 count--;
1555 if (save)
1556 {
1557 if (tmp1next)
1558 {
1559 if (!tmp1empty)
1560 {
1561 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1562 stackptr += sizeof(sljit_w);
1563 }
1564 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1565 tmp1empty = FALSE;
1566 tmp1next = FALSE;
1567 }
1568 else
1569 {
1570 if (!tmp2empty)
1571 {
1572 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1573 stackptr += sizeof(sljit_w);
1574 }
1575 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1576 tmp2empty = FALSE;
1577 tmp1next = TRUE;
1578 }
1579 }
1580 else
1581 {
1582 if (tmp1next)
1583 {
1584 SLJIT_ASSERT(!tmp1empty);
1585 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1586 tmp1empty = stackptr >= stacktop;
1587 if (!tmp1empty)
1588 {
1589 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1590 stackptr += sizeof(sljit_w);
1591 }
1592 tmp1next = FALSE;
1593 }
1594 else
1595 {
1596 SLJIT_ASSERT(!tmp2empty);
1597 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1598 tmp2empty = stackptr >= stacktop;
1599 if (!tmp2empty)
1600 {
1601 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1602 stackptr += sizeof(sljit_w);
1603 }
1604 tmp1next = TRUE;
1605 }
1606 }
1607 }
1608 }
1609
1610 if (save)
1611 {
1612 if (tmp1next)
1613 {
1614 if (!tmp1empty)
1615 {
1616 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1617 stackptr += sizeof(sljit_w);
1618 }
1619 if (!tmp2empty)
1620 {
1621 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1622 stackptr += sizeof(sljit_w);
1623 }
1624 }
1625 else
1626 {
1627 if (!tmp2empty)
1628 {
1629 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1630 stackptr += sizeof(sljit_w);
1631 }
1632 if (!tmp1empty)
1633 {
1634 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1635 stackptr += sizeof(sljit_w);
1636 }
1637 }
1638 }
1639 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1640 }
1641
1642 #undef CASE_ITERATOR_PRIVATE_DATA_1
1643 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1644 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1645 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1646 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1647 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1648
1649 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1650 {
1651 return (value & (value - 1)) == 0;
1652 }
1653
1654 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1655 {
1656 while (list)
1657 {
1658 /* sljit_set_label is clever enough to do nothing
1659 if either the jump or the label is NULL. */
1660 sljit_set_label(list->jump, label);
1661 list = list->next;
1662 }
1663 }
1664
1665 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1666 {
1667 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1668 if (list_item)
1669 {
1670 list_item->next = *list;
1671 list_item->jump = jump;
1672 *list = list_item;
1673 }
1674 }
1675
1676 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1677 {
1678 DEFINE_COMPILER;
1679 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1680
1681 if (list_item)
1682 {
1683 list_item->type = type;
1684 list_item->data = data;
1685 list_item->start = start;
1686 list_item->quit = LABEL();
1687 list_item->next = common->stubs;
1688 common->stubs = list_item;
1689 }
1690 }
1691
1692 static void flush_stubs(compiler_common *common)
1693 {
1694 DEFINE_COMPILER;
1695 stub_list* list_item = common->stubs;
1696
1697 while (list_item)
1698 {
1699 JUMPHERE(list_item->start);
1700 switch(list_item->type)
1701 {
1702 case stack_alloc:
1703 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1704 break;
1705 }
1706 JUMPTO(SLJIT_JUMP, list_item->quit);
1707 list_item = list_item->next;
1708 }
1709 common->stubs = NULL;
1710 }
1711
1712 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1713 {
1714 DEFINE_COMPILER;
1715
1716 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1717 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1718 }
1719
1720 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1721 {
1722 /* May destroy all locals and registers except TMP2. */
1723 DEFINE_COMPILER;
1724
1725 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1726 #ifdef DESTROY_REGISTERS
1727 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1728 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1729 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1730 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1732 #endif
1733 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1734 }
1735
1736 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1737 {
1738 DEFINE_COMPILER;
1739 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1740 }
1741
1742 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1743 {
1744 DEFINE_COMPILER;
1745 struct sljit_label *loop;
1746 int i;
1747 /* At this point we can freely use all temporary registers. */
1748 /* TMP1 returns with begin - 1. */
1749 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1750 if (length < 8)
1751 {
1752 for (i = 0; i < length; i++)
1753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1754 }
1755 else
1756 {
1757 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1758 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1759 loop = LABEL();
1760 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1761 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1762 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1763 }
1764 }
1765
1766 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1767 {
1768 DEFINE_COMPILER;
1769 struct sljit_label *loop;
1770 struct sljit_jump *earlyexit;
1771
1772 /* At this point we can freely use all registers. */
1773 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1774 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1775
1776 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1777 if (common->mark_ptr != 0)
1778 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1779 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1780 if (common->mark_ptr != 0)
1781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1782 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1783 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1784 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1785 /* Unlikely, but possible */
1786 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1787 loop = LABEL();
1788 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1789 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1790 /* Copy the integer value to the output buffer */
1791 #ifdef COMPILE_PCRE16
1792 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1793 #endif
1794 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1795 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1796 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1797 JUMPHERE(earlyexit);
1798
1799 /* Calculate the return value, which is the maximum ovector value. */
1800 if (topbracket > 1)
1801 {
1802 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1803 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1804
1805 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1806 loop = LABEL();
1807 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1808 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1809 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1810 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1811 }
1812 else
1813 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1814 }
1815
1816 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1817 {
1818 DEFINE_COMPILER;
1819
1820 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1821 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1822
1823 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1824 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1825 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1826 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1827
1828 /* Store match begin and end. */
1829 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1830 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1831 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1832 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1833 #ifdef COMPILE_PCRE16
1834 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1835 #endif
1836 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1837
1838 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1839 #ifdef COMPILE_PCRE16
1840 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1841 #endif
1842 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1843
1844 JUMPTO(SLJIT_JUMP, quit);
1845 }
1846
1847 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1848 {
1849 /* May destroy TMP1. */
1850 DEFINE_COMPILER;
1851 struct sljit_jump *jump;
1852
1853 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1854 {
1855 /* The value of -1 must be kept for start_used_ptr! */
1856 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1857 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1858 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1859 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1860 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1861 JUMPHERE(jump);
1862 }
1863 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1864 {
1865 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1866 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1867 JUMPHERE(jump);
1868 }
1869 }
1870
1871 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1872 {
1873 /* Detects if the character has an othercase. */
1874 unsigned int c;
1875
1876 #ifdef SUPPORT_UTF
1877 if (common->utf)
1878 {
1879 GETCHAR(c, cc);
1880 if (c > 127)
1881 {
1882 #ifdef SUPPORT_UCP
1883 return c != UCD_OTHERCASE(c);
1884 #else
1885 return FALSE;
1886 #endif
1887 }
1888 #ifndef COMPILE_PCRE8
1889 return common->fcc[c] != c;
1890 #endif
1891 }
1892 else
1893 #endif
1894 c = *cc;
1895 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1896 }
1897
1898 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1899 {
1900 /* Returns with the othercase. */
1901 #ifdef SUPPORT_UTF
1902 if (common->utf && c > 127)
1903 {
1904 #ifdef SUPPORT_UCP
1905 return UCD_OTHERCASE(c);
1906 #else
1907 return c;
1908 #endif
1909 }
1910 #endif
1911 return TABLE_GET(c, common->fcc, c);
1912 }
1913
1914 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1915 {
1916 /* Detects if the character and its othercase has only 1 bit difference. */
1917 unsigned int c, oc, bit;
1918 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1919 int n;
1920 #endif
1921
1922 #ifdef SUPPORT_UTF
1923 if (common->utf)
1924 {
1925 GETCHAR(c, cc);
1926 if (c <= 127)
1927 oc = common->fcc[c];
1928 else
1929 {
1930 #ifdef SUPPORT_UCP
1931 oc = UCD_OTHERCASE(c);
1932 #else
1933 oc = c;
1934 #endif
1935 }
1936 }
1937 else
1938 {
1939 c = *cc;
1940 oc = TABLE_GET(c, common->fcc, c);
1941 }
1942 #else
1943 c = *cc;
1944 oc = TABLE_GET(c, common->fcc, c);
1945 #endif
1946
1947 SLJIT_ASSERT(c != oc);
1948
1949 bit = c ^ oc;
1950 /* Optimized for English alphabet. */
1951 if (c <= 127 && bit == 0x20)
1952 return (0 << 8) | 0x20;
1953
1954 /* Since c != oc, they must have at least 1 bit difference. */
1955 if (!ispowerof2(bit))
1956 return 0;
1957
1958 #ifdef COMPILE_PCRE8
1959
1960 #ifdef SUPPORT_UTF
1961 if (common->utf && c > 127)
1962 {
1963 n = GET_EXTRALEN(*cc);
1964 while ((bit & 0x3f) == 0)
1965 {
1966 n--;
1967 bit >>= 6;
1968 }
1969 return (n << 8) | bit;
1970 }
1971 #endif /* SUPPORT_UTF */
1972 return (0 << 8) | bit;
1973
1974 #else /* COMPILE_PCRE8 */
1975
1976 #ifdef COMPILE_PCRE16
1977 #ifdef SUPPORT_UTF
1978 if (common->utf && c > 65535)
1979 {
1980 if (bit >= (1 << 10))
1981 bit >>= 10;
1982 else
1983 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
1984 }
1985 #endif /* SUPPORT_UTF */
1986 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
1987 #endif /* COMPILE_PCRE16 */
1988
1989 #endif /* COMPILE_PCRE8 */
1990 }
1991
1992 static void check_partial(compiler_common *common, BOOL force)
1993 {
1994 /* Checks whether a partial matching is occured. Does not modify registers. */
1995 DEFINE_COMPILER;
1996 struct sljit_jump *jump = NULL;
1997
1998 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
1999
2000 if (common->mode == JIT_COMPILE)
2001 return;
2002
2003 if (!force)
2004 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2005 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2006 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2007
2008 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2009 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2010 else
2011 {
2012 if (common->partialmatchlabel != NULL)
2013 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2014 else
2015 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2016 }
2017
2018 if (jump != NULL)
2019 JUMPHERE(jump);
2020 }
2021
2022 static struct sljit_jump *check_str_end(compiler_common *common)
2023 {
2024 /* Does not affect registers. Usually used in a tight spot. */
2025 DEFINE_COMPILER;
2026 struct sljit_jump *jump;
2027 struct sljit_jump *nohit;
2028 struct sljit_jump *return_value;
2029
2030 if (common->mode == JIT_COMPILE)
2031 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2032
2033 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2034 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2035 {
2036 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2037 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2038 JUMPHERE(nohit);
2039 return_value = JUMP(SLJIT_JUMP);
2040 }
2041 else
2042 {
2043 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2044 if (common->partialmatchlabel != NULL)
2045 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2046 else
2047 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2048 }
2049 JUMPHERE(jump);
2050 return return_value;
2051 }
2052
2053 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2054 {
2055 DEFINE_COMPILER;
2056 struct sljit_jump *jump;
2057
2058 if (common->mode == JIT_COMPILE)
2059 {
2060 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2061 return;
2062 }
2063
2064 /* Partial matching mode. */
2065 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2066 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2067 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2068 {
2069 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2070 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2071 }
2072 else
2073 {
2074 if (common->partialmatchlabel != NULL)
2075 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2076 else
2077 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2078 }
2079 JUMPHERE(jump);
2080 }
2081
2082 static void read_char(compiler_common *common)
2083 {
2084 /* Reads the character into TMP1, updates STR_PTR.
2085 Does not check STR_END. TMP2 Destroyed. */
2086 DEFINE_COMPILER;
2087 #ifdef SUPPORT_UTF
2088 struct sljit_jump *jump;
2089 #endif
2090
2091 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2092 #ifdef SUPPORT_UTF
2093 if (common->utf)
2094 {
2095 #ifdef COMPILE_PCRE8
2096 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2097 #else
2098 #ifdef COMPILE_PCRE16
2099 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2100 #endif
2101 #endif /* COMPILE_PCRE8 */
2102 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2103 JUMPHERE(jump);
2104 }
2105 #endif
2106 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2107 }
2108
2109 static void peek_char(compiler_common *common)
2110 {
2111 /* Reads the character into TMP1, keeps STR_PTR.
2112 Does not check STR_END. TMP2 Destroyed. */
2113 DEFINE_COMPILER;
2114 #ifdef SUPPORT_UTF
2115 struct sljit_jump *jump;
2116 #endif
2117
2118 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2119 #ifdef SUPPORT_UTF
2120 if (common->utf)
2121 {
2122 #ifdef COMPILE_PCRE8
2123 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2124 #else
2125 #ifdef COMPILE_PCRE16
2126 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2127 #endif
2128 #endif /* COMPILE_PCRE8 */
2129 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2130 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2131 JUMPHERE(jump);
2132 }
2133 #endif
2134 }
2135
2136 static void read_char8_type(compiler_common *common)
2137 {
2138 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2139 DEFINE_COMPILER;
2140 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2141 struct sljit_jump *jump;
2142 #endif
2143
2144 #ifdef SUPPORT_UTF
2145 if (common->utf)
2146 {
2147 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2148 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2149 #ifdef COMPILE_PCRE8
2150 /* This can be an extra read in some situations, but hopefully
2151 it is needed in most cases. */
2152 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2153 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2154 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2155 JUMPHERE(jump);
2156 #else
2157 #ifdef COMPILE_PCRE16
2158 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2159 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2160 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2161 JUMPHERE(jump);
2162 /* Skip low surrogate if necessary. */
2163 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2164 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2165 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2166 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2167 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2168 #endif
2169 #endif /* COMPILE_PCRE8 */
2170 return;
2171 }
2172 #endif
2173 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2174 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2175 #ifdef COMPILE_PCRE16
2176 /* The ctypes array contains only 256 values. */
2177 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2178 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2179 #endif
2180 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2181 #ifdef COMPILE_PCRE16
2182 JUMPHERE(jump);
2183 #endif
2184 }
2185
2186 static void skip_char_back(compiler_common *common)
2187 {
2188 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2189 DEFINE_COMPILER;
2190 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2191 struct sljit_label *label;
2192
2193 if (common->utf)
2194 {
2195 label = LABEL();
2196 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2197 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2198 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2199 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2200 return;
2201 }
2202 #endif
2203 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2204 if (common->utf)
2205 {
2206 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2207 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2208 /* Skip low surrogate if necessary. */
2209 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2210 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2211 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2212 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2213 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2214 return;
2215 }
2216 #endif
2217 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2218 }
2219
2220 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2221 {
2222 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2223 DEFINE_COMPILER;
2224
2225 if (nltype == NLTYPE_ANY)
2226 {
2227 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2228 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2229 }
2230 else if (nltype == NLTYPE_ANYCRLF)
2231 {
2232 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2233 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2234 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2235 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2236 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2237 }
2238 else
2239 {
2240 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2241 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2242 }
2243 }
2244
2245 #ifdef SUPPORT_UTF
2246
2247 #ifdef COMPILE_PCRE8
2248 static void do_utfreadchar(compiler_common *common)
2249 {
2250 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2251 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2252 DEFINE_COMPILER;
2253 struct sljit_jump *jump;
2254
2255 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2256 /* Searching for the first zero. */
2257 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2258 jump = JUMP(SLJIT_C_NOT_ZERO);
2259 /* Two byte sequence. */
2260 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2261 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2262 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2263 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2264 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2265 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2266 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2267 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2268 JUMPHERE(jump);
2269
2270 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2271 jump = JUMP(SLJIT_C_NOT_ZERO);
2272 /* Three byte sequence. */
2273 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2274 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2275 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2276 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2277 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2278 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2279 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2280 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2281 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2282 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2283 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2284 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2285 JUMPHERE(jump);
2286
2287 /* Four byte sequence. */
2288 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2289 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2290 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2291 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2292 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2293 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2294 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2295 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2296 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2297 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2298 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2299 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2300 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2301 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2302 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2303 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2304 }
2305
2306 static void do_utfreadtype8(compiler_common *common)
2307 {
2308 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2309 of the character (>= 0xc0). Return value in TMP1. */
2310 DEFINE_COMPILER;
2311 struct sljit_jump *jump;
2312 struct sljit_jump *compare;
2313
2314 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2315
2316 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2317 jump = JUMP(SLJIT_C_NOT_ZERO);
2318 /* Two byte sequence. */
2319 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2320 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2321 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2322 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2323 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2324 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2325 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2326 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2327 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2328
2329 JUMPHERE(compare);
2330 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2331 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2332 JUMPHERE(jump);
2333
2334 /* We only have types for characters less than 256. */
2335 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2336 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2337 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2338 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2339 }
2340
2341 #else /* COMPILE_PCRE8 */
2342
2343 #ifdef COMPILE_PCRE16
2344 static void do_utfreadchar(compiler_common *common)
2345 {
2346 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2347 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2348 DEFINE_COMPILER;
2349 struct sljit_jump *jump;
2350
2351 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2352 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2353 /* Do nothing, only return. */
2354 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2355
2356 JUMPHERE(jump);
2357 /* Combine two 16 bit characters. */
2358 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2359 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2360 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2361 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2362 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2363 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2364 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2365 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2366 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2367 }
2368 #endif /* COMPILE_PCRE16 */
2369
2370 #endif /* COMPILE_PCRE8 */
2371
2372 #endif /* SUPPORT_UTF */
2373
2374 #ifdef SUPPORT_UCP
2375
2376 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2377 #define UCD_BLOCK_MASK 127
2378 #define UCD_BLOCK_SHIFT 7
2379
2380 static void do_getucd(compiler_common *common)
2381 {
2382 /* Search the UCD record for the character comes in TMP1.
2383 Returns chartype in TMP1 and UCD offset in TMP2. */
2384 DEFINE_COMPILER;
2385
2386 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2387
2388 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2389 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2390 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2391 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2392 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2393 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2394 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2395 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2396 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2397 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2398 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2399 }
2400 #endif
2401
2402 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2403 {
2404 DEFINE_COMPILER;
2405 struct sljit_label *mainloop;
2406 struct sljit_label *newlinelabel = NULL;
2407 struct sljit_jump *start;
2408 struct sljit_jump *end = NULL;
2409 struct sljit_jump *nl = NULL;
2410 #ifdef SUPPORT_UTF
2411 struct sljit_jump *singlechar;
2412 #endif
2413 jump_list *newline = NULL;
2414 BOOL newlinecheck = FALSE;
2415 BOOL readuchar = FALSE;
2416
2417 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2418 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2419 newlinecheck = TRUE;
2420
2421 if (firstline)
2422 {
2423 /* Search for the end of the first line. */
2424 SLJIT_ASSERT(common->first_line_end != 0);
2425 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2426
2427 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2428 {
2429 mainloop = LABEL();
2430 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2431 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2432 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2433 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2434 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2435 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2436 JUMPHERE(end);
2437 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2438 }
2439 else
2440 {
2441 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2442 mainloop = LABEL();
2443 /* Continual stores does not cause data dependency. */
2444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2445 read_char(common);
2446 check_newlinechar(common, common->nltype, &newline, TRUE);
2447 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2448 JUMPHERE(end);
2449 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2450 set_jumps(newline, LABEL());
2451 }
2452
2453 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2454 }
2455
2456 start = JUMP(SLJIT_JUMP);
2457
2458 if (newlinecheck)
2459 {
2460 newlinelabel = LABEL();
2461 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2462 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2463 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2464 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2465 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2466 #ifdef COMPILE_PCRE16
2467 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2468 #endif
2469 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2470 nl = JUMP(SLJIT_JUMP);
2471 }
2472
2473 mainloop = LABEL();
2474
2475 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2476 #ifdef SUPPORT_UTF
2477 if (common->utf) readuchar = TRUE;
2478 #endif
2479 if (newlinecheck) readuchar = TRUE;
2480
2481 if (readuchar)
2482 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2483
2484 if (newlinecheck)
2485 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2486
2487 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2488 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2489 if (common->utf)
2490 {
2491 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2492 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2493 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2494 JUMPHERE(singlechar);
2495 }
2496 #endif
2497 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2498 if (common->utf)
2499 {
2500 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2501 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2502 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2503 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2504 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2505 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2506 JUMPHERE(singlechar);
2507 }
2508 #endif
2509 JUMPHERE(start);
2510
2511 if (newlinecheck)
2512 {
2513 JUMPHERE(end);
2514 JUMPHERE(nl);
2515 }
2516
2517 return mainloop;
2518 }
2519
2520 static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
2521 {
2522 DEFINE_COMPILER;
2523 struct sljit_label *start;
2524 struct sljit_jump *quit;
2525 struct sljit_jump *found;
2526 pcre_int32 chars[4];
2527 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2528 int location = 0;
2529 pcre_int32 len, c, bit, caseless;
2530 BOOL must_end;
2531
2532 #ifdef COMPILE_PCRE8
2533 union {
2534 sljit_uh ascombined;
2535 sljit_ub asuchars[2];
2536 } pair;
2537 #else
2538 union {
2539 sljit_ui ascombined;
2540 sljit_uh asuchars[2];
2541 } pair;
2542 #endif
2543
2544 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2545 return FALSE;
2546
2547 while (TRUE)
2548 {
2549 caseless = 0;
2550 must_end = TRUE;
2551 switch(*cc)
2552 {
2553 case OP_CHAR:
2554 must_end = FALSE;
2555 cc++;
2556 break;
2557
2558 case OP_CHARI:
2559 caseless = 1;
2560 must_end = FALSE;
2561 cc++;
2562 break;
2563
2564 case OP_SOD:
2565 case OP_SOM:
2566 case OP_SET_SOM:
2567 case OP_NOT_WORD_BOUNDARY:
2568 case OP_WORD_BOUNDARY:
2569 case OP_EODN:
2570 case OP_EOD:
2571 case OP_CIRC:
2572 case OP_CIRCM:
2573 case OP_DOLL:
2574 case OP_DOLLM:
2575 /* Zero width assertions. */
2576 cc++;
2577 continue;
2578
2579 case OP_PLUS:
2580 case OP_MINPLUS:
2581 case OP_POSPLUS:
2582 cc++;
2583 break;
2584
2585 case OP_EXACT:
2586 cc += 1 + IMM2_SIZE;
2587 break;
2588
2589 case OP_PLUSI:
2590 case OP_MINPLUSI:
2591 case OP_POSPLUSI:
2592 caseless = 1;
2593 cc++;
2594 break;
2595
2596 case OP_EXACTI:
2597 caseless = 1;
2598 cc += 1 + IMM2_SIZE;
2599 break;
2600
2601 default:
2602 return FALSE;
2603 }
2604
2605 len = 1;
2606 #ifdef SUPPORT_UTF
2607 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2608 #endif
2609
2610 if (caseless && char_has_othercase(common, cc))
2611 {
2612 caseless = char_get_othercase_bit(common, cc);
2613 if (caseless == 0)
2614 return FALSE;
2615 #ifdef COMPILE_PCRE8
2616 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2617 #else
2618 if ((caseless & 0x100) != 0)
2619 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2620 else
2621 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2622 #endif
2623 }
2624 else
2625 caseless = 0;
2626
2627 while (len > 0 && location < 2 * 2)
2628 {
2629 c = *cc;
2630 bit = 0;
2631 if (len == (caseless & 0xff))
2632 {
2633 bit = caseless >> 8;
2634 c |= bit;
2635 }
2636
2637 chars[location] = c;
2638 chars[location + 1] = bit;
2639
2640 len--;
2641 location += 2;
2642 cc++;
2643 }
2644
2645 if (location == 2 * 2)
2646 break;
2647 else if (must_end)
2648 return FALSE;
2649 }
2650
2651 if (firstline)
2652 {
2653 SLJIT_ASSERT(common->first_line_end != 0);
2654 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2655 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
2656 }
2657 else
2658 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2659
2660 start = LABEL();
2661 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2662 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2663 #ifdef COMPILE_PCRE8
2664 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2665 #else /* COMPILE_PCRE8 */
2666 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2667 #endif
2668
2669 #else /* SLJIT_UNALIGNED */
2670
2671 #if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
2672 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2673 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2674 #else /* SLJIT_BIG_ENDIAN */
2675 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2676 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2677 #endif /* SLJIT_BIG_ENDIAN */
2678
2679 #ifdef COMPILE_PCRE8
2680 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
2681 #else /* COMPILE_PCRE8 */
2682 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
2683 #endif
2684 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2685
2686 #endif
2687
2688 if (chars[1] != 0 || chars[3] != 0)
2689 {
2690 pair.asuchars[0] = chars[1];
2691 pair.asuchars[1] = chars[3];
2692 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
2693 }
2694
2695 pair.asuchars[0] = chars[0];
2696 pair.asuchars[1] = chars[2];
2697 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
2698
2699 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2700 JUMPTO(SLJIT_JUMP, start);
2701 JUMPHERE(found);
2702 JUMPHERE(quit);
2703
2704 if (firstline)
2705 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2706 else
2707 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2708 return TRUE;
2709 }
2710
2711 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2712 {
2713 DEFINE_COMPILER;
2714 struct sljit_label *start;
2715 struct sljit_jump *quit;
2716 struct sljit_jump *found;
2717 pcre_uchar oc, bit;
2718
2719 if (firstline)
2720 {
2721 SLJIT_ASSERT(common->first_line_end != 0);
2722 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2723 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2724 }
2725
2726 start = LABEL();
2727 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2728 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2729
2730 oc = first_char;
2731 if (caseless)
2732 {
2733 oc = TABLE_GET(first_char, common->fcc, first_char);
2734 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2735 if (first_char > 127 && common->utf)
2736 oc = UCD_OTHERCASE(first_char);
2737 #endif
2738 }
2739 if (first_char == oc)
2740 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2741 else
2742 {
2743 bit = first_char ^ oc;
2744 if (ispowerof2(bit))
2745 {
2746 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2747 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2748 }
2749 else
2750 {
2751 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2752 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2753 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2754 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2755 found = JUMP(SLJIT_C_NOT_ZERO);
2756 }
2757 }
2758
2759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2760 JUMPTO(SLJIT_JUMP, start);
2761 JUMPHERE(found);
2762 JUMPHERE(quit);
2763
2764 if (firstline)
2765 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2766 }
2767
2768 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2769 {
2770 DEFINE_COMPILER;
2771 struct sljit_label *loop;
2772 struct sljit_jump *lastchar;
2773 struct sljit_jump *firstchar;
2774 struct sljit_jump *quit;
2775 struct sljit_jump *foundcr = NULL;
2776 struct sljit_jump *notfoundnl;
2777 jump_list *newline = NULL;
2778
2779 if (firstline)
2780 {
2781 SLJIT_ASSERT(common->first_line_end != 0);
2782 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2783 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2784 }
2785
2786 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2787 {
2788 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2789 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2790 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2791 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2792 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2793
2794 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2795 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2796 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2797 #ifdef COMPILE_PCRE16
2798 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2799 #endif
2800 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2801
2802 loop = LABEL();
2803 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2804 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2805 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2806 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2807 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2808 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2809
2810 JUMPHERE(quit);
2811 JUMPHERE(firstchar);
2812 JUMPHERE(lastchar);
2813
2814 if (firstline)
2815 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2816 return;
2817 }
2818
2819 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2820 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2821 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2822 skip_char_back(common);
2823
2824 loop = LABEL();
2825 read_char(common);
2826 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2827 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2828 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2829 check_newlinechar(common, common->nltype, &newline, FALSE);
2830 set_jumps(newline, loop);
2831
2832 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2833 {
2834 quit = JUMP(SLJIT_JUMP);
2835 JUMPHERE(foundcr);
2836 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2837 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2838 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2839 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2840 #ifdef COMPILE_PCRE16
2841 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2842 #endif
2843 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2844 JUMPHERE(notfoundnl);
2845 JUMPHERE(quit);
2846 }
2847 JUMPHERE(lastchar);
2848 JUMPHERE(firstchar);
2849
2850 if (firstline)
2851 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2852 }
2853
2854 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2855 {
2856 DEFINE_COMPILER;
2857 struct sljit_label *start;
2858 struct sljit_jump *quit;
2859 struct sljit_jump *found;
2860 #ifndef COMPILE_PCRE8
2861 struct sljit_jump *jump;
2862 #endif
2863
2864 if (firstline)
2865 {
2866 SLJIT_ASSERT(common->first_line_end != 0);
2867 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2868 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2869 }
2870
2871 start = LABEL();
2872 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2873 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2874 #ifdef SUPPORT_UTF
2875 if (common->utf)
2876 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2877 #endif
2878 #ifndef COMPILE_PCRE8
2879 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2880 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2881 JUMPHERE(jump);
2882 #endif
2883 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2884 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2885 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2886 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2887 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2888 found = JUMP(SLJIT_C_NOT_ZERO);
2889
2890 #ifdef SUPPORT_UTF
2891 if (common->utf)
2892 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2893 #endif
2894 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2895 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2896 if (common->utf)
2897 {
2898 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2899 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2900 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2901 }
2902 #endif
2903 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2904 if (common->utf)
2905 {
2906 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2907 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2908 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2909 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2910 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2911 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2912 }
2913 #endif
2914 JUMPTO(SLJIT_JUMP, start);
2915 JUMPHERE(found);
2916 JUMPHERE(quit);
2917
2918 if (firstline)
2919 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2920 }
2921
2922 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2923 {
2924 DEFINE_COMPILER;
2925 struct sljit_label *loop;
2926 struct sljit_jump *toolong;
2927 struct sljit_jump *alreadyfound;
2928 struct sljit_jump *found;
2929 struct sljit_jump *foundoc = NULL;
2930 struct sljit_jump *notfound;
2931 pcre_uchar oc, bit;
2932
2933 SLJIT_ASSERT(common->req_char_ptr != 0);
2934 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2935 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2936 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2937 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2938
2939 if (has_firstchar)
2940 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2941 else
2942 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2943
2944 loop = LABEL();
2945 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2946
2947 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2948 oc = req_char;
2949 if (caseless)
2950 {
2951 oc = TABLE_GET(req_char, common->fcc, req_char);
2952 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2953 if (req_char > 127 && common->utf)
2954 oc = UCD_OTHERCASE(req_char);
2955 #endif
2956 }
2957 if (req_char == oc)
2958 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2959 else
2960 {
2961 bit = req_char ^ oc;
2962 if (ispowerof2(bit))
2963 {
2964 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
2965 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
2966 }
2967 else
2968 {
2969 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
2970 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
2971 }
2972 }
2973 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
2974 JUMPTO(SLJIT_JUMP, loop);
2975
2976 JUMPHERE(found);
2977 if (foundoc)
2978 JUMPHERE(foundoc);
2979 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
2980 JUMPHERE(alreadyfound);
2981 JUMPHERE(toolong);
2982 return notfound;
2983 }
2984
2985 static void do_revertframes(compiler_common *common)
2986 {
2987 DEFINE_COMPILER;
2988 struct sljit_jump *jump;
2989 struct sljit_label *mainloop;
2990
2991 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2992 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
2993 GET_LOCAL_BASE(TMP3, 0, 0);
2994
2995 /* Drop frames until we reach STACK_TOP. */
2996 mainloop = LABEL();
2997 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2998 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
2999 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3000 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3001 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
3002 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
3003 JUMPTO(SLJIT_JUMP, mainloop);
3004
3005 JUMPHERE(jump);
3006 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3007 /* End of dropping frames. */
3008 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3009
3010 JUMPHERE(jump);
3011 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3012 /* Set string begin. */
3013 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3014 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3015 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3016 JUMPTO(SLJIT_JUMP, mainloop);
3017
3018 JUMPHERE(jump);
3019 if (common->mark_ptr != 0)
3020 {
3021 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3022 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3023 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3025 JUMPTO(SLJIT_JUMP, mainloop);
3026
3027 JUMPHERE(jump);
3028 }
3029
3030 /* Unknown command. */
3031 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3032 JUMPTO(SLJIT_JUMP, mainloop);
3033 }
3034
3035 static void check_wordboundary(compiler_common *common)
3036 {
3037 DEFINE_COMPILER;
3038 struct sljit_jump *skipread;
3039 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3040 struct sljit_jump *jump;
3041 #endif
3042
3043 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3044
3045 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3046 /* Get type of the previous char, and put it to LOCALS1. */
3047 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3048 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3049 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3050 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3051 skip_char_back(common);
3052 check_start_used_ptr(common);
3053 read_char(common);
3054
3055 /* Testing char type. */
3056 #ifdef SUPPORT_UCP
3057 if (common->use_ucp)
3058 {
3059 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3060 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3061 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3062 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3063 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3064 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3065 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3066 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3067 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3068 JUMPHERE(jump);
3069 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3070 }
3071 else
3072 #endif
3073 {
3074 #ifndef COMPILE_PCRE8
3075 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3076 #elif defined SUPPORT_UTF
3077 /* Here LOCALS1 has already been zeroed. */
3078 jump = NULL;
3079 if (common->utf)
3080 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3081 #endif /* COMPILE_PCRE8 */
3082 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3083 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3084 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3085 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3086 #ifndef COMPILE_PCRE8
3087 JUMPHERE(jump);
3088 #elif defined SUPPORT_UTF
3089 if (jump != NULL)
3090 JUMPHERE(jump);
3091 #endif /* COMPILE_PCRE8 */
3092 }
3093 JUMPHERE(skipread);
3094
3095 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3096 skipread = check_str_end(common);
3097 peek_char(common);
3098
3099 /* Testing char type. This is a code duplication. */
3100 #ifdef SUPPORT_UCP
3101 if (common->use_ucp)
3102 {
3103 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3104 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3105 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3106 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3107 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3108 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3109 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3110 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3111 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3112 JUMPHERE(jump);
3113 }
3114 else
3115 #endif
3116 {
3117 #ifndef COMPILE_PCRE8
3118 /* TMP2 may be destroyed by peek_char. */
3119 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3120 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3121 #elif defined SUPPORT_UTF
3122 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3123 jump = NULL;
3124 if (common->utf)
3125 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3126 #endif
3127 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3128 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3129 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3130 #ifndef COMPILE_PCRE8
3131 JUMPHERE(jump);
3132 #elif defined SUPPORT_UTF
3133 if (jump != NULL)
3134 JUMPHERE(jump);
3135 #endif /* COMPILE_PCRE8 */
3136 }
3137 JUMPHERE(skipread);
3138
3139 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3140 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3141 }
3142
3143 /*
3144 range format:
3145
3146 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3147 ranges[1] = first bit (0 or 1)
3148 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3149 */
3150
3151 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3152 {
3153 DEFINE_COMPILER;
3154 struct sljit_jump *jump;
3155
3156 if (ranges[0] < 0)
3157 return FALSE;
3158
3159 switch(ranges[0])
3160 {
3161 case 1:
3162 if (readch)
3163 read_char(common);
3164 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3165 return TRUE;
3166
3167 case 2:
3168 if (readch)
3169 read_char(common);
3170 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3171 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3172 return TRUE;
3173
3174 case 4:
3175 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3176 {
3177 if (readch)
3178 read_char(common);
3179 if (ranges[1] != 0)
3180 {
3181 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3182 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3183 }
3184 else
3185 {
3186 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3187 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3188 JUMPHERE(jump);
3189 }
3190 return TRUE;
3191 }
3192 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && ispowerof2(ranges[4] - ranges[2]))
3193 {
3194 if (readch)
3195 read_char(common);
3196 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3197 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3198 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3199 return TRUE;
3200 }
3201 return FALSE;
3202
3203 default:
3204 return FALSE;
3205 }
3206 }
3207
3208 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3209 {
3210 int i, bit, length;
3211 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3212
3213 bit = ctypes[0] & flag;
3214 ranges[0] = -1;
3215 ranges[1] = bit != 0 ? 1 : 0;
3216 length = 0;
3217
3218 for (i = 1; i < 256; i++)
3219 if ((ctypes[i] & flag) != bit)
3220 {
3221 if (length >= MAX_RANGE_SIZE)
3222 return;
3223 ranges[2 + length] = i;
3224 length++;
3225 bit ^= flag;
3226 }
3227
3228 if (bit != 0)
3229 {
3230 if (length >= MAX_RANGE_SIZE)
3231 return;
3232 ranges[2 + length] = 256;
3233 length++;
3234 }
3235 ranges[0] = length;
3236 }
3237
3238 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3239 {
3240 int ranges[2 + MAX_RANGE_SIZE];
3241 pcre_uint8 bit, cbit, all;
3242 int i, byte, length = 0;
3243
3244 bit = bits[0] & 0x1;
3245 ranges[1] = bit;
3246 /* Can be 0 or 255. */
3247 all = -bit;
3248
3249 for (i = 0; i < 256; )
3250 {
3251 byte = i >> 3;
3252 if ((i & 0x7) == 0 && bits[byte] == all)
3253 i += 8;
3254 else
3255 {
3256 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3257 if (cbit != bit)
3258 {
3259 if (length >= MAX_RANGE_SIZE)
3260 return FALSE;
3261 ranges[2 + length] = i;
3262 length++;
3263 bit = cbit;
3264 all = -cbit;
3265 }
3266 i++;
3267 }
3268 }
3269
3270 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3271 {
3272 if (length >= MAX_RANGE_SIZE)
3273 return FALSE;
3274 ranges[2 + length] = 256;
3275 length++;
3276 }
3277 ranges[0] = length;
3278
3279 return check_ranges(common, ranges, backtracks, FALSE);
3280 }
3281
3282 static void check_anynewline(compiler_common *common)
3283 {
3284 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3285 DEFINE_COMPILER;
3286
3287 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3288
3289 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3290 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3291 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3292 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3293 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3294 #ifdef COMPILE_PCRE8
3295 if (common->utf)
3296 {
3297 #endif
3298 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3299 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3300 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3301 #ifdef COMPILE_PCRE8
3302 }
3303 #endif
3304 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3305 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3306 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3307 }
3308
3309 static void check_hspace(compiler_common *common)
3310 {
3311 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3312 DEFINE_COMPILER;
3313
3314 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3315
3316 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3317 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3318 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3319 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3320 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3321 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3322 #ifdef COMPILE_PCRE8
3323 if (common->utf)
3324 {
3325 #endif
3326 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3327 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3328 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3329 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3330 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3331 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3332 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3333 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3334 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3335 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3336 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3337 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3338 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3339 #ifdef COMPILE_PCRE8
3340 }
3341 #endif
3342 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3343 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3344
3345 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3346 }
3347
3348 static void check_vspace(compiler_common *common)
3349 {
3350 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3351 DEFINE_COMPILER;
3352
3353 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3354
3355 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3356 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3357 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3358 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3359 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3360 #ifdef COMPILE_PCRE8
3361 if (common->utf)
3362 {
3363 #endif
3364 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3365 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3366 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3367 #ifdef COMPILE_PCRE8
3368 }
3369 #endif
3370 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3371 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3372
3373 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3374 }
3375
3376 #define CHAR1 STR_END
3377 #define CHAR2 STACK_TOP
3378
3379 static void do_casefulcmp(compiler_common *common)
3380 {
3381 DEFINE_COMPILER;
3382 struct sljit_jump *jump;
3383 struct sljit_label *label;
3384
3385 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3386 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3387 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3388 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3389 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3390 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3391
3392 label = LABEL();
3393 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3394 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3395 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3396 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3397 JUMPTO(SLJIT_C_NOT_ZERO, label);
3398
3399 JUMPHERE(jump);
3400 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3401 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3402 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3403 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3404 }
3405
3406 #define LCC_TABLE STACK_LIMIT
3407
3408 static void do_caselesscmp(compiler_common *common)
3409 {
3410 DEFINE_COMPILER;
3411 struct sljit_jump *jump;
3412 struct sljit_label *label;
3413
3414 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3415 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3416
3417 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3418 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3419 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3420 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3421 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3422 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3423
3424 label = LABEL();
3425 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3426 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3427 #ifndef COMPILE_PCRE8
3428 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3429 #endif
3430 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3431 #ifndef COMPILE_PCRE8
3432 JUMPHERE(jump);
3433 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3434 #endif
3435 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3436 #ifndef COMPILE_PCRE8
3437 JUMPHERE(jump);
3438 #endif
3439 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3440 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3441 JUMPTO(SLJIT_C_NOT_ZERO, label);
3442
3443 JUMPHERE(jump);
3444 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3445 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3446 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3447 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3448 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3449 }
3450
3451 #undef LCC_TABLE
3452 #undef CHAR1
3453 #undef CHAR2
3454
3455 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3456
3457 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3458 {
3459 /* This function would be ineffective to do in JIT level. */
3460 int c1, c2;
3461 const pcre_uchar *src2 = args->uchar_ptr;
3462 const pcre_uchar *end2 = args->end;
3463
3464 while (src1 < end1)
3465 {
3466 if (src2 >= end2)
3467 return (pcre_uchar*)1;
3468 GETCHARINC(c1, src1);
3469 GETCHARINC(c2, src2);
3470 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
3471 }
3472 return src2;
3473 }
3474
3475 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3476
3477 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3478 compare_context* context, jump_list **backtracks)
3479 {
3480 DEFINE_COMPILER;
3481 unsigned int othercasebit = 0;
3482 pcre_uchar *othercasechar = NULL;
3483 #ifdef SUPPORT_UTF
3484 int utflength;
3485 #endif
3486
3487 if (caseless && char_has_othercase(common, cc))
3488 {
3489 othercasebit = char_get_othercase_bit(common, cc);
3490 SLJIT_ASSERT(othercasebit);
3491 /* Extracting bit difference info. */
3492 #ifdef COMPILE_PCRE8
3493 othercasechar = cc + (othercasebit >> 8);
3494 othercasebit &= 0xff;
3495 #else
3496 #ifdef COMPILE_PCRE16
3497 othercasechar = cc + (othercasebit >> 9);
3498 if ((othercasebit & 0x100) != 0)
3499 othercasebit = (othercasebit & 0xff) << 8;
3500 else
3501 othercasebit &= 0xff;
3502 #endif
3503 #endif
3504 }
3505
3506 if (context->sourcereg == -1)
3507 {
3508 #ifdef COMPILE_PCRE8
3509 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3510 if (context->length >= 4)
3511 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3512 else if (context->length >= 2)
3513 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3514 else
3515 #endif
3516 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3517 #else
3518 #ifdef COMPILE_PCRE16
3519 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3520 if (context->length >= 4)
3521 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3522 else
3523 #endif
3524 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3525 #endif
3526 #endif /* COMPILE_PCRE8 */
3527 context->sourcereg = TMP2;
3528 }
3529
3530 #ifdef SUPPORT_UTF
3531 utflength = 1;
3532 if (common->utf && HAS_EXTRALEN(*cc))
3533 utflength += GET_EXTRALEN(*cc);
3534
3535 do
3536 {
3537 #endif
3538
3539 context->length -= IN_UCHARS(1);
3540 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3541
3542 /* Unaligned read is supported. */
3543 if (othercasebit != 0 && othercasechar == cc)
3544 {
3545 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3546 context->oc.asuchars[context->ucharptr] = othercasebit;
3547 }
3548 else
3549 {
3550 context->c.asuchars[context->ucharptr] = *cc;
3551 context->oc.asuchars[context->ucharptr] = 0;
3552 }
3553 context->ucharptr++;
3554
3555 #ifdef COMPILE_PCRE8
3556 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3557 #else
3558 if (context->ucharptr >= 2 || context->length == 0)
3559 #endif
3560 {
3561 if (context->length >= 4)
3562 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3563 #ifdef COMPILE_PCRE8
3564 else if (context->length >= 2)
3565 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3566 else if (context->length >= 1)
3567 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3568 #else
3569 else if (context->length >= 2)
3570 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3571 #endif
3572 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3573
3574 switch(context->ucharptr)
3575 {
3576 case 4 / sizeof(pcre_uchar):
3577 if (context->oc.asint != 0)
3578 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3579 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3580 break;
3581
3582 case 2 / sizeof(pcre_uchar):
3583 if (context->oc.asushort != 0)
3584 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3585 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3586 break;
3587
3588 #ifdef COMPILE_PCRE8
3589 case 1:
3590 if (context->oc.asbyte != 0)
3591 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3592 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3593 break;
3594 #endif
3595
3596 default:
3597 SLJIT_ASSERT_STOP();
3598 break;
3599 }
3600 context->ucharptr = 0;
3601 }
3602
3603 #else
3604
3605 /* Unaligned read is unsupported. */
3606 #ifdef COMPILE_PCRE8
3607 if (context->length > 0)
3608 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3609 #else
3610 if (context->length > 0)
3611 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3612 #endif
3613 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3614
3615 if (othercasebit != 0 && othercasechar == cc)
3616 {
3617 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3618 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3619 }
3620 else
3621 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3622
3623 #endif
3624
3625 cc++;
3626 #ifdef SUPPORT_UTF
3627 utflength--;
3628 }
3629 while (utflength > 0);
3630 #endif
3631
3632 return cc;
3633 }
3634
3635 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3636
3637 #define SET_TYPE_OFFSET(value) \
3638 if ((value) != typeoffset) \
3639 { \
3640 if ((value) > typeoffset) \
3641 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3642 else \
3643 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3644 } \
3645 typeoffset = (value);
3646
3647 #define SET_CHAR_OFFSET(value) \
3648 if ((value) != charoffset) \
3649 { \
3650 if ((value) > charoffset) \
3651 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3652 else \
3653 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3654 } \
3655 charoffset = (value);
3656
3657 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3658 {
3659 DEFINE_COMPILER;
3660 jump_list *found = NULL;
3661 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3662 unsigned int c;
3663 int compares;
3664 struct sljit_jump *jump = NULL;
3665 pcre_uchar *ccbegin;
3666 #ifdef SUPPORT_UCP
3667 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3668 BOOL charsaved = FALSE;
3669 int typereg = TMP1, scriptreg = TMP1;
3670 unsigned int typeoffset;
3671 #endif
3672 int invertcmp, numberofcmps;
3673 unsigned int charoffset;
3674
3675 /* Although SUPPORT_UTF must be defined, we are
3676 not necessary in utf mode even in 8 bit mode. */
3677 detect_partial_match(common, backtracks);
3678 read_char(common);
3679
3680 if ((*cc++ & XCL_MAP) != 0)
3681 {
3682 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3683 #ifndef COMPILE_PCRE8
3684 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3685 #elif defined SUPPORT_UTF
3686 if (common->utf)
3687 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3688 #endif
3689
3690 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3691 {
3692 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3693 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3694 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3695 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3696 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3697 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3698 }
3699
3700 #ifndef COMPILE_PCRE8
3701 JUMPHERE(jump);
3702 #elif defined SUPPORT_UTF
3703 if (common->utf)
3704 JUMPHERE(jump);
3705 #endif
3706 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3707 #ifdef SUPPORT_UCP
3708 charsaved = TRUE;
3709 #endif
3710 cc += 32 / sizeof(pcre_uchar);
3711 }
3712
3713 /* Scanning the necessary info. */
3714 ccbegin = cc;
3715 compares = 0;
3716 while (*cc != XCL_END)
3717 {
3718 compares++;
3719 if (*cc == XCL_SINGLE)
3720 {
3721 cc += 2;
3722 #ifdef SUPPORT_UTF
3723 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3724 #endif
3725 #ifdef SUPPORT_UCP
3726 needschar = TRUE;
3727 #endif
3728 }
3729 else if (*cc == XCL_RANGE)
3730 {
3731 cc += 2;
3732 #ifdef SUPPORT_UTF
3733 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3734 #endif
3735 cc++;
3736 #ifdef SUPPORT_UTF
3737 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3738 #endif
3739 #ifdef SUPPORT_UCP
3740 needschar = TRUE;
3741 #endif
3742 }
3743 #ifdef SUPPORT_UCP
3744 else
3745 {
3746 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3747 cc++;
3748 switch(*cc)
3749 {
3750 case PT_ANY:
3751 break;
3752
3753 case PT_LAMP:
3754 case PT_GC:
3755 case PT_PC:
3756 case PT_ALNUM:
3757 needstype = TRUE;
3758 break;
3759
3760 case PT_SC:
3761 needsscript = TRUE;
3762 break;
3763
3764 case PT_SPACE:
3765 case PT_PXSPACE:
3766 case PT_WORD:
3767 needstype = TRUE;
3768 needschar = TRUE;
3769 break;
3770
3771 default:
3772 SLJIT_ASSERT_STOP();
3773 break;
3774 }
3775 cc += 2;
3776 }
3777 #endif
3778 }
3779
3780 #ifdef SUPPORT_UCP
3781 /* Simple register allocation. TMP1 is preferred if possible. */
3782 if (needstype || needsscript)
3783 {
3784 if (needschar && !charsaved)
3785 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3786 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3787 if (needschar)
3788 {
3789 if (needstype)
3790 {
3791 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3792 typereg = RETURN_ADDR;
3793 }
3794
3795 if (needsscript)
3796 scriptreg = TMP3;
3797 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3798 }
3799 else if (needstype && needsscript)
3800 scriptreg = TMP3;
3801 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3802
3803 if (needsscript)
3804 {
3805 if (scriptreg == TMP1)
3806 {
3807 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3808 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3809 }
3810 else
3811 {
3812 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3813 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3814 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3815 }
3816 }
3817 }
3818 #endif
3819
3820 /* Generating code. */
3821 cc = ccbegin;
3822 charoffset = 0;
3823 numberofcmps = 0;
3824 #ifdef SUPPORT_UCP
3825 typeoffset = 0;
3826 #endif
3827
3828 while (*cc != XCL_END)
3829 {
3830 compares--;
3831 invertcmp = (compares == 0 && list != backtracks);
3832 jump = NULL;
3833
3834 if (*cc == XCL_SINGLE)
3835 {
3836 cc ++;
3837 #ifdef SUPPORT_UTF
3838 if (common->utf)
3839 {
3840 GETCHARINC(c, cc);
3841 }
3842 else
3843 #endif
3844 c = *cc++;
3845
3846 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3847 {
3848 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3849 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3850 numberofcmps++;
3851 }
3852 else if (numberofcmps > 0)
3853 {
3854 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3855 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3856 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3857 numberofcmps = 0;
3858 }
3859 else
3860 {
3861 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3862 numberofcmps = 0;
3863 }
3864 }
3865 else if (*cc == XCL_RANGE)
3866 {
3867 cc ++;
3868 #ifdef SUPPORT_UTF
3869 if (common->utf)
3870 {
3871 GETCHARINC(c, cc);
3872 }
3873 else
3874 #endif
3875 c = *cc++;
3876 SET_CHAR_OFFSET(c);
3877 #ifdef SUPPORT_UTF
3878 if (common->utf)
3879 {
3880 GETCHARINC(c, cc);
3881 }
3882 else
3883 #endif
3884 c = *cc++;
3885 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3886 {
3887 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3888 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3889 numberofcmps++;
3890 }
3891 else if (numberofcmps > 0)
3892 {
3893 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3894 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3895 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3896 numberofcmps = 0;
3897 }
3898 else
3899 {
3900 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3901 numberofcmps = 0;
3902 }
3903 }
3904 #ifdef SUPPORT_UCP
3905 else
3906 {
3907 if (*cc == XCL_NOTPROP)
3908 invertcmp ^= 0x1;
3909 cc++;
3910 switch(*cc)
3911 {
3912 case PT_ANY:
3913 if (list != backtracks)
3914 {
3915 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3916 continue;
3917 }
3918 else if (cc[-1] == XCL_NOTPROP)
3919 continue;
3920 jump = JUMP(SLJIT_JUMP);
3921 break;
3922
3923 case PT_LAMP:
3924 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3925 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3926 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3927 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3928 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3929 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3930 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3931 break;
3932
3933 case PT_GC:
3934 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3935 SET_TYPE_OFFSET(c);
3936 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3937 break;
3938
3939 case PT_PC:
3940 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3941 break;
3942
3943 case PT_SC:
3944 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3945 break;
3946
3947 case PT_SPACE:
3948 case PT_PXSPACE:
3949 if (*cc == PT_SPACE)
3950 {
3951 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3952 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3953 }
3954 SET_CHAR_OFFSET(9);
3955 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
3956 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3957 if (*cc == PT_SPACE)
3958 JUMPHERE(jump);
3959
3960 SET_TYPE_OFFSET(ucp_Zl);
3961 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
3962 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3963 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3964 break;
3965
3966 case PT_WORD:
3967 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
3968 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3969 /* ... fall through */
3970
3971 case PT_ALNUM:
3972 SET_TYPE_OFFSET(ucp_Ll);
3973 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3974 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3975 SET_TYPE_OFFSET(ucp_Nd);
3976 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3977 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3978 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3979 break;
3980 }
3981 cc += 2;
3982 }
3983 #endif
3984
3985 if (jump != NULL)
3986 add_jump(compiler, compares > 0 ? list : backtracks, jump);
3987 }
3988
3989 if (found != NULL)
3990 set_jumps(found, LABEL());
3991 }
3992
3993 #undef SET_TYPE_OFFSET
3994 #undef SET_CHAR_OFFSET
3995
3996 #endif
3997
3998 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
3999 {
4000 DEFINE_COMPILER;
4001 int length;
4002 unsigned int c, oc, bit;
4003 compare_context context;
4004 struct sljit_jump *jump[4];
4005 #ifdef SUPPORT_UTF
4006 struct sljit_label *label;
4007 #ifdef SUPPORT_UCP
4008 pcre_uchar propdata[5];
4009 #endif
4010 #endif
4011
4012 switch(type)
4013 {
4014 case OP_SOD:
4015 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4016 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4017 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4018 return cc;
4019
4020 case OP_SOM:
4021 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4023 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4024 return cc;
4025
4026 case OP_NOT_WORD_BOUNDARY:
4027 case OP_WORD_BOUNDARY:
4028 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4029 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4030 return cc;
4031
4032 case OP_NOT_DIGIT:
4033 case OP_DIGIT:
4034 /* Digits are usually 0-9, so it is worth to optimize them. */
4035 if (common->digits[0] == -2)
4036 get_ctype_ranges(common, ctype_digit, common->digits);
4037 detect_partial_match(common, backtracks);
4038 /* Flip the starting bit in the negative case. */
4039 if (type == OP_NOT_DIGIT)
4040 common->digits[1] ^= 1;
4041 if (!check_ranges(common, common->digits, backtracks, TRUE))
4042 {
4043 read_char8_type(common);
4044 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4045 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4046 }
4047 if (type == OP_NOT_DIGIT)
4048 common->digits[1] ^= 1;
4049 return cc;
4050
4051 case OP_NOT_WHITESPACE:
4052 case OP_WHITESPACE:
4053 detect_partial_match(common, backtracks);
4054 read_char8_type(common);
4055 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4056 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4057 return cc;
4058
4059 case OP_NOT_WORDCHAR:
4060 case OP_WORDCHAR:
4061 detect_partial_match(common, backtracks);
4062 read_char8_type(common);
4063 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4064 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4065 return cc;
4066
4067 case OP_ANY:
4068 detect_partial_match(common, backtracks);
4069 read_char(common);
4070 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4071 {
4072 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4073 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4074 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4075 else
4076 jump[1] = check_str_end(common);
4077
4078 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4079 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4080 if (jump[1] != NULL)
4081 JUMPHERE(jump[1]);
4082 JUMPHERE(jump[0]);
4083 }
4084 else
4085 check_newlinechar(common, common->nltype, backtracks, TRUE);
4086 return cc;
4087
4088 case OP_ALLANY:
4089 detect_partial_match(common, backtracks);
4090 #ifdef SUPPORT_UTF
4091 if (common->utf)
4092 {
4093 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4094 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4095 #ifdef COMPILE_PCRE8
4096 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4097 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4098 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4099 #else /* COMPILE_PCRE8 */
4100 #ifdef COMPILE_PCRE16
4101 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4102 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4103 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4104 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
4105 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4106 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4107 #endif /* COMPILE_PCRE16 */
4108 #endif /* COMPILE_PCRE8 */
4109 JUMPHERE(jump[0]);
4110 return cc;
4111 }
4112 #endif
4113 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4114 return cc;
4115
4116 case OP_ANYBYTE:
4117 detect_partial_match(common, backtracks);
4118 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4119 return cc;
4120
4121 #ifdef SUPPORT_UTF
4122 #ifdef SUPPORT_UCP
4123 case OP_NOTPROP:
4124 case OP_PROP:
4125 propdata[0] = 0;
4126 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4127 propdata[2] = cc[0];
4128 propdata[3] = cc[1];
4129 propdata[4] = XCL_END;
4130 compile_xclass_matchingpath(common, propdata, backtracks);
4131 return cc + 2;
4132 #endif
4133 #endif
4134
4135 case OP_ANYNL:
4136 detect_partial_match(common, backtracks);
4137 read_char(common);
4138 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4139 /* We don't need to handle soft partial matching case. */
4140 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4141 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4142 else
4143 jump[1] = check_str_end(common);
4144 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4145 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4146 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4147 jump[3] = JUMP(SLJIT_JUMP);
4148 JUMPHERE(jump[0]);
4149 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4150 JUMPHERE(jump[1]);
4151 JUMPHERE(jump[2]);
4152 JUMPHERE(jump[3]);
4153 return cc;
4154
4155 case OP_NOT_HSPACE:
4156 case OP_HSPACE:
4157 detect_partial_match(common, backtracks);
4158 read_char(common);
4159 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4160 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4161 return cc;
4162
4163 case OP_NOT_VSPACE:
4164 case OP_VSPACE:
4165 detect_partial_match(common, backtracks);
4166 read_char(common);
4167 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4168 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4169 return cc;
4170
4171 #ifdef SUPPORT_UCP
4172 case OP_EXTUNI:
4173 detect_partial_match(common, backtracks);
4174 read_char(common);
4175 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4176 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4177 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
4178
4179 label = LABEL();
4180 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4181 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4182 read_char(common);
4183 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4184 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4185 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
4186
4187 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4188 JUMPHERE(jump[0]);
4189 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4190 {
4191 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4192 /* Since we successfully read a char above, partial matching must occure. */
4193 check_partial(common, TRUE);
4194 JUMPHERE(jump[0]);
4195 }
4196 return cc;
4197 #endif
4198
4199 case OP_EODN:
4200 /* Requires rather complex checks. */
4201 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4202 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4203 {
4204 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4205 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4206 if (common->mode == JIT_COMPILE)
4207 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4208 else
4209 {
4210 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4211 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4212 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4213 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4214 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4215 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4216 check_partial(common, TRUE);
4217 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4218 JUMPHERE(jump[1]);
4219 }
4220 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4221 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4222 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4223 }
4224 else if (common->nltype == NLTYPE_FIXED)
4225 {
4226 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4227 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4228 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4229 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4230 }
4231 else
4232 {
4233 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4234 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4235 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4236 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4237 jump[2] = JUMP(SLJIT_C_GREATER);
4238 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4239 /* Equal. */
4240 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4241 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4242 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4243
4244 JUMPHERE(jump[1]);
4245 if (common->nltype == NLTYPE_ANYCRLF)
4246 {
4247 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4248 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4249 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4250 }
4251 else
4252 {
4253 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4254 read_char(common);
4255 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4256 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4257 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4258 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4259 }
4260 JUMPHERE(jump[2]);
4261 JUMPHERE(jump[3]);
4262 }
4263 JUMPHERE(jump[0]);
4264 check_partial(common, FALSE);
4265 return cc;
4266
4267 case OP_EOD:
4268 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4269 check_partial(common, FALSE);
4270 return cc;
4271
4272 case OP_CIRC:
4273 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4274 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4275 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4276 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4277 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4278 return cc;
4279
4280 case OP_CIRCM:
4281 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4282 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4283 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4284 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4285 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4286 jump[0] = JUMP(SLJIT_JUMP);
4287 JUMPHERE(jump[1]);
4288
4289 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4290 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4291 {
4292 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4293 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4294 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4295 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4296 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4297 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4298 }
4299 else
4300 {
4301 skip_char_back(common);
4302 read_char(common);
4303 check_newlinechar(common, common->nltype, backtracks, FALSE);
4304 }
4305 JUMPHERE(jump[0]);
4306 return cc;
4307
4308 case OP_DOLL:
4309 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4310 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4311 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4312
4313 if (!common->endonly)
4314 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4315 else
4316 {
4317 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4318 check_partial(common, FALSE);
4319 }
4320 return cc;
4321
4322 case OP_DOLLM:
4323 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4324 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4325 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4326 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4327 check_partial(common, FALSE);
4328 jump[0] = JUMP(SLJIT_JUMP);
4329 JUMPHERE(jump[1]);
4330
4331 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4332 {
4333 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4334 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4335 if (common->mode == JIT_COMPILE)
4336 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4337 else
4338 {
4339 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4340 /* STR_PTR = STR_END - IN_UCHARS(1) */
4341 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4342 check_partial(common, TRUE);
4343 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4344 JUMPHERE(jump[1]);
4345 }
4346
4347 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4348 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4349 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4350 }
4351 else
4352 {
4353 peek_char(common);
4354 check_newlinechar(common, common->nltype, backtracks, FALSE);
4355 }
4356 JUMPHERE(jump[0]);
4357 return cc;
4358
4359 case OP_CHAR:
4360 case OP_CHARI:
4361 length = 1;
4362 #ifdef SUPPORT_UTF
4363 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4364 #endif
4365 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4366 {
4367 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4368 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4369
4370 context.length = IN_UCHARS(length);
4371 context.sourcereg = -1;
4372 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4373 context.ucharptr = 0;
4374 #endif
4375 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4376 }
4377 detect_partial_match(common, backtracks);
4378 read_char(common);
4379 #ifdef SUPPORT_UTF
4380 if (common->utf)
4381 {
4382 GETCHAR(c, cc);
4383 }
4384 else
4385 #endif
4386 c = *cc;
4387 if (type == OP_CHAR || !char_has_othercase(common, cc))
4388 {
4389 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4390 return cc + length;
4391 }
4392 oc = char_othercase(common, c);
4393 bit = c ^ oc;
4394 if (ispowerof2(bit))
4395 {
4396 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4397 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4398 return cc + length;
4399 }
4400 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4401 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4402 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4403 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4404 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4405 return cc + length;
4406
4407 case OP_NOT:
4408 case OP_NOTI:
4409 detect_partial_match(common, backtracks);
4410 length = 1;
4411 #ifdef SUPPORT_UTF
4412 if (common->utf)
4413 {
4414 #ifdef COMPILE_PCRE8
4415 c = *cc;
4416 if (c < 128)
4417 {
4418 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4419 if (type == OP_NOT || !char_has_othercase(common, cc))
4420 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4421 else
4422 {
4423 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4424 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4425 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4426 }
4427 /* Skip the variable-length character. */
4428 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4429 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4430 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4431 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4432 JUMPHERE(jump[0]);
4433 return cc + 1;
4434 }
4435 else
4436 #endif /* COMPILE_PCRE8 */
4437 {
4438 GETCHARLEN(c, cc, length);
4439 read_char(common);
4440 }
4441 }
4442 else
4443 #endif /* SUPPORT_UTF */
4444 {
4445 read_char(common);
4446 c = *cc;
4447 }
4448
4449 if (type == OP_NOT || !char_has_othercase(common, cc))
4450 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4451 else
4452 {
4453 oc = char_othercase(common, c);
4454 bit = c ^ oc;
4455 if (ispowerof2(bit))
4456 {
4457 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4458 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4459 }
4460 else
4461 {
4462 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4463 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4464 }
4465 }
4466 return cc + length;
4467
4468 case OP_CLASS:
4469 case OP_NCLASS:
4470 detect_partial_match(common, backtracks);
4471 read_char(common);
4472 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4473 return cc + 32 / sizeof(pcre_uchar);
4474
4475 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4476 jump[0] = NULL;
4477 #ifdef COMPILE_PCRE8
4478 /* This check only affects 8 bit mode. In other modes, we
4479 always need to compare the value with 255. */
4480 if (common->utf)
4481 #endif /* COMPILE_PCRE8 */
4482 {
4483 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4484 if (type == OP_CLASS)
4485 {
4486 add_jump(compiler, backtracks, jump[0]);
4487 jump[0] = NULL;
4488 }
4489 }
4490 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4491 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4492 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4493 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4494 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4495 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4496 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4497 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4498 if (jump[0] != NULL)
4499 JUMPHERE(jump[0]);
4500 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4501 return cc + 32 / sizeof(pcre_uchar);
4502
4503 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4504 case OP_XCLASS:
4505 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4506 return cc + GET(cc, 0) - 1;
4507 #endif
4508
4509 case OP_REVERSE:
4510 length = GET(cc, 0);
4511 if (length == 0)
4512 return cc + LINK_SIZE;
4513 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4514 #ifdef SUPPORT_UTF
4515 if (common->utf)
4516 {
4517 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4518 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4519 label = LABEL();
4520 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4521 skip_char_back(common);
4522 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4523 JUMPTO(SLJIT_C_NOT_ZERO, label);
4524 }
4525 else
4526 #endif
4527 {
4528 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4529 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4530 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4531 }
4532 check_start_used_ptr(common);
4533 return cc + LINK_SIZE;
4534 }
4535 SLJIT_ASSERT_STOP();
4536 return cc;
4537 }
4538
4539 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4540 {
4541 /* This function consumes at least one input character. */
4542 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4543 DEFINE_COMPILER;
4544 pcre_uchar *ccbegin = cc;
4545 compare_context context;
4546 int size;
4547
4548 context.length = 0;
4549 do
4550 {
4551 if (cc >= ccend)
4552 break;
4553
4554 if (*cc == OP_CHAR)
4555 {
4556 size = 1;
4557 #ifdef SUPPORT_UTF
4558 if (common->utf && HAS_EXTRALEN(cc[1]))
4559 size += GET_EXTRALEN(cc[1]);
4560 #endif
4561 }
4562 else if (*cc == OP_CHARI)
4563 {
4564 size = 1;
4565 #ifdef SUPPORT_UTF
4566 if (common->utf)
4567 {
4568 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4569 size = 0;
4570 else if (HAS_EXTRALEN(cc[1]))
4571 size += GET_EXTRALEN(cc[1]);
4572 }
4573 else
4574 #endif
4575 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4576 size = 0;
4577 }
4578 else
4579 size = 0;
4580
4581 cc += 1 + size;
4582 context.length += IN_UCHARS(size);
4583 }
4584 while (size > 0 && context.length <= 128);
4585
4586 cc = ccbegin;
4587 if (context.length > 0)
4588 {
4589 /* We have a fixed-length byte sequence. */
4590 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4591 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4592
4593 context.sourcereg = -1;
4594 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4595 context.ucharptr = 0;
4596 #endif
4597 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4598 return cc;
4599 }
4600
4601 /* A non-fixed length character will be checked if length == 0. */
4602 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4603 }
4604
4605 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4606 {
4607 DEFINE_COMPILER;
4608 int offset = GET2(cc, 1) << 1;
4609
4610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4611 if (!common->jscript_compat)
4612 {
4613 if (backtracks == NULL)
4614 {
4615 /* OVECTOR(1) contains the "string begin - 1" constant. */
4616 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4617 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4618 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4619 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4620 return JUMP(SLJIT_C_NOT_ZERO);
4621 }
4622 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4623 }
4624 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4625 }
4626
4627 /* Forward definitions. */
4628 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4629 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4630
4631 #define PUSH_BACKTRACK(size, ccstart, error) \
4632 do \
4633 { \
4634 backtrack = sljit_alloc_memory(compiler, (size)); \
4635 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4636 return error; \
4637 memset(backtrack, 0, size); \
4638 backtrack->prev = parent->top; \
4639 backtrack->cc = (ccstart); \
4640 parent->top = backtrack; \
4641 } \
4642 while (0)
4643
4644 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4645 do \
4646 { \
4647 backtrack = sljit_alloc_memory(compiler, (size)); \
4648 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4649 return; \
4650 memset(backtrack, 0, size); \
4651 backtrack->prev = parent->top; \
4652 backtrack->cc = (ccstart); \
4653 parent->top = backtrack; \
4654 } \
4655 while (0)
4656
4657 #define BACKTRACK_AS(type) ((type *)backtrack)
4658
4659 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4660 {
4661 DEFINE_COMPILER;
4662 int offset = GET2(cc, 1) << 1;
4663 struct sljit_jump *jump = NULL;
4664 struct sljit_jump *partial;
4665 struct sljit_jump *nopartial;
4666
4667 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4668 /* OVECTOR(1) contains the "string begin - 1" constant. */
4669 if (withchecks && !common->jscript_compat)
4670 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4671
4672 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4673 if (common->utf && *cc == OP_REFI)
4674 {
4675 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4676 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4677 if (withchecks)
4678 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4679
4680 /* Needed to save important temporary registers. */
4681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4682 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4683 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4684 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4685 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4686 if (common->mode == JIT_COMPILE)
4687 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4688 else
4689 {
4690 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4691 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4692 check_partial(common, FALSE);
4693 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4694 JUMPHERE(nopartial);
4695 }
4696 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4697 }
4698 else
4699 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4700 {
4701 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4702 if (withchecks)
4703 jump = JUMP(SLJIT_C_ZERO);
4704
4705 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4706 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4707 if (common->mode == JIT_COMPILE)
4708 add_jump(compiler, backtracks, partial);
4709
4710 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4711 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4712
4713 if (common->mode != JIT_COMPILE)
4714 {
4715 nopartial = JUMP(SLJIT_JUMP);
4716 JUMPHERE(partial);
4717 /* TMP2 -= STR_END - STR_PTR */
4718 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4719 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4720 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4721 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4722 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4723 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4724 JUMPHERE(partial);
4725 check_partial(common, FALSE);
4726 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4727 JUMPHERE(nopartial);
4728 }
4729 }
4730
4731 if (jump != NULL)
4732 {
4733 if (emptyfail)
4734 add_jump(compiler, backtracks, jump);
4735 else
4736 JUMPHERE(jump);
4737 }
4738 return cc + 1 + IMM2_SIZE;
4739 }
4740
4741 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4742 {
4743 DEFINE_COMPILER;
4744 backtrack_common *backtrack;
4745 pcre_uchar type;
4746 struct sljit_label *label;
4747 struct sljit_jump *zerolength;
4748 struct sljit_jump *jump = NULL;
4749 pcre_uchar *ccbegin = cc;
4750 int min = 0, max = 0;
4751 BOOL minimize;
4752
4753 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4754
4755 type = cc[1 + IMM2_SIZE];
4756 minimize = (type & 0x1) != 0;
4757 switch(type)
4758 {
4759 case OP_CRSTAR:
4760 case OP_CRMINSTAR:
4761 min = 0;
4762 max = 0;
4763 cc += 1 + IMM2_SIZE + 1;
4764 break;
4765 case OP_CRPLUS:
4766 case OP_CRMINPLUS:
4767 min = 1;
4768 max = 0;
4769 cc += 1 + IMM2_SIZE + 1;
4770 break;
4771 case OP_CRQUERY:
4772 case OP_CRMINQUERY:
4773 min = 0;
4774 max = 1;
4775 cc += 1 + IMM2_SIZE + 1;
4776 break;
4777 case OP_CRRANGE:
4778 case OP_CRMINRANGE:
4779 min = GET2(cc, 1 + IMM2_SIZE + 1);
4780 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4781 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4782 break;
4783 default:
4784 SLJIT_ASSERT_STOP();
4785 break;
4786 }
4787
4788 if (!minimize)
4789 {
4790 if (min == 0)
4791 {
4792 allocate_stack(common, 2);
4793 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4794 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4795 /* Temporary release of STR_PTR. */
4796 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4797 zerolength = compile_ref_checks(common, ccbegin, NULL);
4798 /* Restore if not zero length. */
4799 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4800 }
4801 else
4802 {
4803 allocate_stack(common, 1);
4804 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4805 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4806 }
4807
4808 if (min > 1 || max > 1)
4809 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4810
4811 label = LABEL();
4812 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4813
4814 if (min > 1 || max > 1)
4815 {
4816 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4817 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4818 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4819 if (min > 1)
4820 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4821 if (max > 1)
4822 {
4823 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4824 allocate_stack(common, 1);
4825 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4826 JUMPTO(SLJIT_JUMP, label);
4827 JUMPHERE(jump);
4828 }
4829 }
4830
4831 if (max == 0)
4832 {
4833 /* Includes min > 1 case as well. */
4834 allocate_stack(common, 1);
4835 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4836 JUMPTO(SLJIT_JUMP, label);
4837 }
4838
4839 JUMPHERE(zerolength);
4840 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4841
4842 decrease_call_count(common);
4843 return cc;
4844 }
4845
4846 allocate_stack(common, 2);
4847 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4848 if (type != OP_CRMINSTAR)
4849 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4850
4851 if (min == 0)
4852 {
4853 zerolength = compile_ref_checks(common, ccbegin, NULL);
4854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4855 jump = JUMP(SLJIT_JUMP);
4856 }
4857 else
4858 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4859
4860 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4861 if (max > 0)
4862 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4863
4864 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4865 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4866
4867 if (min > 1)
4868 {
4869 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4870 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4871 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4872 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
4873 }
4874 else if (max > 0)
4875 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4876
4877 if (jump != NULL)
4878 JUMPHERE(jump);
4879 JUMPHERE(zerolength);
4880
4881 decrease_call_count(common);
4882 return cc;
4883 }
4884
4885 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4886 {
4887 DEFINE_COMPILER;
4888 backtrack_common *backtrack;
4889 recurse_entry *entry = common->entries;
4890 recurse_entry *prev = NULL;
4891 int start = GET(cc, 1);
4892
4893 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4894 while (entry != NULL)
4895 {
4896 if (entry->start == start)
4897 break;
4898 prev = entry;
4899 entry = entry->next;
4900 }
4901
4902 if (entry == NULL)
4903 {
4904 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4905 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4906 return NULL;
4907 entry->next = NULL;
4908 entry->entry = NULL;
4909 entry->calls = NULL;
4910 entry->start = start;
4911
4912 if (prev != NULL)
4913 prev->next = entry;
4914 else
4915 common->entries = entry;
4916 }
4917
4918 if (common->has_set_som && common->mark_ptr != 0)
4919 {
4920 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4921 allocate_stack(common, 2);
4922 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4923 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4924 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4925 }
4926 else if (common->has_set_som || common->mark_ptr != 0)
4927 {
4928 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4929 allocate_stack(common, 1);
4930 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4931 }
4932
4933 if (entry->entry == NULL)
4934 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4935 else
4936 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4937 /* Leave if the match is failed. */
4938 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4939 return cc + 1 + LINK_SIZE;
4940 }
4941
4942 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4943 {
4944 DEFINE_COMPILER;
4945 int framesize;
4946 int private_data_ptr;
4947 backtrack_common altbacktrack;
4948 pcre_uchar *ccbegin;
4949 pcre_uchar opcode;
4950 pcre_uchar bra = OP_BRA;
4951 jump_list *tmp = NULL;
4952 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
4953 jump_list **found;
4954 /* Saving previous accept variables. */
4955 struct sljit_label *save_quitlabel = common->quitlabel;
4956 struct sljit_label *save_acceptlabel = common->acceptlabel;
4957 jump_list *save_quit = common->quit;
4958 jump_list *save_accept = common->accept;
4959 struct sljit_jump *jump;
4960 struct sljit_jump *brajump = NULL;
4961
4962 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
4963 {
4964 SLJIT_ASSERT(!conditional);
4965 bra = *cc;
4966 cc++;
4967 }
4968 private_data_ptr = PRIVATE_DATA(cc);
4969 SLJIT_ASSERT(private_data_ptr != 0);
4970 framesize = get_framesize(common, cc, FALSE);
4971 backtrack->framesize = framesize;
4972 backtrack->private_data_ptr = private_data_ptr;
4973 opcode = *cc;
4974 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
4975 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
4976 ccbegin = cc;
4977 cc += GET(cc, 1);
4978
4979 if (bra == OP_BRAMINZERO)
4980 {
4981 /* This is a braminzero backtrack path. */
4982 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
4983 free_stack(common, 1);
4984 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
4985 }
4986
4987 if (framesize < 0)
4988 {
4989 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
4990 allocate_stack(common, 1);
4991 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4992 }
4993 else
4994 {
4995 allocate_stack(common, framesize + 2);
4996 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
4997 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
4998 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
4999 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5000 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5001 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5002 }
5003
5004 memset(&altbacktrack, 0, sizeof(backtrack_common));
5005 common->quitlabel = NULL;
5006 common->quit = NULL;
5007 while (1)
5008 {
5009 common->acceptlabel = NULL;
5010 common->accept = NULL;
5011 altbacktrack.top = NULL;
5012 altbacktrack.topbacktracks = NULL;
5013
5014 if (*ccbegin == OP_ALT)
5015 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5016
5017 altbacktrack.cc = ccbegin;
5018 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5019 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5020 {
5021 common->quitlabel = save_quitlabel;
5022 common->acceptlabel = save_acceptlabel;
5023 common->quit = save_quit;
5024 common->accept = save_accept;
5025 return NULL;
5026 }
5027 common->acceptlabel = LABEL();
5028 if (common->accept != NULL)
5029 set_jumps(common->accept, common->acceptlabel);
5030
5031 /* Reset stack. */
5032 if (framesize < 0)
5033 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5034 else {
5035 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5036 {
5037 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5038 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5039 }
5040 else
5041 {
5042 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5043 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5044 }
5045 }
5046
5047 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5048 {
5049 /* We know that STR_PTR was stored on the top of the stack. */
5050 if (conditional)
5051 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5052 else if (bra == OP_BRAZERO)
5053 {
5054 if (framesize < 0)
5055 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5056 else
5057 {
5058 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5059 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
5060 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5061 }
5062 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5063 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5064 }
5065 else if (framesize >= 0)
5066 {
5067 /* For OP_BRA and OP_BRAMINZERO. */
5068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5069 }
5070 }
5071 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5072
5073 compile_backtrackingpath(common, altbacktrack.top);
5074 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5075 {
5076 common->quitlabel = save_quitlabel;
5077 common->acceptlabel = save_acceptlabel;
5078 common->quit = save_quit;
5079 common->accept = save_accept;
5080 return NULL;
5081 }
5082 set_jumps(altbacktrack.topbacktracks, LABEL());
5083
5084 if (*cc != OP_ALT)
5085 break;
5086
5087 ccbegin = cc;
5088 cc += GET(cc, 1);
5089 }
5090 /* None of them matched. */
5091 if (common->quit != NULL)
5092 set_jumps(common->quit, LABEL());
5093
5094 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5095 {
5096 /* Assert is failed. */
5097 if (conditional || bra == OP_BRAZERO)
5098 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5099
5100 if (framesize < 0)
5101 {
5102 /* The topmost item should be 0. */
5103 if (bra == OP_BRAZERO)
5104 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5105 else
5106 free_stack(common, 1);
5107 }
5108 else
5109 {
5110 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5111 /* The topmost item should be 0. */
5112 if (bra == OP_BRAZERO)
5113 {
5114 free_stack(common, framesize + 1);
5115 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5116 }
5117 else
5118 free_stack(common, framesize + 2);
5119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5120 }
5121 jump = JUMP(SLJIT_JUMP);
5122 if (bra != OP_BRAZERO)
5123 add_jump(compiler, target, jump);
5124
5125 /* Assert is successful. */
5126 set_jumps(tmp, LABEL());
5127 if (framesize < 0)
5128 {
5129 /* We know that STR_PTR was stored on the top of the stack. */
5130 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5131 /* Keep the STR_PTR on the top of the stack. */
5132 if (bra == OP_BRAZERO)
5133 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5134 else if (bra == OP_BRAMINZERO)
5135 {
5136 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5137 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5138 }
5139 }
5140 else
5141 {
5142 if (bra == OP_BRA)
5143 {
5144 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5145 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5146 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5147 }
5148 else
5149 {
5150 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5151 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
5152 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5154 }
5155 }
5156
5157 if (bra == OP_BRAZERO)
5158 {
5159 backtrack->matchingpath = LABEL();
5160 sljit_set_label(jump, backtrack->matchingpath);
5161 }
5162 else if (bra == OP_BRAMINZERO)
5163 {
5164 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5165 JUMPHERE(brajump);
5166 if (framesize >= 0)
5167 {
5168 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5169 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5171 }
5172 set_jumps(backtrack->common.topbacktracks, LABEL());
5173 }
5174 }
5175 else
5176 {
5177 /* AssertNot is successful. */
5178 if (framesize < 0)
5179 {
5180 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5181 if (bra != OP_BRA)
5182 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5183 else
5184 free_stack(common, 1);
5185 }
5186 else
5187 {
5188 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5189 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5190 /* The topmost item should be 0. */
5191 if (bra != OP_BRA)
5192 {
5193 free_stack(common, framesize + 1);
5194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5195 }
5196 else
5197 free_stack(common, framesize + 2);
5198 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5199 }
5200
5201 if (bra == OP_BRAZERO)
5202 backtrack->matchingpath = LABEL();
5203 else if (bra == OP_BRAMINZERO)
5204 {
5205 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5206 JUMPHERE(brajump);
5207 }
5208
5209 if (bra != OP_BRA)
5210 {
5211 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5212 set_jumps(backtrack->common.topbacktracks, LABEL());
5213 backtrack->common.topbacktracks = NULL;
5214 }
5215 }
5216
5217 common->quitlabel = save_quitlabel;
5218 common->acceptlabel = save_acceptlabel;
5219 common->quit = save_quit;
5220 common->accept = save_accept;
5221 return cc + 1 + LINK_SIZE;
5222 }
5223
5224 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
5225 {
5226 int condition = FALSE;
5227 pcre_uchar *slotA = name_table;
5228 pcre_uchar *slotB;
5229 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5230 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5231 sljit_w no_capture;
5232 int i;
5233
5234 locals += refno & 0xff;
5235 refno >>= 8;
5236 no_capture = locals[1];
5237
5238 for (i = 0; i < name_count; i++)
5239 {
5240 if (GET2(slotA, 0) == refno) break;
5241 slotA += name_entry_size;
5242 }
5243
5244 if (i < name_count)
5245 {
5246 /* Found a name for the number - there can be only one; duplicate names
5247 for different numbers are allowed, but not vice versa. First scan down
5248 for duplicates. */
5249
5250 slotB = slotA;
5251 while (slotB > name_table)
5252 {
5253 slotB -= name_entry_size;
5254 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5255 {
5256 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5257 if (condition) break;
5258 }
5259 else break;
5260 }
5261
5262 /* Scan up for duplicates */
5263 if (!condition)
5264 {
5265 slotB = slotA;
5266 for (i++; i < name_count; i++)
5267 {
5268 slotB += name_entry_size;
5269 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5270 {
5271 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5272 if (condition) break;
5273 }
5274 else break;
5275 }
5276 }
5277 }
5278 return condition;
5279 }
5280
5281 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
5282 {
5283 int condition = FALSE;
5284 pcre_uchar *slotA = name_table;
5285 pcre_uchar *slotB;
5286 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5287 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5288 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5289 int i;
5290
5291 for (i = 0; i < name_count; i++)
5292 {
5293 if (GET2(slotA, 0) == recno) break;
5294 slotA += name_entry_size;
5295 }
5296
5297 if (i < name_count)
5298 {
5299 /* Found a name for the number - there can be only one; duplicate
5300 names for different numbers are allowed, but not vice versa. First
5301 scan down for duplicates. */
5302
5303 slotB = slotA;
5304 while (slotB > name_table)
5305 {
5306 slotB -= name_entry_size;
5307 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5308 {
5309 condition = GET2(slotB, 0) == group_num;
5310 if (condition) break;
5311 }
5312 else break;
5313 }
5314
5315 /* Scan up for duplicates */
5316 if (!condition)
5317 {
5318 slotB = slotA;
5319 for (i++; i < name_count; i++)
5320 {
5321 slotB += name_entry_size;
5322 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5323 {
5324 condition = GET2(slotB, 0) == group_num;
5325 if (condition) break;
5326 }
5327 else break;
5328 }
5329 }
5330 }
5331 return condition;
5332 }
5333
5334 /*
5335 Handling bracketed expressions is probably the most complex part.
5336
5337 Stack layout naming characters:
5338 S - Push the current STR_PTR
5339 0 - Push a 0 (NULL)
5340 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5341 before the next alternative. Not pushed if there are no alternatives.
5342 M - Any values pushed by the current alternative. Can be empty, or anything.
5343 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5344 L - Push the previous local (pointed by localptr) to the stack
5345 () - opional values stored on the stack
5346 ()* - optonal, can be stored multiple times
5347
5348 The following list shows the regular expression templates, their PCRE byte codes
5349 and stack layout supported by pcre-sljit.
5350
5351 (?:) OP_BRA | OP_KET A M
5352 () OP_CBRA | OP_KET C M
5353 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5354 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5355 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5356 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5357 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5358 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5359 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5360 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5361 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5362 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5363 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5364 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5365 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5366 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5367 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5368 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5369 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5370 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5371 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5372 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5373
5374
5375 Stack layout naming characters:
5376 A - Push the alternative index (starting from 0) on the stack.
5377 Not pushed if there is no alternatives.
5378 M - Any values pushed by the current alternative. Can be empty, or anything.
5379
5380 The next list shows the possible content of a bracket:
5381 (|) OP_*BRA | OP_ALT ... M A
5382 (?()|) OP_*COND | OP_ALT M A
5383 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5384 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5385 Or nothing, if trace is unnecessary
5386 */
5387
5388 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5389 {
5390 DEFINE_COMPILER;
5391 backtrack_common *backtrack;
5392 pcre_uchar opcode;
5393 int private_data_ptr = 0;
5394 int offset = 0;
5395 int stacksize;
5396 pcre_uchar *ccbegin;
5397 pcre_uchar *matchingpath;
5398 pcre_uchar bra = OP_BRA;
5399 pcre_uchar ket;
5400 assert_backtrack *assert;
5401 BOOL has_alternatives;
5402 struct sljit_jump *jump;
5403 struct sljit_jump *skip;
5404 struct sljit_label *rmaxlabel = NULL;
5405 struct sljit_jump *braminzerojump = NULL;
5406
5407 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5408
5409 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5410 {
5411 bra = *cc;
5412 cc++;
5413 opcode = *cc;
5414 }
5415
5416 opcode = *cc;
5417 ccbegin = cc;
5418 matchingpath = ccbegin + 1 + LINK_SIZE;
5419
5420 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5421 {
5422 /* Drop this bracket_backtrack. */
5423 parent->top = backtrack->prev;
5424 return bracketend(cc);
5425 }
5426
5427 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5428 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5429 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5430 cc += GET(cc, 1);
5431
5432 has_alternatives = *cc == OP_ALT;
5433 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5434 {
5435 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5436 if (*matchingpath == OP_NRREF)
5437 {
5438 stacksize = GET2(matchingpath, 1);
5439 if (common->currententry == NULL || stacksize == RREF_ANY)
5440 has_alternatives = FALSE;
5441 else if (common->currententry->start == 0)
5442 has_alternatives = stacksize != 0;
5443 else
5444 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5445 }
5446 }
5447
5448 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5449 opcode = OP_SCOND;
5450 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5451 opcode = OP_ONCE;
5452
5453 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5454 {
5455 /* Capturing brackets has a pre-allocated space. */
5456 offset = GET2(ccbegin, 1 + LINK_SIZE);
5457 private_data_ptr = OVECTOR_PRIV(offset);
5458 offset <<= 1;
5459 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5460 matchingpath += IMM2_SIZE;
5461 }
5462 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5463 {
5464 /* Other brackets simply allocate the next entry. */
5465 private_data_ptr = PRIVATE_DATA(ccbegin);
5466 SLJIT_ASSERT(private_data_ptr != 0);
5467 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5468 if (opcode == OP_ONCE)
5469 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5470 }
5471
5472 /* Instructions before the first alternative. */
5473 stacksize = 0;
5474 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5475 stacksize++;
5476 if (bra == OP_BRAZERO)
5477 stacksize++;
5478
5479 if (stacksize > 0)
5480 allocate_stack(common, stacksize);
5481
5482 stacksize = 0;
5483 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5484 {
5485 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5486 stacksize++;
5487 }
5488
5489 if (bra == OP_BRAZERO)
5490 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5491
5492 if (bra == OP_BRAMINZERO)
5493 {
5494 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5495 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5496 if (ket != OP_KETRMIN)
5497 {
5498 free_stack(common, 1);
5499 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5500 }
5501 else
5502 {
5503 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5504 {
5505 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5506 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5507 /* Nothing stored during the first run. */
5508 skip = JUMP(SLJIT_JUMP);
5509 JUMPHERE(jump);
5510 /* Checking zero-length iteration. */
5511 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5512 {
5513 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5514 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5515 }
5516 else
5517 {
5518 /* Except when the whole stack frame must be saved. */
5519 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5520 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5521 }
5522 JUMPHERE(skip);
5523 }
5524 else
5525 {
5526 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5527 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5528 JUMPHERE(jump);
5529 }
5530 }
5531 }
5532
5533 if (ket == OP_KETRMIN)
5534 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5535
5536 if (ket == OP_KETRMAX)
5537 {
5538 rmaxlabel = LABEL();
5539 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5540 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5541 }
5542
5543 /* Handling capturing brackets and alternatives. */
5544 if (opcode == OP_ONCE)
5545 {
5546 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5547 {
5548 /* Neither capturing brackets nor recursions are not found in the block. */
5549 if (ket == OP_KETRMIN)
5550 {
5551 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5552 allocate_stack(common, 2);
5553 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5554 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5555 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5556 }
5557 else if (ket == OP_KETRMAX || has_alternatives)
5558 {
5559 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5560 allocate_stack(common, 1);
5561 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5562 }
5563 else
5564 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5565 }
5566 else
5567 {
5568 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5569 {
5570 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5571 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5572 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5573 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5574 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5575 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5576 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5577 }
5578 else
5579 {
5580 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5581 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5582 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5583 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5584 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5585 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5586 }
5587 }
5588 }
5589 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5590 {
5591 /* Saving the previous values. */
5592 allocate_stack(common, 3);
5593 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5594 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5595 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5596 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5597 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5598 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5599 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5600 }
5601 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5602 {
5603 /* Saving the previous value. */
5604 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5605 allocate_stack(common, 1);
5606 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5607 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5608 }
5609 else if (has_alternatives)
5610 {
5611 /* Pushing the starting string pointer. */
5612 allocate_stack(common, 1);
5613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5614 }
5615
5616 /* Generating code for the first alternative. */
5617 if (opcode == OP_COND || opcode == OP_SCOND)
5618 {
5619 if (*matchingpath == OP_CREF)
5620 {
5621 SLJIT_ASSERT(has_alternatives);
5622 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5623 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5624 matchingpath += 1 + IMM2_SIZE;
5625 }
5626 else if (*matchingpath == OP_NCREF)
5627 {
5628 SLJIT_ASSERT(has_alternatives);
5629 stacksize = GET2(matchingpath, 1);
5630 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5631
5632 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5633 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5635 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5636 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5637 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5638 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5639 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5640 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5641
5642 JUMPHERE(jump);
5643 matchingpath += 1 + IMM2_SIZE;
5644 }
5645 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5646 {
5647 /* Never has other case. */
5648 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5649
5650 stacksize = GET2(matchingpath, 1);
5651 if (common->currententry == NULL)
5652 stacksize = 0;
5653 else if (stacksize == RREF_ANY)
5654 stacksize = 1;
5655 else if (common->currententry->start == 0)
5656 stacksize = stacksize == 0;
5657 else
5658 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5659
5660 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5661 {
5662 SLJIT_ASSERT(!has_alternatives);
5663 if (stacksize != 0)
5664 matchingpath += 1 + IMM2_SIZE;
5665 else
5666 {
5667 if (*cc == OP_ALT)
5668 {
5669 matchingpath = cc + 1 + LINK_SIZE;
5670 cc += GET(cc, 1);
5671 }
5672 else
5673 matchingpath = cc;
5674 }
5675 }
5676 else
5677 {
5678 SLJIT_ASSERT(has_alternatives);
5679
5680 stacksize = GET2(matchingpath, 1);
5681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5682 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5683 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5684 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5685 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5686 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5687 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5688 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5689 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5690 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5691 matchingpath += 1 + IMM2_SIZE;
5692 }
5693 }
5694 else
5695 {
5696 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5697 /* Similar code as PUSH_BACKTRACK macro. */
5698 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5699 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5700 return NULL;
5701 memset(assert, 0, sizeof(assert_backtrack));
5702 assert->common.cc = matchingpath;
5703 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5704 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5705 }
5706 }
5707
5708 compile_matchingpath(common, matchingpath, cc, backtrack);
5709 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5710 return NULL;
5711
5712 if (opcode == OP_ONCE)
5713 {
5714 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5715 {
5716 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5717 /* TMP2 which is set here used by OP_KETRMAX below. */
5718 if (ket == OP_KETRMAX)
5719 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5720 else if (ket == OP_KETRMIN)
5721 {
5722 /* Move the STR_PTR to the private_data_ptr. */
5723 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5724 }
5725 }
5726 else
5727 {
5728 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5729 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5730 if (ket == OP_KETRMAX)
5731 {
5732 /* TMP2 which is set here used by OP_KETRMAX below. */
5733 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5734 }
5735 }
5736 }
5737
5738 stacksize = 0;
5739 if (ket != OP_KET || bra != OP_BRA)
5740 stacksize++;
5741 if (has_alternatives && opcode != OP_ONCE)
5742 stacksize++;
5743
5744 if (stacksize > 0)
5745 allocate_stack(common, stacksize);
5746
5747 stacksize = 0;
5748 if (ket != OP_KET)
5749 {
5750 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5751 stacksize++;
5752 }
5753 else if (bra != OP_BRA)
5754 {
5755 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5756 stacksize++;
5757 }
5758
5759 if (has_alternatives)
5760 {
5761 if (opcode != OP_ONCE)
5762 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5763 if (ket != OP_KETRMAX)
5764 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5765 }
5766
5767 /* Must be after the matchingpath label. */
5768 if (offset != 0)
5769 {
5770 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5771 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5772 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5773 }
5774
5775 if (ket == OP_KETRMAX)
5776 {
5777 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5778 {
5779 if (has_alternatives)
5780 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5781 /* Checking zero-length iteration. */
5782 if (opcode != OP_ONCE)
5783 {
5784 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
5785 /* Drop STR_PTR for greedy plus quantifier. */
5786 if (bra != OP_BRAZERO)
5787 free_stack(common, 1);
5788 }
5789 else
5790 /* TMP2 must contain the starting STR_PTR. */
5791 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5792 }
5793 else
5794 JUMPTO(SLJIT_JUMP, rmaxlabel);
5795 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5796 }
5797
5798 if (bra == OP_BRAZERO)
5799 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
5800
5801 if (bra == OP_BRAMINZERO)
5802 {
5803 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5804 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
5805 if (braminzerojump != NULL)
5806 {
5807 JUMPHERE(braminzerojump);
5808 /* We need to release the end pointer to perform the
5809 backtrack for the zero-length iteration. When
5810 framesize is < 0, OP_ONCE will do the release itself. */
5811 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5812 {
5813 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5814 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5815 }
5816 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5817 free_stack(common, 1);
5818 }
5819 /* Continue to the normal backtrack. */
5820 }
5821
5822 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5823 decrease_call_count(common);
5824
5825 /* Skip the other alternatives. */
5826 while (*cc == OP_ALT)
5827 cc += GET(cc, 1);
5828 cc += 1 + LINK_SIZE;
5829 return cc;
5830 }
5831
5832 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5833 {
5834 DEFINE_COMPILER;
5835 backtrack_common *backtrack;
5836 pcre_uchar opcode;
5837 int private_data_ptr;
5838 int cbraprivptr = 0;
5839 int framesize;
5840 int stacksize;
5841 int offset = 0;
5842 BOOL zero = FALSE;
5843 pcre_uchar *ccbegin = NULL;
5844 int stack;
5845 struct sljit_label *loop = NULL;
5846 struct jump_list *emptymatch = NULL;
5847
5848 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5849 if (*cc == OP_BRAPOSZERO)
5850 {
5851 zero = TRUE;
5852 cc++;
5853 }
5854
5855 opcode = *cc;
5856 private_data_ptr = PRIVATE_DATA(cc);
5857 SLJIT_ASSERT(private_data_ptr != 0);
5858 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
5859 switch(opcode)
5860 {
5861 case OP_BRAPOS:
5862 case OP_SBRAPOS:
5863 ccbegin = cc + 1 + LINK_SIZE;
5864 break;
5865
5866 case OP_CBRAPOS:
5867 case OP_SCBRAPOS:
5868 offset = GET2(cc, 1 + LINK_SIZE);
5869 cbraprivptr = OVECTOR_PRIV(offset);
5870 offset <<= 1;
5871 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5872 break;
5873
5874 default:
5875 SLJIT_ASSERT_STOP();
5876 break;
5877 }
5878
5879 framesize = get_framesize(common, cc, FALSE);
5880 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5881 if (framesize < 0)
5882 {
5883 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5884 if (!zero)
5885 stacksize++;
5886 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5887 allocate_stack(common, stacksize);
5888 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5889
5890 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5891 {
5892 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5893 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5895 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5896 }
5897 else
5898 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5899
5900 if (!zero)
5901 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5902 }
5903 else
5904 {
5905 stacksize = framesize + 1;
5906 if (!zero)
5907 stacksize++;
5908 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5909 stacksize++;
5910 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5911 allocate_stack(common, stacksize);
5912
5913 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5914 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5915 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5916 stack = 0;
5917 if (!zero)
5918 {
5919 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5920 stack++;
5921 }
5922 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5923 {
5924 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5925 stack++;
5926 }
5927 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5928 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
5929 }
5930
5931 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5932 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5933
5934 loop = LABEL();
5935 while (*cc != OP_KETRPOS)
5936 {
5937 backtrack->top = NULL;
5938 backtrack->topbacktracks = NULL;
5939 cc += GET(cc, 1);
5940
5941 compile_matchingpath(common, ccbegin, cc, backtrack);
5942 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5943 return NULL;
5944
5945 if (framesize < 0)
5946 {
5947 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5948
5949 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5950 {
5951 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5953 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5954 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5955 }
5956 else
5957 {
5958 if (opcode == OP_SBRAPOS)
5959 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5960 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5961 }
5962
5963 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5964 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5965
5966 if (!zero)
5967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5968 }
5969 else
5970 {
5971 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5972 {
5973 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
5974 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
5975 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5976 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
5977 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
5978 }
5979 else
5980 {
5981 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5982 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
5983 if (opcode == OP_SBRAPOS)
5984 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
5985 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
5986 }
5987
5988 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
5989 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
5990
5991 if (!zero)
5992 {
5993 if (framesize < 0)
5994 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
5995 else
5996 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5997 }
5998 }
5999 JUMPTO(SLJIT_JUMP, loop);
6000 flush_stubs(common);
6001
6002 compile_backtrackingpath(common, backtrack->top);
6003 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6004 return NULL;
6005 set_jumps(backtrack->topbacktracks, LABEL());
6006
6007 if (framesize < 0)
6008 {
6009 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6010 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6011 else
6012 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6013 }
6014 else
6015 {
6016 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6017 {
6018 /* Last alternative. */
6019 if (*cc == OP_KETRPOS)
6020 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6021 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6022 }
6023 else
6024 {
6025 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6026 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6027 }
6028 }
6029
6030 if (*cc == OP_KETRPOS)
6031 break;
6032 ccbegin = cc + 1 + LINK_SIZE;
6033 }
6034
6035 backtrack->topbacktracks = NULL;
6036 if (!zero)
6037 {
6038 if (framesize < 0)
6039 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6040 else /* TMP2 is set to [private_data_ptr] above. */
6041 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
6042 }
6043
6044 /* None of them matched. */
6045 set_jumps(emptymatch, LABEL());
6046 decrease_call_count(common);
6047 return cc + 1 + LINK_SIZE;
6048 }
6049
6050 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6051 {
6052 int class_len;
6053
6054 *opcode = *cc;
6055 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6056 {
6057 cc++;
6058 *type = OP_CHAR;
6059 }
6060 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6061 {
6062 cc++;
6063 *type = OP_CHARI;
6064 *opcode -= OP_STARI - OP_STAR;
6065 }
6066 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6067 {
6068 cc++;
6069 *type = OP_NOT;
6070 *opcode -= OP_NOTSTAR - OP_STAR;
6071 }
6072 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6073 {
6074 cc++;
6075 *type = OP_NOTI;
6076 *opcode -= OP_NOTSTARI - OP_STAR;
6077 }
6078 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6079 {
6080 cc++;
6081 *opcode -= OP_TYPESTAR - OP_STAR;
6082 *type = 0;
6083 }
6084 else
6085 {
6086 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6087 *type = *opcode;
6088 cc++;
6089 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6090 *opcode = cc[class_len - 1];
6091 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6092 {
6093 *opcode -= OP_CRSTAR - OP_STAR;
6094 if (end != NULL)
6095 *end = cc + class_len;
6096 }
6097 else
6098 {
6099 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6100 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6101 *arg2 = GET2(cc, class_len);
6102
6103 if (*arg2 == 0)
6104 {
6105 SLJIT_ASSERT(*arg1 != 0);
6106 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6107 }
6108 if (*arg1 == *arg2)
6109 *opcode = OP_EXACT;
6110
6111 if (end != NULL)
6112 *end = cc + class_len + 2 * IMM2_SIZE;
6113 }
6114 return cc;
6115 }
6116
6117 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6118 {
6119 *arg1 = GET2(cc, 0);
6120 cc += IMM2_SIZE;
6121 }
6122
6123 if (*type == 0)
6124 {
6125 *type = *cc;
6126 if (end != NULL)
6127 *end = next_opcode(common, cc);
6128 cc++;
6129 return cc;
6130 }
6131
6132 if (end != NULL)
6133 {
6134 *end = cc + 1;
6135 #ifdef SUPPORT_UTF
6136 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6137 #endif
6138 }
6139 return cc;
6140 }
6141
6142 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6143 {
6144 DEFINE_COMPILER;
6145 backtrack_common *backtrack;
6146 pcre_uchar opcode;
6147 pcre_uchar type;
6148 int arg1 = -1, arg2 = -1;
6149 pcre_uchar* end;
6150 jump_list *nomatch = NULL;
6151 struct sljit_jump *jump = NULL;
6152 struct sljit_label *label;
6153 int private_data_ptr = PRIVATE_DATA(cc);
6154 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6155 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6156 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_w);
6157 int tmp_base, tmp_offset;
6158
6159 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6160
6161 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6162
6163 switch (type)
6164 {
6165 case OP_NOT_DIGIT:
6166 case OP_DIGIT:
6167 case OP_NOT_WHITESPACE:
6168 case OP_WHITESPACE:
6169 case OP_NOT_WORDCHAR:
6170 case OP_WORDCHAR:
6171 case OP_ANY:
6172 case OP_ALLANY:
6173 case OP_ANYBYTE:
6174 case OP_ANYNL:
6175 case OP_NOT_HSPACE:
6176 case OP_HSPACE:
6177 case OP_NOT_VSPACE:
6178 case OP_VSPACE:
6179 case OP_CHAR:
6180 case OP_CHARI:
6181 case OP_NOT:
6182 case OP_NOTI:
6183 case OP_CLASS:
6184 case OP_NCLASS:
6185 tmp_base = TMP3;
6186 tmp_offset = 0;
6187 break;
6188
6189 default:
6190 SLJIT_ASSERT_STOP();
6191 /* Fall through. */
6192
6193 case OP_EXTUNI:
6194 case OP_XCLASS:
6195 case OP_NOTPROP:
6196 case OP_PROP:
6197 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6198 tmp_offset = POSSESSIVE0;
6199 break;
6200 }
6201
6202 switch(opcode)
6203 {
6204 case OP_STAR:
6205 case OP_PLUS:
6206 case OP_UPTO:
6207 case OP_CRRANGE:
6208 if (type == OP_ANYNL || type == OP_EXTUNI)
6209 {
6210 SLJIT_ASSERT(private_data_ptr == 0);
6211 if (opcode == OP_STAR || opcode == OP_UPTO)
6212 {
6213 allocate_stack(common, 2);
6214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6216 }
6217 else
6218 {
6219 allocate_stack(common, 1);
6220 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6221 }
6222
6223 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6224 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6225
6226 label = LABEL();
6227 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6228 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6229 {
6230 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6231 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6232 if (opcode == OP_CRRANGE && arg2 > 0)
6233 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6234 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6235 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6236 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6237 }
6238
6239 /* We cannot use TMP3 because of this allocate_stack. */
6240 allocate_stack(common, 1);
6241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6242 JUMPTO(SLJIT_JUMP, label);
6243 if (jump != NULL)
6244 JUMPHERE(jump);
6245 }
6246 else
6247 {
6248 if (opcode == OP_PLUS)
6249 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6250 if (private_data_ptr == 0)
6251 allocate_stack(common, 2);
6252 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6253 if (opcode <= OP_PLUS)
6254 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6255 else
6256 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6257 label = LABEL();
6258 compile_char1_matchingpath(common, type, cc, &nomatch);
6259 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6260 if (opcode <= OP_PLUS)
6261 JUMPTO(SLJIT_JUMP, label);
6262 else if (opcode == OP_CRRANGE && arg1 == 0)
6263 {
6264 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6265 JUMPTO(SLJIT_JUMP, label);
6266 }
6267 else
6268 {
6269 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6270 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6271 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6272 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6273 }
6274 set_jumps(nomatch, LABEL());
6275 if (opcode == OP_CRRANGE)
6276 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6277 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6278 }
6279 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6280 break;
6281
6282 case OP_MINSTAR:
6283 case OP_MINPLUS:
6284 if (opcode == OP_MINPLUS)
6285 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6286 if (private_data_ptr == 0)
6287 allocate_stack(common, 1);
6288 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6289 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6290 break;
6291
6292 case OP_MINUPTO:
6293 case OP_CRMINRANGE:
6294 if (private_data_ptr == 0)
6295 allocate_stack(common, 2);
6296 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6297 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6298 if (opcode == OP_CRMINRANGE)
6299 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6300 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6301 break;
6302
6303 case OP_QUERY:
6304 case OP_MINQUERY:
6305 if (private_data_ptr == 0)
6306 allocate_stack(common, 1);
6307 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6308 if (opcode == OP_QUERY)
6309 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6310 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6311 break;
6312
6313 case OP_EXACT:
6314 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6315 label = LABEL();
6316 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6317 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6318 JUMPTO(SLJIT_C_NOT_ZERO, label);
6319 break;
6320
6321 case OP_POSSTAR:
6322 case OP_POSPLUS:
6323 case OP_POSUPTO:
6324 if (opcode == OP_POSPLUS)
6325 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6326 if (opcode == OP_POSUPTO)
6327 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6328 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6329 label = LABEL();
6330 compile_char1_matchingpath(common, type, cc, &nomatch);
6331 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6332 if (opcode != OP_POSUPTO)
6333 JUMPTO(SLJIT_JUMP, label);
6334 else
6335 {
6336 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6337 JUMPTO(SLJIT_C_NOT_ZERO, label);
6338 }
6339 set_jumps(nomatch, LABEL());
6340 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6341 break;
6342
6343 case OP_POSQUERY:
6344 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6345 compile_char1_matchingpath(common, type, cc, &nomatch);
6346 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6347 set_jumps(nomatch, LABEL());
6348 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6349 break;
6350
6351 default:
6352 SLJIT_ASSERT_STOP();
6353 break;
6354 }
6355
6356 decrease_call_count(common);
6357 return end;
6358 }
6359
6360 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6361 {
6362 DEFINE_COMPILER;
6363 backtrack_common *backtrack;
6364
6365 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6366
6367 if (*cc == OP_FAIL)
6368 {
6369 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6370 return cc + 1;
6371 }
6372
6373 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6374 {
6375 /* No need to check notempty conditions. */
6376 if (common->acceptlabel == NULL)
6377 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6378 else
6379 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6380 return cc + 1;
6381 }
6382
6383 if (common->acceptlabel == NULL)
6384 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6385 else
6386 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6387 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6388 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6389 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6390 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6391 if (common->acceptlabel == NULL)
6392 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6393 else
6394 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6395 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6396 if (common->acceptlabel == NULL)
6397 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6398 else
6399 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6400 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6401 return cc + 1;
6402 }
6403
6404 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6405 {
6406 DEFINE_COMPILER;
6407 int offset = GET2(cc, 1);
6408
6409 /* Data will be discarded anyway... */
6410 if (common->currententry != NULL)
6411 return cc + 1 + IMM2_SIZE;
6412
6413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6414 offset <<= 1;
6415 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6416 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6417 return cc + 1 + IMM2_SIZE;
6418 }
6419
6420 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6421 {
6422 DEFINE_COMPILER;
6423 backtrack_common *backtrack;
6424
6425 while (cc < ccend)
6426 {
6427 switch(*cc)
6428 {
6429 case OP_SOD:
6430 case OP_SOM:
6431 case OP_NOT_WORD_BOUNDARY:
6432 case OP_WORD_BOUNDARY:
6433 case OP_NOT_DIGIT:
6434 case OP_DIGIT:
6435 case OP_NOT_WHITESPACE:
6436 case OP_WHITESPACE:
6437 case OP_NOT_WORDCHAR:
6438 case OP_WORDCHAR:
6439 case OP_ANY:
6440 case OP_ALLANY:
6441 case OP_ANYBYTE:
6442 case OP_NOTPROP:
6443 case OP_PROP:
6444 case OP_ANYNL:
6445 case OP_NOT_HSPACE:
6446 case OP_HSPACE:
6447 case OP_NOT_VSPACE:
6448 case OP_VSPACE:
6449 case OP_EXTUNI:
6450 case OP_EODN:
6451 case OP_EOD:
6452 case OP_CIRC:
6453 case OP_CIRCM:
6454 case OP_DOLL:
6455 case OP_DOLLM:
6456 case OP_NOT:
6457 case OP_NOTI:
6458 case OP_REVERSE:
6459 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6460 break;
6461
6462 case OP_SET_SOM:
6463 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6464 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6465 allocate_stack(common, 1);
6466 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6468 cc++;
6469 break;
6470
6471 case OP_CHAR:
6472 case OP_CHARI:
6473 if (common->mode == JIT_COMPILE)
6474 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6475 else
6476 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6477 break;
6478
6479 case OP_STAR:
6480 case OP_MINSTAR:
6481 case OP_PLUS:
6482 case OP_MINPLUS:
6483 case OP_QUERY:
6484 case OP_MINQUERY:
6485 case OP_UPTO:
6486 case OP_MINUPTO:
6487 case OP_EXACT:
6488 case OP_POSSTAR:
6489 case OP_POSPLUS:
6490 case OP_POSQUERY:
6491 case OP_POSUPTO:
6492 case OP_STARI:
6493 case OP_MINSTARI:
6494 case OP_PLUSI:
6495 case OP_MINPLUSI:
6496 case OP_QUERYI:
6497 case OP_MINQUERYI:
6498 case OP_UPTOI:
6499 case OP_MINUPTOI:
6500 case OP_EXACTI:
6501 case OP_POSSTARI:
6502 case OP_POSPLUSI:
6503 case OP_POSQUERYI:
6504 case OP_POSUPTOI:
6505 case OP_NOTSTAR:
6506 case OP_NOTMINSTAR:
6507 case OP_NOTPLUS:
6508 case OP_NOTMINPLUS:
6509 case OP_NOTQUERY:
6510 case OP_NOTMINQUERY:
6511 case OP_NOTUPTO:
6512 case OP_NOTMINUPTO:
6513 case OP_NOTEXACT:
6514 case OP_NOTPOSSTAR:
6515 case OP_NOTPOSPLUS:
6516 case OP_NOTPOSQUERY:
6517 case OP_NOTPOSUPTO:
6518 case OP_NOTSTARI:
6519 case OP_NOTMINSTARI:
6520 case OP_NOTPLUSI:
6521 case OP_NOTMINPLUSI:
6522 case OP_NOTQUERYI:
6523 case OP_NOTMINQUERYI:
6524 case OP_NOTUPTOI:
6525 case OP_NOTMINUPTOI:
6526 case OP_NOTEXACTI:
6527 case OP_NOTPOSSTARI:
6528 case OP_NOTPOSPLUSI:
6529 case OP_NOTPOSQUERYI:
6530 case OP_NOTPOSUPTOI:
6531 case OP_TYPESTAR:
6532 case OP_TYPEMINSTAR:
6533 case OP_TYPEPLUS:
6534 case OP_TYPEMINPLUS:
6535 case OP_TYPEQUERY:
6536 case OP_TYPEMINQUERY:
6537 case OP_TYPEUPTO:
6538 case OP_TYPEMINUPTO:
6539 case OP_TYPEEXACT:
6540 case OP_TYPEPOSSTAR:
6541 case OP_TYPEPOSPLUS:
6542 case OP_TYPEPOSQUERY:
6543 case OP_TYPEPOSUPTO:
6544 cc = compile_iterator_matchingpath(common, cc, parent);
6545 break;
6546
6547 case OP_CLASS:
6548 case OP_NCLASS:
6549 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6550 cc = compile_iterator_matchingpath(common, cc, parent);
6551 else
6552 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6553 break;
6554
6555 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
6556 case OP_XCLASS:
6557 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6558 cc = compile_iterator_matchingpath(common, cc, parent);
6559 else
6560 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6561 break;
6562 #endif
6563
6564 case OP_REF:
6565 case OP_REFI:
6566 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6567 cc = compile_ref_iterator_matchingpath(common, cc, parent);
6568 else
6569 cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6570 break;
6571
6572 case OP_RECURSE:
6573 cc = compile_recurse_matchingpath(common, cc, parent);
6574 break;
6575
6576 case OP_ASSERT:
6577 case OP_ASSERT_NOT:
6578 case OP_ASSERTBACK:
6579 case OP_ASSERTBACK_NOT:
6580 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6581 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6582 break;
6583
6584 case OP_BRAMINZERO:
6585 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6586 cc = bracketend(cc + 1);
6587 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6588 {
6589 allocate_stack(common, 1);
6590 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6591 }
6592 else
6593 {
6594 allocate_stack(common, 2);
6595 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6596 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6597 }
6598 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
6599 if (cc[1] > OP_ASSERTBACK_NOT)
6600 decrease_call_count(common);
6601 break;
6602
6603 case OP_ONCE:
6604 case OP_ONCE_NC:
6605 case OP_BRA:
6606 case OP_CBRA:
6607 case OP_COND:
6608 case OP_SBRA:
6609 case OP_SCBRA:
6610 case OP_SCOND:
6611 cc = compile_bracket_matchingpath(common, cc, parent);
6612 break;
6613
6614 case OP_BRAZERO:
6615 if (cc[1] > OP_ASSERTBACK_NOT)
6616 cc = compile_bracket_matchingpath(common, cc, parent);
6617 else
6618 {
6619 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6620 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6621 }
6622 break;
6623
6624 case OP_BRAPOS:
6625 case OP_CBRAPOS:
6626 case OP_SBRAPOS:
6627 case OP_SCBRAPOS:
6628 case OP_BRAPOSZERO:
6629 cc = compile_bracketpos_matchingpath(common, cc, parent);
6630 break;
6631
6632 case OP_MARK:
6633 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6634 SLJIT_ASSERT(common->mark_ptr != 0);
6635 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6636 allocate_stack(common, 1);
6637 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6638 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6639 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
6640 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
6641 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
6642 cc += 1 + 2 + cc[1];
6643 break;
6644
6645 case OP_COMMIT:
6646 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6647 cc += 1;
6648 break;
6649
6650 case OP_FAIL:
6651 case OP_ACCEPT:
6652 case OP_ASSERT_ACCEPT:
6653 cc = compile_fail_accept_matchingpath(common, cc, parent);
6654 break;
6655
6656 case OP_CLOSE:
6657 cc = compile_close_matchingpath(common, cc);
6658 break;
6659
6660 case OP_SKIPZERO:
6661 cc = bracketend(cc + 1);
6662 break;
6663
6664 default:
6665 SLJIT_ASSERT_STOP();
6666 return;
6667 }
6668 if (cc == NULL)
6669 return;
6670 }
6671 SLJIT_ASSERT(cc == ccend);
6672 }
6673
6674 #undef PUSH_BACKTRACK
6675 #undef PUSH_BACKTRACK_NOVALUE
6676 #undef BACKTRACK_AS
6677
6678 #define COMPILE_BACKTRACKINGPATH(current) \
6679 do \
6680 { \
6681 compile_backtrackingpath(common, (current)); \
6682 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6683 return; \
6684 } \
6685 while (0)
6686
6687 #define CURRENT_AS(type) ((type *)current)
6688
6689 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
6690 {
6691 DEFINE_COMPILER;
6692 pcre_uchar *cc = current->cc;
6693 pcre_uchar opcode;
6694 pcre_uchar type;
6695 int arg1 = -1, arg2 = -1;
6696 struct sljit_label *label = NULL;
6697 struct sljit_jump *jump = NULL;
6698 jump_list *jumplist = NULL;
6699 int private_data_ptr = PRIVATE_DATA(cc);
6700 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6701 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6702 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_w);
6703
6704 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
6705
6706 switch(opcode)
6707 {
6708 case OP_STAR:
6709 case OP_PLUS:
6710 case OP_UPTO:
6711 case OP_CRRANGE:
6712 if (type == OP_ANYNL || type == OP_EXTUNI)
6713 {
6714 SLJIT_ASSERT(private_data_ptr == 0);
6715 set_jumps(current->topbacktracks, LABEL());
6716 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6717 free_stack(common, 1);
6718 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(iterator_backtrack)->matchingpath);
6719 }
6720 else
6721 {
6722 if (opcode == OP_UPTO)
6723 arg2 = 0;
6724 if (opcode <= OP_PLUS)
6725 {
6726 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6727 jump = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, base, offset1);
6728 }
6729 else
6730 {
6731 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6732 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6733 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
6734 OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
6735 }
6736 skip_char_back(common);
6737 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6738 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
6739 if (opcode == OP_CRRANGE)
6740 set_jumps(current->topbacktracks, LABEL());
6741 JUMPHERE(jump);
6742 if (private_data_ptr == 0)
6743 free_stack(common, 2);
6744 if (opcode == OP_PLUS)
6745 set_jumps(current->topbacktracks, LABEL());
6746 }
6747 break;
6748
6749 case OP_MINSTAR:
6750 case OP_MINPLUS:
6751 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6752 compile_char1_matchingpath(common, type, cc, &jumplist);
6753 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6754 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
6755 set_jumps(jumplist, LABEL());
6756 if (private_data_ptr == 0)
6757 free_stack(common, 1);
6758 if (opcode == OP_MINPLUS)
6759 set_jumps(current->topbacktracks, LABEL());
6760 break;
6761
6762 case OP_MINUPTO:
6763 case OP_CRMINRANGE:
6764 if (opcode == OP_CRMINRANGE)
6765 {
6766 label = LABEL();
6767 set_jumps(current->topbacktracks, label);
6768 }
6769 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6770 compile_char1_matchingpath(common, type, cc, &jumplist);
6771
6772 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6773 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6774 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6775 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6776
6777 if (opcode == OP_CRMINRANGE)
6778 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
6779
6780 if (opcode == OP_CRMINRANGE && arg1 == 0)
6781 JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
6782 else
6783 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
6784