/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1009 - (show annotations)
Wed Aug 22 12:01:22 2012 UTC (7 years, 2 months ago) by zherczeg
File MIME type: text/plain
File size: 255681 byte(s)
Improve the matching speed of capturing brackets.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory for the regex stack on the real machine stack.
69 Fast, but limited size. */
70 #define MACHINE_STACK_SIZE 32768
71
72 /* Growth rate for stack allocated by the OS. Should be the multiply
73 of page size. */
74 #define STACK_GROWTH_RATE 8192
75
76 /* Enable to check that the allocation could destroy temporaries. */
77 #if defined SLJIT_DEBUG && SLJIT_DEBUG
78 #define DESTROY_REGISTERS 1
79 #endif
80
81 /*
82 Short summary about the backtracking mechanism empolyed by the jit code generator:
83
84 The code generator follows the recursive nature of the PERL compatible regular
85 expressions. The basic blocks of regular expressions are condition checkers
86 whose execute different commands depending on the result of the condition check.
87 The relationship between the operators can be horizontal (concatenation) and
88 vertical (sub-expression) (See struct backtrack_common for more details).
89
90 'ab' - 'a' and 'b' regexps are concatenated
91 'a+' - 'a' is the sub-expression of the '+' operator
92
93 The condition checkers are boolean (true/false) checkers. Machine code is generated
94 for the checker itself and for the actions depending on the result of the checker.
95 The 'true' case is called as the matching path (expected path), and the other is called as
96 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97 branches on the matching path.
98
99 Greedy star operator (*) :
100 Matching path: match happens.
101 Backtrack path: match failed.
102 Non-greedy star operator (*?) :
103 Matching path: no need to perform a match.
104 Backtrack path: match is required.
105
106 The following example shows how the code generated for a capturing bracket
107 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
108 we have the following regular expression:
109
110 A(B|C)D
111
112 The generated code will be the following:
113
114 A matching path
115 '(' matching path (pushing arguments to the stack)
116 B matching path
117 ')' matching path (pushing arguments to the stack)
118 D matching path
119 return with successful match
120
121 D backtrack path
122 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123 B backtrack path
124 C expected path
125 jump to D matching path
126 C backtrack path
127 A backtrack path
128
129 Notice, that the order of backtrack code paths are the opposite of the fast
130 code paths. In this way the topmost value on the stack is always belong
131 to the current backtrack code path. The backtrack path must check
132 whether there is a next alternative. If so, it needs to jump back to
133 the matching path eventually. Otherwise it needs to clear out its own stack
134 frame and continue the execution on the backtrack code paths.
135 */
136
137 /*
138 Saved stack frames:
139
140 Atomic blocks and asserts require reloading the values of private data
141 when the backtrack mechanism performed. Because of OP_RECURSE, the data
142 are not necessarly known in compile time, thus we need a dynamic restore
143 mechanism.
144
145 The stack frames are stored in a chain list, and have the following format:
146 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147
148 Thus we can restore the private data to a particular point in the stack.
149 */
150
151 typedef struct jit_arguments {
152 /* Pointers first. */
153 struct sljit_stack *stack;
154 const pcre_uchar *str;
155 const pcre_uchar *begin;
156 const pcre_uchar *end;
157 int *offsets;
158 pcre_uchar *uchar_ptr;
159 pcre_uchar *mark_ptr;
160 /* Everything else after. */
161 int offsetcount;
162 int calllimit;
163 pcre_uint8 notbol;
164 pcre_uint8 noteol;
165 pcre_uint8 notempty;
166 pcre_uint8 notempty_atstart;
167 } jit_arguments;
168
169 typedef struct executable_functions {
170 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
171 PUBL(jit_callback) callback;
172 void *userdata;
173 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
174 } executable_functions;
175
176 typedef struct jump_list {
177 struct sljit_jump *jump;
178 struct jump_list *next;
179 } jump_list;
180
181 enum stub_types { stack_alloc };
182
183 typedef struct stub_list {
184 enum stub_types type;
185 int data;
186 struct sljit_jump *start;
187 struct sljit_label *quit;
188 struct stub_list *next;
189 } stub_list;
190
191 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
192
193 /* The following structure is the key data type for the recursive
194 code generator. It is allocated by compile_matchingpath, and contains
195 the aguments for compile_backtrackingpath. Must be the first member
196 of its descendants. */
197 typedef struct backtrack_common {
198 /* Concatenation stack. */
199 struct backtrack_common *prev;
200 jump_list *nextbacktracks;
201 /* Internal stack (for component operators). */
202 struct backtrack_common *top;
203 jump_list *topbacktracks;
204 /* Opcode pointer. */
205 pcre_uchar *cc;
206 } backtrack_common;
207
208 typedef struct assert_backtrack {
209 backtrack_common common;
210 jump_list *condfailed;
211 /* Less than 0 (-1) if a frame is not needed. */
212 int framesize;
213 /* Points to our private memory word on the stack. */
214 int private_data_ptr;
215 /* For iterators. */
216 struct sljit_label *matchingpath;
217 } assert_backtrack;
218
219 typedef struct bracket_backtrack {
220 backtrack_common common;
221 /* Where to coninue if an alternative is successfully matched. */
222 struct sljit_label *alternative_matchingpath;
223 /* For rmin and rmax iterators. */
224 struct sljit_label *recursive_matchingpath;
225 /* For greedy ? operator. */
226 struct sljit_label *zero_matchingpath;
227 /* Contains the branches of a failed condition. */
228 union {
229 /* Both for OP_COND, OP_SCOND. */
230 jump_list *condfailed;
231 assert_backtrack *assert;
232 /* For OP_ONCE. -1 if not needed. */
233 int framesize;
234 } u;
235 /* Points to our private memory word on the stack. */
236 int private_data_ptr;
237 } bracket_backtrack;
238
239 typedef struct bracketpos_backtrack {
240 backtrack_common common;
241 /* Points to our private memory word on the stack. */
242 int private_data_ptr;
243 /* Reverting stack is needed. */
244 int framesize;
245 /* Allocated stack size. */
246 int stacksize;
247 } bracketpos_backtrack;
248
249 typedef struct braminzero_backtrack {
250 backtrack_common common;
251 struct sljit_label *matchingpath;
252 } braminzero_backtrack;
253
254 typedef struct iterator_backtrack {
255 backtrack_common common;
256 /* Next iteration. */
257 struct sljit_label *matchingpath;
258 } iterator_backtrack;
259
260 typedef struct recurse_entry {
261 struct recurse_entry *next;
262 /* Contains the function entry. */
263 struct sljit_label *entry;
264 /* Collects the calls until the function is not created. */
265 jump_list *calls;
266 /* Points to the starting opcode. */
267 int start;
268 } recurse_entry;
269
270 typedef struct recurse_backtrack {
271 backtrack_common common;
272 } recurse_backtrack;
273
274 #define MAX_RANGE_SIZE 6
275
276 typedef struct compiler_common {
277 struct sljit_compiler *compiler;
278 pcre_uchar *start;
279
280 /* Maps private data offset to each opcode. */
281 int *private_data_ptrs;
282 /* Tells whether the capturing bracket is optimized. */
283 pcre_uint8 *optimized_cbracket;
284 /* Starting offset of private data for capturing brackets. */
285 int cbraptr;
286 /* OVector starting point. Must be divisible by 2. */
287 int ovector_start;
288 /* Last known position of the requested byte. */
289 int req_char_ptr;
290 /* Head of the last recursion. */
291 int recursive_head;
292 /* First inspected character for partial matching. */
293 int start_used_ptr;
294 /* Starting pointer for partial soft matches. */
295 int hit_start;
296 /* End pointer of the first line. */
297 int first_line_end;
298 /* Points to the marked string. */
299 int mark_ptr;
300
301 /* Flipped and lower case tables. */
302 const pcre_uint8 *fcc;
303 sljit_w lcc;
304 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
305 int mode;
306 /* Newline control. */
307 int nltype;
308 int newline;
309 int bsr_nltype;
310 /* Dollar endonly. */
311 int endonly;
312 BOOL has_set_som;
313 /* Tables. */
314 sljit_w ctypes;
315 int digits[2 + MAX_RANGE_SIZE];
316 /* Named capturing brackets. */
317 sljit_uw name_table;
318 sljit_w name_count;
319 sljit_w name_entry_size;
320
321 /* Labels and jump lists. */
322 struct sljit_label *partialmatchlabel;
323 struct sljit_label *quitlabel;
324 struct sljit_label *acceptlabel;
325 stub_list *stubs;
326 recurse_entry *entries;
327 recurse_entry *currententry;
328 jump_list *partialmatch;
329 jump_list *quit;
330 jump_list *accept;
331 jump_list *calllimit;
332 jump_list *stackalloc;
333 jump_list *revertframes;
334 jump_list *wordboundary;
335 jump_list *anynewline;
336 jump_list *hspace;
337 jump_list *vspace;
338 jump_list *casefulcmp;
339 jump_list *caselesscmp;
340 BOOL jscript_compat;
341 #ifdef SUPPORT_UTF
342 BOOL utf;
343 #ifdef SUPPORT_UCP
344 BOOL use_ucp;
345 #endif
346 jump_list *utfreadchar;
347 #ifdef COMPILE_PCRE8
348 jump_list *utfreadtype8;
349 #endif
350 #endif /* SUPPORT_UTF */
351 #ifdef SUPPORT_UCP
352 jump_list *getucd;
353 #endif
354 } compiler_common;
355
356 /* For byte_sequence_compare. */
357
358 typedef struct compare_context {
359 int length;
360 int sourcereg;
361 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
362 int ucharptr;
363 union {
364 sljit_i asint;
365 sljit_uh asushort;
366 #ifdef COMPILE_PCRE8
367 sljit_ub asbyte;
368 sljit_ub asuchars[4];
369 #else
370 #ifdef COMPILE_PCRE16
371 sljit_uh asuchars[2];
372 #endif
373 #endif
374 } c;
375 union {
376 sljit_i asint;
377 sljit_uh asushort;
378 #ifdef COMPILE_PCRE8
379 sljit_ub asbyte;
380 sljit_ub asuchars[4];
381 #else
382 #ifdef COMPILE_PCRE16
383 sljit_uh asuchars[2];
384 #endif
385 #endif
386 } oc;
387 #endif
388 } compare_context;
389
390 enum {
391 frame_end = 0,
392 frame_setstrbegin = -1,
393 frame_setmark = -2
394 };
395
396 /* Undefine sljit macros. */
397 #undef CMP
398
399 /* Used for accessing the elements of the stack. */
400 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
401
402 #define TMP1 SLJIT_TEMPORARY_REG1
403 #define TMP2 SLJIT_TEMPORARY_REG3
404 #define TMP3 SLJIT_TEMPORARY_EREG2
405 #define STR_PTR SLJIT_SAVED_REG1
406 #define STR_END SLJIT_SAVED_REG2
407 #define STACK_TOP SLJIT_TEMPORARY_REG2
408 #define STACK_LIMIT SLJIT_SAVED_REG3
409 #define ARGUMENTS SLJIT_SAVED_EREG1
410 #define CALL_COUNT SLJIT_SAVED_EREG2
411 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
412
413 /* Local space layout. */
414 /* These two locals can be used by the current opcode. */
415 #define LOCALS0 (0 * sizeof(sljit_w))
416 #define LOCALS1 (1 * sizeof(sljit_w))
417 /* Two local variables for possessive quantifiers (char1 cannot use them). */
418 #define POSSESSIVE0 (2 * sizeof(sljit_w))
419 #define POSSESSIVE1 (3 * sizeof(sljit_w))
420 /* Max limit of recursions. */
421 #define CALL_LIMIT (4 * sizeof(sljit_w))
422 /* The output vector is stored on the stack, and contains pointers
423 to characters. The vector data is divided into two groups: the first
424 group contains the start / end character pointers, and the second is
425 the start pointers when the end of the capturing group has not yet reached. */
426 #define OVECTOR_START (common->ovector_start)
427 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
428 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
429 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
430
431 #ifdef COMPILE_PCRE8
432 #define MOV_UCHAR SLJIT_MOV_UB
433 #define MOVU_UCHAR SLJIT_MOVU_UB
434 #else
435 #ifdef COMPILE_PCRE16
436 #define MOV_UCHAR SLJIT_MOV_UH
437 #define MOVU_UCHAR SLJIT_MOVU_UH
438 #else
439 #error Unsupported compiling mode
440 #endif
441 #endif
442
443 /* Shortcuts. */
444 #define DEFINE_COMPILER \
445 struct sljit_compiler *compiler = common->compiler
446 #define OP1(op, dst, dstw, src, srcw) \
447 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
448 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
449 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
450 #define LABEL() \
451 sljit_emit_label(compiler)
452 #define JUMP(type) \
453 sljit_emit_jump(compiler, (type))
454 #define JUMPTO(type, label) \
455 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
456 #define JUMPHERE(jump) \
457 sljit_set_label((jump), sljit_emit_label(compiler))
458 #define CMP(type, src1, src1w, src2, src2w) \
459 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
460 #define CMPTO(type, src1, src1w, src2, src2w, label) \
461 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
462 #define COND_VALUE(op, dst, dstw, type) \
463 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
464 #define GET_LOCAL_BASE(dst, dstw, offset) \
465 sljit_get_local_base(compiler, (dst), (dstw), (offset))
466
467 static pcre_uchar* bracketend(pcre_uchar* cc)
468 {
469 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
470 do cc += GET(cc, 1); while (*cc == OP_ALT);
471 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
472 cc += 1 + LINK_SIZE;
473 return cc;
474 }
475
476 /* Functions whose might need modification for all new supported opcodes:
477 next_opcode
478 get_private_data_length
479 set_private_data_ptrs
480 get_framesize
481 init_frame
482 get_private_data_length_for_copy
483 copy_private_data
484 compile_matchingpath
485 compile_backtrackingpath
486 */
487
488 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
489 {
490 SLJIT_UNUSED_ARG(common);
491 switch(*cc)
492 {
493 case OP_SOD:
494 case OP_SOM:
495 case OP_SET_SOM:
496 case OP_NOT_WORD_BOUNDARY:
497 case OP_WORD_BOUNDARY:
498 case OP_NOT_DIGIT:
499 case OP_DIGIT:
500 case OP_NOT_WHITESPACE:
501 case OP_WHITESPACE:
502 case OP_NOT_WORDCHAR:
503 case OP_WORDCHAR:
504 case OP_ANY:
505 case OP_ALLANY:
506 case OP_ANYNL:
507 case OP_NOT_HSPACE:
508 case OP_HSPACE:
509 case OP_NOT_VSPACE:
510 case OP_VSPACE:
511 case OP_EXTUNI:
512 case OP_EODN:
513 case OP_EOD:
514 case OP_CIRC:
515 case OP_CIRCM:
516 case OP_DOLL:
517 case OP_DOLLM:
518 case OP_TYPESTAR:
519 case OP_TYPEMINSTAR:
520 case OP_TYPEPLUS:
521 case OP_TYPEMINPLUS:
522 case OP_TYPEQUERY:
523 case OP_TYPEMINQUERY:
524 case OP_TYPEPOSSTAR:
525 case OP_TYPEPOSPLUS:
526 case OP_TYPEPOSQUERY:
527 case OP_CRSTAR:
528 case OP_CRMINSTAR:
529 case OP_CRPLUS:
530 case OP_CRMINPLUS:
531 case OP_CRQUERY:
532 case OP_CRMINQUERY:
533 case OP_DEF:
534 case OP_BRAZERO:
535 case OP_BRAMINZERO:
536 case OP_BRAPOSZERO:
537 case OP_COMMIT:
538 case OP_FAIL:
539 case OP_ACCEPT:
540 case OP_ASSERT_ACCEPT:
541 case OP_SKIPZERO:
542 return cc + 1;
543
544 case OP_ANYBYTE:
545 #ifdef SUPPORT_UTF
546 if (common->utf) return NULL;
547 #endif
548 return cc + 1;
549
550 case OP_CHAR:
551 case OP_CHARI:
552 case OP_NOT:
553 case OP_NOTI:
554 case OP_STAR:
555 case OP_MINSTAR:
556 case OP_PLUS:
557 case OP_MINPLUS:
558 case OP_QUERY:
559 case OP_MINQUERY:
560 case OP_POSSTAR:
561 case OP_POSPLUS:
562 case OP_POSQUERY:
563 case OP_STARI:
564 case OP_MINSTARI:
565 case OP_PLUSI:
566 case OP_MINPLUSI:
567 case OP_QUERYI:
568 case OP_MINQUERYI:
569 case OP_POSSTARI:
570 case OP_POSPLUSI:
571 case OP_POSQUERYI:
572 case OP_NOTSTAR:
573 case OP_NOTMINSTAR:
574 case OP_NOTPLUS:
575 case OP_NOTMINPLUS:
576 case OP_NOTQUERY:
577 case OP_NOTMINQUERY:
578 case OP_NOTPOSSTAR:
579 case OP_NOTPOSPLUS:
580 case OP_NOTPOSQUERY:
581 case OP_NOTSTARI:
582 case OP_NOTMINSTARI:
583 case OP_NOTPLUSI:
584 case OP_NOTMINPLUSI:
585 case OP_NOTQUERYI:
586 case OP_NOTMINQUERYI:
587 case OP_NOTPOSSTARI:
588 case OP_NOTPOSPLUSI:
589 case OP_NOTPOSQUERYI:
590 cc += 2;
591 #ifdef SUPPORT_UTF
592 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
593 #endif
594 return cc;
595
596 case OP_UPTO:
597 case OP_MINUPTO:
598 case OP_EXACT:
599 case OP_POSUPTO:
600 case OP_UPTOI:
601 case OP_MINUPTOI:
602 case OP_EXACTI:
603 case OP_POSUPTOI:
604 case OP_NOTUPTO:
605 case OP_NOTMINUPTO:
606 case OP_NOTEXACT:
607 case OP_NOTPOSUPTO:
608 case OP_NOTUPTOI:
609 case OP_NOTMINUPTOI:
610 case OP_NOTEXACTI:
611 case OP_NOTPOSUPTOI:
612 cc += 2 + IMM2_SIZE;
613 #ifdef SUPPORT_UTF
614 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
615 #endif
616 return cc;
617
618 case OP_NOTPROP:
619 case OP_PROP:
620 return cc + 1 + 2;
621
622 case OP_TYPEUPTO:
623 case OP_TYPEMINUPTO:
624 case OP_TYPEEXACT:
625 case OP_TYPEPOSUPTO:
626 case OP_REF:
627 case OP_REFI:
628 case OP_CREF:
629 case OP_NCREF:
630 case OP_RREF:
631 case OP_NRREF:
632 case OP_CLOSE:
633 cc += 1 + IMM2_SIZE;
634 return cc;
635
636 case OP_CRRANGE:
637 case OP_CRMINRANGE:
638 return cc + 1 + 2 * IMM2_SIZE;
639
640 case OP_CLASS:
641 case OP_NCLASS:
642 return cc + 1 + 32 / sizeof(pcre_uchar);
643
644 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
645 case OP_XCLASS:
646 return cc + GET(cc, 1);
647 #endif
648
649 case OP_RECURSE:
650 case OP_ASSERT:
651 case OP_ASSERT_NOT:
652 case OP_ASSERTBACK:
653 case OP_ASSERTBACK_NOT:
654 case OP_REVERSE:
655 case OP_ONCE:
656 case OP_ONCE_NC:
657 case OP_BRA:
658 case OP_BRAPOS:
659 case OP_COND:
660 case OP_SBRA:
661 case OP_SBRAPOS:
662 case OP_SCOND:
663 case OP_ALT:
664 case OP_KET:
665 case OP_KETRMAX:
666 case OP_KETRMIN:
667 case OP_KETRPOS:
668 return cc + 1 + LINK_SIZE;
669
670 case OP_CBRA:
671 case OP_CBRAPOS:
672 case OP_SCBRA:
673 case OP_SCBRAPOS:
674 return cc + 1 + LINK_SIZE + IMM2_SIZE;
675
676 case OP_MARK:
677 return cc + 1 + 2 + cc[1];
678
679 default:
680 return NULL;
681 }
682 }
683
684 #define CASE_ITERATOR_PRIVATE_DATA_1 \
685 case OP_MINSTAR: \
686 case OP_MINPLUS: \
687 case OP_QUERY: \
688 case OP_MINQUERY: \
689 case OP_MINSTARI: \
690 case OP_MINPLUSI: \
691 case OP_QUERYI: \
692 case OP_MINQUERYI: \
693 case OP_NOTMINSTAR: \
694 case OP_NOTMINPLUS: \
695 case OP_NOTQUERY: \
696 case OP_NOTMINQUERY: \
697 case OP_NOTMINSTARI: \
698 case OP_NOTMINPLUSI: \
699 case OP_NOTQUERYI: \
700 case OP_NOTMINQUERYI:
701
702 #define CASE_ITERATOR_PRIVATE_DATA_2A \
703 case OP_STAR: \
704 case OP_PLUS: \
705 case OP_STARI: \
706 case OP_PLUSI: \
707 case OP_NOTSTAR: \
708 case OP_NOTPLUS: \
709 case OP_NOTSTARI: \
710 case OP_NOTPLUSI:
711
712 #define CASE_ITERATOR_PRIVATE_DATA_2B \
713 case OP_UPTO: \
714 case OP_MINUPTO: \
715 case OP_UPTOI: \
716 case OP_MINUPTOI: \
717 case OP_NOTUPTO: \
718 case OP_NOTMINUPTO: \
719 case OP_NOTUPTOI: \
720 case OP_NOTMINUPTOI:
721
722 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
723 case OP_TYPEMINSTAR: \
724 case OP_TYPEMINPLUS: \
725 case OP_TYPEQUERY: \
726 case OP_TYPEMINQUERY:
727
728 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
729 case OP_TYPESTAR: \
730 case OP_TYPEPLUS:
731
732 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
733 case OP_TYPEUPTO: \
734 case OP_TYPEMINUPTO:
735
736 static int get_class_iterator_size(pcre_uchar *cc)
737 {
738 switch(*cc)
739 {
740 case OP_CRSTAR:
741 case OP_CRPLUS:
742 return 2;
743
744 case OP_CRMINSTAR:
745 case OP_CRMINPLUS:
746 case OP_CRQUERY:
747 case OP_CRMINQUERY:
748 return 1;
749
750 case OP_CRRANGE:
751 case OP_CRMINRANGE:
752 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
753 return 0;
754 return 2;
755
756 default:
757 return 0;
758 }
759 }
760
761 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
762 {
763 int private_data_length = 0;
764 pcre_uchar *alternative;
765 pcre_uchar *name;
766 pcre_uchar *end = NULL;
767 int space, size, bracketlen, i;
768
769 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
770 while (cc < ccend)
771 {
772 space = 0;
773 size = 0;
774 bracketlen = 0;
775 switch(*cc)
776 {
777 case OP_SET_SOM:
778 common->has_set_som = TRUE;
779 cc += 1;
780 break;
781
782 case OP_REF:
783 case OP_REFI:
784 common->optimized_cbracket[GET2(cc, 1)] = 0;
785 cc += 1 + IMM2_SIZE;
786 break;
787
788 case OP_ASSERT:
789 case OP_ASSERT_NOT:
790 case OP_ASSERTBACK:
791 case OP_ASSERTBACK_NOT:
792 case OP_ONCE:
793 case OP_ONCE_NC:
794 case OP_BRAPOS:
795 case OP_SBRA:
796 case OP_SBRAPOS:
797 private_data_length += sizeof(sljit_w);
798 bracketlen = 1 + LINK_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 private_data_length += sizeof(sljit_w);
804 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
805 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
806 break;
807
808 case OP_COND:
809 case OP_SCOND:
810 bracketlen = cc[1 + LINK_SIZE];
811 if (bracketlen == OP_CREF)
812 {
813 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
814 common->optimized_cbracket[bracketlen] = 0;
815 }
816 else if (bracketlen == OP_NCREF)
817 {
818 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
819 name = (pcre_uchar *)common->name_table;
820 alternative = name;
821 for (i = 0; i < common->name_count; i++)
822 {
823 if (GET2(name, 0) == bracketlen) break;
824 name += common->name_entry_size;
825 }
826 SLJIT_ASSERT(i != common->name_count);
827
828 for (i = 0; i < common->name_count; i++)
829 {
830 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
831 common->optimized_cbracket[GET2(alternative, 0)] = 0;
832 alternative += common->name_entry_size;
833 }
834 }
835
836 if (*cc == OP_COND)
837 {
838 /* Might be a hidden SCOND. */
839 alternative = cc + GET(cc, 1);
840 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
841 private_data_length += sizeof(sljit_w);
842 }
843 else
844 private_data_length += sizeof(sljit_w);
845 bracketlen = 1 + LINK_SIZE;
846 break;
847
848 case OP_BRA:
849 bracketlen = 1 + LINK_SIZE;
850 break;
851
852 case OP_CBRA:
853 case OP_SCBRA:
854 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
855 break;
856
857 CASE_ITERATOR_PRIVATE_DATA_1
858 space = 1;
859 size = -2;
860 break;
861
862 CASE_ITERATOR_PRIVATE_DATA_2A
863 space = 2;
864 size = -2;
865 break;
866
867 CASE_ITERATOR_PRIVATE_DATA_2B
868 space = 2;
869 size = -(2 + IMM2_SIZE);
870 break;
871
872 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
873 space = 1;
874 size = 1;
875 break;
876
877 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
878 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
879 space = 2;
880 size = 1;
881 break;
882
883 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
884 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
885 space = 2;
886 size = 1 + IMM2_SIZE;
887 break;
888
889 case OP_CLASS:
890 case OP_NCLASS:
891 size += 1 + 32 / sizeof(pcre_uchar);
892 space = get_class_iterator_size(cc + size);
893 break;
894
895 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
896 case OP_XCLASS:
897 size = GET(cc, 1);
898 space = get_class_iterator_size(cc + size);
899 break;
900 #endif
901
902 case OP_RECURSE:
903 alternative = common->start + GET(cc, 1);
904 if (alternative != common->start)
905 common->optimized_cbracket[GET2(alternative, 1 + LINK_SIZE)] = 0;
906 /* Set its value only once. */
907 if (common->recursive_head == 0)
908 {
909 common->recursive_head = common->ovector_start;
910 common->ovector_start += sizeof(sljit_w);
911 }
912 cc += 1 + LINK_SIZE;
913 break;
914
915 case OP_MARK:
916 if (common->mark_ptr == 0)
917 {
918 common->mark_ptr = common->ovector_start;
919 common->ovector_start += sizeof(sljit_w);
920 }
921 cc += 1 + 2 + cc[1];
922 break;
923
924 default:
925 cc = next_opcode(common, cc);
926 if (cc == NULL)
927 return -1;
928 break;
929 }
930
931 if (space > 0 && cc >= end)
932 private_data_length += sizeof(sljit_w) * space;
933
934 if (size != 0)
935 {
936 if (size < 0)
937 {
938 cc += -size;
939 #ifdef SUPPORT_UTF
940 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
941 #endif
942 }
943 else
944 cc += size;
945 }
946
947 if (bracketlen > 0)
948 {
949 if (cc >= end)
950 {
951 end = bracketend(cc);
952 if (end[-1 - LINK_SIZE] == OP_KET)
953 end = NULL;
954 }
955 cc += bracketlen;
956 }
957 }
958 return private_data_length;
959 }
960
961 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
962 {
963 pcre_uchar *cc = common->start;
964 pcre_uchar *alternative;
965 pcre_uchar *end = NULL;
966 int space, size, bracketlen;
967
968 while (cc < ccend)
969 {
970 space = 0;
971 size = 0;
972 bracketlen = 0;
973 switch(*cc)
974 {
975 case OP_ASSERT:
976 case OP_ASSERT_NOT:
977 case OP_ASSERTBACK:
978 case OP_ASSERTBACK_NOT:
979 case OP_ONCE:
980 case OP_ONCE_NC:
981 case OP_BRAPOS:
982 case OP_SBRA:
983 case OP_SBRAPOS:
984 case OP_SCOND:
985 common->private_data_ptrs[cc - common->start] = private_data_ptr;
986 private_data_ptr += sizeof(sljit_w);
987 bracketlen = 1 + LINK_SIZE;
988 break;
989
990 case OP_CBRAPOS:
991 case OP_SCBRAPOS:
992 common->private_data_ptrs[cc - common->start] = private_data_ptr;
993 private_data_ptr += sizeof(sljit_w);
994 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
995 break;
996
997 case OP_COND:
998 /* Might be a hidden SCOND. */
999 alternative = cc + GET(cc, 1);
1000 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1001 {
1002 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1003 private_data_ptr += sizeof(sljit_w);
1004 }
1005 bracketlen = 1 + LINK_SIZE;
1006 break;
1007
1008 case OP_BRA:
1009 bracketlen = 1 + LINK_SIZE;
1010 break;
1011
1012 case OP_CBRA:
1013 case OP_SCBRA:
1014 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1015 break;
1016
1017 CASE_ITERATOR_PRIVATE_DATA_1
1018 space = 1;
1019 size = -2;
1020 break;
1021
1022 CASE_ITERATOR_PRIVATE_DATA_2A
1023 space = 2;
1024 size = -2;
1025 break;
1026
1027 CASE_ITERATOR_PRIVATE_DATA_2B
1028 space = 2;
1029 size = -(2 + IMM2_SIZE);
1030 break;
1031
1032 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1033 space = 1;
1034 size = 1;
1035 break;
1036
1037 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1038 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1039 space = 2;
1040 size = 1;
1041 break;
1042
1043 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1044 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1045 space = 2;
1046 size = 1 + IMM2_SIZE;
1047 break;
1048
1049 case OP_CLASS:
1050 case OP_NCLASS:
1051 size += 1 + 32 / sizeof(pcre_uchar);
1052 space = get_class_iterator_size(cc + size);
1053 break;
1054
1055 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1056 case OP_XCLASS:
1057 size = GET(cc, 1);
1058 space = get_class_iterator_size(cc + size);
1059 break;
1060 #endif
1061
1062 default:
1063 cc = next_opcode(common, cc);
1064 SLJIT_ASSERT(cc != NULL);
1065 break;
1066 }
1067
1068 if (space > 0 && cc >= end)
1069 {
1070 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1071 private_data_ptr += sizeof(sljit_w) * space;
1072 }
1073
1074 if (size != 0)
1075 {
1076 if (size < 0)
1077 {
1078 cc += -size;
1079 #ifdef SUPPORT_UTF
1080 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1081 #endif
1082 }
1083 else
1084 cc += size;
1085 }
1086
1087 if (bracketlen > 0)
1088 {
1089 if (cc >= end)
1090 {
1091 end = bracketend(cc);
1092 if (end[-1 - LINK_SIZE] == OP_KET)
1093 end = NULL;
1094 }
1095 cc += bracketlen;
1096 }
1097 }
1098 }
1099
1100 /* Returns with -1 if no need for frame. */
1101 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1102 {
1103 pcre_uchar *ccend = bracketend(cc);
1104 int length = 0;
1105 BOOL possessive = FALSE;
1106 BOOL setsom_found = recursive;
1107 BOOL setmark_found = recursive;
1108
1109 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1110 {
1111 length = 3;
1112 possessive = TRUE;
1113 }
1114
1115 cc = next_opcode(common, cc);
1116 SLJIT_ASSERT(cc != NULL);
1117 while (cc < ccend)
1118 switch(*cc)
1119 {
1120 case OP_SET_SOM:
1121 SLJIT_ASSERT(common->has_set_som);
1122 if (!setsom_found)
1123 {
1124 length += 2;
1125 setsom_found = TRUE;
1126 }
1127 cc += 1;
1128 break;
1129
1130 case OP_MARK:
1131 SLJIT_ASSERT(common->mark_ptr != 0);
1132 if (!setmark_found)
1133 {
1134 length += 2;
1135 setmark_found = TRUE;
1136 }
1137 cc += 1 + 2 + cc[1];
1138 break;
1139
1140 case OP_RECURSE:
1141 if (common->has_set_som && !setsom_found)
1142 {
1143 length += 2;
1144 setsom_found = TRUE;
1145 }
1146 if (common->mark_ptr != 0 && !setmark_found)
1147 {
1148 length += 2;
1149 setmark_found = TRUE;
1150 }
1151 cc += 1 + LINK_SIZE;
1152 break;
1153
1154 case OP_CBRA:
1155 case OP_CBRAPOS:
1156 case OP_SCBRA:
1157 case OP_SCBRAPOS:
1158 length += 3;
1159 cc += 1 + LINK_SIZE + IMM2_SIZE;
1160 break;
1161
1162 default:
1163 cc = next_opcode(common, cc);
1164 SLJIT_ASSERT(cc != NULL);
1165 break;
1166 }
1167
1168 /* Possessive quantifiers can use a special case. */
1169 if (SLJIT_UNLIKELY(possessive) && length == 3)
1170 return -1;
1171
1172 if (length > 0)
1173 return length + 1;
1174 return -1;
1175 }
1176
1177 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1178 {
1179 DEFINE_COMPILER;
1180 pcre_uchar *ccend = bracketend(cc);
1181 BOOL setsom_found = recursive;
1182 BOOL setmark_found = recursive;
1183 int offset;
1184
1185 /* >= 1 + shortest item size (2) */
1186 SLJIT_UNUSED_ARG(stacktop);
1187 SLJIT_ASSERT(stackpos >= stacktop + 2);
1188
1189 stackpos = STACK(stackpos);
1190 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1191 cc = next_opcode(common, cc);
1192 SLJIT_ASSERT(cc != NULL);
1193 while (cc < ccend)
1194 switch(*cc)
1195 {
1196 case OP_SET_SOM:
1197 SLJIT_ASSERT(common->has_set_som);
1198 if (!setsom_found)
1199 {
1200 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1202 stackpos += (int)sizeof(sljit_w);
1203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1204 stackpos += (int)sizeof(sljit_w);
1205 setsom_found = TRUE;
1206 }
1207 cc += 1;
1208 break;
1209
1210 case OP_MARK:
1211 SLJIT_ASSERT(common->mark_ptr != 0);
1212 if (!setmark_found)
1213 {
1214 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1215 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1216 stackpos += (int)sizeof(sljit_w);
1217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1218 stackpos += (int)sizeof(sljit_w);
1219 setmark_found = TRUE;
1220 }
1221 cc += 1 + 2 + cc[1];
1222 break;
1223
1224 case OP_RECURSE:
1225 if (common->has_set_som && !setsom_found)
1226 {
1227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1229 stackpos += (int)sizeof(sljit_w);
1230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1231 stackpos += (int)sizeof(sljit_w);
1232 setsom_found = TRUE;
1233 }
1234 if (common->mark_ptr != 0 && !setmark_found)
1235 {
1236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1238 stackpos += (int)sizeof(sljit_w);
1239 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1240 stackpos += (int)sizeof(sljit_w);
1241 setmark_found = TRUE;
1242 }
1243 cc += 1 + LINK_SIZE;
1244 break;
1245
1246 case OP_CBRA:
1247 case OP_CBRAPOS:
1248 case OP_SCBRA:
1249 case OP_SCBRAPOS:
1250 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1251 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1252 stackpos += (int)sizeof(sljit_w);
1253 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1254 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1255 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1256 stackpos += (int)sizeof(sljit_w);
1257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1258 stackpos += (int)sizeof(sljit_w);
1259
1260 cc += 1 + LINK_SIZE + IMM2_SIZE;
1261 break;
1262
1263 default:
1264 cc = next_opcode(common, cc);
1265 SLJIT_ASSERT(cc != NULL);
1266 break;
1267 }
1268
1269 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1270 SLJIT_ASSERT(stackpos == STACK(stacktop));
1271 }
1272
1273 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1274 {
1275 int private_data_length = 2;
1276 int size;
1277 pcre_uchar *alternative;
1278 /* Calculate the sum of the private machine words. */
1279 while (cc < ccend)
1280 {
1281 size = 0;
1282 switch(*cc)
1283 {
1284 case OP_ASSERT:
1285 case OP_ASSERT_NOT:
1286 case OP_ASSERTBACK:
1287 case OP_ASSERTBACK_NOT:
1288 case OP_ONCE:
1289 case OP_ONCE_NC:
1290 case OP_BRAPOS:
1291 case OP_SBRA:
1292 case OP_SBRAPOS:
1293 case OP_SCOND:
1294 private_data_length++;
1295 cc += 1 + LINK_SIZE;
1296 break;
1297
1298 case OP_CBRA:
1299 case OP_SCBRA:
1300 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1301 private_data_length++;
1302 cc += 1 + LINK_SIZE + IMM2_SIZE;
1303 break;
1304
1305 case OP_CBRAPOS:
1306 case OP_SCBRAPOS:
1307 SLJIT_ASSERT(common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0);
1308 private_data_length += 2;
1309 cc += 1 + LINK_SIZE + IMM2_SIZE;
1310 break;
1311
1312 case OP_COND:
1313 /* Might be a hidden SCOND. */
1314 alternative = cc + GET(cc, 1);
1315 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1316 private_data_length++;
1317 cc += 1 + LINK_SIZE;
1318 break;
1319
1320 CASE_ITERATOR_PRIVATE_DATA_1
1321 if (PRIVATE_DATA(cc))
1322 private_data_length++;
1323 cc += 2;
1324 #ifdef SUPPORT_UTF
1325 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1326 #endif
1327 break;
1328
1329 CASE_ITERATOR_PRIVATE_DATA_2A
1330 if (PRIVATE_DATA(cc))
1331 private_data_length += 2;
1332 cc += 2;
1333 #ifdef SUPPORT_UTF
1334 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1335 #endif
1336 break;
1337
1338 CASE_ITERATOR_PRIVATE_DATA_2B
1339 if (PRIVATE_DATA(cc))
1340 private_data_length += 2;
1341 cc += 2 + IMM2_SIZE;
1342 #ifdef SUPPORT_UTF
1343 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1344 #endif
1345 break;
1346
1347 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1348 if (PRIVATE_DATA(cc))
1349 private_data_length++;
1350 cc += 1;
1351 break;
1352
1353 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1354 if (PRIVATE_DATA(cc))
1355 private_data_length += 2;
1356 cc += 1;
1357 break;
1358
1359 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1360 if (PRIVATE_DATA(cc))
1361 private_data_length += 2;
1362 cc += 1 + IMM2_SIZE;
1363 break;
1364
1365 case OP_CLASS:
1366 case OP_NCLASS:
1367 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1368 case OP_XCLASS:
1369 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1370 #else
1371 size = 1 + 32 / (int)sizeof(pcre_uchar);
1372 #endif
1373 if (PRIVATE_DATA(cc))
1374 private_data_length += get_class_iterator_size(cc + size);
1375 cc += size;
1376 break;
1377
1378 default:
1379 cc = next_opcode(common, cc);
1380 SLJIT_ASSERT(cc != NULL);
1381 break;
1382 }
1383 }
1384 SLJIT_ASSERT(cc == ccend);
1385 return private_data_length;
1386 }
1387
1388 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1389 BOOL save, int stackptr, int stacktop)
1390 {
1391 DEFINE_COMPILER;
1392 int srcw[2];
1393 int count, size;
1394 BOOL tmp1next = TRUE;
1395 BOOL tmp1empty = TRUE;
1396 BOOL tmp2empty = TRUE;
1397 pcre_uchar *alternative;
1398 enum {
1399 start,
1400 loop,
1401 end
1402 } status;
1403
1404 status = save ? start : loop;
1405 stackptr = STACK(stackptr - 2);
1406 stacktop = STACK(stacktop - 1);
1407
1408 if (!save)
1409 {
1410 stackptr += sizeof(sljit_w);
1411 if (stackptr < stacktop)
1412 {
1413 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1414 stackptr += sizeof(sljit_w);
1415 tmp1empty = FALSE;
1416 }
1417 if (stackptr < stacktop)
1418 {
1419 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1420 stackptr += sizeof(sljit_w);
1421 tmp2empty = FALSE;
1422 }
1423 /* The tmp1next must be TRUE in either way. */
1424 }
1425
1426 while (status != end)
1427 {
1428 count = 0;
1429 switch(status)
1430 {
1431 case start:
1432 SLJIT_ASSERT(save && common->recursive_head != 0);
1433 count = 1;
1434 srcw[0] = common->recursive_head;
1435 status = loop;
1436 break;
1437
1438 case loop:
1439 if (cc >= ccend)
1440 {
1441 status = end;
1442 break;
1443 }
1444
1445 switch(*cc)
1446 {
1447 case OP_ASSERT:
1448 case OP_ASSERT_NOT:
1449 case OP_ASSERTBACK:
1450 case OP_ASSERTBACK_NOT:
1451 case OP_ONCE:
1452 case OP_ONCE_NC:
1453 case OP_BRAPOS:
1454 case OP_SBRA:
1455 case OP_SBRAPOS:
1456 case OP_SCOND:
1457 count = 1;
1458 srcw[0] = PRIVATE_DATA(cc);
1459 SLJIT_ASSERT(srcw[0] != 0);
1460 cc += 1 + LINK_SIZE;
1461 break;
1462
1463 case OP_CBRA:
1464 case OP_SCBRA:
1465 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1466 {
1467 count = 1;
1468 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1469 }
1470 cc += 1 + LINK_SIZE + IMM2_SIZE;
1471 break;
1472
1473 case OP_CBRAPOS:
1474 case OP_SCBRAPOS:
1475 count = 2;
1476 srcw[0] = PRIVATE_DATA(cc);
1477 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1478 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1479 cc += 1 + LINK_SIZE + IMM2_SIZE;
1480 break;
1481
1482 case OP_COND:
1483 /* Might be a hidden SCOND. */
1484 alternative = cc + GET(cc, 1);
1485 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1486 {
1487 count = 1;
1488 srcw[0] = PRIVATE_DATA(cc);
1489 SLJIT_ASSERT(srcw[0] != 0);
1490 }
1491 cc += 1 + LINK_SIZE;
1492 break;
1493
1494 CASE_ITERATOR_PRIVATE_DATA_1
1495 if (PRIVATE_DATA(cc))
1496 {
1497 count = 1;
1498 srcw[0] = PRIVATE_DATA(cc);
1499 }
1500 cc += 2;
1501 #ifdef SUPPORT_UTF
1502 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1503 #endif
1504 break;
1505
1506 CASE_ITERATOR_PRIVATE_DATA_2A
1507 if (PRIVATE_DATA(cc))
1508 {
1509 count = 2;
1510 srcw[0] = PRIVATE_DATA(cc);
1511 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1512 }
1513 cc += 2;
1514 #ifdef SUPPORT_UTF
1515 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1516 #endif
1517 break;
1518
1519 CASE_ITERATOR_PRIVATE_DATA_2B
1520 if (PRIVATE_DATA(cc))
1521 {
1522 count = 2;
1523 srcw[0] = PRIVATE_DATA(cc);
1524 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1525 }
1526 cc += 2 + IMM2_SIZE;
1527 #ifdef SUPPORT_UTF
1528 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1529 #endif
1530 break;
1531
1532 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1533 if (PRIVATE_DATA(cc))
1534 {
1535 count = 1;
1536 srcw[0] = PRIVATE_DATA(cc);
1537 }
1538 cc += 1;
1539 break;
1540
1541 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1542 if (PRIVATE_DATA(cc))
1543 {
1544 count = 2;
1545 srcw[0] = PRIVATE_DATA(cc);
1546 srcw[1] = srcw[0] + sizeof(sljit_w);
1547 }
1548 cc += 1;
1549 break;
1550
1551 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1552 if (PRIVATE_DATA(cc))
1553 {
1554 count = 2;
1555 srcw[0] = PRIVATE_DATA(cc);
1556 srcw[1] = srcw[0] + sizeof(sljit_w);
1557 }
1558 cc += 1 + IMM2_SIZE;
1559 break;
1560
1561 case OP_CLASS:
1562 case OP_NCLASS:
1563 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1564 case OP_XCLASS:
1565 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1566 #else
1567 size = 1 + 32 / (int)sizeof(pcre_uchar);
1568 #endif
1569 if (PRIVATE_DATA(cc))
1570 switch(get_class_iterator_size(cc + size))
1571 {
1572 case 1:
1573 count = 1;
1574 srcw[0] = PRIVATE_DATA(cc);
1575 break;
1576
1577 case 2:
1578 count = 2;
1579 srcw[0] = PRIVATE_DATA(cc);
1580 srcw[1] = srcw[0] + sizeof(sljit_w);
1581 break;
1582
1583 default:
1584 SLJIT_ASSERT_STOP();
1585 break;
1586 }
1587 cc += size;
1588 break;
1589
1590 default:
1591 cc = next_opcode(common, cc);
1592 SLJIT_ASSERT(cc != NULL);
1593 break;
1594 }
1595 break;
1596
1597 case end:
1598 SLJIT_ASSERT_STOP();
1599 break;
1600 }
1601
1602 while (count > 0)
1603 {
1604 count--;
1605 if (save)
1606 {
1607 if (tmp1next)
1608 {
1609 if (!tmp1empty)
1610 {
1611 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1612 stackptr += sizeof(sljit_w);
1613 }
1614 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1615 tmp1empty = FALSE;
1616 tmp1next = FALSE;
1617 }
1618 else
1619 {
1620 if (!tmp2empty)
1621 {
1622 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1623 stackptr += sizeof(sljit_w);
1624 }
1625 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1626 tmp2empty = FALSE;
1627 tmp1next = TRUE;
1628 }
1629 }
1630 else
1631 {
1632 if (tmp1next)
1633 {
1634 SLJIT_ASSERT(!tmp1empty);
1635 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1636 tmp1empty = stackptr >= stacktop;
1637 if (!tmp1empty)
1638 {
1639 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1640 stackptr += sizeof(sljit_w);
1641 }
1642 tmp1next = FALSE;
1643 }
1644 else
1645 {
1646 SLJIT_ASSERT(!tmp2empty);
1647 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1648 tmp2empty = stackptr >= stacktop;
1649 if (!tmp2empty)
1650 {
1651 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1652 stackptr += sizeof(sljit_w);
1653 }
1654 tmp1next = TRUE;
1655 }
1656 }
1657 }
1658 }
1659
1660 if (save)
1661 {
1662 if (tmp1next)
1663 {
1664 if (!tmp1empty)
1665 {
1666 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1667 stackptr += sizeof(sljit_w);
1668 }
1669 if (!tmp2empty)
1670 {
1671 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1672 stackptr += sizeof(sljit_w);
1673 }
1674 }
1675 else
1676 {
1677 if (!tmp2empty)
1678 {
1679 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1680 stackptr += sizeof(sljit_w);
1681 }
1682 if (!tmp1empty)
1683 {
1684 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1685 stackptr += sizeof(sljit_w);
1686 }
1687 }
1688 }
1689 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1690 }
1691
1692 #undef CASE_ITERATOR_PRIVATE_DATA_1
1693 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1694 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1695 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1696 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1697 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1698
1699 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1700 {
1701 return (value & (value - 1)) == 0;
1702 }
1703
1704 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1705 {
1706 while (list)
1707 {
1708 /* sljit_set_label is clever enough to do nothing
1709 if either the jump or the label is NULL. */
1710 sljit_set_label(list->jump, label);
1711 list = list->next;
1712 }
1713 }
1714
1715 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1716 {
1717 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1718 if (list_item)
1719 {
1720 list_item->next = *list;
1721 list_item->jump = jump;
1722 *list = list_item;
1723 }
1724 }
1725
1726 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1727 {
1728 DEFINE_COMPILER;
1729 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1730
1731 if (list_item)
1732 {
1733 list_item->type = type;
1734 list_item->data = data;
1735 list_item->start = start;
1736 list_item->quit = LABEL();
1737 list_item->next = common->stubs;
1738 common->stubs = list_item;
1739 }
1740 }
1741
1742 static void flush_stubs(compiler_common *common)
1743 {
1744 DEFINE_COMPILER;
1745 stub_list* list_item = common->stubs;
1746
1747 while (list_item)
1748 {
1749 JUMPHERE(list_item->start);
1750 switch(list_item->type)
1751 {
1752 case stack_alloc:
1753 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1754 break;
1755 }
1756 JUMPTO(SLJIT_JUMP, list_item->quit);
1757 list_item = list_item->next;
1758 }
1759 common->stubs = NULL;
1760 }
1761
1762 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1763 {
1764 DEFINE_COMPILER;
1765
1766 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1767 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1768 }
1769
1770 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1771 {
1772 /* May destroy all locals and registers except TMP2. */
1773 DEFINE_COMPILER;
1774
1775 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1776 #ifdef DESTROY_REGISTERS
1777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1778 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1779 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1780 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1782 #endif
1783 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1784 }
1785
1786 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1787 {
1788 DEFINE_COMPILER;
1789 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1790 }
1791
1792 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1793 {
1794 DEFINE_COMPILER;
1795 struct sljit_label *loop;
1796 int i;
1797 /* At this point we can freely use all temporary registers. */
1798 /* TMP1 returns with begin - 1. */
1799 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1800 if (length < 8)
1801 {
1802 for (i = 0; i < length; i++)
1803 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1804 }
1805 else
1806 {
1807 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1808 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1809 loop = LABEL();
1810 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1811 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1812 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1813 }
1814 }
1815
1816 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1817 {
1818 DEFINE_COMPILER;
1819 struct sljit_label *loop;
1820 struct sljit_jump *earlyexit;
1821
1822 /* At this point we can freely use all registers. */
1823 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1825
1826 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1827 if (common->mark_ptr != 0)
1828 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1829 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1830 if (common->mark_ptr != 0)
1831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1832 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1833 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1834 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1835 /* Unlikely, but possible */
1836 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1837 loop = LABEL();
1838 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1839 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1840 /* Copy the integer value to the output buffer */
1841 #ifdef COMPILE_PCRE16
1842 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1843 #endif
1844 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1845 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1846 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1847 JUMPHERE(earlyexit);
1848
1849 /* Calculate the return value, which is the maximum ovector value. */
1850 if (topbracket > 1)
1851 {
1852 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1853 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1854
1855 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1856 loop = LABEL();
1857 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1858 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1859 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1860 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1861 }
1862 else
1863 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1864 }
1865
1866 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1867 {
1868 DEFINE_COMPILER;
1869
1870 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1871 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1872
1873 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1874 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1875 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1876 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1877
1878 /* Store match begin and end. */
1879 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1880 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1881 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1882 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1883 #ifdef COMPILE_PCRE16
1884 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1885 #endif
1886 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1887
1888 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1889 #ifdef COMPILE_PCRE16
1890 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1891 #endif
1892 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1893
1894 JUMPTO(SLJIT_JUMP, quit);
1895 }
1896
1897 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1898 {
1899 /* May destroy TMP1. */
1900 DEFINE_COMPILER;
1901 struct sljit_jump *jump;
1902
1903 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1904 {
1905 /* The value of -1 must be kept for start_used_ptr! */
1906 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1907 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1908 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1909 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1910 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1911 JUMPHERE(jump);
1912 }
1913 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1914 {
1915 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1917 JUMPHERE(jump);
1918 }
1919 }
1920
1921 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1922 {
1923 /* Detects if the character has an othercase. */
1924 unsigned int c;
1925
1926 #ifdef SUPPORT_UTF
1927 if (common->utf)
1928 {
1929 GETCHAR(c, cc);
1930 if (c > 127)
1931 {
1932 #ifdef SUPPORT_UCP
1933 return c != UCD_OTHERCASE(c);
1934 #else
1935 return FALSE;
1936 #endif
1937 }
1938 #ifndef COMPILE_PCRE8
1939 return common->fcc[c] != c;
1940 #endif
1941 }
1942 else
1943 #endif
1944 c = *cc;
1945 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1946 }
1947
1948 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1949 {
1950 /* Returns with the othercase. */
1951 #ifdef SUPPORT_UTF
1952 if (common->utf && c > 127)
1953 {
1954 #ifdef SUPPORT_UCP
1955 return UCD_OTHERCASE(c);
1956 #else
1957 return c;
1958 #endif
1959 }
1960 #endif
1961 return TABLE_GET(c, common->fcc, c);
1962 }
1963
1964 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1965 {
1966 /* Detects if the character and its othercase has only 1 bit difference. */
1967 unsigned int c, oc, bit;
1968 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1969 int n;
1970 #endif
1971
1972 #ifdef SUPPORT_UTF
1973 if (common->utf)
1974 {
1975 GETCHAR(c, cc);
1976 if (c <= 127)
1977 oc = common->fcc[c];
1978 else
1979 {
1980 #ifdef SUPPORT_UCP
1981 oc = UCD_OTHERCASE(c);
1982 #else
1983 oc = c;
1984 #endif
1985 }
1986 }
1987 else
1988 {
1989 c = *cc;
1990 oc = TABLE_GET(c, common->fcc, c);
1991 }
1992 #else
1993 c = *cc;
1994 oc = TABLE_GET(c, common->fcc, c);
1995 #endif
1996
1997 SLJIT_ASSERT(c != oc);
1998
1999 bit = c ^ oc;
2000 /* Optimized for English alphabet. */
2001 if (c <= 127 && bit == 0x20)
2002 return (0 << 8) | 0x20;
2003
2004 /* Since c != oc, they must have at least 1 bit difference. */
2005 if (!ispowerof2(bit))
2006 return 0;
2007
2008 #ifdef COMPILE_PCRE8
2009
2010 #ifdef SUPPORT_UTF
2011 if (common->utf && c > 127)
2012 {
2013 n = GET_EXTRALEN(*cc);
2014 while ((bit & 0x3f) == 0)
2015 {
2016 n--;
2017 bit >>= 6;
2018 }
2019 return (n << 8) | bit;
2020 }
2021 #endif /* SUPPORT_UTF */
2022 return (0 << 8) | bit;
2023
2024 #else /* COMPILE_PCRE8 */
2025
2026 #ifdef COMPILE_PCRE16
2027 #ifdef SUPPORT_UTF
2028 if (common->utf && c > 65535)
2029 {
2030 if (bit >= (1 << 10))
2031 bit >>= 10;
2032 else
2033 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2034 }
2035 #endif /* SUPPORT_UTF */
2036 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2037 #endif /* COMPILE_PCRE16 */
2038
2039 #endif /* COMPILE_PCRE8 */
2040 }
2041
2042 static void check_partial(compiler_common *common, BOOL force)
2043 {
2044 /* Checks whether a partial matching is occured. Does not modify registers. */
2045 DEFINE_COMPILER;
2046 struct sljit_jump *jump = NULL;
2047
2048 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2049
2050 if (common->mode == JIT_COMPILE)
2051 return;
2052
2053 if (!force)
2054 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2055 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2056 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2057
2058 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2059 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2060 else
2061 {
2062 if (common->partialmatchlabel != NULL)
2063 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2064 else
2065 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2066 }
2067
2068 if (jump != NULL)
2069 JUMPHERE(jump);
2070 }
2071
2072 static struct sljit_jump *check_str_end(compiler_common *common)
2073 {
2074 /* Does not affect registers. Usually used in a tight spot. */
2075 DEFINE_COMPILER;
2076 struct sljit_jump *jump;
2077 struct sljit_jump *nohit;
2078 struct sljit_jump *return_value;
2079
2080 if (common->mode == JIT_COMPILE)
2081 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2082
2083 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2084 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2085 {
2086 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2087 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2088 JUMPHERE(nohit);
2089 return_value = JUMP(SLJIT_JUMP);
2090 }
2091 else
2092 {
2093 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2094 if (common->partialmatchlabel != NULL)
2095 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2096 else
2097 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2098 }
2099 JUMPHERE(jump);
2100 return return_value;
2101 }
2102
2103 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2104 {
2105 DEFINE_COMPILER;
2106 struct sljit_jump *jump;
2107
2108 if (common->mode == JIT_COMPILE)
2109 {
2110 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2111 return;
2112 }
2113
2114 /* Partial matching mode. */
2115 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2116 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2117 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2118 {
2119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2120 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2121 }
2122 else
2123 {
2124 if (common->partialmatchlabel != NULL)
2125 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2126 else
2127 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2128 }
2129 JUMPHERE(jump);
2130 }
2131
2132 static void read_char(compiler_common *common)
2133 {
2134 /* Reads the character into TMP1, updates STR_PTR.
2135 Does not check STR_END. TMP2 Destroyed. */
2136 DEFINE_COMPILER;
2137 #ifdef SUPPORT_UTF
2138 struct sljit_jump *jump;
2139 #endif
2140
2141 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2142 #ifdef SUPPORT_UTF
2143 if (common->utf)
2144 {
2145 #ifdef COMPILE_PCRE8
2146 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2147 #else
2148 #ifdef COMPILE_PCRE16
2149 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2150 #endif
2151 #endif /* COMPILE_PCRE8 */
2152 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2153 JUMPHERE(jump);
2154 }
2155 #endif
2156 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2157 }
2158
2159 static void peek_char(compiler_common *common)
2160 {
2161 /* Reads the character into TMP1, keeps STR_PTR.
2162 Does not check STR_END. TMP2 Destroyed. */
2163 DEFINE_COMPILER;
2164 #ifdef SUPPORT_UTF
2165 struct sljit_jump *jump;
2166 #endif
2167
2168 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2169 #ifdef SUPPORT_UTF
2170 if (common->utf)
2171 {
2172 #ifdef COMPILE_PCRE8
2173 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2174 #else
2175 #ifdef COMPILE_PCRE16
2176 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2177 #endif
2178 #endif /* COMPILE_PCRE8 */
2179 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2180 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2181 JUMPHERE(jump);
2182 }
2183 #endif
2184 }
2185
2186 static void read_char8_type(compiler_common *common)
2187 {
2188 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2189 DEFINE_COMPILER;
2190 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2191 struct sljit_jump *jump;
2192 #endif
2193
2194 #ifdef SUPPORT_UTF
2195 if (common->utf)
2196 {
2197 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2198 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2199 #ifdef COMPILE_PCRE8
2200 /* This can be an extra read in some situations, but hopefully
2201 it is needed in most cases. */
2202 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2203 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2204 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2205 JUMPHERE(jump);
2206 #else
2207 #ifdef COMPILE_PCRE16
2208 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2209 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2210 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2211 JUMPHERE(jump);
2212 /* Skip low surrogate if necessary. */
2213 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2214 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2215 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2216 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2217 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2218 #endif
2219 #endif /* COMPILE_PCRE8 */
2220 return;
2221 }
2222 #endif
2223 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2224 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2225 #ifdef COMPILE_PCRE16
2226 /* The ctypes array contains only 256 values. */
2227 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2228 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2229 #endif
2230 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2231 #ifdef COMPILE_PCRE16
2232 JUMPHERE(jump);
2233 #endif
2234 }
2235
2236 static void skip_char_back(compiler_common *common)
2237 {
2238 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2239 DEFINE_COMPILER;
2240 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2241 struct sljit_label *label;
2242
2243 if (common->utf)
2244 {
2245 label = LABEL();
2246 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2247 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2248 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2249 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2250 return;
2251 }
2252 #endif
2253 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2254 if (common->utf)
2255 {
2256 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2257 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2258 /* Skip low surrogate if necessary. */
2259 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2260 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2261 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2262 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2263 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2264 return;
2265 }
2266 #endif
2267 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2268 }
2269
2270 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2271 {
2272 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2273 DEFINE_COMPILER;
2274
2275 if (nltype == NLTYPE_ANY)
2276 {
2277 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2278 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2279 }
2280 else if (nltype == NLTYPE_ANYCRLF)
2281 {
2282 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2283 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2284 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2285 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2286 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2287 }
2288 else
2289 {
2290 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2291 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2292 }
2293 }
2294
2295 #ifdef SUPPORT_UTF
2296
2297 #ifdef COMPILE_PCRE8
2298 static void do_utfreadchar(compiler_common *common)
2299 {
2300 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2301 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2302 DEFINE_COMPILER;
2303 struct sljit_jump *jump;
2304
2305 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2306 /* Searching for the first zero. */
2307 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2308 jump = JUMP(SLJIT_C_NOT_ZERO);
2309 /* Two byte sequence. */
2310 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2311 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2312 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2313 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2314 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2315 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2316 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2317 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2318 JUMPHERE(jump);
2319
2320 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2321 jump = JUMP(SLJIT_C_NOT_ZERO);
2322 /* Three byte sequence. */
2323 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2324 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2325 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2326 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2327 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2328 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2329 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2330 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2331 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2332 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2333 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2334 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2335 JUMPHERE(jump);
2336
2337 /* Four byte sequence. */
2338 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2339 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2340 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2341 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2342 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2343 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2344 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2345 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2346 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2347 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2348 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2349 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2350 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2351 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2352 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2353 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2354 }
2355
2356 static void do_utfreadtype8(compiler_common *common)
2357 {
2358 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2359 of the character (>= 0xc0). Return value in TMP1. */
2360 DEFINE_COMPILER;
2361 struct sljit_jump *jump;
2362 struct sljit_jump *compare;
2363
2364 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2365
2366 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2367 jump = JUMP(SLJIT_C_NOT_ZERO);
2368 /* Two byte sequence. */
2369 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2370 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2371 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2372 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2373 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2374 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2375 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2376 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2377 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2378
2379 JUMPHERE(compare);
2380 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2381 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2382 JUMPHERE(jump);
2383
2384 /* We only have types for characters less than 256. */
2385 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2386 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2387 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2388 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2389 }
2390
2391 #else /* COMPILE_PCRE8 */
2392
2393 #ifdef COMPILE_PCRE16
2394 static void do_utfreadchar(compiler_common *common)
2395 {
2396 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2397 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2398 DEFINE_COMPILER;
2399 struct sljit_jump *jump;
2400
2401 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2402 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2403 /* Do nothing, only return. */
2404 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2405
2406 JUMPHERE(jump);
2407 /* Combine two 16 bit characters. */
2408 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2409 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2410 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2411 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2412 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2413 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2414 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2415 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2416 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2417 }
2418 #endif /* COMPILE_PCRE16 */
2419
2420 #endif /* COMPILE_PCRE8 */
2421
2422 #endif /* SUPPORT_UTF */
2423
2424 #ifdef SUPPORT_UCP
2425
2426 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2427 #define UCD_BLOCK_MASK 127
2428 #define UCD_BLOCK_SHIFT 7
2429
2430 static void do_getucd(compiler_common *common)
2431 {
2432 /* Search the UCD record for the character comes in TMP1.
2433 Returns chartype in TMP1 and UCD offset in TMP2. */
2434 DEFINE_COMPILER;
2435
2436 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2437
2438 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2439 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2440 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2441 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2442 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2443 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2444 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2445 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2446 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2447 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2448 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2449 }
2450 #endif
2451
2452 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2453 {
2454 DEFINE_COMPILER;
2455 struct sljit_label *mainloop;
2456 struct sljit_label *newlinelabel = NULL;
2457 struct sljit_jump *start;
2458 struct sljit_jump *end = NULL;
2459 struct sljit_jump *nl = NULL;
2460 #ifdef SUPPORT_UTF
2461 struct sljit_jump *singlechar;
2462 #endif
2463 jump_list *newline = NULL;
2464 BOOL newlinecheck = FALSE;
2465 BOOL readuchar = FALSE;
2466
2467 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2468 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2469 newlinecheck = TRUE;
2470
2471 if (firstline)
2472 {
2473 /* Search for the end of the first line. */
2474 SLJIT_ASSERT(common->first_line_end != 0);
2475 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2476
2477 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2478 {
2479 mainloop = LABEL();
2480 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2481 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2482 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2483 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2484 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2485 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2486 JUMPHERE(end);
2487 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2488 }
2489 else
2490 {
2491 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2492 mainloop = LABEL();
2493 /* Continual stores does not cause data dependency. */
2494 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2495 read_char(common);
2496 check_newlinechar(common, common->nltype, &newline, TRUE);
2497 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2498 JUMPHERE(end);
2499 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2500 set_jumps(newline, LABEL());
2501 }
2502
2503 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2504 }
2505
2506 start = JUMP(SLJIT_JUMP);
2507
2508 if (newlinecheck)
2509 {
2510 newlinelabel = LABEL();
2511 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2512 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2513 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2514 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2515 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2516 #ifdef COMPILE_PCRE16
2517 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2518 #endif
2519 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2520 nl = JUMP(SLJIT_JUMP);
2521 }
2522
2523 mainloop = LABEL();
2524
2525 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2526 #ifdef SUPPORT_UTF
2527 if (common->utf) readuchar = TRUE;
2528 #endif
2529 if (newlinecheck) readuchar = TRUE;
2530
2531 if (readuchar)
2532 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2533
2534 if (newlinecheck)
2535 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2536
2537 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2538 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2539 if (common->utf)
2540 {
2541 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2542 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2543 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2544 JUMPHERE(singlechar);
2545 }
2546 #endif
2547 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2548 if (common->utf)
2549 {
2550 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2551 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2552 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2553 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2554 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2555 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2556 JUMPHERE(singlechar);
2557 }
2558 #endif
2559 JUMPHERE(start);
2560
2561 if (newlinecheck)
2562 {
2563 JUMPHERE(end);
2564 JUMPHERE(nl);
2565 }
2566
2567 return mainloop;
2568 }
2569
2570 static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
2571 {
2572 DEFINE_COMPILER;
2573 struct sljit_label *start;
2574 struct sljit_jump *quit;
2575 struct sljit_jump *found;
2576 pcre_int32 chars[4];
2577 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2578 int location = 0;
2579 pcre_int32 len, c, bit, caseless;
2580 BOOL must_end;
2581
2582 #ifdef COMPILE_PCRE8
2583 union {
2584 sljit_uh ascombined;
2585 sljit_ub asuchars[2];
2586 } pair;
2587 #else
2588 union {
2589 sljit_ui ascombined;
2590 sljit_uh asuchars[2];
2591 } pair;
2592 #endif
2593
2594 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2595 return FALSE;
2596
2597 while (TRUE)
2598 {
2599 caseless = 0;
2600 must_end = TRUE;
2601 switch(*cc)
2602 {
2603 case OP_CHAR:
2604 must_end = FALSE;
2605 cc++;
2606 break;
2607
2608 case OP_CHARI:
2609 caseless = 1;
2610 must_end = FALSE;
2611 cc++;
2612 break;
2613
2614 case OP_SOD:
2615 case OP_SOM:
2616 case OP_SET_SOM:
2617 case OP_NOT_WORD_BOUNDARY:
2618 case OP_WORD_BOUNDARY:
2619 case OP_EODN:
2620 case OP_EOD:
2621 case OP_CIRC:
2622 case OP_CIRCM:
2623 case OP_DOLL:
2624 case OP_DOLLM:
2625 /* Zero width assertions. */
2626 cc++;
2627 continue;
2628
2629 case OP_PLUS:
2630 case OP_MINPLUS:
2631 case OP_POSPLUS:
2632 cc++;
2633 break;
2634
2635 case OP_EXACT:
2636 cc += 1 + IMM2_SIZE;
2637 break;
2638
2639 case OP_PLUSI:
2640 case OP_MINPLUSI:
2641 case OP_POSPLUSI:
2642 caseless = 1;
2643 cc++;
2644 break;
2645
2646 case OP_EXACTI:
2647 caseless = 1;
2648 cc += 1 + IMM2_SIZE;
2649 break;
2650
2651 default:
2652 return FALSE;
2653 }
2654
2655 len = 1;
2656 #ifdef SUPPORT_UTF
2657 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2658 #endif
2659
2660 if (caseless && char_has_othercase(common, cc))
2661 {
2662 caseless = char_get_othercase_bit(common, cc);
2663 if (caseless == 0)
2664 return FALSE;
2665 #ifdef COMPILE_PCRE8
2666 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2667 #else
2668 if ((caseless & 0x100) != 0)
2669 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2670 else
2671 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2672 #endif
2673 }
2674 else
2675 caseless = 0;
2676
2677 while (len > 0 && location < 2 * 2)
2678 {
2679 c = *cc;
2680 bit = 0;
2681 if (len == (caseless & 0xff))
2682 {
2683 bit = caseless >> 8;
2684 c |= bit;
2685 }
2686
2687 chars[location] = c;
2688 chars[location + 1] = bit;
2689
2690 len--;
2691 location += 2;
2692 cc++;
2693 }
2694
2695 if (location == 2 * 2)
2696 break;
2697 else if (must_end)
2698 return FALSE;
2699 }
2700
2701 if (firstline)
2702 {
2703 SLJIT_ASSERT(common->first_line_end != 0);
2704 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2705 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
2706 }
2707 else
2708 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2709
2710 start = LABEL();
2711 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2712 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2713 #ifdef COMPILE_PCRE8
2714 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2715 #else /* COMPILE_PCRE8 */
2716 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2717 #endif
2718
2719 #else /* SLJIT_UNALIGNED */
2720
2721 #if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
2722 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2723 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2724 #else /* SLJIT_BIG_ENDIAN */
2725 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2726 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2727 #endif /* SLJIT_BIG_ENDIAN */
2728
2729 #ifdef COMPILE_PCRE8
2730 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
2731 #else /* COMPILE_PCRE8 */
2732 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
2733 #endif
2734 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2735
2736 #endif
2737
2738 if (chars[1] != 0 || chars[3] != 0)
2739 {
2740 pair.asuchars[0] = chars[1];
2741 pair.asuchars[1] = chars[3];
2742 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
2743 }
2744
2745 pair.asuchars[0] = chars[0];
2746 pair.asuchars[1] = chars[2];
2747 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
2748
2749 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2750 JUMPTO(SLJIT_JUMP, start);
2751 JUMPHERE(found);
2752 JUMPHERE(quit);
2753
2754 if (firstline)
2755 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2756 else
2757 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2758 return TRUE;
2759 }
2760
2761 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2762 {
2763 DEFINE_COMPILER;
2764 struct sljit_label *start;
2765 struct sljit_jump *quit;
2766 struct sljit_jump *found;
2767 pcre_uchar oc, bit;
2768
2769 if (firstline)
2770 {
2771 SLJIT_ASSERT(common->first_line_end != 0);
2772 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2773 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2774 }
2775
2776 start = LABEL();
2777 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2778 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2779
2780 oc = first_char;
2781 if (caseless)
2782 {
2783 oc = TABLE_GET(first_char, common->fcc, first_char);
2784 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2785 if (first_char > 127 && common->utf)
2786 oc = UCD_OTHERCASE(first_char);
2787 #endif
2788 }
2789 if (first_char == oc)
2790 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2791 else
2792 {
2793 bit = first_char ^ oc;
2794 if (ispowerof2(bit))
2795 {
2796 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2797 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2798 }
2799 else
2800 {
2801 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2802 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2803 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2804 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2805 found = JUMP(SLJIT_C_NOT_ZERO);
2806 }
2807 }
2808
2809 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2810 JUMPTO(SLJIT_JUMP, start);
2811 JUMPHERE(found);
2812 JUMPHERE(quit);
2813
2814 if (firstline)
2815 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2816 }
2817
2818 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2819 {
2820 DEFINE_COMPILER;
2821 struct sljit_label *loop;
2822 struct sljit_jump *lastchar;
2823 struct sljit_jump *firstchar;
2824 struct sljit_jump *quit;
2825 struct sljit_jump *foundcr = NULL;
2826 struct sljit_jump *notfoundnl;
2827 jump_list *newline = NULL;
2828
2829 if (firstline)
2830 {
2831 SLJIT_ASSERT(common->first_line_end != 0);
2832 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2833 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2834 }
2835
2836 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2837 {
2838 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2839 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2840 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2841 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2842 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2843
2844 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2845 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2846 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2847 #ifdef COMPILE_PCRE16
2848 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2849 #endif
2850 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2851
2852 loop = LABEL();
2853 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2854 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2855 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2856 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2857 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2858 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2859
2860 JUMPHERE(quit);
2861 JUMPHERE(firstchar);
2862 JUMPHERE(lastchar);
2863
2864 if (firstline)
2865 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2866 return;
2867 }
2868
2869 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2870 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2871 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2872 skip_char_back(common);
2873
2874 loop = LABEL();
2875 read_char(common);
2876 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2877 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2878 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2879 check_newlinechar(common, common->nltype, &newline, FALSE);
2880 set_jumps(newline, loop);
2881
2882 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2883 {
2884 quit = JUMP(SLJIT_JUMP);
2885 JUMPHERE(foundcr);
2886 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2887 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2888 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2889 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2890 #ifdef COMPILE_PCRE16
2891 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2892 #endif
2893 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2894 JUMPHERE(notfoundnl);
2895 JUMPHERE(quit);
2896 }
2897 JUMPHERE(lastchar);
2898 JUMPHERE(firstchar);
2899
2900 if (firstline)
2901 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2902 }
2903
2904 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2905 {
2906 DEFINE_COMPILER;
2907 struct sljit_label *start;
2908 struct sljit_jump *quit;
2909 struct sljit_jump *found;
2910 #ifndef COMPILE_PCRE8
2911 struct sljit_jump *jump;
2912 #endif
2913
2914 if (firstline)
2915 {
2916 SLJIT_ASSERT(common->first_line_end != 0);
2917 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2918 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2919 }
2920
2921 start = LABEL();
2922 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2923 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2924 #ifdef SUPPORT_UTF
2925 if (common->utf)
2926 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2927 #endif
2928 #ifndef COMPILE_PCRE8
2929 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2930 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2931 JUMPHERE(jump);
2932 #endif
2933 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2934 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2935 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2936 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2937 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2938 found = JUMP(SLJIT_C_NOT_ZERO);
2939
2940 #ifdef SUPPORT_UTF
2941 if (common->utf)
2942 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2943 #endif
2944 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2945 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2946 if (common->utf)
2947 {
2948 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2949 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2950 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2951 }
2952 #endif
2953 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2954 if (common->utf)
2955 {
2956 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2957 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2958 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2959 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2960 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2961 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2962 }
2963 #endif
2964 JUMPTO(SLJIT_JUMP, start);
2965 JUMPHERE(found);
2966 JUMPHERE(quit);
2967
2968 if (firstline)
2969 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2970 }
2971
2972 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2973 {
2974 DEFINE_COMPILER;
2975 struct sljit_label *loop;
2976 struct sljit_jump *toolong;
2977 struct sljit_jump *alreadyfound;
2978 struct sljit_jump *found;
2979 struct sljit_jump *foundoc = NULL;
2980 struct sljit_jump *notfound;
2981 pcre_uchar oc, bit;
2982
2983 SLJIT_ASSERT(common->req_char_ptr != 0);
2984 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2985 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2986 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2987 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2988
2989 if (has_firstchar)
2990 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2991 else
2992 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2993
2994 loop = LABEL();
2995 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2996
2997 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2998 oc = req_char;
2999 if (caseless)
3000 {
3001 oc = TABLE_GET(req_char, common->fcc, req_char);
3002 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3003 if (req_char > 127 && common->utf)
3004 oc = UCD_OTHERCASE(req_char);
3005 #endif
3006 }
3007 if (req_char == oc)
3008 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3009 else
3010 {
3011 bit = req_char ^ oc;
3012 if (ispowerof2(bit))
3013 {
3014 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3015 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3016 }
3017 else
3018 {
3019 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3020 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3021 }
3022 }
3023 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3024 JUMPTO(SLJIT_JUMP, loop);
3025
3026 JUMPHERE(found);
3027 if (foundoc)
3028 JUMPHERE(foundoc);
3029 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3030 JUMPHERE(alreadyfound);
3031 JUMPHERE(toolong);
3032 return notfound;
3033 }
3034
3035 static void do_revertframes(compiler_common *common)
3036 {
3037 DEFINE_COMPILER;
3038 struct sljit_jump *jump;
3039 struct sljit_label *mainloop;
3040
3041 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3042 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3043 GET_LOCAL_BASE(TMP3, 0, 0);
3044
3045 /* Drop frames until we reach STACK_TOP. */
3046 mainloop = LABEL();
3047 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3048 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3049 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3050 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3051 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
3052 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
3053 JUMPTO(SLJIT_JUMP, mainloop);
3054
3055 JUMPHERE(jump);
3056 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3057 /* End of dropping frames. */
3058 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3059
3060 JUMPHERE(jump);
3061 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3062 /* Set string begin. */
3063 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3064 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3065 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3066 JUMPTO(SLJIT_JUMP, mainloop);
3067
3068 JUMPHERE(jump);
3069 if (common->mark_ptr != 0)
3070 {
3071 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3072 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3073 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3074 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3075 JUMPTO(SLJIT_JUMP, mainloop);
3076
3077 JUMPHERE(jump);
3078 }
3079
3080 /* Unknown command. */
3081 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3082 JUMPTO(SLJIT_JUMP, mainloop);
3083 }
3084
3085 static void check_wordboundary(compiler_common *common)
3086 {
3087 DEFINE_COMPILER;
3088 struct sljit_jump *skipread;
3089 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3090 struct sljit_jump *jump;
3091 #endif
3092
3093 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3094
3095 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3096 /* Get type of the previous char, and put it to LOCALS1. */
3097 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3098 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3099 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3100 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3101 skip_char_back(common);
3102 check_start_used_ptr(common);
3103 read_char(common);
3104
3105 /* Testing char type. */
3106 #ifdef SUPPORT_UCP
3107 if (common->use_ucp)
3108 {
3109 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3110 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3111 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3112 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3113 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3114 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3115 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3116 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3117 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3118 JUMPHERE(jump);
3119 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3120 }
3121 else
3122 #endif
3123 {
3124 #ifndef COMPILE_PCRE8
3125 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3126 #elif defined SUPPORT_UTF
3127 /* Here LOCALS1 has already been zeroed. */
3128 jump = NULL;
3129 if (common->utf)
3130 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3131 #endif /* COMPILE_PCRE8 */
3132 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3133 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3134 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3135 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3136 #ifndef COMPILE_PCRE8
3137 JUMPHERE(jump);
3138 #elif defined SUPPORT_UTF
3139 if (jump != NULL)
3140 JUMPHERE(jump);
3141 #endif /* COMPILE_PCRE8 */
3142 }
3143 JUMPHERE(skipread);
3144
3145 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3146 skipread = check_str_end(common);
3147 peek_char(common);
3148
3149 /* Testing char type. This is a code duplication. */
3150 #ifdef SUPPORT_UCP
3151 if (common->use_ucp)
3152 {
3153 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3154 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3155 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3156 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3157 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3158 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3159 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3160 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3161 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3162 JUMPHERE(jump);
3163 }
3164 else
3165 #endif
3166 {
3167 #ifndef COMPILE_PCRE8
3168 /* TMP2 may be destroyed by peek_char. */
3169 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3170 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3171 #elif defined SUPPORT_UTF
3172 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3173 jump = NULL;
3174 if (common->utf)
3175 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3176 #endif
3177 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3178 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3179 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3180 #ifndef COMPILE_PCRE8
3181 JUMPHERE(jump);
3182 #elif defined SUPPORT_UTF
3183 if (jump != NULL)
3184 JUMPHERE(jump);
3185 #endif /* COMPILE_PCRE8 */
3186 }
3187 JUMPHERE(skipread);
3188
3189 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3190 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3191 }
3192
3193 /*
3194 range format:
3195
3196 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3197 ranges[1] = first bit (0 or 1)
3198 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3199 */
3200
3201 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3202 {
3203 DEFINE_COMPILER;
3204 struct sljit_jump *jump;
3205
3206 if (ranges[0] < 0)
3207 return FALSE;
3208
3209 switch(ranges[0])
3210 {
3211 case 1:
3212 if (readch)
3213 read_char(common);
3214 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3215 return TRUE;
3216
3217 case 2:
3218 if (readch)
3219 read_char(common);
3220 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3221 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3222 return TRUE;
3223
3224 case 4:
3225 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3226 {
3227 if (readch)
3228 read_char(common);
3229 if (ranges[1] != 0)
3230 {
3231 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3232 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3233 }
3234 else
3235 {
3236 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3237 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3238 JUMPHERE(jump);
3239 }
3240 return TRUE;
3241 }
3242 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && ispowerof2(ranges[4] - ranges[2]))
3243 {
3244 if (readch)
3245 read_char(common);
3246 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3247 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3248 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3249 return TRUE;
3250 }
3251 return FALSE;
3252
3253 default:
3254 return FALSE;
3255 }
3256 }
3257
3258 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3259 {
3260 int i, bit, length;
3261 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3262
3263 bit = ctypes[0] & flag;
3264 ranges[0] = -1;
3265 ranges[1] = bit != 0 ? 1 : 0;
3266 length = 0;
3267
3268 for (i = 1; i < 256; i++)
3269 if ((ctypes[i] & flag) != bit)
3270 {
3271 if (length >= MAX_RANGE_SIZE)
3272 return;
3273 ranges[2 + length] = i;
3274 length++;
3275 bit ^= flag;
3276 }
3277
3278 if (bit != 0)
3279 {
3280 if (length >= MAX_RANGE_SIZE)
3281 return;
3282 ranges[2 + length] = 256;
3283 length++;
3284 }
3285 ranges[0] = length;
3286 }
3287
3288 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3289 {
3290 int ranges[2 + MAX_RANGE_SIZE];
3291 pcre_uint8 bit, cbit, all;
3292 int i, byte, length = 0;
3293
3294 bit = bits[0] & 0x1;
3295 ranges[1] = bit;
3296 /* Can be 0 or 255. */
3297 all = -bit;
3298
3299 for (i = 0; i < 256; )
3300 {
3301 byte = i >> 3;
3302 if ((i & 0x7) == 0 && bits[byte] == all)
3303 i += 8;
3304 else
3305 {
3306 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3307 if (cbit != bit)
3308 {
3309 if (length >= MAX_RANGE_SIZE)
3310 return FALSE;
3311 ranges[2 + length] = i;
3312 length++;
3313 bit = cbit;
3314 all = -cbit;
3315 }
3316 i++;
3317 }
3318 }
3319
3320 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3321 {
3322 if (length >= MAX_RANGE_SIZE)
3323 return FALSE;
3324 ranges[2 + length] = 256;
3325 length++;
3326 }
3327 ranges[0] = length;
3328
3329 return check_ranges(common, ranges, backtracks, FALSE);
3330 }
3331
3332 static void check_anynewline(compiler_common *common)
3333 {
3334 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3335 DEFINE_COMPILER;
3336
3337 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3338
3339 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3340 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3341 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3342 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3343 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3344 #ifdef COMPILE_PCRE8
3345 if (common->utf)
3346 {
3347 #endif
3348 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3349 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3350 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3351 #ifdef COMPILE_PCRE8
3352 }
3353 #endif
3354 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3355 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3356 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3357 }
3358
3359 static void check_hspace(compiler_common *common)
3360 {
3361 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3362 DEFINE_COMPILER;
3363
3364 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3365
3366 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3367 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3368 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3369 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3370 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3371 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3372 #ifdef COMPILE_PCRE8
3373 if (common->utf)
3374 {
3375 #endif
3376 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3377 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3378 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3379 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3380 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3381 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3382 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3383 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3384 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3385 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3386 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3387 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3388 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3389 #ifdef COMPILE_PCRE8
3390 }
3391 #endif
3392 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3393 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3394
3395 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3396 }
3397
3398 static void check_vspace(compiler_common *common)
3399 {
3400 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3401 DEFINE_COMPILER;
3402
3403 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3404
3405 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3406 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3407 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3408 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3409 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3410 #ifdef COMPILE_PCRE8
3411 if (common->utf)
3412 {
3413 #endif
3414 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3415 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3416 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3417 #ifdef COMPILE_PCRE8
3418 }
3419 #endif
3420 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3421 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3422
3423 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3424 }
3425
3426 #define CHAR1 STR_END
3427 #define CHAR2 STACK_TOP
3428
3429 static void do_casefulcmp(compiler_common *common)
3430 {
3431 DEFINE_COMPILER;
3432 struct sljit_jump *jump;
3433 struct sljit_label *label;
3434
3435 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3436 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3437 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3438 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3439 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3440 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3441
3442 label = LABEL();
3443 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3444 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3445 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3446 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3447 JUMPTO(SLJIT_C_NOT_ZERO, label);
3448
3449 JUMPHERE(jump);
3450 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3451 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3452 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3453 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3454 }
3455
3456 #define LCC_TABLE STACK_LIMIT
3457
3458 static void do_caselesscmp(compiler_common *common)
3459 {
3460 DEFINE_COMPILER;
3461 struct sljit_jump *jump;
3462 struct sljit_label *label;
3463
3464 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3465 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3466
3467 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3468 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3469 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3470 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3471 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3472 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3473
3474 label = LABEL();
3475 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3476 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3477 #ifndef COMPILE_PCRE8
3478 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3479 #endif
3480 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3481 #ifndef COMPILE_PCRE8
3482 JUMPHERE(jump);
3483 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3484 #endif
3485 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3486 #ifndef COMPILE_PCRE8
3487 JUMPHERE(jump);
3488 #endif
3489 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3490 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3491 JUMPTO(SLJIT_C_NOT_ZERO, label);
3492
3493 JUMPHERE(jump);
3494 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3495 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3496 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3497 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3498 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3499 }
3500
3501 #undef LCC_TABLE
3502 #undef CHAR1
3503 #undef CHAR2
3504
3505 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3506
3507 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3508 {
3509 /* This function would be ineffective to do in JIT level. */
3510 int c1, c2;
3511 const pcre_uchar *src2 = args->uchar_ptr;
3512 const pcre_uchar *end2 = args->end;
3513
3514 while (src1 < end1)
3515 {
3516 if (src2 >= end2)
3517 return (pcre_uchar*)1;
3518 GETCHARINC(c1, src1);
3519 GETCHARINC(c2, src2);
3520 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
3521 }
3522 return src2;
3523 }
3524
3525 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3526
3527 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3528 compare_context* context, jump_list **backtracks)
3529 {
3530 DEFINE_COMPILER;
3531 unsigned int othercasebit = 0;
3532 pcre_uchar *othercasechar = NULL;
3533 #ifdef SUPPORT_UTF
3534 int utflength;
3535 #endif
3536
3537 if (caseless && char_has_othercase(common, cc))
3538 {
3539 othercasebit = char_get_othercase_bit(common, cc);
3540 SLJIT_ASSERT(othercasebit);
3541 /* Extracting bit difference info. */
3542 #ifdef COMPILE_PCRE8
3543 othercasechar = cc + (othercasebit >> 8);
3544 othercasebit &= 0xff;
3545 #else
3546 #ifdef COMPILE_PCRE16
3547 othercasechar = cc + (othercasebit >> 9);
3548 if ((othercasebit & 0x100) != 0)
3549 othercasebit = (othercasebit & 0xff) << 8;
3550 else
3551 othercasebit &= 0xff;
3552 #endif
3553 #endif
3554 }
3555
3556 if (context->sourcereg == -1)
3557 {
3558 #ifdef COMPILE_PCRE8
3559 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3560 if (context->length >= 4)
3561 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3562 else if (context->length >= 2)
3563 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3564 else
3565 #endif
3566 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3567 #else
3568 #ifdef COMPILE_PCRE16
3569 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3570 if (context->length >= 4)
3571 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3572 else
3573 #endif
3574 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3575 #endif
3576 #endif /* COMPILE_PCRE8 */
3577 context->sourcereg = TMP2;
3578 }
3579
3580 #ifdef SUPPORT_UTF
3581 utflength = 1;
3582 if (common->utf && HAS_EXTRALEN(*cc))
3583 utflength += GET_EXTRALEN(*cc);
3584
3585 do
3586 {
3587 #endif
3588
3589 context->length -= IN_UCHARS(1);
3590 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3591
3592 /* Unaligned read is supported. */
3593 if (othercasebit != 0 && othercasechar == cc)
3594 {
3595 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3596 context->oc.asuchars[context->ucharptr] = othercasebit;
3597 }
3598 else
3599 {
3600 context->c.asuchars[context->ucharptr] = *cc;
3601 context->oc.asuchars[context->ucharptr] = 0;
3602 }
3603 context->ucharptr++;
3604
3605 #ifdef COMPILE_PCRE8
3606 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3607 #else
3608 if (context->ucharptr >= 2 || context->length == 0)
3609 #endif
3610 {
3611 if (context->length >= 4)
3612 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3613 #ifdef COMPILE_PCRE8
3614 else if (context->length >= 2)
3615 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3616 else if (context->length >= 1)
3617 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3618 #else
3619 else if (context->length >= 2)
3620 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3621 #endif
3622 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3623
3624 switch(context->ucharptr)
3625 {
3626 case 4 / sizeof(pcre_uchar):
3627 if (context->oc.asint != 0)
3628 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3629 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3630 break;
3631
3632 case 2 / sizeof(pcre_uchar):
3633 if (context->oc.asushort != 0)
3634 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3635 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3636 break;
3637
3638 #ifdef COMPILE_PCRE8
3639 case 1:
3640 if (context->oc.asbyte != 0)
3641 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3642 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3643 break;
3644 #endif
3645
3646 default:
3647 SLJIT_ASSERT_STOP();
3648 break;
3649 }
3650 context->ucharptr = 0;
3651 }
3652
3653 #else
3654
3655 /* Unaligned read is unsupported. */
3656 #ifdef COMPILE_PCRE8
3657 if (context->length > 0)
3658 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3659 #else
3660 if (context->length > 0)
3661 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3662 #endif
3663 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3664
3665 if (othercasebit != 0 && othercasechar == cc)
3666 {
3667 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3668 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3669 }
3670 else
3671 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3672
3673 #endif
3674
3675 cc++;
3676 #ifdef SUPPORT_UTF
3677 utflength--;
3678 }
3679 while (utflength > 0);
3680 #endif
3681
3682 return cc;
3683 }
3684
3685 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3686
3687 #define SET_TYPE_OFFSET(value) \
3688 if ((value) != typeoffset) \
3689 { \
3690 if ((value) > typeoffset) \
3691 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3692 else \
3693 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3694 } \
3695 typeoffset = (value);
3696
3697 #define SET_CHAR_OFFSET(value) \
3698 if ((value) != charoffset) \
3699 { \
3700 if ((value) > charoffset) \
3701 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3702 else \
3703 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3704 } \
3705 charoffset = (value);
3706
3707 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3708 {
3709 DEFINE_COMPILER;
3710 jump_list *found = NULL;
3711 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3712 unsigned int c;
3713 int compares;
3714 struct sljit_jump *jump = NULL;
3715 pcre_uchar *ccbegin;
3716 #ifdef SUPPORT_UCP
3717 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3718 BOOL charsaved = FALSE;
3719 int typereg = TMP1, scriptreg = TMP1;
3720 unsigned int typeoffset;
3721 #endif
3722 int invertcmp, numberofcmps;
3723 unsigned int charoffset;
3724
3725 /* Although SUPPORT_UTF must be defined, we are
3726 not necessary in utf mode even in 8 bit mode. */
3727 detect_partial_match(common, backtracks);
3728 read_char(common);
3729
3730 if ((*cc++ & XCL_MAP) != 0)
3731 {
3732 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3733 #ifndef COMPILE_PCRE8
3734 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3735 #elif defined SUPPORT_UTF
3736 if (common->utf)
3737 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3738 #endif
3739
3740 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3741 {
3742 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3743 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3744 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3745 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3746 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3747 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3748 }
3749
3750 #ifndef COMPILE_PCRE8
3751 JUMPHERE(jump);
3752 #elif defined SUPPORT_UTF
3753 if (common->utf)
3754 JUMPHERE(jump);
3755 #endif
3756 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3757 #ifdef SUPPORT_UCP
3758 charsaved = TRUE;
3759 #endif
3760 cc += 32 / sizeof(pcre_uchar);
3761 }
3762
3763 /* Scanning the necessary info. */
3764 ccbegin = cc;
3765 compares = 0;
3766 while (*cc != XCL_END)
3767 {
3768 compares++;
3769 if (*cc == XCL_SINGLE)
3770 {
3771 cc += 2;
3772 #ifdef SUPPORT_UTF
3773 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3774 #endif
3775 #ifdef SUPPORT_UCP
3776 needschar = TRUE;
3777 #endif
3778 }
3779 else if (*cc == XCL_RANGE)
3780 {
3781 cc += 2;
3782 #ifdef SUPPORT_UTF
3783 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3784 #endif
3785 cc++;
3786 #ifdef SUPPORT_UTF
3787 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3788 #endif
3789 #ifdef SUPPORT_UCP
3790 needschar = TRUE;
3791 #endif
3792 }
3793 #ifdef SUPPORT_UCP
3794 else
3795 {
3796 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3797 cc++;
3798 switch(*cc)
3799 {
3800 case PT_ANY:
3801 break;
3802
3803 case PT_LAMP:
3804 case PT_GC:
3805 case PT_PC:
3806 case PT_ALNUM:
3807 needstype = TRUE;
3808 break;
3809
3810 case PT_SC:
3811 needsscript = TRUE;
3812 break;
3813
3814 case PT_SPACE:
3815 case PT_PXSPACE:
3816 case PT_WORD:
3817 needstype = TRUE;
3818 needschar = TRUE;
3819 break;
3820
3821 default:
3822 SLJIT_ASSERT_STOP();
3823 break;
3824 }
3825 cc += 2;
3826 }
3827 #endif
3828 }
3829
3830 #ifdef SUPPORT_UCP
3831 /* Simple register allocation. TMP1 is preferred if possible. */
3832 if (needstype || needsscript)
3833 {
3834 if (needschar && !charsaved)
3835 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3836 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3837 if (needschar)
3838 {
3839 if (needstype)
3840 {
3841 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3842 typereg = RETURN_ADDR;
3843 }
3844
3845 if (needsscript)
3846 scriptreg = TMP3;
3847 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3848 }
3849 else if (needstype && needsscript)
3850 scriptreg = TMP3;
3851 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3852
3853 if (needsscript)
3854 {
3855 if (scriptreg == TMP1)
3856 {
3857 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3858 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3859 }
3860 else
3861 {
3862 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3863 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3864 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3865 }
3866 }
3867 }
3868 #endif
3869
3870 /* Generating code. */
3871 cc = ccbegin;
3872 charoffset = 0;
3873 numberofcmps = 0;
3874 #ifdef SUPPORT_UCP
3875 typeoffset = 0;
3876 #endif
3877
3878 while (*cc != XCL_END)
3879 {
3880 compares--;
3881 invertcmp = (compares == 0 && list != backtracks);
3882 jump = NULL;
3883
3884 if (*cc == XCL_SINGLE)
3885 {
3886 cc ++;
3887 #ifdef SUPPORT_UTF
3888 if (common->utf)
3889 {
3890 GETCHARINC(c, cc);
3891 }
3892 else
3893 #endif
3894 c = *cc++;
3895
3896 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3897 {
3898 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3899 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3900 numberofcmps++;
3901 }
3902 else if (numberofcmps > 0)
3903 {
3904 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3905 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3906 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3907 numberofcmps = 0;
3908 }
3909 else
3910 {
3911 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3912 numberofcmps = 0;
3913 }
3914 }
3915 else if (*cc == XCL_RANGE)
3916 {
3917 cc ++;
3918 #ifdef SUPPORT_UTF
3919 if (common->utf)
3920 {
3921 GETCHARINC(c, cc);
3922 }
3923 else
3924 #endif
3925 c = *cc++;
3926 SET_CHAR_OFFSET(c);
3927 #ifdef SUPPORT_UTF
3928 if (common->utf)
3929 {
3930 GETCHARINC(c, cc);
3931 }
3932 else
3933 #endif
3934 c = *cc++;
3935 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3936 {
3937 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3938 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3939 numberofcmps++;
3940 }
3941 else if (numberofcmps > 0)
3942 {
3943 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3944 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3945 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3946 numberofcmps = 0;
3947 }
3948 else
3949 {
3950 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3951 numberofcmps = 0;
3952 }
3953 }
3954 #ifdef SUPPORT_UCP
3955 else
3956 {
3957 if (*cc == XCL_NOTPROP)
3958 invertcmp ^= 0x1;
3959 cc++;
3960 switch(*cc)
3961 {
3962 case PT_ANY:
3963 if (list != backtracks)
3964 {
3965 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3966 continue;
3967 }
3968 else if (cc[-1] == XCL_NOTPROP)
3969 continue;
3970 jump = JUMP(SLJIT_JUMP);
3971 break;
3972
3973 case PT_LAMP:
3974 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3975 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3976 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3977 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3978 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3979 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3980 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3981 break;
3982
3983 case PT_GC:
3984 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3985 SET_TYPE_OFFSET(c);
3986 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3987 break;
3988
3989 case PT_PC:
3990 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3991 break;
3992
3993 case PT_SC:
3994 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3995 break;
3996
3997 case PT_SPACE:
3998 case PT_PXSPACE:
3999 if (*cc == PT_SPACE)
4000 {
4001 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4002 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4003 }
4004 SET_CHAR_OFFSET(9);
4005 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4006 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
4007 if (*cc == PT_SPACE)
4008 JUMPHERE(jump);
4009
4010 SET_TYPE_OFFSET(ucp_Zl);
4011 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4012 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
4013 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4014 break;
4015
4016 case PT_WORD:
4017 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4018 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4019 /* ... fall through */
4020
4021 case PT_ALNUM:
4022 SET_TYPE_OFFSET(ucp_Ll);
4023 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4024 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
4025 SET_TYPE_OFFSET(ucp_Nd);
4026 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4027 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
4028 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4029 break;
4030 }
4031 cc += 2;
4032 }
4033 #endif
4034
4035 if (jump != NULL)
4036 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4037 }
4038
4039 if (found != NULL)
4040 set_jumps(found, LABEL());
4041 }
4042
4043 #undef SET_TYPE_OFFSET
4044 #undef SET_CHAR_OFFSET
4045
4046 #endif
4047
4048 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4049 {
4050 DEFINE_COMPILER;
4051 int length;
4052 unsigned int c, oc, bit;
4053 compare_context context;
4054 struct sljit_jump *jump[4];
4055 #ifdef SUPPORT_UTF
4056 struct sljit_label *label;
4057 #ifdef SUPPORT_UCP
4058 pcre_uchar propdata[5];
4059 #endif
4060 #endif
4061
4062 switch(type)
4063 {
4064 case OP_SOD:
4065 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4066 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4067 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4068 return cc;
4069
4070 case OP_SOM:
4071 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4073 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4074 return cc;
4075
4076 case OP_NOT_WORD_BOUNDARY:
4077 case OP_WORD_BOUNDARY:
4078 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4079 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4080 return cc;
4081
4082 case OP_NOT_DIGIT:
4083 case OP_DIGIT:
4084 /* Digits are usually 0-9, so it is worth to optimize them. */
4085 if (common->digits[0] == -2)
4086 get_ctype_ranges(common, ctype_digit, common->digits);
4087 detect_partial_match(common, backtracks);
4088 /* Flip the starting bit in the negative case. */
4089 if (type == OP_NOT_DIGIT)
4090 common->digits[1] ^= 1;
4091 if (!check_ranges(common, common->digits, backtracks, TRUE))
4092 {
4093 read_char8_type(common);
4094 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4095 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4096 }
4097 if (type == OP_NOT_DIGIT)
4098 common->digits[1] ^= 1;
4099 return cc;
4100
4101 case OP_NOT_WHITESPACE:
4102 case OP_WHITESPACE:
4103 detect_partial_match(common, backtracks);
4104 read_char8_type(common);
4105 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4106 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4107 return cc;
4108
4109 case OP_NOT_WORDCHAR:
4110 case OP_WORDCHAR:
4111 detect_partial_match(common, backtracks);
4112 read_char8_type(common);
4113 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4114 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4115 return cc;
4116
4117 case OP_ANY:
4118 detect_partial_match(common, backtracks);
4119 read_char(common);
4120 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4121 {
4122 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4123 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4124 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4125 else
4126 jump[1] = check_str_end(common);
4127
4128 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4129 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4130 if (jump[1] != NULL)
4131 JUMPHERE(jump[1]);
4132 JUMPHERE(jump[0]);
4133 }
4134 else
4135 check_newlinechar(common, common->nltype, backtracks, TRUE);
4136 return cc;
4137
4138 case OP_ALLANY:
4139 detect_partial_match(common, backtracks);
4140 #ifdef SUPPORT_UTF
4141 if (common->utf)
4142 {
4143 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4144 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4145 #ifdef COMPILE_PCRE8
4146 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4147 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4148 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4149 #else /* COMPILE_PCRE8 */
4150 #ifdef COMPILE_PCRE16
4151 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4152 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4153 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4154 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
4155 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4156 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4157 #endif /* COMPILE_PCRE16 */
4158 #endif /* COMPILE_PCRE8 */
4159 JUMPHERE(jump[0]);
4160 return cc;
4161 }
4162 #endif
4163 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4164 return cc;
4165
4166 case OP_ANYBYTE:
4167 detect_partial_match(common, backtracks);
4168 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4169 return cc;
4170
4171 #ifdef SUPPORT_UTF
4172 #ifdef SUPPORT_UCP
4173 case OP_NOTPROP:
4174 case OP_PROP:
4175 propdata[0] = 0;
4176 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4177 propdata[2] = cc[0];
4178 propdata[3] = cc[1];
4179 propdata[4] = XCL_END;
4180 compile_xclass_matchingpath(common, propdata, backtracks);
4181 return cc + 2;
4182 #endif
4183 #endif
4184
4185 case OP_ANYNL:
4186 detect_partial_match(common, backtracks);
4187 read_char(common);
4188 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4189 /* We don't need to handle soft partial matching case. */
4190 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4191 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4192 else
4193 jump[1] = check_str_end(common);
4194 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4195 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4196 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4197 jump[3] = JUMP(SLJIT_JUMP);
4198 JUMPHERE(jump[0]);
4199 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4200 JUMPHERE(jump[1]);
4201 JUMPHERE(jump[2]);
4202 JUMPHERE(jump[3]);
4203 return cc;
4204
4205 case OP_NOT_HSPACE:
4206 case OP_HSPACE:
4207 detect_partial_match(common, backtracks);
4208 read_char(common);
4209 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4210 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4211 return cc;
4212
4213 case OP_NOT_VSPACE:
4214 case OP_VSPACE:
4215 detect_partial_match(common, backtracks);
4216 read_char(common);
4217 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4218 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4219 return cc;
4220
4221 #ifdef SUPPORT_UCP
4222 case OP_EXTUNI:
4223 detect_partial_match(common, backtracks);
4224 read_char(common);
4225 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4226 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4227 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc));
4228
4229 label = LABEL();
4230 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4231 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4232 read_char(common);
4233 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4234 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Mc);
4235 CMPTO(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Mn - ucp_Mc, label);
4236
4237 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4238 JUMPHERE(jump[0]);
4239 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4240 {
4241 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4242 /* Since we successfully read a char above, partial matching must occure. */
4243 check_partial(common, TRUE);
4244 JUMPHERE(jump[0]);
4245 }
4246 return cc;
4247 #endif
4248
4249 case OP_EODN:
4250 /* Requires rather complex checks. */
4251 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4252 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4253 {
4254 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4255 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4256 if (common->mode == JIT_COMPILE)
4257 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4258 else
4259 {
4260 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4261 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4262 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4263 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4264 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4265 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4266 check_partial(common, TRUE);
4267 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4268 JUMPHERE(jump[1]);
4269 }
4270 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4271 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4272 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4273 }
4274 else if (common->nltype == NLTYPE_FIXED)
4275 {
4276 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4277 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4278 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4279 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4280 }
4281 else
4282 {
4283 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4284 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4285 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4286 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4287 jump[2] = JUMP(SLJIT_C_GREATER);
4288 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4289 /* Equal. */
4290 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4291 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4292 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4293
4294 JUMPHERE(jump[1]);
4295 if (common->nltype == NLTYPE_ANYCRLF)
4296 {
4297 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4298 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4299 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4300 }
4301 else
4302 {
4303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4304 read_char(common);
4305 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4306 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4307 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4308 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4309 }
4310 JUMPHERE(jump[2]);
4311 JUMPHERE(jump[3]);
4312 }
4313 JUMPHERE(jump[0]);
4314 check_partial(common, FALSE);
4315 return cc;
4316
4317 case OP_EOD:
4318 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4319 check_partial(common, FALSE);
4320 return cc;
4321
4322 case OP_CIRC:
4323 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4324 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4325 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4326 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4327 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4328 return cc;
4329
4330 case OP_CIRCM:
4331 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4332 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4333 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4334 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4335 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4336 jump[0] = JUMP(SLJIT_JUMP);
4337 JUMPHERE(jump[1]);
4338
4339 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4340 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4341 {
4342 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4343 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4344 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4345 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4346 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4347 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4348 }
4349 else
4350 {
4351 skip_char_back(common);
4352 read_char(common);
4353 check_newlinechar(common, common->nltype, backtracks, FALSE);
4354 }
4355 JUMPHERE(jump[0]);
4356 return cc;
4357
4358 case OP_DOLL:
4359 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4360 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4361 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4362
4363 if (!common->endonly)
4364 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4365 else
4366 {
4367 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4368 check_partial(common, FALSE);
4369 }
4370 return cc;
4371
4372 case OP_DOLLM:
4373 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4374 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4375 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4376 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4377 check_partial(common, FALSE);
4378 jump[0] = JUMP(SLJIT_JUMP);
4379 JUMPHERE(jump[1]);
4380
4381 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4382 {
4383 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4384 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4385 if (common->mode == JIT_COMPILE)
4386 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4387 else
4388 {
4389 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4390 /* STR_PTR = STR_END - IN_UCHARS(1) */
4391 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4392 check_partial(common, TRUE);
4393 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4394 JUMPHERE(jump[1]);
4395 }
4396
4397 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4398 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4399 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4400 }
4401 else
4402 {
4403 peek_char(common);
4404 check_newlinechar(common, common->nltype, backtracks, FALSE);
4405 }
4406 JUMPHERE(jump[0]);
4407 return cc;
4408
4409 case OP_CHAR:
4410 case OP_CHARI:
4411 length = 1;
4412 #ifdef SUPPORT_UTF
4413 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4414 #endif
4415 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4416 {
4417 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4418 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4419
4420 context.length = IN_UCHARS(length);
4421 context.sourcereg = -1;
4422 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4423 context.ucharptr = 0;
4424 #endif
4425 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4426 }
4427 detect_partial_match(common, backtracks);
4428 read_char(common);
4429 #ifdef SUPPORT_UTF
4430 if (common->utf)
4431 {
4432 GETCHAR(c, cc);
4433 }
4434 else
4435 #endif
4436 c = *cc;
4437 if (type == OP_CHAR || !char_has_othercase(common, cc))
4438 {
4439 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4440 return cc + length;
4441 }
4442 oc = char_othercase(common, c);
4443 bit = c ^ oc;
4444 if (ispowerof2(bit))
4445 {
4446 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4447 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4448 return cc + length;
4449 }
4450 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4451 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4452 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4453 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4454 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4455 return cc + length;
4456
4457 case OP_NOT:
4458 case OP_NOTI:
4459 detect_partial_match(common, backtracks);
4460 length = 1;
4461 #ifdef SUPPORT_UTF
4462 if (common->utf)
4463 {
4464 #ifdef COMPILE_PCRE8
4465 c = *cc;
4466 if (c < 128)
4467 {
4468 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4469 if (type == OP_NOT || !char_has_othercase(common, cc))
4470 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4471 else
4472 {
4473 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4474 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4475 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4476 }
4477 /* Skip the variable-length character. */
4478 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4479 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4480 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4481 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4482 JUMPHERE(jump[0]);
4483 return cc + 1;
4484 }
4485 else
4486 #endif /* COMPILE_PCRE8 */
4487 {
4488 GETCHARLEN(c, cc, length);
4489 read_char(common);
4490 }
4491 }
4492 else
4493 #endif /* SUPPORT_UTF */
4494 {
4495 read_char(common);
4496 c = *cc;
4497 }
4498
4499 if (type == OP_NOT || !char_has_othercase(common, cc))
4500 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4501 else
4502 {
4503 oc = char_othercase(common, c);
4504 bit = c ^ oc;
4505 if (ispowerof2(bit))
4506 {
4507 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4508 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4509 }
4510 else
4511 {
4512 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4513 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4514 }
4515 }
4516 return cc + length;
4517
4518 case OP_CLASS:
4519 case OP_NCLASS:
4520 detect_partial_match(common, backtracks);
4521 read_char(common);
4522 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4523 return cc + 32 / sizeof(pcre_uchar);
4524
4525 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4526 jump[0] = NULL;
4527 #ifdef COMPILE_PCRE8
4528 /* This check only affects 8 bit mode. In other modes, we
4529 always need to compare the value with 255. */
4530 if (common->utf)
4531 #endif /* COMPILE_PCRE8 */
4532 {
4533 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4534 if (type == OP_CLASS)
4535 {
4536 add_jump(compiler, backtracks, jump[0]);
4537 jump[0] = NULL;
4538 }
4539 }
4540 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4541 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4542 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4543 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4544 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4545 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4546 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4547 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4548 if (jump[0] != NULL)
4549 JUMPHERE(jump[0]);
4550 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4551 return cc + 32 / sizeof(pcre_uchar);
4552
4553 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4554 case OP_XCLASS:
4555 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4556 return cc + GET(cc, 0) - 1;
4557 #endif
4558
4559 case OP_REVERSE:
4560 length = GET(cc, 0);
4561 if (length == 0)
4562 return cc + LINK_SIZE;
4563 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4564 #ifdef SUPPORT_UTF
4565 if (common->utf)
4566 {
4567 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4568 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4569 label = LABEL();
4570 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4571 skip_char_back(common);
4572 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4573 JUMPTO(SLJIT_C_NOT_ZERO, label);
4574 }
4575 else
4576 #endif
4577 {
4578 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4579 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4580 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4581 }
4582 check_start_used_ptr(common);
4583 return cc + LINK_SIZE;
4584 }
4585 SLJIT_ASSERT_STOP();
4586 return cc;
4587 }
4588
4589 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4590 {
4591 /* This function consumes at least one input character. */
4592 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4593 DEFINE_COMPILER;
4594 pcre_uchar *ccbegin = cc;
4595 compare_context context;
4596 int size;
4597
4598 context.length = 0;
4599 do
4600 {
4601 if (cc >= ccend)
4602 break;
4603
4604 if (*cc == OP_CHAR)
4605 {
4606 size = 1;
4607 #ifdef SUPPORT_UTF
4608 if (common->utf && HAS_EXTRALEN(cc[1]))
4609 size += GET_EXTRALEN(cc[1]);
4610 #endif
4611 }
4612 else if (*cc == OP_CHARI)
4613 {
4614 size = 1;
4615 #ifdef SUPPORT_UTF
4616 if (common->utf)
4617 {
4618 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4619 size = 0;
4620 else if (HAS_EXTRALEN(cc[1]))
4621 size += GET_EXTRALEN(cc[1]);
4622 }
4623 else
4624 #endif
4625 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4626 size = 0;
4627 }
4628 else
4629 size = 0;
4630
4631 cc += 1 + size;
4632 context.length += IN_UCHARS(size);
4633 }
4634 while (size > 0 && context.length <= 128);
4635
4636 cc = ccbegin;
4637 if (context.length > 0)
4638 {
4639 /* We have a fixed-length byte sequence. */
4640 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4641 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4642
4643 context.sourcereg = -1;
4644 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4645 context.ucharptr = 0;
4646 #endif
4647 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4648 return cc;
4649 }
4650
4651 /* A non-fixed length character will be checked if length == 0. */
4652 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4653 }
4654
4655 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4656 {
4657 DEFINE_COMPILER;
4658 int offset = GET2(cc, 1) << 1;
4659
4660 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4661 if (!common->jscript_compat)
4662 {
4663 if (backtracks == NULL)
4664 {
4665 /* OVECTOR(1) contains the "string begin - 1" constant. */
4666 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4667 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4668 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4669 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4670 return JUMP(SLJIT_C_NOT_ZERO);
4671 }
4672 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4673 }
4674 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4675 }
4676
4677 /* Forward definitions. */
4678 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4679 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4680
4681 #define PUSH_BACKTRACK(size, ccstart, error) \
4682 do \
4683 { \
4684 backtrack = sljit_alloc_memory(compiler, (size)); \
4685 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4686 return error; \
4687 memset(backtrack, 0, size); \
4688 backtrack->prev = parent->top; \
4689 backtrack->cc = (ccstart); \
4690 parent->top = backtrack; \
4691 } \
4692 while (0)
4693
4694 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4695 do \
4696 { \
4697 backtrack = sljit_alloc_memory(compiler, (size)); \
4698 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4699 return; \
4700 memset(backtrack, 0, size); \
4701 backtrack->prev = parent->top; \
4702 backtrack->cc = (ccstart); \
4703 parent->top = backtrack; \
4704 } \
4705 while (0)
4706
4707 #define BACKTRACK_AS(type) ((type *)backtrack)
4708
4709 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4710 {
4711 DEFINE_COMPILER;
4712 int offset = GET2(cc, 1) << 1;
4713 struct sljit_jump *jump = NULL;
4714 struct sljit_jump *partial;
4715 struct sljit_jump *nopartial;
4716
4717 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4718 /* OVECTOR(1) contains the "string begin - 1" constant. */
4719 if (withchecks && !common->jscript_compat)
4720 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4721
4722 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4723 if (common->utf && *cc == OP_REFI)
4724 {
4725 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4726 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4727 if (withchecks)
4728 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4729
4730 /* Needed to save important temporary registers. */
4731 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4732 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4734 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4735 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4736 if (common->mode == JIT_COMPILE)
4737 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4738 else
4739 {
4740 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4741 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4742 check_partial(common, FALSE);
4743 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4744 JUMPHERE(nopartial);
4745 }
4746 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4747 }
4748 else
4749 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4750 {
4751 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4752 if (withchecks)
4753 jump = JUMP(SLJIT_C_ZERO);
4754
4755 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4756 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4757 if (common->mode == JIT_COMPILE)
4758 add_jump(compiler, backtracks, partial);
4759
4760 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4761 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4762
4763 if (common->mode != JIT_COMPILE)
4764 {
4765 nopartial = JUMP(SLJIT_JUMP);
4766 JUMPHERE(partial);
4767 /* TMP2 -= STR_END - STR_PTR */
4768 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4769 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4770 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4771 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4772 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4773 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4774 JUMPHERE(partial);
4775 check_partial(common, FALSE);
4776 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4777 JUMPHERE(nopartial);
4778 }
4779 }
4780
4781 if (jump != NULL)
4782 {
4783 if (emptyfail)
4784 add_jump(compiler, backtracks, jump);
4785 else
4786 JUMPHERE(jump);
4787 }
4788 return cc + 1 + IMM2_SIZE;
4789 }
4790
4791 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4792 {
4793 DEFINE_COMPILER;
4794 backtrack_common *backtrack;
4795 pcre_uchar type;
4796 struct sljit_label *label;
4797 struct sljit_jump *zerolength;
4798 struct sljit_jump *jump = NULL;
4799 pcre_uchar *ccbegin = cc;
4800 int min = 0, max = 0;
4801 BOOL minimize;
4802
4803 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4804
4805 type = cc[1 + IMM2_SIZE];
4806 minimize = (type & 0x1) != 0;
4807 switch(type)
4808 {
4809 case OP_CRSTAR:
4810 case OP_CRMINSTAR:
4811 min = 0;
4812 max = 0;
4813 cc += 1 + IMM2_SIZE + 1;
4814 break;
4815 case OP_CRPLUS:
4816 case OP_CRMINPLUS:
4817 min = 1;
4818 max = 0;
4819 cc += 1 + IMM2_SIZE + 1;
4820 break;
4821 case OP_CRQUERY:
4822 case OP_CRMINQUERY:
4823 min = 0;
4824 max = 1;
4825 cc += 1 + IMM2_SIZE + 1;
4826 break;
4827 case OP_CRRANGE:
4828 case OP_CRMINRANGE:
4829 min = GET2(cc, 1 + IMM2_SIZE + 1);
4830 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4831 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4832 break;
4833 default:
4834 SLJIT_ASSERT_STOP();
4835 break;
4836 }
4837
4838 if (!minimize)
4839 {
4840 if (min == 0)
4841 {
4842 allocate_stack(common, 2);
4843 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4844 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4845 /* Temporary release of STR_PTR. */
4846 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4847 zerolength = compile_ref_checks(common, ccbegin, NULL);
4848 /* Restore if not zero length. */
4849 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4850 }
4851 else
4852 {
4853 allocate_stack(common, 1);
4854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4855 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4856 }
4857
4858 if (min > 1 || max > 1)
4859 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4860
4861 label = LABEL();
4862 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4863
4864 if (min > 1 || max > 1)
4865 {
4866 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4867 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4868 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4869 if (min > 1)
4870 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4871 if (max > 1)
4872 {
4873 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4874 allocate_stack(common, 1);
4875 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4876 JUMPTO(SLJIT_JUMP, label);
4877 JUMPHERE(jump);
4878 }
4879 }
4880
4881 if (max == 0)
4882 {
4883 /* Includes min > 1 case as well. */
4884 allocate_stack(common, 1);
4885 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4886 JUMPTO(SLJIT_JUMP, label);
4887 }
4888
4889 JUMPHERE(zerolength);
4890 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4891
4892 decrease_call_count(common);
4893 return cc;
4894 }
4895
4896 allocate_stack(common, 2);
4897 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4898 if (type != OP_CRMINSTAR)
4899 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4900
4901 if (min == 0)
4902 {
4903 zerolength = compile_ref_checks(common, ccbegin, NULL);
4904 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4905 jump = JUMP(SLJIT_JUMP);
4906 }
4907 else
4908 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4909
4910 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4911 if (max > 0)
4912 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4913
4914 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4915 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4916
4917 if (min > 1)
4918 {
4919 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4920 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4921 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4922 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
4923 }
4924 else if (max > 0)
4925 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4926
4927 if (jump != NULL)
4928 JUMPHERE(jump);
4929 JUMPHERE(zerolength);
4930
4931 decrease_call_count(common);
4932 return cc;
4933 }
4934
4935 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4936 {
4937 DEFINE_COMPILER;
4938 backtrack_common *backtrack;
4939 recurse_entry *entry = common->entries;
4940 recurse_entry *prev = NULL;
4941 int start = GET(cc, 1);
4942
4943 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4944 while (entry != NULL)
4945 {
4946 if (entry->start == start)
4947 break;
4948 prev = entry;
4949 entry = entry->next;
4950 }
4951
4952 if (entry == NULL)
4953 {
4954 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4955 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4956 return NULL;
4957 entry->next = NULL;
4958 entry->entry = NULL;
4959 entry->calls = NULL;
4960 entry->start = start;
4961
4962 if (prev != NULL)
4963 prev->next = entry;
4964 else
4965 common->entries = entry;
4966 }
4967
4968 if (common->has_set_som && common->mark_ptr != 0)
4969 {
4970 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4971 allocate_stack(common, 2);
4972 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4973 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4974 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4975 }
4976 else if (common->has_set_som || common->mark_ptr != 0)
4977 {
4978 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4979 allocate_stack(common, 1);
4980 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4981 }
4982
4983 if (entry->entry == NULL)
4984 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4985 else
4986 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4987 /* Leave if the match is failed. */
4988 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4989 return cc + 1 + LINK_SIZE;
4990 }
4991
4992 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4993 {
4994 DEFINE_COMPILER;
4995 int framesize;
4996 int private_data_ptr;
4997 backtrack_common altbacktrack;
4998 pcre_uchar *ccbegin;
4999 pcre_uchar opcode;
5000 pcre_uchar bra = OP_BRA;
5001 jump_list *tmp = NULL;
5002 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5003 jump_list **found;
5004 /* Saving previous accept variables. */
5005 struct sljit_label *save_quitlabel = common->quitlabel;
5006 struct sljit_label *save_acceptlabel = common->acceptlabel;
5007 jump_list *save_quit = common->quit;
5008 jump_list *save_accept = common->accept;
5009 struct sljit_jump *jump;
5010 struct sljit_jump *brajump = NULL;
5011
5012 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5013 {
5014 SLJIT_ASSERT(!conditional);
5015 bra = *cc;
5016 cc++;
5017 }
5018 private_data_ptr = PRIVATE_DATA(cc);
5019 SLJIT_ASSERT(private_data_ptr != 0);
5020 framesize = get_framesize(common, cc, FALSE);
5021 backtrack->framesize = framesize;
5022 backtrack->private_data_ptr = private_data_ptr;
5023 opcode = *cc;
5024 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5025 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5026 ccbegin = cc;
5027 cc += GET(cc, 1);
5028
5029 if (bra == OP_BRAMINZERO)
5030 {
5031 /* This is a braminzero backtrack path. */
5032 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5033 free_stack(common, 1);
5034 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5035 }
5036
5037 if (framesize < 0)
5038 {
5039 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5040 allocate_stack(common, 1);
5041 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5042 }
5043 else
5044 {
5045 allocate_stack(common, framesize + 2);
5046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5047 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5048 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5049 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5050 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5051 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5052 }
5053
5054 memset(&altbacktrack, 0, sizeof(backtrack_common));
5055 common->quitlabel = NULL;
5056 common->quit = NULL;
5057 while (1)
5058 {
5059 common->acceptlabel = NULL;
5060 common->accept = NULL;
5061 altbacktrack.top = NULL;
5062 altbacktrack.topbacktracks = NULL;
5063
5064 if (*ccbegin == OP_ALT)
5065 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5066
5067 altbacktrack.cc = ccbegin;
5068 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5069 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5070 {
5071 common->quitlabel = save_quitlabel;
5072 common->acceptlabel = save_acceptlabel;
5073 common->quit = save_quit;
5074 common->accept = save_accept;
5075 return NULL;
5076 }
5077 common->acceptlabel = LABEL();
5078 if (common->accept != NULL)
5079 set_jumps(common->accept, common->acceptlabel);
5080
5081 /* Reset stack. */
5082 if (framesize < 0)
5083 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5084 else {
5085 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5086 {
5087 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5088 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5089 }
5090 else
5091 {
5092 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5093 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5094 }
5095 }
5096
5097 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5098 {
5099 /* We know that STR_PTR was stored on the top of the stack. */
5100 if (conditional)
5101 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5102 else if (bra == OP_BRAZERO)
5103 {
5104 if (framesize < 0)
5105 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5106 else
5107 {
5108 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5109 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
5110 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5111 }
5112 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5113 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5114 }
5115 else if (framesize >= 0)
5116 {
5117 /* For OP_BRA and OP_BRAMINZERO. */
5118 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5119 }
5120 }
5121 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5122
5123 compile_backtrackingpath(common, altbacktrack.top);
5124 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5125 {
5126 common->quitlabel = save_quitlabel;
5127 common->acceptlabel = save_acceptlabel;
5128 common->quit = save_quit;
5129 common->accept = save_accept;
5130 return NULL;
5131 }
5132 set_jumps(altbacktrack.topbacktracks, LABEL());
5133
5134 if (*cc != OP_ALT)
5135 break;
5136
5137 ccbegin = cc;
5138 cc += GET(cc, 1);
5139 }
5140 /* None of them matched. */
5141 if (common->quit != NULL)
5142 set_jumps(common->quit, LABEL());
5143
5144 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5145 {
5146 /* Assert is failed. */
5147 if (conditional || bra == OP_BRAZERO)
5148 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5149
5150 if (framesize < 0)
5151 {
5152 /* The topmost item should be 0. */
5153 if (bra == OP_BRAZERO)
5154 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5155 else
5156 free_stack(common, 1);
5157 }
5158 else
5159 {
5160 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5161 /* The topmost item should be 0. */
5162 if (bra == OP_BRAZERO)
5163 {
5164 free_stack(common, framesize + 1);
5165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5166 }
5167 else
5168 free_stack(common, framesize + 2);
5169 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5170 }
5171 jump = JUMP(SLJIT_JUMP);
5172 if (bra != OP_BRAZERO)
5173 add_jump(compiler, target, jump);
5174
5175 /* Assert is successful. */
5176 set_jumps(tmp, LABEL());
5177 if (framesize < 0)
5178 {
5179 /* We know that STR_PTR was stored on the top of the stack. */
5180 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5181 /* Keep the STR_PTR on the top of the stack. */
5182 if (bra == OP_BRAZERO)
5183 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5184 else if (bra == OP_BRAMINZERO)
5185 {
5186 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5187 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5188 }
5189 }
5190 else
5191 {
5192 if (bra == OP_BRA)
5193 {
5194 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5195 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5196 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5197 }
5198 else
5199 {
5200 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5201 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
5202 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5204 }
5205 }
5206
5207 if (bra == OP_BRAZERO)
5208 {
5209 backtrack->matchingpath = LABEL();
5210 sljit_set_label(jump, backtrack->matchingpath);
5211 }
5212 else if (bra == OP_BRAMINZERO)
5213 {
5214 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5215 JUMPHERE(brajump);
5216 if (framesize >= 0)
5217 {
5218 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5219 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5220 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5221 }
5222 set_jumps(backtrack->common.topbacktracks, LABEL());
5223 }
5224 }
5225 else
5226 {
5227 /* AssertNot is successful. */
5228 if (framesize < 0)
5229 {
5230 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5231 if (bra != OP_BRA)
5232 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5233 else
5234 free_stack(common, 1);
5235 }
5236 else
5237 {
5238 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5239 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5240 /* The topmost item should be 0. */
5241 if (bra != OP_BRA)
5242 {
5243 free_stack(common, framesize + 1);
5244 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5245 }
5246 else
5247 free_stack(common, framesize + 2);
5248 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5249 }
5250
5251 if (bra == OP_BRAZERO)
5252 backtrack->matchingpath = LABEL();
5253 else if (bra == OP_BRAMINZERO)
5254 {
5255 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5256 JUMPHERE(brajump);
5257 }
5258
5259 if (bra != OP_BRA)
5260 {
5261 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5262 set_jumps(backtrack->common.topbacktracks, LABEL());
5263 backtrack->common.topbacktracks = NULL;
5264 }
5265 }
5266
5267 common->quitlabel = save_quitlabel;
5268 common->acceptlabel = save_acceptlabel;
5269 common->quit = save_quit;
5270 common->accept = save_accept;
5271 return cc + 1 + LINK_SIZE;
5272 }
5273
5274 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
5275 {
5276 int condition = FALSE;
5277 pcre_uchar *slotA = name_table;
5278 pcre_uchar *slotB;
5279 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5280 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5281 sljit_w no_capture;
5282 int i;
5283
5284 locals += refno & 0xff;
5285 refno >>= 8;
5286 no_capture = locals[1];
5287
5288 for (i = 0; i < name_count; i++)
5289 {
5290 if (GET2(slotA, 0) == refno) break;
5291 slotA += name_entry_size;
5292 }
5293
5294 if (i < name_count)
5295 {
5296 /* Found a name for the number - there can be only one; duplicate names
5297 for different numbers are allowed, but not vice versa. First scan down
5298 for duplicates. */
5299
5300 slotB = slotA;
5301 while (slotB > name_table)
5302 {
5303 slotB -= name_entry_size;
5304 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5305 {
5306 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5307 if (condition) break;
5308 }
5309 else break;
5310 }
5311
5312 /* Scan up for duplicates */
5313 if (!condition)
5314 {
5315 slotB = slotA;
5316 for (i++; i < name_count; i++)
5317 {
5318 slotB += name_entry_size;
5319 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5320 {
5321 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5322 if (condition) break;
5323 }
5324 else break;
5325 }
5326 }
5327 }
5328 return condition;
5329 }
5330
5331 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
5332 {
5333 int condition = FALSE;
5334 pcre_uchar *slotA = name_table;
5335 pcre_uchar *slotB;
5336 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5337 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5338 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5339 int i;
5340
5341 for (i = 0; i < name_count; i++)
5342 {
5343 if (GET2(slotA, 0) == recno) break;
5344 slotA += name_entry_size;
5345 }
5346
5347 if (i < name_count)
5348 {
5349 /* Found a name for the number - there can be only one; duplicate
5350 names for different numbers are allowed, but not vice versa. First
5351 scan down for duplicates. */
5352
5353 slotB = slotA;
5354 while (slotB > name_table)
5355 {
5356 slotB -= name_entry_size;
5357 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5358 {
5359 condition = GET2(slotB, 0) == group_num;
5360 if (condition) break;
5361 }
5362 else break;
5363 }
5364
5365 /* Scan up for duplicates */
5366 if (!condition)
5367 {
5368 slotB = slotA;
5369 for (i++; i < name_count; i++)
5370 {
5371 slotB += name_entry_size;
5372 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5373 {
5374 condition = GET2(slotB, 0) == group_num;
5375 if (condition) break;
5376 }
5377 else break;
5378 }
5379 }
5380 }
5381 return condition;
5382 }
5383
5384 /*
5385 Handling bracketed expressions is probably the most complex part.
5386
5387 Stack layout naming characters:
5388 S - Push the current STR_PTR
5389 0 - Push a 0 (NULL)
5390 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5391 before the next alternative. Not pushed if there are no alternatives.
5392 M - Any values pushed by the current alternative. Can be empty, or anything.
5393 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5394 L - Push the previous local (pointed by localptr) to the stack
5395 () - opional values stored on the stack
5396 ()* - optonal, can be stored multiple times
5397
5398 The following list shows the regular expression templates, their PCRE byte codes
5399 and stack layout supported by pcre-sljit.
5400
5401 (?:) OP_BRA | OP_KET A M
5402 () OP_CBRA | OP_KET C M
5403 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5404 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5405 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5406 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5407 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5408 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5409 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5410 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5411 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5412 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5413 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5414 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5415 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5416 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5417 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5418 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5419 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5420 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5421 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5422 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5423
5424
5425 Stack layout naming characters:
5426 A - Push the alternative index (starting from 0) on the stack.
5427 Not pushed if there is no alternatives.
5428 M - Any values pushed by the current alternative. Can be empty, or anything.
5429
5430 The next list shows the possible content of a bracket:
5431 (|) OP_*BRA | OP_ALT ... M A
5432 (?()|) OP_*COND | OP_ALT M A
5433 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5434 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5435 Or nothing, if trace is unnecessary
5436 */
5437
5438 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5439 {
5440 DEFINE_COMPILER;
5441 backtrack_common *backtrack;
5442 pcre_uchar opcode;
5443 int private_data_ptr = 0;
5444 int offset = 0;
5445 int stacksize;
5446 pcre_uchar *ccbegin;
5447 pcre_uchar *matchingpath;
5448 pcre_uchar bra = OP_BRA;
5449 pcre_uchar ket;
5450 assert_backtrack *assert;
5451 BOOL has_alternatives;
5452 struct sljit_jump *jump;
5453 struct sljit_jump *skip;
5454 struct sljit_label *rmaxlabel = NULL;
5455 struct sljit_jump *braminzerojump = NULL;
5456
5457 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5458
5459 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5460 {
5461 bra = *cc;
5462 cc++;
5463 opcode = *cc;
5464 }
5465
5466 opcode = *cc;
5467 ccbegin = cc;
5468 matchingpath = ccbegin + 1 + LINK_SIZE;
5469
5470 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5471 {
5472 /* Drop this bracket_backtrack. */
5473 parent->top = backtrack->prev;
5474 return bracketend(cc);
5475 }
5476
5477 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5478 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5479 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5480 cc += GET(cc, 1);
5481
5482 has_alternatives = *cc == OP_ALT;
5483 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5484 {
5485 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5486 if (*matchingpath == OP_NRREF)
5487 {
5488 stacksize = GET2(matchingpath, 1);
5489 if (common->currententry == NULL || stacksize == RREF_ANY)
5490 has_alternatives = FALSE;
5491 else if (common->currententry->start == 0)
5492 has_alternatives = stacksize != 0;
5493 else
5494 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5495 }
5496 }
5497
5498 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5499 opcode = OP_SCOND;
5500 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5501 opcode = OP_ONCE;
5502
5503 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5504 {
5505 /* Capturing brackets has a pre-allocated space. */
5506 offset = GET2(ccbegin, 1 + LINK_SIZE);
5507 if (common->optimized_cbracket[offset] == 0)
5508 {
5509 private_data_ptr = OVECTOR_PRIV(offset);
5510 offset <<= 1;
5511 }
5512 else
5513 {
5514 offset <<= 1;
5515 private_data_ptr = OVECTOR(offset);
5516 }
5517 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5518 matchingpath += IMM2_SIZE;
5519 }
5520 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5521 {
5522 /* Other brackets simply allocate the next entry. */
5523 private_data_ptr = PRIVATE_DATA(ccbegin);
5524 SLJIT_ASSERT(private_data_ptr != 0);
5525 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5526 if (opcode == OP_ONCE)
5527 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5528 }
5529
5530 /* Instructions before the first alternative. */
5531 stacksize = 0;
5532 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5533 stacksize++;
5534 if (bra == OP_BRAZERO)
5535 stacksize++;
5536
5537 if (stacksize > 0)
5538 allocate_stack(common, stacksize);
5539
5540 stacksize = 0;
5541 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5542 {
5543 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5544 stacksize++;
5545 }
5546
5547 if (bra == OP_BRAZERO)
5548 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5549
5550 if (bra == OP_BRAMINZERO)
5551 {
5552 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5553 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5554 if (ket != OP_KETRMIN)
5555 {
5556 free_stack(common, 1);
5557 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5558 }
5559 else
5560 {
5561 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5562 {
5563 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5564 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5565 /* Nothing stored during the first run. */
5566 skip = JUMP(SLJIT_JUMP);
5567 JUMPHERE(jump);
5568 /* Checking zero-length iteration. */
5569 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5570 {
5571 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5572 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5573 }
5574 else
5575 {
5576 /* Except when the whole stack frame must be saved. */
5577 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5578 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5579 }
5580 JUMPHERE(skip);
5581 }
5582 else
5583 {
5584 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5585 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5586 JUMPHERE(jump);
5587 }
5588 }
5589 }
5590
5591 if (ket == OP_KETRMIN)
5592 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5593
5594 if (ket == OP_KETRMAX)
5595 {
5596 rmaxlabel = LABEL();
5597 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5598 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5599 }
5600
5601 /* Handling capturing brackets and alternatives. */
5602 if (opcode == OP_ONCE)
5603 {
5604 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5605 {
5606 /* Neither capturing brackets nor recursions are not found in the block. */
5607 if (ket == OP_KETRMIN)
5608 {
5609 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5610 allocate_stack(common, 2);
5611 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5612 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5613 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5614 }
5615 else if (ket == OP_KETRMAX || has_alternatives)
5616 {
5617 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5618 allocate_stack(common, 1);
5619 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5620 }
5621 else
5622 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5623 }
5624 else
5625 {
5626 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5627 {
5628 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5630 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5631 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5632 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5633 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5634 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5635 }
5636 else
5637 {
5638 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5639 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5640 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5642 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5643 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5644 }
5645 }
5646 }
5647 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5648 {
5649 /* Saving the previous values. */
5650 if (common->optimized_cbracket[offset >> 1] == 0)
5651 {
5652 allocate_stack(common, 3);
5653 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5654 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5655 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5656 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5657 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5658 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5659 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5660 }
5661 else
5662 {
5663 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5664 allocate_stack(common, 2);
5665 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5666 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_w));
5667 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5668 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5669 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5670 }
5671 }
5672 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5673 {
5674 /* Saving the previous value. */
5675 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5676 allocate_stack(common, 1);
5677 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5678 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5679 }
5680 else if (has_alternatives)
5681 {
5682 /* Pushing the starting string pointer. */
5683 allocate_stack(common, 1);
5684 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5685 }
5686
5687 /* Generating code for the first alternative. */
5688 if (opcode == OP_COND || opcode == OP_SCOND)
5689 {
5690 if (*matchingpath == OP_CREF)
5691 {
5692 SLJIT_ASSERT(has_alternatives);
5693 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5694 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5695 matchingpath += 1 + IMM2_SIZE;
5696 }
5697 else if (*matchingpath == OP_NCREF)
5698 {
5699 SLJIT_ASSERT(has_alternatives);
5700 stacksize = GET2(matchingpath, 1);
5701 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5702
5703 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5704 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5705 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5706 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5707 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5708 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5709 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5710 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5711 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5712
5713 JUMPHERE(jump);
5714 matchingpath += 1 + IMM2_SIZE;
5715 }
5716 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5717 {
5718 /* Never has other case. */
5719 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5720
5721 stacksize = GET2(matchingpath, 1);
5722 if (common->currententry == NULL)
5723 stacksize = 0;
5724 else if (stacksize == RREF_ANY)
5725 stacksize = 1;
5726 else if (common->currententry->start == 0)
5727 stacksize = stacksize == 0;
5728 else
5729 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5730
5731 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5732 {
5733 SLJIT_ASSERT(!has_alternatives);
5734 if (stacksize != 0)
5735 matchingpath += 1 + IMM2_SIZE;
5736 else
5737 {
5738 if (*cc == OP_ALT)
5739 {
5740 matchingpath = cc + 1 + LINK_SIZE;
5741 cc += GET(cc, 1);
5742 }
5743 else
5744 matchingpath = cc;
5745 }
5746 }
5747 else
5748 {
5749 SLJIT_ASSERT(has_alternatives);
5750
5751 stacksize = GET2(matchingpath, 1);
5752 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5753 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5756 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5757 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5758 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5759 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5760 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5761 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5762 matchingpath += 1 + IMM2_SIZE;
5763 }
5764 }
5765 else
5766 {
5767 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5768 /* Similar code as PUSH_BACKTRACK macro. */
5769 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5770 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5771 return NULL;
5772 memset(assert, 0, sizeof(assert_backtrack));
5773 assert->common.cc = matchingpath;
5774 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5775 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5776 }
5777 }
5778
5779 compile_matchingpath(common, matchingpath, cc, backtrack);
5780 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5781 return NULL;
5782
5783 if (opcode == OP_ONCE)
5784 {
5785 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5786 {
5787 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5788 /* TMP2 which is set here used by OP_KETRMAX below. */
5789 if (ket == OP_KETRMAX)
5790 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5791 else if (ket == OP_KETRMIN)
5792 {
5793 /* Move the STR_PTR to the private_data_ptr. */
5794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5795 }
5796 }
5797 else
5798 {
5799 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5800 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5801 if (ket == OP_KETRMAX)
5802 {
5803 /* TMP2 which is set here used by OP_KETRMAX below. */
5804 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5805 }
5806 }
5807 }
5808
5809 stacksize = 0;
5810 if (ket != OP_KET || bra != OP_BRA)
5811 stacksize++;
5812 if (has_alternatives && opcode != OP_ONCE)
5813 stacksize++;
5814
5815 if (stacksize > 0)
5816 allocate_stack(common, stacksize);
5817
5818 stacksize = 0;
5819 if (ket != OP_KET)
5820 {
5821 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5822 stacksize++;
5823 }
5824 else if (bra != OP_BRA)
5825 {
5826 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5827 stacksize++;
5828 }
5829
5830 if (has_alternatives)
5831 {
5832 if (opcode != OP_ONCE)
5833 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5834 if (ket != OP_KETRMAX)
5835 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5836 }
5837
5838 /* Must be after the matchingpath label. */
5839 if (offset != 0)
5840 {
5841 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5842 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5844 }
5845
5846 if (ket == OP_KETRMAX)
5847 {
5848 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5849 {
5850 if (has_alternatives)
5851 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5852 /* Checking zero-length iteration. */
5853 if (opcode != OP_ONCE)
5854 {
5855 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
5856 /* Drop STR_PTR for greedy plus quantifier. */
5857 if (bra != OP_BRAZERO)
5858 free_stack(common, 1);
5859 }
5860 else
5861 /* TMP2 must contain the starting STR_PTR. */
5862 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5863 }
5864 else
5865 JUMPTO(SLJIT_JUMP, rmaxlabel);
5866 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5867 }
5868
5869 if (bra == OP_BRAZERO)
5870 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
5871
5872 if (bra == OP_BRAMINZERO)
5873 {
5874 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5875 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
5876 if (braminzerojump != NULL)
5877 {
5878 JUMPHERE(braminzerojump);
5879 /* We need to release the end pointer to perform the
5880 backtrack for the zero-length iteration. When
5881 framesize is < 0, OP_ONCE will do the release itself. */
5882 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5883 {
5884 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5885 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5886 }
5887 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5888 free_stack(common, 1);
5889 }
5890 /* Continue to the normal backtrack. */
5891 }
5892
5893 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5894 decrease_call_count(common);
5895
5896 /* Skip the other alternatives. */
5897 while (*cc == OP_ALT)
5898 cc += GET(cc, 1);
5899 cc += 1 + LINK_SIZE;
5900 return cc;
5901 }
5902
5903 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5904 {
5905 DEFINE_COMPILER;
5906 backtrack_common *backtrack;
5907 pcre_uchar opcode;
5908 int private_data_ptr;
5909 int cbraprivptr = 0;
5910 int framesize;
5911 int stacksize;
5912 int offset = 0;
5913 BOOL zero = FALSE;
5914 pcre_uchar *ccbegin = NULL;
5915 int stack;
5916 struct sljit_label *loop = NULL;
5917 struct jump_list *emptymatch = NULL;
5918
5919 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5920 if (*cc == OP_BRAPOSZERO)
5921 {
5922 zero = TRUE;
5923 cc++;
5924 }
5925
5926 opcode = *cc;
5927 private_data_ptr = PRIVATE_DATA(cc);
5928 SLJIT_ASSERT(private_data_ptr != 0);
5929 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
5930 switch(opcode)
5931 {
5932 case OP_BRAPOS:
5933 case OP_SBRAPOS:
5934 ccbegin = cc + 1 + LINK_SIZE;
5935 break;
5936
5937 case OP_CBRAPOS:
5938 case OP_SCBRAPOS:
5939 offset = GET2(cc, 1 + LINK_SIZE);
5940 cbraprivptr = OVECTOR_PRIV(offset);
5941 offset <<= 1;
5942 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5943 break;
5944
5945 default:
5946 SLJIT_ASSERT_STOP();
5947 break;
5948 }
5949
5950 framesize = get_framesize(common, cc, FALSE);
5951 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5952 if (framesize < 0)
5953 {
5954 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5955 if (!zero)
5956 stacksize++;
5957 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5958 allocate_stack(common, stacksize);
5959 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5960
5961 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5962 {
5963 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5964 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5965 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5966 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5967 }
5968 else
5969 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5970
5971 if (!zero)
5972 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5973 }
5974 else
5975 {
5976 stacksize = framesize + 1;
5977 if (!zero)
5978 stacksize++;
5979 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5980 stacksize++;
5981 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5982 allocate_stack(common, stacksize);
5983
5984 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5985 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5986 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5987 stack = 0;
5988 if (!zero)
5989 {
5990 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5991 stack++;
5992 }
5993 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5994 {
5995 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
5996 stack++;
5997 }
5998 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
5999 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6000 }
6001
6002 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6003 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6004
6005 loop = LABEL();
6006 while (*cc != OP_KETRPOS)
6007 {
6008 backtrack->top = NULL;
6009 backtrack->topbacktracks = NULL;
6010 cc += GET(cc, 1);
6011
6012 compile_matchingpath(common, ccbegin, cc, backtrack);
6013 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6014 return NULL;
6015
6016 if (framesize < 0)
6017 {
6018 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6019
6020 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6021 {
6022 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6023 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6026 }
6027 else
6028 {
6029 if (opcode == OP_SBRAPOS)
6030 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6031 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6032 }
6033
6034 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6035 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6036
6037 if (!zero)
6038 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6039 }
6040 else
6041 {
6042 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6043 {
6044 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
6045 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6048 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6049 }
6050 else
6051 {
6052 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6053 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
6054 if (opcode == OP_SBRAPOS)
6055 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6056 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
6057 }
6058
6059 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6060 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6061
6062 if (!zero)
6063 {
6064 if (framesize < 0)
6065 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6066 else
6067 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6068 }
6069 }
6070 JUMPTO(SLJIT_JUMP, loop);
6071 flush_stubs(common);
6072
6073 compile_backtrackingpath(common, backtrack->top);
6074 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6075 return NULL;
6076 set_jumps(backtrack->topbacktracks, LABEL());
6077
6078 if (framesize < 0)
6079 {
6080 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6081 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6082 else
6083 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6084 }
6085 else
6086 {
6087 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6088 {
6089 /* Last alternative. */
6090 if (*cc == OP_KETRPOS)
6091 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6092 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6093 }
6094 else
6095 {
6096 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6097 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6098 }
6099 }
6100
6101 if (*cc == OP_KETRPOS)
6102 break;
6103 ccbegin = cc + 1 + LINK_SIZE;
6104 }
6105
6106 backtrack->topbacktracks = NULL;
6107 if (!zero)
6108 {
6109 if (framesize < 0)
6110 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6111 else /* TMP2 is set to [private_data_ptr] above. */
6112 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
6113 }
6114
6115 /* None of them matched. */
6116 set_jumps(emptymatch, LABEL());
6117 decrease_call_count(common);
6118 return cc + 1 + LINK_SIZE;
6119 }
6120
6121 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6122 {
6123 int class_len;
6124
6125 *opcode = *cc;
6126 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6127 {
6128 cc++;
6129 *type = OP_CHAR;
6130 }
6131 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6132 {
6133 cc++;
6134 *type = OP_CHARI;
6135 *opcode -= OP_STARI - OP_STAR;
6136 }
6137 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6138 {
6139 cc++;
6140 *type = OP_NOT;
6141 *opcode -= OP_NOTSTAR - OP_STAR;
6142 }
6143 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6144 {
6145 cc++;
6146 *type = OP_NOTI;
6147 *opcode -= OP_NOTSTARI - OP_STAR;
6148 }
6149 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6150 {
6151 cc++;
6152 *opcode -= OP_TYPESTAR - OP_STAR;
6153 *type = 0;
6154 }
6155 else
6156 {
6157 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6158 *type = *opcode;
6159 cc++;
6160 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6161 *opcode = cc[class_len - 1];
6162 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6163 {
6164 *opcode -= OP_CRSTAR - OP_STAR;
6165 if (end != NULL)
6166 *end = cc + class_len;
6167 }
6168 else
6169 {
6170 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6171 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6172 *arg2 = GET2(cc, class_len);
6173
6174 if (*arg2 == 0)
6175 {
6176 SLJIT_ASSERT(*arg1 != 0);
6177 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6178 }
6179 if (*arg1 == *arg2)
6180 *opcode = OP_EXACT;
6181
6182 if (end != NULL)
6183 *end = cc + class_len + 2 * IMM2_SIZE;
6184 }
6185 return cc;
6186 }
6187
6188 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6189 {
6190 *arg1 = GET2(cc, 0);
6191 cc += IMM2_SIZE;
6192 }
6193
6194 if (*type == 0)
6195 {
6196 *type = *cc;
6197 if (end != NULL)
6198 *end = next_opcode(common, cc);
6199 cc++;
6200 return cc;
6201 }
6202
6203 if (end != NULL)
6204 {
6205 *end = cc + 1;
6206 #ifdef SUPPORT_UTF
6207 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6208 #endif
6209 }
6210 return cc;
6211 }
6212
6213 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6214 {
6215 DEFINE_COMPILER;
6216 backtrack_common *backtrack;
6217 pcre_uchar opcode;
6218 pcre_uchar type;
6219 int arg1 = -1, arg2 = -1;
6220 pcre_uchar* end;
6221 jump_list *nomatch = NULL;
6222 struct sljit_jump *jump = NULL;
6223 struct sljit_label *label;
6224 int private_data_ptr = PRIVATE_DATA(cc);
6225 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6226 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6227 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_w);
6228 int tmp_base, tmp_offset;
6229
6230 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6231
6232 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6233
6234 switch (type)
6235 {
6236 case OP_NOT_DIGIT:
6237 case OP_DIGIT:
6238 case OP_NOT_WHITESPACE:
6239 case OP_WHITESPACE:
6240 case OP_NOT_WORDCHAR:
6241 case OP_WORDCHAR:
6242 case OP_ANY:
6243 case OP_ALLANY:
6244 case OP_ANYBYTE:
6245 case OP_ANYNL:
6246 case OP_NOT_HSPACE:
6247 case OP_HSPACE:
6248 case OP_NOT_VSPACE:
6249 case OP_VSPACE:
6250 case OP_CHAR:
6251 case OP_CHARI:
6252 case OP_NOT:
6253 case OP_NOTI:
6254 case OP_CLASS:
6255 case OP_NCLASS:
6256 tmp_base = TMP3;
6257 tmp_offset = 0;
6258 break;
6259
6260 default:
6261 SLJIT_ASSERT_STOP();
6262 /* Fall through. */
6263
6264 case OP_EXTUNI:
6265 case OP_XCLASS:
6266 case OP_NOTPROP:
6267 case OP_PROP:
6268 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6269 tmp_offset = POSSESSIVE0;
6270 break;
6271 }
6272
6273 switch(opcode)
6274 {
6275 case OP_STAR:
6276 case OP_PLUS:
6277 case OP_UPTO:
6278 case OP_CRRANGE:
6279 if (type == OP_ANYNL || type == OP_EXTUNI)
6280 {
6281 SLJIT_ASSERT(private_data_ptr == 0);
6282 if (opcode == OP_STAR || opcode == OP_UPTO)
6283 {
6284 allocate_stack(common, 2);
6285 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6287 }
6288 else
6289 {
6290 allocate_stack(common, 1);
6291 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6292 }
6293
6294 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6295 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6296
6297 label = LABEL();
6298 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6299 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6300 {
6301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6302 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6303 if (opcode == OP_CRRANGE && arg2 > 0)
6304 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6305 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6306 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6307 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6308 }
6309
6310 /* We cannot use TMP3 because of this allocate_stack. */
6311 allocate_stack(common, 1);
6312 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6313 JUMPTO(SLJIT_JUMP, label);
6314 if (jump != NULL)
6315 JUMPHERE(jump);
6316 }
6317 else
6318 {
6319 if (opcode == OP_PLUS)
6320 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6321 if (private_data_ptr == 0)
6322 allocate_stack(common, 2);
6323 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6324 if (opcode <= OP_PLUS)
6325 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6326 else
6327 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6328 label = LABEL();
6329 compile_char1_matchingpath(common, type, cc, &nomatch);
6330 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6331 if (opcode <= OP_PLUS)
6332 JUMPTO(SLJIT_JUMP, label);
6333 else if (opcode == OP_CRRANGE && arg1 == 0)
6334 {
6335 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6336 JUMPTO(SLJIT_JUMP, label);
6337 }
6338 else
6339 {
6340 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6341 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6342 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6343 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6344 }
6345 set_jumps(nomatch, LABEL());
6346 if (opcode == OP_CRRANGE)
6347 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6348 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6349 }
6350 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6351 break;
6352
6353 case OP_MINSTAR:
6354 case OP_MINPLUS:
6355 if (opcode == OP_MINPLUS)
6356 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6357 if (private_data_ptr == 0)
6358 allocate_stack(common, 1);
6359 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6360 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6361 break;
6362
6363 case OP_MINUPTO:
6364 case OP_CRMINRANGE:
6365 if (private_data_ptr == 0)
6366 allocate_stack(common, 2);
6367 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6368 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6369 if (opcode == OP_CRMINRANGE)
6370 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6371 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6372 break;
6373
6374 case OP_QUERY:
6375 case OP_MINQUERY:
6376 if (private_data_ptr == 0)
6377 allocate_stack(common, 1);
6378 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6379 if (opcode == OP_QUERY)
6380 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6381 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6382 break;
6383
6384 case OP_EXACT:
6385 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6386 label = LABEL();
6387 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6388 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6389 JUMPTO(SLJIT_C_NOT_ZERO, label);
6390 break;
6391
6392 case OP_POSSTAR:
6393 case OP_POSPLUS:
6394 case OP_POSUPTO:
6395 if (opcode == OP_POSPLUS)
6396 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6397 if (opcode == OP_POSUPTO)
6398 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6399 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6400 label = LABEL();
6401 compile_char1_matchingpath(common, type, cc, &nomatch);
6402 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6403 if (opcode != OP_POSUPTO)
6404 JUMPTO(SLJIT_JUMP, label);
6405 else
6406 {
6407 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6408 JUMPTO(SLJIT_C_NOT_ZERO, label);
6409 }
6410 set_jumps(nomatch, LABEL());
6411 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6412 break;
6413
6414 case OP_POSQUERY:
6415 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6416 compile_char1_matchingpath(common, type, cc, &nomatch);
6417 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6418 set_jumps(nomatch, LABEL());
6419 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6420 break;
6421
6422 default:
6423 SLJIT_ASSERT_STOP();
6424 break;
6425 }
6426
6427 decrease_call_count(common);
6428 return end;
6429 }
6430
6431 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6432 {
6433 DEFINE_COMPILER;
6434 backtrack_common *backtrack;
6435
6436 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6437
6438 if (*cc == OP_FAIL)
6439 {
6440 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6441 return cc + 1;
6442 }
6443
6444 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6445 {
6446 /* No need to check notempty conditions. */
6447 if (common->acceptlabel == NULL)
6448 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6449 else
6450 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6451 return cc + 1;
6452 }
6453
6454 if (common->acceptlabel == NULL)
6455 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6456 else
6457 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6458 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6459 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6460 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6461 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6462 if (common->acceptlabel == NULL)
6463 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6464 else
6465 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6466 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6467 if (common->acceptlabel == NULL)
6468 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6469 else
6470 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6471 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6472 return cc + 1;
6473 }
6474
6475 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6476 {
6477 DEFINE_COMPILER;
6478 int offset = GET2(cc, 1);
6479 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
6480
6481 /* Data will be discarded anyway... */
6482 if (common->currententry != NULL)
6483 return cc + 1 + IMM2_SIZE;
6484
6485 if (!optimized_cbracket)
6486 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6487 offset <<= 1;
6488 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6489 if (!optimized_cbracket)
6490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6491 return cc + 1 + IMM2_SIZE;
6492 }
6493
6494 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6495 {
6496 DEFINE_COMPILER;
6497 backtrack_common *backtrack;
6498
6499 while (cc < ccend)
6500 {
6501 switch(*cc)
6502 {
6503 case OP_SOD:
6504 case OP_SOM:
6505 case OP_NOT_WORD_BOUNDARY:
6506 case OP_WORD_BOUNDARY:
6507 case OP_NOT_DIGIT:
6508 case OP_DIGIT:
6509 case OP_NOT_WHITESPACE:
6510 case OP_WHITESPACE:
6511 case OP_NOT_WORDCHAR:
6512 case OP_WORDCHAR:
6513 case OP_ANY:
6514 case OP_ALLANY:
6515 case OP_ANYBYTE:
6516 case OP_NOTPROP:
6517 case OP_PROP:
6518 case OP_ANYNL:
6519 case OP_NOT_HSPACE:
6520 case OP_HSPACE:
6521 case OP_NOT_VSPACE:
6522 case OP_VSPACE:
6523 case OP_EXTUNI:
6524 case OP_EODN:
6525 case OP_EOD:
6526 case OP_CIRC:
6527 case OP_CIRCM:
6528 case OP_DOLL:
6529 case OP_DOLLM:
6530 case OP_NOT:
6531 case OP_NOTI:
6532 case OP_REVERSE:
6533 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6534 break;
6535
6536 case OP_SET_SOM:
6537 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6538 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6539 allocate_stack(common, 1);
6540 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6541 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6542 cc++;
6543 break;
6544
6545 case OP_CHAR:
6546 case OP_CHARI:
6547 if (common->mode == JIT_COMPILE)
6548 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6549 else
6550 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6551 break;
6552
6553 case OP_STAR:
6554 case OP_MINSTAR:
6555 case OP_PLUS:
6556 case OP_MINPLUS:
6557 case OP_QUERY:
6558 case OP_MINQUERY:
6559 case OP_UPTO:
6560 case OP_MINUPTO:
6561 case OP_EXACT:
6562 case OP_POSSTAR:
6563 case OP_POSPLUS:
6564 case OP_POSQUERY:
6565 case OP_POSUPTO:
6566 case OP_STARI:
6567 case OP_MINSTARI:
6568 case OP_PLUSI:
6569 case OP_MINPLUSI:
6570 case OP_QUERYI:
6571 case OP_MINQUERYI:
6572 case OP_UPTOI:
6573 case OP_MINUPTOI:
6574 case OP_EXACTI:
6575 case OP_POSSTARI:
6576 case OP_POSPLUSI:
6577 case OP_POSQUERYI:
6578 case OP_POSUPTOI:
6579 case OP_NOTSTAR:
6580 case OP_NOTMINSTAR:
6581 case OP_NOTPLUS:
6582 case OP_NOTMINPLUS:
6583 case OP_NOTQUERY:
6584 case OP_NOTMINQUERY:
6585 case OP_NOTUPTO:
6586 case OP_NOTMINUPTO:
6587 case OP_NOTEXACT:
6588 case OP_NOTPOSSTAR:
6589 case OP_NOTPOSPLUS:
6590 case OP_NOTPOSQUERY:
6591 case OP_NOTPOSUPTO:
6592 case OP_NOTSTARI:
6593 case OP_NOTMINSTARI:
6594 case OP_NOTPLUSI:
6595 case OP_NOTMINPLUSI:
6596 case OP_NOTQUERYI:
6597 case OP_NOTMINQUERYI:
6598 case OP_NOTUPTOI:
6599 case OP_NOTMINUPTOI:
6600 case OP_NOTEXACTI:
6601 case OP_NOTPOSSTARI:
6602 case OP_NOTPOSPLUSI:
6603 case OP_NOTPOSQUERYI:
6604 case OP_NOTPOSUPTOI:
6605 case OP_TYPESTAR:
6606 case OP_TYPEMINSTAR:
6607 case OP_TYPEPLUS:
6608 case OP_TYPEMINPLUS:
6609 case OP_TYPEQUERY:
6610 case OP_TYPEMINQUERY:
6611 case OP_TYPEUPTO:
6612 case OP_TYPEMINUPTO:
6613 case OP_TYPEEXACT:
6614 case OP_TYPEPOSSTAR:
6615 case OP_TYPEPOSPLUS:
6616 case OP_TYPEPOSQUERY:
6617 case OP_TYPEPOSUPTO:
6618 cc = compile_iterator_matchingpath(common, cc, parent);
6619 break;
6620
6621 case OP_CLASS:
6622 case OP_NCLASS:
6623 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6624 cc = compile_iterator_matchingpath(common, cc, parent);
6625 else
6626 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6627 break;
6628
6629 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
6630 case OP_XCLASS:
6631 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6632 cc = compile_iterator_matchingpath(common, cc, parent);
6633 else
6634 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6635 break;
6636 #endif
6637
6638 case OP_REF:
6639 case OP_REFI:
6640 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6641 cc = compile_ref_iterator_matchingpath(common, cc, parent);
6642 else
6643 cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6644 break;
6645
6646 case OP_RECURSE:
6647 cc = compile_recurse_matchingpath(common, cc, parent);
6648 break;
6649
6650 case OP_ASSERT:
6651 case OP_ASSERT_NOT:
6652 case OP_ASSERTBACK:
6653 case OP_ASSERTBACK_NOT:
6654 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6655 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6656 break;
6657
6658 case OP_BRAMINZERO:
6659 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6660 cc = bracketend(cc + 1);
6661 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6662 {
6663 allocate_stack(common, 1);
6664 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6665 }
6666 else
6667 {
6668 allocate_stack(common, 2);
6669 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6670 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6671 }
6672 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
6673 if (cc[1] > OP_ASSERTBACK_NOT)
6674 decrease_call_count(common);
6675 break;
6676
6677 case OP_ONCE:
6678 case OP_ONCE_NC:
6679 case OP_BRA:
6680 case OP_CBRA:
6681 case OP_COND:
6682 case OP_SBRA:
6683 case OP_SCBRA:
6684 case OP_SCOND:
6685 cc = compile_bracket_matchingpath(common, cc, parent);
6686 break;
6687
6688 case OP_BRAZERO:
6689 if (cc[1] > OP_ASSERTBACK_NOT)
6690 cc = compile_bracket_matchingpath(common, cc, parent);
6691 else
6692 {
6693 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6694 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6695 }
6696 break;
6697
6698 case OP_BRAPOS:
6699 case OP_CBRAPOS:
6700 case OP_SBRAPOS:
6701 case OP_SCBRAPOS:
6702 case OP_BRAPOSZERO:
6703 cc = compile_bracketpos_matchingpath(common, cc, parent);
6704 break;
6705
6706 case OP_MARK:
6707 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6708 SLJIT_ASSERT(common->mark_ptr != 0);
6709 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6710 allocate_stack(common, 1);
6711 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6712 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6713 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
6714 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
6715 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
6716 cc += 1 + 2 + cc[1];
6717 break;
6718
6719 case OP_COMMIT:
6720 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6721 cc += 1;
6722 break;
6723
6724 case OP_FAIL:
6725 case OP_ACCEPT:
6726 case OP_ASSERT_ACCEPT:
6727 cc = compile_fail_accept_matchingpath(common, cc, parent);
6728 break;
6729
6730 case OP_CLOSE:
6731 cc = compile_close_matchingpath(common, cc);
6732 break;
6733
6734 case OP_SKIPZERO:
6735 cc = bracketend(cc + 1);
6736 break;
6737
6738 default:
6739 SLJIT_ASSERT_STOP();
6740 return;
6741 }
6742 if (cc == NULL)
6743 return;
6744 }
6745 SLJIT_ASSERT(cc == ccend);
6746 }
6747
6748 #undef PUSH_BACKTRACK
6749 #undef PUSH_BACKTRACK_NOVALUE
6750 #undef BACKTRACK_AS
6751
6752 #define COMPILE_BACKTRACKINGPATH(current) \
6753 do \
6754 { \
6755 compile_backtrackingpath(common, (current)); \
6756 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6757 return; \
6758 } \
6759 while (0)
6760
6761 #define CURRENT_AS(type) ((type *)current)
6762
6763 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
6764 {
6765 DEFINE_COMPILER;
6766 pcre_uchar *cc = current->cc;
6767 pcre_uchar opcode;
6768 pcre_uchar type;
6769 int arg1 = -1, arg2 = -1;
6770 struct sljit_label *label = NULL;
6771 struct sljit_jump *jump = NULL;
6772 jump_list *jumplist = NULL;
6773 int private_data_ptr = PRIVATE_DATA(cc);
6774 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6775 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6776 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_w);
6777
6778 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
6779
6780 switch(opcode)
6781 {
6782 case OP_STAR:
6783 case OP_PLUS: