/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1012 - (show annotations)
Sat Aug 25 15:34:13 2012 UTC (7 years, 2 months ago) by zherczeg
File MIME type: text/plain
File size: 255969 byte(s)
JIT support for extended grapheme cluster.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #ifdef SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory for the regex stack on the real machine stack.
69 Fast, but limited size. */
70 #define MACHINE_STACK_SIZE 32768
71
72 /* Growth rate for stack allocated by the OS. Should be the multiply
73 of page size. */
74 #define STACK_GROWTH_RATE 8192
75
76 /* Enable to check that the allocation could destroy temporaries. */
77 #if defined SLJIT_DEBUG && SLJIT_DEBUG
78 #define DESTROY_REGISTERS 1
79 #endif
80
81 /*
82 Short summary about the backtracking mechanism empolyed by the jit code generator:
83
84 The code generator follows the recursive nature of the PERL compatible regular
85 expressions. The basic blocks of regular expressions are condition checkers
86 whose execute different commands depending on the result of the condition check.
87 The relationship between the operators can be horizontal (concatenation) and
88 vertical (sub-expression) (See struct backtrack_common for more details).
89
90 'ab' - 'a' and 'b' regexps are concatenated
91 'a+' - 'a' is the sub-expression of the '+' operator
92
93 The condition checkers are boolean (true/false) checkers. Machine code is generated
94 for the checker itself and for the actions depending on the result of the checker.
95 The 'true' case is called as the matching path (expected path), and the other is called as
96 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97 branches on the matching path.
98
99 Greedy star operator (*) :
100 Matching path: match happens.
101 Backtrack path: match failed.
102 Non-greedy star operator (*?) :
103 Matching path: no need to perform a match.
104 Backtrack path: match is required.
105
106 The following example shows how the code generated for a capturing bracket
107 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
108 we have the following regular expression:
109
110 A(B|C)D
111
112 The generated code will be the following:
113
114 A matching path
115 '(' matching path (pushing arguments to the stack)
116 B matching path
117 ')' matching path (pushing arguments to the stack)
118 D matching path
119 return with successful match
120
121 D backtrack path
122 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123 B backtrack path
124 C expected path
125 jump to D matching path
126 C backtrack path
127 A backtrack path
128
129 Notice, that the order of backtrack code paths are the opposite of the fast
130 code paths. In this way the topmost value on the stack is always belong
131 to the current backtrack code path. The backtrack path must check
132 whether there is a next alternative. If so, it needs to jump back to
133 the matching path eventually. Otherwise it needs to clear out its own stack
134 frame and continue the execution on the backtrack code paths.
135 */
136
137 /*
138 Saved stack frames:
139
140 Atomic blocks and asserts require reloading the values of private data
141 when the backtrack mechanism performed. Because of OP_RECURSE, the data
142 are not necessarly known in compile time, thus we need a dynamic restore
143 mechanism.
144
145 The stack frames are stored in a chain list, and have the following format:
146 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147
148 Thus we can restore the private data to a particular point in the stack.
149 */
150
151 typedef struct jit_arguments {
152 /* Pointers first. */
153 struct sljit_stack *stack;
154 const pcre_uchar *str;
155 const pcre_uchar *begin;
156 const pcre_uchar *end;
157 int *offsets;
158 pcre_uchar *uchar_ptr;
159 pcre_uchar *mark_ptr;
160 /* Everything else after. */
161 int offsetcount;
162 int calllimit;
163 pcre_uint8 notbol;
164 pcre_uint8 noteol;
165 pcre_uint8 notempty;
166 pcre_uint8 notempty_atstart;
167 } jit_arguments;
168
169 typedef struct executable_functions {
170 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
171 PUBL(jit_callback) callback;
172 void *userdata;
173 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
174 } executable_functions;
175
176 typedef struct jump_list {
177 struct sljit_jump *jump;
178 struct jump_list *next;
179 } jump_list;
180
181 enum stub_types { stack_alloc };
182
183 typedef struct stub_list {
184 enum stub_types type;
185 int data;
186 struct sljit_jump *start;
187 struct sljit_label *quit;
188 struct stub_list *next;
189 } stub_list;
190
191 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
192
193 /* The following structure is the key data type for the recursive
194 code generator. It is allocated by compile_matchingpath, and contains
195 the aguments for compile_backtrackingpath. Must be the first member
196 of its descendants. */
197 typedef struct backtrack_common {
198 /* Concatenation stack. */
199 struct backtrack_common *prev;
200 jump_list *nextbacktracks;
201 /* Internal stack (for component operators). */
202 struct backtrack_common *top;
203 jump_list *topbacktracks;
204 /* Opcode pointer. */
205 pcre_uchar *cc;
206 } backtrack_common;
207
208 typedef struct assert_backtrack {
209 backtrack_common common;
210 jump_list *condfailed;
211 /* Less than 0 (-1) if a frame is not needed. */
212 int framesize;
213 /* Points to our private memory word on the stack. */
214 int private_data_ptr;
215 /* For iterators. */
216 struct sljit_label *matchingpath;
217 } assert_backtrack;
218
219 typedef struct bracket_backtrack {
220 backtrack_common common;
221 /* Where to coninue if an alternative is successfully matched. */
222 struct sljit_label *alternative_matchingpath;
223 /* For rmin and rmax iterators. */
224 struct sljit_label *recursive_matchingpath;
225 /* For greedy ? operator. */
226 struct sljit_label *zero_matchingpath;
227 /* Contains the branches of a failed condition. */
228 union {
229 /* Both for OP_COND, OP_SCOND. */
230 jump_list *condfailed;
231 assert_backtrack *assert;
232 /* For OP_ONCE. -1 if not needed. */
233 int framesize;
234 } u;
235 /* Points to our private memory word on the stack. */
236 int private_data_ptr;
237 } bracket_backtrack;
238
239 typedef struct bracketpos_backtrack {
240 backtrack_common common;
241 /* Points to our private memory word on the stack. */
242 int private_data_ptr;
243 /* Reverting stack is needed. */
244 int framesize;
245 /* Allocated stack size. */
246 int stacksize;
247 } bracketpos_backtrack;
248
249 typedef struct braminzero_backtrack {
250 backtrack_common common;
251 struct sljit_label *matchingpath;
252 } braminzero_backtrack;
253
254 typedef struct iterator_backtrack {
255 backtrack_common common;
256 /* Next iteration. */
257 struct sljit_label *matchingpath;
258 } iterator_backtrack;
259
260 typedef struct recurse_entry {
261 struct recurse_entry *next;
262 /* Contains the function entry. */
263 struct sljit_label *entry;
264 /* Collects the calls until the function is not created. */
265 jump_list *calls;
266 /* Points to the starting opcode. */
267 int start;
268 } recurse_entry;
269
270 typedef struct recurse_backtrack {
271 backtrack_common common;
272 } recurse_backtrack;
273
274 #define MAX_RANGE_SIZE 6
275
276 typedef struct compiler_common {
277 struct sljit_compiler *compiler;
278 pcre_uchar *start;
279
280 /* Maps private data offset to each opcode. */
281 int *private_data_ptrs;
282 /* Tells whether the capturing bracket is optimized. */
283 pcre_uint8 *optimized_cbracket;
284 /* Starting offset of private data for capturing brackets. */
285 int cbraptr;
286 /* OVector starting point. Must be divisible by 2. */
287 int ovector_start;
288 /* Last known position of the requested byte. */
289 int req_char_ptr;
290 /* Head of the last recursion. */
291 int recursive_head;
292 /* First inspected character for partial matching. */
293 int start_used_ptr;
294 /* Starting pointer for partial soft matches. */
295 int hit_start;
296 /* End pointer of the first line. */
297 int first_line_end;
298 /* Points to the marked string. */
299 int mark_ptr;
300
301 /* Flipped and lower case tables. */
302 const pcre_uint8 *fcc;
303 sljit_w lcc;
304 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
305 int mode;
306 /* Newline control. */
307 int nltype;
308 int newline;
309 int bsr_nltype;
310 /* Dollar endonly. */
311 int endonly;
312 BOOL has_set_som;
313 /* Tables. */
314 sljit_w ctypes;
315 int digits[2 + MAX_RANGE_SIZE];
316 /* Named capturing brackets. */
317 sljit_uw name_table;
318 sljit_w name_count;
319 sljit_w name_entry_size;
320
321 /* Labels and jump lists. */
322 struct sljit_label *partialmatchlabel;
323 struct sljit_label *quitlabel;
324 struct sljit_label *acceptlabel;
325 stub_list *stubs;
326 recurse_entry *entries;
327 recurse_entry *currententry;
328 jump_list *partialmatch;
329 jump_list *quit;
330 jump_list *accept;
331 jump_list *calllimit;
332 jump_list *stackalloc;
333 jump_list *revertframes;
334 jump_list *wordboundary;
335 jump_list *anynewline;
336 jump_list *hspace;
337 jump_list *vspace;
338 jump_list *casefulcmp;
339 jump_list *caselesscmp;
340 BOOL jscript_compat;
341 #ifdef SUPPORT_UTF
342 BOOL utf;
343 #ifdef SUPPORT_UCP
344 BOOL use_ucp;
345 #endif
346 jump_list *utfreadchar;
347 #ifdef COMPILE_PCRE8
348 jump_list *utfreadtype8;
349 #endif
350 #endif /* SUPPORT_UTF */
351 #ifdef SUPPORT_UCP
352 jump_list *getucd;
353 #endif
354 } compiler_common;
355
356 /* For byte_sequence_compare. */
357
358 typedef struct compare_context {
359 int length;
360 int sourcereg;
361 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
362 int ucharptr;
363 union {
364 sljit_i asint;
365 sljit_uh asushort;
366 #ifdef COMPILE_PCRE8
367 sljit_ub asbyte;
368 sljit_ub asuchars[4];
369 #else
370 #ifdef COMPILE_PCRE16
371 sljit_uh asuchars[2];
372 #endif
373 #endif
374 } c;
375 union {
376 sljit_i asint;
377 sljit_uh asushort;
378 #ifdef COMPILE_PCRE8
379 sljit_ub asbyte;
380 sljit_ub asuchars[4];
381 #else
382 #ifdef COMPILE_PCRE16
383 sljit_uh asuchars[2];
384 #endif
385 #endif
386 } oc;
387 #endif
388 } compare_context;
389
390 enum {
391 frame_end = 0,
392 frame_setstrbegin = -1,
393 frame_setmark = -2
394 };
395
396 /* Undefine sljit macros. */
397 #undef CMP
398
399 /* Used for accessing the elements of the stack. */
400 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_w))
401
402 #define TMP1 SLJIT_TEMPORARY_REG1
403 #define TMP2 SLJIT_TEMPORARY_REG3
404 #define TMP3 SLJIT_TEMPORARY_EREG2
405 #define STR_PTR SLJIT_SAVED_REG1
406 #define STR_END SLJIT_SAVED_REG2
407 #define STACK_TOP SLJIT_TEMPORARY_REG2
408 #define STACK_LIMIT SLJIT_SAVED_REG3
409 #define ARGUMENTS SLJIT_SAVED_EREG1
410 #define CALL_COUNT SLJIT_SAVED_EREG2
411 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
412
413 /* Local space layout. */
414 /* These two locals can be used by the current opcode. */
415 #define LOCALS0 (0 * sizeof(sljit_w))
416 #define LOCALS1 (1 * sizeof(sljit_w))
417 /* Two local variables for possessive quantifiers (char1 cannot use them). */
418 #define POSSESSIVE0 (2 * sizeof(sljit_w))
419 #define POSSESSIVE1 (3 * sizeof(sljit_w))
420 /* Max limit of recursions. */
421 #define CALL_LIMIT (4 * sizeof(sljit_w))
422 /* The output vector is stored on the stack, and contains pointers
423 to characters. The vector data is divided into two groups: the first
424 group contains the start / end character pointers, and the second is
425 the start pointers when the end of the capturing group has not yet reached. */
426 #define OVECTOR_START (common->ovector_start)
427 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_w))
428 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_w))
429 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
430
431 #ifdef COMPILE_PCRE8
432 #define MOV_UCHAR SLJIT_MOV_UB
433 #define MOVU_UCHAR SLJIT_MOVU_UB
434 #else
435 #ifdef COMPILE_PCRE16
436 #define MOV_UCHAR SLJIT_MOV_UH
437 #define MOVU_UCHAR SLJIT_MOVU_UH
438 #else
439 #error Unsupported compiling mode
440 #endif
441 #endif
442
443 /* Shortcuts. */
444 #define DEFINE_COMPILER \
445 struct sljit_compiler *compiler = common->compiler
446 #define OP1(op, dst, dstw, src, srcw) \
447 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
448 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
449 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
450 #define LABEL() \
451 sljit_emit_label(compiler)
452 #define JUMP(type) \
453 sljit_emit_jump(compiler, (type))
454 #define JUMPTO(type, label) \
455 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
456 #define JUMPHERE(jump) \
457 sljit_set_label((jump), sljit_emit_label(compiler))
458 #define CMP(type, src1, src1w, src2, src2w) \
459 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
460 #define CMPTO(type, src1, src1w, src2, src2w, label) \
461 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
462 #define COND_VALUE(op, dst, dstw, type) \
463 sljit_emit_cond_value(compiler, (op), (dst), (dstw), (type))
464 #define GET_LOCAL_BASE(dst, dstw, offset) \
465 sljit_get_local_base(compiler, (dst), (dstw), (offset))
466
467 static pcre_uchar* bracketend(pcre_uchar* cc)
468 {
469 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
470 do cc += GET(cc, 1); while (*cc == OP_ALT);
471 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
472 cc += 1 + LINK_SIZE;
473 return cc;
474 }
475
476 /* Functions whose might need modification for all new supported opcodes:
477 next_opcode
478 get_private_data_length
479 set_private_data_ptrs
480 get_framesize
481 init_frame
482 get_private_data_length_for_copy
483 copy_private_data
484 compile_matchingpath
485 compile_backtrackingpath
486 */
487
488 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
489 {
490 SLJIT_UNUSED_ARG(common);
491 switch(*cc)
492 {
493 case OP_SOD:
494 case OP_SOM:
495 case OP_SET_SOM:
496 case OP_NOT_WORD_BOUNDARY:
497 case OP_WORD_BOUNDARY:
498 case OP_NOT_DIGIT:
499 case OP_DIGIT:
500 case OP_NOT_WHITESPACE:
501 case OP_WHITESPACE:
502 case OP_NOT_WORDCHAR:
503 case OP_WORDCHAR:
504 case OP_ANY:
505 case OP_ALLANY:
506 case OP_ANYNL:
507 case OP_NOT_HSPACE:
508 case OP_HSPACE:
509 case OP_NOT_VSPACE:
510 case OP_VSPACE:
511 case OP_EXTUNI:
512 case OP_EODN:
513 case OP_EOD:
514 case OP_CIRC:
515 case OP_CIRCM:
516 case OP_DOLL:
517 case OP_DOLLM:
518 case OP_TYPESTAR:
519 case OP_TYPEMINSTAR:
520 case OP_TYPEPLUS:
521 case OP_TYPEMINPLUS:
522 case OP_TYPEQUERY:
523 case OP_TYPEMINQUERY:
524 case OP_TYPEPOSSTAR:
525 case OP_TYPEPOSPLUS:
526 case OP_TYPEPOSQUERY:
527 case OP_CRSTAR:
528 case OP_CRMINSTAR:
529 case OP_CRPLUS:
530 case OP_CRMINPLUS:
531 case OP_CRQUERY:
532 case OP_CRMINQUERY:
533 case OP_DEF:
534 case OP_BRAZERO:
535 case OP_BRAMINZERO:
536 case OP_BRAPOSZERO:
537 case OP_COMMIT:
538 case OP_FAIL:
539 case OP_ACCEPT:
540 case OP_ASSERT_ACCEPT:
541 case OP_SKIPZERO:
542 return cc + 1;
543
544 case OP_ANYBYTE:
545 #ifdef SUPPORT_UTF
546 if (common->utf) return NULL;
547 #endif
548 return cc + 1;
549
550 case OP_CHAR:
551 case OP_CHARI:
552 case OP_NOT:
553 case OP_NOTI:
554 case OP_STAR:
555 case OP_MINSTAR:
556 case OP_PLUS:
557 case OP_MINPLUS:
558 case OP_QUERY:
559 case OP_MINQUERY:
560 case OP_POSSTAR:
561 case OP_POSPLUS:
562 case OP_POSQUERY:
563 case OP_STARI:
564 case OP_MINSTARI:
565 case OP_PLUSI:
566 case OP_MINPLUSI:
567 case OP_QUERYI:
568 case OP_MINQUERYI:
569 case OP_POSSTARI:
570 case OP_POSPLUSI:
571 case OP_POSQUERYI:
572 case OP_NOTSTAR:
573 case OP_NOTMINSTAR:
574 case OP_NOTPLUS:
575 case OP_NOTMINPLUS:
576 case OP_NOTQUERY:
577 case OP_NOTMINQUERY:
578 case OP_NOTPOSSTAR:
579 case OP_NOTPOSPLUS:
580 case OP_NOTPOSQUERY:
581 case OP_NOTSTARI:
582 case OP_NOTMINSTARI:
583 case OP_NOTPLUSI:
584 case OP_NOTMINPLUSI:
585 case OP_NOTQUERYI:
586 case OP_NOTMINQUERYI:
587 case OP_NOTPOSSTARI:
588 case OP_NOTPOSPLUSI:
589 case OP_NOTPOSQUERYI:
590 cc += 2;
591 #ifdef SUPPORT_UTF
592 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
593 #endif
594 return cc;
595
596 case OP_UPTO:
597 case OP_MINUPTO:
598 case OP_EXACT:
599 case OP_POSUPTO:
600 case OP_UPTOI:
601 case OP_MINUPTOI:
602 case OP_EXACTI:
603 case OP_POSUPTOI:
604 case OP_NOTUPTO:
605 case OP_NOTMINUPTO:
606 case OP_NOTEXACT:
607 case OP_NOTPOSUPTO:
608 case OP_NOTUPTOI:
609 case OP_NOTMINUPTOI:
610 case OP_NOTEXACTI:
611 case OP_NOTPOSUPTOI:
612 cc += 2 + IMM2_SIZE;
613 #ifdef SUPPORT_UTF
614 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
615 #endif
616 return cc;
617
618 case OP_NOTPROP:
619 case OP_PROP:
620 return cc + 1 + 2;
621
622 case OP_TYPEUPTO:
623 case OP_TYPEMINUPTO:
624 case OP_TYPEEXACT:
625 case OP_TYPEPOSUPTO:
626 case OP_REF:
627 case OP_REFI:
628 case OP_CREF:
629 case OP_NCREF:
630 case OP_RREF:
631 case OP_NRREF:
632 case OP_CLOSE:
633 cc += 1 + IMM2_SIZE;
634 return cc;
635
636 case OP_CRRANGE:
637 case OP_CRMINRANGE:
638 return cc + 1 + 2 * IMM2_SIZE;
639
640 case OP_CLASS:
641 case OP_NCLASS:
642 return cc + 1 + 32 / sizeof(pcre_uchar);
643
644 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
645 case OP_XCLASS:
646 return cc + GET(cc, 1);
647 #endif
648
649 case OP_RECURSE:
650 case OP_ASSERT:
651 case OP_ASSERT_NOT:
652 case OP_ASSERTBACK:
653 case OP_ASSERTBACK_NOT:
654 case OP_REVERSE:
655 case OP_ONCE:
656 case OP_ONCE_NC:
657 case OP_BRA:
658 case OP_BRAPOS:
659 case OP_COND:
660 case OP_SBRA:
661 case OP_SBRAPOS:
662 case OP_SCOND:
663 case OP_ALT:
664 case OP_KET:
665 case OP_KETRMAX:
666 case OP_KETRMIN:
667 case OP_KETRPOS:
668 return cc + 1 + LINK_SIZE;
669
670 case OP_CBRA:
671 case OP_CBRAPOS:
672 case OP_SCBRA:
673 case OP_SCBRAPOS:
674 return cc + 1 + LINK_SIZE + IMM2_SIZE;
675
676 case OP_MARK:
677 return cc + 1 + 2 + cc[1];
678
679 default:
680 return NULL;
681 }
682 }
683
684 #define CASE_ITERATOR_PRIVATE_DATA_1 \
685 case OP_MINSTAR: \
686 case OP_MINPLUS: \
687 case OP_QUERY: \
688 case OP_MINQUERY: \
689 case OP_MINSTARI: \
690 case OP_MINPLUSI: \
691 case OP_QUERYI: \
692 case OP_MINQUERYI: \
693 case OP_NOTMINSTAR: \
694 case OP_NOTMINPLUS: \
695 case OP_NOTQUERY: \
696 case OP_NOTMINQUERY: \
697 case OP_NOTMINSTARI: \
698 case OP_NOTMINPLUSI: \
699 case OP_NOTQUERYI: \
700 case OP_NOTMINQUERYI:
701
702 #define CASE_ITERATOR_PRIVATE_DATA_2A \
703 case OP_STAR: \
704 case OP_PLUS: \
705 case OP_STARI: \
706 case OP_PLUSI: \
707 case OP_NOTSTAR: \
708 case OP_NOTPLUS: \
709 case OP_NOTSTARI: \
710 case OP_NOTPLUSI:
711
712 #define CASE_ITERATOR_PRIVATE_DATA_2B \
713 case OP_UPTO: \
714 case OP_MINUPTO: \
715 case OP_UPTOI: \
716 case OP_MINUPTOI: \
717 case OP_NOTUPTO: \
718 case OP_NOTMINUPTO: \
719 case OP_NOTUPTOI: \
720 case OP_NOTMINUPTOI:
721
722 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
723 case OP_TYPEMINSTAR: \
724 case OP_TYPEMINPLUS: \
725 case OP_TYPEQUERY: \
726 case OP_TYPEMINQUERY:
727
728 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
729 case OP_TYPESTAR: \
730 case OP_TYPEPLUS:
731
732 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
733 case OP_TYPEUPTO: \
734 case OP_TYPEMINUPTO:
735
736 static int get_class_iterator_size(pcre_uchar *cc)
737 {
738 switch(*cc)
739 {
740 case OP_CRSTAR:
741 case OP_CRPLUS:
742 return 2;
743
744 case OP_CRMINSTAR:
745 case OP_CRMINPLUS:
746 case OP_CRQUERY:
747 case OP_CRMINQUERY:
748 return 1;
749
750 case OP_CRRANGE:
751 case OP_CRMINRANGE:
752 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
753 return 0;
754 return 2;
755
756 default:
757 return 0;
758 }
759 }
760
761 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
762 {
763 int private_data_length = 0;
764 pcre_uchar *alternative;
765 pcre_uchar *name;
766 pcre_uchar *end = NULL;
767 int space, size, bracketlen, i;
768
769 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
770 while (cc < ccend)
771 {
772 space = 0;
773 size = 0;
774 bracketlen = 0;
775 switch(*cc)
776 {
777 case OP_SET_SOM:
778 common->has_set_som = TRUE;
779 cc += 1;
780 break;
781
782 case OP_REF:
783 case OP_REFI:
784 common->optimized_cbracket[GET2(cc, 1)] = 0;
785 cc += 1 + IMM2_SIZE;
786 break;
787
788 case OP_ASSERT:
789 case OP_ASSERT_NOT:
790 case OP_ASSERTBACK:
791 case OP_ASSERTBACK_NOT:
792 case OP_ONCE:
793 case OP_ONCE_NC:
794 case OP_BRAPOS:
795 case OP_SBRA:
796 case OP_SBRAPOS:
797 private_data_length += sizeof(sljit_w);
798 bracketlen = 1 + LINK_SIZE;
799 break;
800
801 case OP_CBRAPOS:
802 case OP_SCBRAPOS:
803 private_data_length += sizeof(sljit_w);
804 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
805 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
806 break;
807
808 case OP_COND:
809 case OP_SCOND:
810 bracketlen = cc[1 + LINK_SIZE];
811 if (bracketlen == OP_CREF)
812 {
813 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
814 common->optimized_cbracket[bracketlen] = 0;
815 }
816 else if (bracketlen == OP_NCREF)
817 {
818 bracketlen = GET2(cc, 1 + LINK_SIZE + 1);
819 name = (pcre_uchar *)common->name_table;
820 alternative = name;
821 for (i = 0; i < common->name_count; i++)
822 {
823 if (GET2(name, 0) == bracketlen) break;
824 name += common->name_entry_size;
825 }
826 SLJIT_ASSERT(i != common->name_count);
827
828 for (i = 0; i < common->name_count; i++)
829 {
830 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
831 common->optimized_cbracket[GET2(alternative, 0)] = 0;
832 alternative += common->name_entry_size;
833 }
834 }
835
836 if (*cc == OP_COND)
837 {
838 /* Might be a hidden SCOND. */
839 alternative = cc + GET(cc, 1);
840 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
841 private_data_length += sizeof(sljit_w);
842 }
843 else
844 private_data_length += sizeof(sljit_w);
845 bracketlen = 1 + LINK_SIZE;
846 break;
847
848 case OP_BRA:
849 bracketlen = 1 + LINK_SIZE;
850 break;
851
852 case OP_CBRA:
853 case OP_SCBRA:
854 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
855 break;
856
857 CASE_ITERATOR_PRIVATE_DATA_1
858 space = 1;
859 size = -2;
860 break;
861
862 CASE_ITERATOR_PRIVATE_DATA_2A
863 space = 2;
864 size = -2;
865 break;
866
867 CASE_ITERATOR_PRIVATE_DATA_2B
868 space = 2;
869 size = -(2 + IMM2_SIZE);
870 break;
871
872 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
873 space = 1;
874 size = 1;
875 break;
876
877 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
878 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
879 space = 2;
880 size = 1;
881 break;
882
883 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
884 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
885 space = 2;
886 size = 1 + IMM2_SIZE;
887 break;
888
889 case OP_CLASS:
890 case OP_NCLASS:
891 size += 1 + 32 / sizeof(pcre_uchar);
892 space = get_class_iterator_size(cc + size);
893 break;
894
895 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
896 case OP_XCLASS:
897 size = GET(cc, 1);
898 space = get_class_iterator_size(cc + size);
899 break;
900 #endif
901
902 case OP_RECURSE:
903 /* Set its value only once. */
904 if (common->recursive_head == 0)
905 {
906 common->recursive_head = common->ovector_start;
907 common->ovector_start += sizeof(sljit_w);
908 }
909 cc += 1 + LINK_SIZE;
910 break;
911
912 case OP_MARK:
913 if (common->mark_ptr == 0)
914 {
915 common->mark_ptr = common->ovector_start;
916 common->ovector_start += sizeof(sljit_w);
917 }
918 cc += 1 + 2 + cc[1];
919 break;
920
921 default:
922 cc = next_opcode(common, cc);
923 if (cc == NULL)
924 return -1;
925 break;
926 }
927
928 if (space > 0 && cc >= end)
929 private_data_length += sizeof(sljit_w) * space;
930
931 if (size != 0)
932 {
933 if (size < 0)
934 {
935 cc += -size;
936 #ifdef SUPPORT_UTF
937 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
938 #endif
939 }
940 else
941 cc += size;
942 }
943
944 if (bracketlen > 0)
945 {
946 if (cc >= end)
947 {
948 end = bracketend(cc);
949 if (end[-1 - LINK_SIZE] == OP_KET)
950 end = NULL;
951 }
952 cc += bracketlen;
953 }
954 }
955 return private_data_length;
956 }
957
958 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
959 {
960 pcre_uchar *cc = common->start;
961 pcre_uchar *alternative;
962 pcre_uchar *end = NULL;
963 int space, size, bracketlen;
964
965 while (cc < ccend)
966 {
967 space = 0;
968 size = 0;
969 bracketlen = 0;
970 switch(*cc)
971 {
972 case OP_ASSERT:
973 case OP_ASSERT_NOT:
974 case OP_ASSERTBACK:
975 case OP_ASSERTBACK_NOT:
976 case OP_ONCE:
977 case OP_ONCE_NC:
978 case OP_BRAPOS:
979 case OP_SBRA:
980 case OP_SBRAPOS:
981 case OP_SCOND:
982 common->private_data_ptrs[cc - common->start] = private_data_ptr;
983 private_data_ptr += sizeof(sljit_w);
984 bracketlen = 1 + LINK_SIZE;
985 break;
986
987 case OP_CBRAPOS:
988 case OP_SCBRAPOS:
989 common->private_data_ptrs[cc - common->start] = private_data_ptr;
990 private_data_ptr += sizeof(sljit_w);
991 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
992 break;
993
994 case OP_COND:
995 /* Might be a hidden SCOND. */
996 alternative = cc + GET(cc, 1);
997 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
998 {
999 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1000 private_data_ptr += sizeof(sljit_w);
1001 }
1002 bracketlen = 1 + LINK_SIZE;
1003 break;
1004
1005 case OP_BRA:
1006 bracketlen = 1 + LINK_SIZE;
1007 break;
1008
1009 case OP_CBRA:
1010 case OP_SCBRA:
1011 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1012 break;
1013
1014 CASE_ITERATOR_PRIVATE_DATA_1
1015 space = 1;
1016 size = -2;
1017 break;
1018
1019 CASE_ITERATOR_PRIVATE_DATA_2A
1020 space = 2;
1021 size = -2;
1022 break;
1023
1024 CASE_ITERATOR_PRIVATE_DATA_2B
1025 space = 2;
1026 size = -(2 + IMM2_SIZE);
1027 break;
1028
1029 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1030 space = 1;
1031 size = 1;
1032 break;
1033
1034 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1035 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1036 space = 2;
1037 size = 1;
1038 break;
1039
1040 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1041 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1042 space = 2;
1043 size = 1 + IMM2_SIZE;
1044 break;
1045
1046 case OP_CLASS:
1047 case OP_NCLASS:
1048 size += 1 + 32 / sizeof(pcre_uchar);
1049 space = get_class_iterator_size(cc + size);
1050 break;
1051
1052 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1053 case OP_XCLASS:
1054 size = GET(cc, 1);
1055 space = get_class_iterator_size(cc + size);
1056 break;
1057 #endif
1058
1059 default:
1060 cc = next_opcode(common, cc);
1061 SLJIT_ASSERT(cc != NULL);
1062 break;
1063 }
1064
1065 if (space > 0 && cc >= end)
1066 {
1067 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1068 private_data_ptr += sizeof(sljit_w) * space;
1069 }
1070
1071 if (size != 0)
1072 {
1073 if (size < 0)
1074 {
1075 cc += -size;
1076 #ifdef SUPPORT_UTF
1077 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1078 #endif
1079 }
1080 else
1081 cc += size;
1082 }
1083
1084 if (bracketlen > 0)
1085 {
1086 if (cc >= end)
1087 {
1088 end = bracketend(cc);
1089 if (end[-1 - LINK_SIZE] == OP_KET)
1090 end = NULL;
1091 }
1092 cc += bracketlen;
1093 }
1094 }
1095 }
1096
1097 /* Returns with -1 if no need for frame. */
1098 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1099 {
1100 pcre_uchar *ccend = bracketend(cc);
1101 int length = 0;
1102 BOOL possessive = FALSE;
1103 BOOL setsom_found = recursive;
1104 BOOL setmark_found = recursive;
1105
1106 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1107 {
1108 length = 3;
1109 possessive = TRUE;
1110 }
1111
1112 cc = next_opcode(common, cc);
1113 SLJIT_ASSERT(cc != NULL);
1114 while (cc < ccend)
1115 switch(*cc)
1116 {
1117 case OP_SET_SOM:
1118 SLJIT_ASSERT(common->has_set_som);
1119 if (!setsom_found)
1120 {
1121 length += 2;
1122 setsom_found = TRUE;
1123 }
1124 cc += 1;
1125 break;
1126
1127 case OP_MARK:
1128 SLJIT_ASSERT(common->mark_ptr != 0);
1129 if (!setmark_found)
1130 {
1131 length += 2;
1132 setmark_found = TRUE;
1133 }
1134 cc += 1 + 2 + cc[1];
1135 break;
1136
1137 case OP_RECURSE:
1138 if (common->has_set_som && !setsom_found)
1139 {
1140 length += 2;
1141 setsom_found = TRUE;
1142 }
1143 if (common->mark_ptr != 0 && !setmark_found)
1144 {
1145 length += 2;
1146 setmark_found = TRUE;
1147 }
1148 cc += 1 + LINK_SIZE;
1149 break;
1150
1151 case OP_CBRA:
1152 case OP_CBRAPOS:
1153 case OP_SCBRA:
1154 case OP_SCBRAPOS:
1155 length += 3;
1156 cc += 1 + LINK_SIZE + IMM2_SIZE;
1157 break;
1158
1159 default:
1160 cc = next_opcode(common, cc);
1161 SLJIT_ASSERT(cc != NULL);
1162 break;
1163 }
1164
1165 /* Possessive quantifiers can use a special case. */
1166 if (SLJIT_UNLIKELY(possessive) && length == 3)
1167 return -1;
1168
1169 if (length > 0)
1170 return length + 1;
1171 return -1;
1172 }
1173
1174 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1175 {
1176 DEFINE_COMPILER;
1177 pcre_uchar *ccend = bracketend(cc);
1178 BOOL setsom_found = recursive;
1179 BOOL setmark_found = recursive;
1180 int offset;
1181
1182 /* >= 1 + shortest item size (2) */
1183 SLJIT_UNUSED_ARG(stacktop);
1184 SLJIT_ASSERT(stackpos >= stacktop + 2);
1185
1186 stackpos = STACK(stackpos);
1187 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1188 cc = next_opcode(common, cc);
1189 SLJIT_ASSERT(cc != NULL);
1190 while (cc < ccend)
1191 switch(*cc)
1192 {
1193 case OP_SET_SOM:
1194 SLJIT_ASSERT(common->has_set_som);
1195 if (!setsom_found)
1196 {
1197 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1198 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1199 stackpos += (int)sizeof(sljit_w);
1200 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1201 stackpos += (int)sizeof(sljit_w);
1202 setsom_found = TRUE;
1203 }
1204 cc += 1;
1205 break;
1206
1207 case OP_MARK:
1208 SLJIT_ASSERT(common->mark_ptr != 0);
1209 if (!setmark_found)
1210 {
1211 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1212 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1213 stackpos += (int)sizeof(sljit_w);
1214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1215 stackpos += (int)sizeof(sljit_w);
1216 setmark_found = TRUE;
1217 }
1218 cc += 1 + 2 + cc[1];
1219 break;
1220
1221 case OP_RECURSE:
1222 if (common->has_set_som && !setsom_found)
1223 {
1224 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1225 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1226 stackpos += (int)sizeof(sljit_w);
1227 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1228 stackpos += (int)sizeof(sljit_w);
1229 setsom_found = TRUE;
1230 }
1231 if (common->mark_ptr != 0 && !setmark_found)
1232 {
1233 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1234 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1235 stackpos += (int)sizeof(sljit_w);
1236 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1237 stackpos += (int)sizeof(sljit_w);
1238 setmark_found = TRUE;
1239 }
1240 cc += 1 + LINK_SIZE;
1241 break;
1242
1243 case OP_CBRA:
1244 case OP_CBRAPOS:
1245 case OP_SCBRA:
1246 case OP_SCBRAPOS:
1247 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1248 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1249 stackpos += (int)sizeof(sljit_w);
1250 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1251 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1252 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1253 stackpos += (int)sizeof(sljit_w);
1254 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1255 stackpos += (int)sizeof(sljit_w);
1256
1257 cc += 1 + LINK_SIZE + IMM2_SIZE;
1258 break;
1259
1260 default:
1261 cc = next_opcode(common, cc);
1262 SLJIT_ASSERT(cc != NULL);
1263 break;
1264 }
1265
1266 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1267 SLJIT_ASSERT(stackpos == STACK(stacktop));
1268 }
1269
1270 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1271 {
1272 int private_data_length = 2;
1273 int size;
1274 pcre_uchar *alternative;
1275 /* Calculate the sum of the private machine words. */
1276 while (cc < ccend)
1277 {
1278 size = 0;
1279 switch(*cc)
1280 {
1281 case OP_ASSERT:
1282 case OP_ASSERT_NOT:
1283 case OP_ASSERTBACK:
1284 case OP_ASSERTBACK_NOT:
1285 case OP_ONCE:
1286 case OP_ONCE_NC:
1287 case OP_BRAPOS:
1288 case OP_SBRA:
1289 case OP_SBRAPOS:
1290 case OP_SCOND:
1291 private_data_length++;
1292 cc += 1 + LINK_SIZE;
1293 break;
1294
1295 case OP_CBRA:
1296 case OP_SCBRA:
1297 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1298 private_data_length++;
1299 cc += 1 + LINK_SIZE + IMM2_SIZE;
1300 break;
1301
1302 case OP_CBRAPOS:
1303 case OP_SCBRAPOS:
1304 private_data_length += 2;
1305 cc += 1 + LINK_SIZE + IMM2_SIZE;
1306 break;
1307
1308 case OP_COND:
1309 /* Might be a hidden SCOND. */
1310 alternative = cc + GET(cc, 1);
1311 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1312 private_data_length++;
1313 cc += 1 + LINK_SIZE;
1314 break;
1315
1316 CASE_ITERATOR_PRIVATE_DATA_1
1317 if (PRIVATE_DATA(cc))
1318 private_data_length++;
1319 cc += 2;
1320 #ifdef SUPPORT_UTF
1321 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1322 #endif
1323 break;
1324
1325 CASE_ITERATOR_PRIVATE_DATA_2A
1326 if (PRIVATE_DATA(cc))
1327 private_data_length += 2;
1328 cc += 2;
1329 #ifdef SUPPORT_UTF
1330 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1331 #endif
1332 break;
1333
1334 CASE_ITERATOR_PRIVATE_DATA_2B
1335 if (PRIVATE_DATA(cc))
1336 private_data_length += 2;
1337 cc += 2 + IMM2_SIZE;
1338 #ifdef SUPPORT_UTF
1339 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1340 #endif
1341 break;
1342
1343 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1344 if (PRIVATE_DATA(cc))
1345 private_data_length++;
1346 cc += 1;
1347 break;
1348
1349 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1350 if (PRIVATE_DATA(cc))
1351 private_data_length += 2;
1352 cc += 1;
1353 break;
1354
1355 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1356 if (PRIVATE_DATA(cc))
1357 private_data_length += 2;
1358 cc += 1 + IMM2_SIZE;
1359 break;
1360
1361 case OP_CLASS:
1362 case OP_NCLASS:
1363 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1364 case OP_XCLASS:
1365 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1366 #else
1367 size = 1 + 32 / (int)sizeof(pcre_uchar);
1368 #endif
1369 if (PRIVATE_DATA(cc))
1370 private_data_length += get_class_iterator_size(cc + size);
1371 cc += size;
1372 break;
1373
1374 default:
1375 cc = next_opcode(common, cc);
1376 SLJIT_ASSERT(cc != NULL);
1377 break;
1378 }
1379 }
1380 SLJIT_ASSERT(cc == ccend);
1381 return private_data_length;
1382 }
1383
1384 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1385 BOOL save, int stackptr, int stacktop)
1386 {
1387 DEFINE_COMPILER;
1388 int srcw[2];
1389 int count, size;
1390 BOOL tmp1next = TRUE;
1391 BOOL tmp1empty = TRUE;
1392 BOOL tmp2empty = TRUE;
1393 pcre_uchar *alternative;
1394 enum {
1395 start,
1396 loop,
1397 end
1398 } status;
1399
1400 status = save ? start : loop;
1401 stackptr = STACK(stackptr - 2);
1402 stacktop = STACK(stacktop - 1);
1403
1404 if (!save)
1405 {
1406 stackptr += sizeof(sljit_w);
1407 if (stackptr < stacktop)
1408 {
1409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1410 stackptr += sizeof(sljit_w);
1411 tmp1empty = FALSE;
1412 }
1413 if (stackptr < stacktop)
1414 {
1415 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1416 stackptr += sizeof(sljit_w);
1417 tmp2empty = FALSE;
1418 }
1419 /* The tmp1next must be TRUE in either way. */
1420 }
1421
1422 while (status != end)
1423 {
1424 count = 0;
1425 switch(status)
1426 {
1427 case start:
1428 SLJIT_ASSERT(save && common->recursive_head != 0);
1429 count = 1;
1430 srcw[0] = common->recursive_head;
1431 status = loop;
1432 break;
1433
1434 case loop:
1435 if (cc >= ccend)
1436 {
1437 status = end;
1438 break;
1439 }
1440
1441 switch(*cc)
1442 {
1443 case OP_ASSERT:
1444 case OP_ASSERT_NOT:
1445 case OP_ASSERTBACK:
1446 case OP_ASSERTBACK_NOT:
1447 case OP_ONCE:
1448 case OP_ONCE_NC:
1449 case OP_BRAPOS:
1450 case OP_SBRA:
1451 case OP_SBRAPOS:
1452 case OP_SCOND:
1453 count = 1;
1454 srcw[0] = PRIVATE_DATA(cc);
1455 SLJIT_ASSERT(srcw[0] != 0);
1456 cc += 1 + LINK_SIZE;
1457 break;
1458
1459 case OP_CBRA:
1460 case OP_SCBRA:
1461 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1462 {
1463 count = 1;
1464 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1465 }
1466 cc += 1 + LINK_SIZE + IMM2_SIZE;
1467 break;
1468
1469 case OP_CBRAPOS:
1470 case OP_SCBRAPOS:
1471 count = 2;
1472 srcw[0] = PRIVATE_DATA(cc);
1473 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1474 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1475 cc += 1 + LINK_SIZE + IMM2_SIZE;
1476 break;
1477
1478 case OP_COND:
1479 /* Might be a hidden SCOND. */
1480 alternative = cc + GET(cc, 1);
1481 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1482 {
1483 count = 1;
1484 srcw[0] = PRIVATE_DATA(cc);
1485 SLJIT_ASSERT(srcw[0] != 0);
1486 }
1487 cc += 1 + LINK_SIZE;
1488 break;
1489
1490 CASE_ITERATOR_PRIVATE_DATA_1
1491 if (PRIVATE_DATA(cc))
1492 {
1493 count = 1;
1494 srcw[0] = PRIVATE_DATA(cc);
1495 }
1496 cc += 2;
1497 #ifdef SUPPORT_UTF
1498 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1499 #endif
1500 break;
1501
1502 CASE_ITERATOR_PRIVATE_DATA_2A
1503 if (PRIVATE_DATA(cc))
1504 {
1505 count = 2;
1506 srcw[0] = PRIVATE_DATA(cc);
1507 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1508 }
1509 cc += 2;
1510 #ifdef SUPPORT_UTF
1511 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1512 #endif
1513 break;
1514
1515 CASE_ITERATOR_PRIVATE_DATA_2B
1516 if (PRIVATE_DATA(cc))
1517 {
1518 count = 2;
1519 srcw[0] = PRIVATE_DATA(cc);
1520 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_w);
1521 }
1522 cc += 2 + IMM2_SIZE;
1523 #ifdef SUPPORT_UTF
1524 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1525 #endif
1526 break;
1527
1528 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1529 if (PRIVATE_DATA(cc))
1530 {
1531 count = 1;
1532 srcw[0] = PRIVATE_DATA(cc);
1533 }
1534 cc += 1;
1535 break;
1536
1537 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1538 if (PRIVATE_DATA(cc))
1539 {
1540 count = 2;
1541 srcw[0] = PRIVATE_DATA(cc);
1542 srcw[1] = srcw[0] + sizeof(sljit_w);
1543 }
1544 cc += 1;
1545 break;
1546
1547 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1548 if (PRIVATE_DATA(cc))
1549 {
1550 count = 2;
1551 srcw[0] = PRIVATE_DATA(cc);
1552 srcw[1] = srcw[0] + sizeof(sljit_w);
1553 }
1554 cc += 1 + IMM2_SIZE;
1555 break;
1556
1557 case OP_CLASS:
1558 case OP_NCLASS:
1559 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1560 case OP_XCLASS:
1561 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1562 #else
1563 size = 1 + 32 / (int)sizeof(pcre_uchar);
1564 #endif
1565 if (PRIVATE_DATA(cc))
1566 switch(get_class_iterator_size(cc + size))
1567 {
1568 case 1:
1569 count = 1;
1570 srcw[0] = PRIVATE_DATA(cc);
1571 break;
1572
1573 case 2:
1574 count = 2;
1575 srcw[0] = PRIVATE_DATA(cc);
1576 srcw[1] = srcw[0] + sizeof(sljit_w);
1577 break;
1578
1579 default:
1580 SLJIT_ASSERT_STOP();
1581 break;
1582 }
1583 cc += size;
1584 break;
1585
1586 default:
1587 cc = next_opcode(common, cc);
1588 SLJIT_ASSERT(cc != NULL);
1589 break;
1590 }
1591 break;
1592
1593 case end:
1594 SLJIT_ASSERT_STOP();
1595 break;
1596 }
1597
1598 while (count > 0)
1599 {
1600 count--;
1601 if (save)
1602 {
1603 if (tmp1next)
1604 {
1605 if (!tmp1empty)
1606 {
1607 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1608 stackptr += sizeof(sljit_w);
1609 }
1610 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1611 tmp1empty = FALSE;
1612 tmp1next = FALSE;
1613 }
1614 else
1615 {
1616 if (!tmp2empty)
1617 {
1618 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1619 stackptr += sizeof(sljit_w);
1620 }
1621 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1622 tmp2empty = FALSE;
1623 tmp1next = TRUE;
1624 }
1625 }
1626 else
1627 {
1628 if (tmp1next)
1629 {
1630 SLJIT_ASSERT(!tmp1empty);
1631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1632 tmp1empty = stackptr >= stacktop;
1633 if (!tmp1empty)
1634 {
1635 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1636 stackptr += sizeof(sljit_w);
1637 }
1638 tmp1next = FALSE;
1639 }
1640 else
1641 {
1642 SLJIT_ASSERT(!tmp2empty);
1643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1644 tmp2empty = stackptr >= stacktop;
1645 if (!tmp2empty)
1646 {
1647 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1648 stackptr += sizeof(sljit_w);
1649 }
1650 tmp1next = TRUE;
1651 }
1652 }
1653 }
1654 }
1655
1656 if (save)
1657 {
1658 if (tmp1next)
1659 {
1660 if (!tmp1empty)
1661 {
1662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1663 stackptr += sizeof(sljit_w);
1664 }
1665 if (!tmp2empty)
1666 {
1667 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1668 stackptr += sizeof(sljit_w);
1669 }
1670 }
1671 else
1672 {
1673 if (!tmp2empty)
1674 {
1675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1676 stackptr += sizeof(sljit_w);
1677 }
1678 if (!tmp1empty)
1679 {
1680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1681 stackptr += sizeof(sljit_w);
1682 }
1683 }
1684 }
1685 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1686 }
1687
1688 #undef CASE_ITERATOR_PRIVATE_DATA_1
1689 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1690 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1691 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1692 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1693 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1694
1695 static SLJIT_INLINE BOOL ispowerof2(unsigned int value)
1696 {
1697 return (value & (value - 1)) == 0;
1698 }
1699
1700 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1701 {
1702 while (list)
1703 {
1704 /* sljit_set_label is clever enough to do nothing
1705 if either the jump or the label is NULL. */
1706 sljit_set_label(list->jump, label);
1707 list = list->next;
1708 }
1709 }
1710
1711 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1712 {
1713 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1714 if (list_item)
1715 {
1716 list_item->next = *list;
1717 list_item->jump = jump;
1718 *list = list_item;
1719 }
1720 }
1721
1722 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1723 {
1724 DEFINE_COMPILER;
1725 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1726
1727 if (list_item)
1728 {
1729 list_item->type = type;
1730 list_item->data = data;
1731 list_item->start = start;
1732 list_item->quit = LABEL();
1733 list_item->next = common->stubs;
1734 common->stubs = list_item;
1735 }
1736 }
1737
1738 static void flush_stubs(compiler_common *common)
1739 {
1740 DEFINE_COMPILER;
1741 stub_list* list_item = common->stubs;
1742
1743 while (list_item)
1744 {
1745 JUMPHERE(list_item->start);
1746 switch(list_item->type)
1747 {
1748 case stack_alloc:
1749 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1750 break;
1751 }
1752 JUMPTO(SLJIT_JUMP, list_item->quit);
1753 list_item = list_item->next;
1754 }
1755 common->stubs = NULL;
1756 }
1757
1758 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1759 {
1760 DEFINE_COMPILER;
1761
1762 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1763 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1764 }
1765
1766 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1767 {
1768 /* May destroy all locals and registers except TMP2. */
1769 DEFINE_COMPILER;
1770
1771 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1772 #ifdef DESTROY_REGISTERS
1773 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1774 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1775 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1776 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1777 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1778 #endif
1779 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1780 }
1781
1782 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1783 {
1784 DEFINE_COMPILER;
1785 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_w));
1786 }
1787
1788 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1789 {
1790 DEFINE_COMPILER;
1791 struct sljit_label *loop;
1792 int i;
1793 /* At this point we can freely use all temporary registers. */
1794 /* TMP1 returns with begin - 1. */
1795 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1796 if (length < 8)
1797 {
1798 for (i = 0; i < length; i++)
1799 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_TEMPORARY_REG1, 0);
1800 }
1801 else
1802 {
1803 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, OVECTOR_START - sizeof(sljit_w));
1804 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, length);
1805 loop = LABEL();
1806 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(sljit_w), SLJIT_TEMPORARY_REG1, 0);
1807 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1808 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1809 }
1810 }
1811
1812 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1813 {
1814 DEFINE_COMPILER;
1815 struct sljit_label *loop;
1816 struct sljit_jump *earlyexit;
1817
1818 /* At this point we can freely use all registers. */
1819 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1820 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1821
1822 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, ARGUMENTS, 0);
1823 if (common->mark_ptr != 0)
1824 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1825 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1826 if (common->mark_ptr != 0)
1827 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_TEMPORARY_REG3, 0);
1828 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1829 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1830 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1831 /* Unlikely, but possible */
1832 earlyexit = CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 0);
1833 loop = LABEL();
1834 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_TEMPORARY_REG1, 0);
1835 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_w));
1836 /* Copy the integer value to the output buffer */
1837 #ifdef COMPILE_PCRE16
1838 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1839 #endif
1840 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1841 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1842 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1843 JUMPHERE(earlyexit);
1844
1845 /* Calculate the return value, which is the maximum ovector value. */
1846 if (topbracket > 1)
1847 {
1848 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_w));
1849 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, topbracket + 1);
1850
1851 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1852 loop = LABEL();
1853 OP1(SLJIT_MOVU, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG1), -(2 * (sljit_w)sizeof(sljit_w)));
1854 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG2, 0, SLJIT_TEMPORARY_REG2, 0, SLJIT_IMM, 1);
1855 CMPTO(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1856 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_TEMPORARY_REG2, 0);
1857 }
1858 else
1859 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1860 }
1861
1862 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1863 {
1864 DEFINE_COMPILER;
1865
1866 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1867 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1868
1869 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
1870 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1871 OP1(SLJIT_MOV_SI, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsetcount));
1872 CMPTO(SLJIT_C_LESS, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 2, quit);
1873
1874 /* Store match begin and end. */
1875 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1876 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1877 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1878 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1879 #ifdef COMPILE_PCRE16
1880 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, 1);
1881 #endif
1882 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1883
1884 OP2(SLJIT_SUB, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_SAVED_REG1, 0);
1885 #ifdef COMPILE_PCRE16
1886 OP2(SLJIT_ASHR, SLJIT_TEMPORARY_REG3, 0, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, 1);
1887 #endif
1888 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), 0, SLJIT_TEMPORARY_REG3, 0);
1889
1890 JUMPTO(SLJIT_JUMP, quit);
1891 }
1892
1893 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1894 {
1895 /* May destroy TMP1. */
1896 DEFINE_COMPILER;
1897 struct sljit_jump *jump;
1898
1899 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1900 {
1901 /* The value of -1 must be kept for start_used_ptr! */
1902 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1903 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1904 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1905 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1906 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1907 JUMPHERE(jump);
1908 }
1909 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1910 {
1911 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1912 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1913 JUMPHERE(jump);
1914 }
1915 }
1916
1917 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1918 {
1919 /* Detects if the character has an othercase. */
1920 unsigned int c;
1921
1922 #ifdef SUPPORT_UTF
1923 if (common->utf)
1924 {
1925 GETCHAR(c, cc);
1926 if (c > 127)
1927 {
1928 #ifdef SUPPORT_UCP
1929 return c != UCD_OTHERCASE(c);
1930 #else
1931 return FALSE;
1932 #endif
1933 }
1934 #ifndef COMPILE_PCRE8
1935 return common->fcc[c] != c;
1936 #endif
1937 }
1938 else
1939 #endif
1940 c = *cc;
1941 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1942 }
1943
1944 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1945 {
1946 /* Returns with the othercase. */
1947 #ifdef SUPPORT_UTF
1948 if (common->utf && c > 127)
1949 {
1950 #ifdef SUPPORT_UCP
1951 return UCD_OTHERCASE(c);
1952 #else
1953 return c;
1954 #endif
1955 }
1956 #endif
1957 return TABLE_GET(c, common->fcc, c);
1958 }
1959
1960 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1961 {
1962 /* Detects if the character and its othercase has only 1 bit difference. */
1963 unsigned int c, oc, bit;
1964 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1965 int n;
1966 #endif
1967
1968 #ifdef SUPPORT_UTF
1969 if (common->utf)
1970 {
1971 GETCHAR(c, cc);
1972 if (c <= 127)
1973 oc = common->fcc[c];
1974 else
1975 {
1976 #ifdef SUPPORT_UCP
1977 oc = UCD_OTHERCASE(c);
1978 #else
1979 oc = c;
1980 #endif
1981 }
1982 }
1983 else
1984 {
1985 c = *cc;
1986 oc = TABLE_GET(c, common->fcc, c);
1987 }
1988 #else
1989 c = *cc;
1990 oc = TABLE_GET(c, common->fcc, c);
1991 #endif
1992
1993 SLJIT_ASSERT(c != oc);
1994
1995 bit = c ^ oc;
1996 /* Optimized for English alphabet. */
1997 if (c <= 127 && bit == 0x20)
1998 return (0 << 8) | 0x20;
1999
2000 /* Since c != oc, they must have at least 1 bit difference. */
2001 if (!ispowerof2(bit))
2002 return 0;
2003
2004 #ifdef COMPILE_PCRE8
2005
2006 #ifdef SUPPORT_UTF
2007 if (common->utf && c > 127)
2008 {
2009 n = GET_EXTRALEN(*cc);
2010 while ((bit & 0x3f) == 0)
2011 {
2012 n--;
2013 bit >>= 6;
2014 }
2015 return (n << 8) | bit;
2016 }
2017 #endif /* SUPPORT_UTF */
2018 return (0 << 8) | bit;
2019
2020 #else /* COMPILE_PCRE8 */
2021
2022 #ifdef COMPILE_PCRE16
2023 #ifdef SUPPORT_UTF
2024 if (common->utf && c > 65535)
2025 {
2026 if (bit >= (1 << 10))
2027 bit >>= 10;
2028 else
2029 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2030 }
2031 #endif /* SUPPORT_UTF */
2032 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2033 #endif /* COMPILE_PCRE16 */
2034
2035 #endif /* COMPILE_PCRE8 */
2036 }
2037
2038 static void check_partial(compiler_common *common, BOOL force)
2039 {
2040 /* Checks whether a partial matching is occured. Does not modify registers. */
2041 DEFINE_COMPILER;
2042 struct sljit_jump *jump = NULL;
2043
2044 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2045
2046 if (common->mode == JIT_COMPILE)
2047 return;
2048
2049 if (!force)
2050 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2051 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2052 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2053
2054 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2055 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2056 else
2057 {
2058 if (common->partialmatchlabel != NULL)
2059 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2060 else
2061 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2062 }
2063
2064 if (jump != NULL)
2065 JUMPHERE(jump);
2066 }
2067
2068 static struct sljit_jump *check_str_end(compiler_common *common)
2069 {
2070 /* Does not affect registers. Usually used in a tight spot. */
2071 DEFINE_COMPILER;
2072 struct sljit_jump *jump;
2073 struct sljit_jump *nohit;
2074 struct sljit_jump *return_value;
2075
2076 if (common->mode == JIT_COMPILE)
2077 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2078
2079 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2080 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2081 {
2082 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2083 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2084 JUMPHERE(nohit);
2085 return_value = JUMP(SLJIT_JUMP);
2086 }
2087 else
2088 {
2089 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2090 if (common->partialmatchlabel != NULL)
2091 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2092 else
2093 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2094 }
2095 JUMPHERE(jump);
2096 return return_value;
2097 }
2098
2099 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2100 {
2101 DEFINE_COMPILER;
2102 struct sljit_jump *jump;
2103
2104 if (common->mode == JIT_COMPILE)
2105 {
2106 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2107 return;
2108 }
2109
2110 /* Partial matching mode. */
2111 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2112 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2113 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2114 {
2115 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2116 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2117 }
2118 else
2119 {
2120 if (common->partialmatchlabel != NULL)
2121 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2122 else
2123 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2124 }
2125 JUMPHERE(jump);
2126 }
2127
2128 static void read_char(compiler_common *common)
2129 {
2130 /* Reads the character into TMP1, updates STR_PTR.
2131 Does not check STR_END. TMP2 Destroyed. */
2132 DEFINE_COMPILER;
2133 #ifdef SUPPORT_UTF
2134 struct sljit_jump *jump;
2135 #endif
2136
2137 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2138 #ifdef SUPPORT_UTF
2139 if (common->utf)
2140 {
2141 #ifdef COMPILE_PCRE8
2142 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2143 #else
2144 #ifdef COMPILE_PCRE16
2145 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2146 #endif
2147 #endif /* COMPILE_PCRE8 */
2148 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2149 JUMPHERE(jump);
2150 }
2151 #endif
2152 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2153 }
2154
2155 static void peek_char(compiler_common *common)
2156 {
2157 /* Reads the character into TMP1, keeps STR_PTR.
2158 Does not check STR_END. TMP2 Destroyed. */
2159 DEFINE_COMPILER;
2160 #ifdef SUPPORT_UTF
2161 struct sljit_jump *jump;
2162 #endif
2163
2164 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2165 #ifdef SUPPORT_UTF
2166 if (common->utf)
2167 {
2168 #ifdef COMPILE_PCRE8
2169 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2170 #else
2171 #ifdef COMPILE_PCRE16
2172 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2173 #endif
2174 #endif /* COMPILE_PCRE8 */
2175 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2176 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2177 JUMPHERE(jump);
2178 }
2179 #endif
2180 }
2181
2182 static void read_char8_type(compiler_common *common)
2183 {
2184 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2185 DEFINE_COMPILER;
2186 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
2187 struct sljit_jump *jump;
2188 #endif
2189
2190 #ifdef SUPPORT_UTF
2191 if (common->utf)
2192 {
2193 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2194 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2195 #ifdef COMPILE_PCRE8
2196 /* This can be an extra read in some situations, but hopefully
2197 it is needed in most cases. */
2198 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2199 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2200 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2201 JUMPHERE(jump);
2202 #else
2203 #ifdef COMPILE_PCRE16
2204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2205 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2206 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2207 JUMPHERE(jump);
2208 /* Skip low surrogate if necessary. */
2209 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2210 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2211 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2212 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2213 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2214 #endif
2215 #endif /* COMPILE_PCRE8 */
2216 return;
2217 }
2218 #endif
2219 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2220 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2221 #ifdef COMPILE_PCRE16
2222 /* The ctypes array contains only 256 values. */
2223 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2224 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2225 #endif
2226 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2227 #ifdef COMPILE_PCRE16
2228 JUMPHERE(jump);
2229 #endif
2230 }
2231
2232 static void skip_char_back(compiler_common *common)
2233 {
2234 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2235 DEFINE_COMPILER;
2236 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2237 struct sljit_label *label;
2238
2239 if (common->utf)
2240 {
2241 label = LABEL();
2242 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2243 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2244 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2245 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2246 return;
2247 }
2248 #endif
2249 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2250 if (common->utf)
2251 {
2252 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2253 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2254 /* Skip low surrogate if necessary. */
2255 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2256 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2257 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2258 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2259 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2260 return;
2261 }
2262 #endif
2263 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2264 }
2265
2266 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2267 {
2268 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2269 DEFINE_COMPILER;
2270
2271 if (nltype == NLTYPE_ANY)
2272 {
2273 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2274 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2275 }
2276 else if (nltype == NLTYPE_ANYCRLF)
2277 {
2278 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2279 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2280 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2281 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2282 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2283 }
2284 else
2285 {
2286 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2287 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2288 }
2289 }
2290
2291 #ifdef SUPPORT_UTF
2292
2293 #ifdef COMPILE_PCRE8
2294 static void do_utfreadchar(compiler_common *common)
2295 {
2296 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2297 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2298 DEFINE_COMPILER;
2299 struct sljit_jump *jump;
2300
2301 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2302 /* Searching for the first zero. */
2303 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2304 jump = JUMP(SLJIT_C_NOT_ZERO);
2305 /* Two byte sequence. */
2306 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2307 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2308 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2309 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2310 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2311 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2312 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2313 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2314 JUMPHERE(jump);
2315
2316 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2317 jump = JUMP(SLJIT_C_NOT_ZERO);
2318 /* Three byte sequence. */
2319 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2320 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2321 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2322 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2323 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2324 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2325 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2326 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2327 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2328 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2329 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2330 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2331 JUMPHERE(jump);
2332
2333 /* Four byte sequence. */
2334 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2335 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2336 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2337 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2338 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2339 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2340 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2341 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2342 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2343 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2344 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2345 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2346 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2347 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2348 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2349 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2350 }
2351
2352 static void do_utfreadtype8(compiler_common *common)
2353 {
2354 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2355 of the character (>= 0xc0). Return value in TMP1. */
2356 DEFINE_COMPILER;
2357 struct sljit_jump *jump;
2358 struct sljit_jump *compare;
2359
2360 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2361
2362 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2363 jump = JUMP(SLJIT_C_NOT_ZERO);
2364 /* Two byte sequence. */
2365 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2366 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2367 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2368 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2369 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2370 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2371 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2372 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2373 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2374
2375 JUMPHERE(compare);
2376 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2377 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2378 JUMPHERE(jump);
2379
2380 /* We only have types for characters less than 256. */
2381 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(utf8_table4) - 0xc0);
2382 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2383 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2384 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2385 }
2386
2387 #else /* COMPILE_PCRE8 */
2388
2389 #ifdef COMPILE_PCRE16
2390 static void do_utfreadchar(compiler_common *common)
2391 {
2392 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2393 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2394 DEFINE_COMPILER;
2395 struct sljit_jump *jump;
2396
2397 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2398 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2399 /* Do nothing, only return. */
2400 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2401
2402 JUMPHERE(jump);
2403 /* Combine two 16 bit characters. */
2404 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2405 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2406 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2407 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2408 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2409 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2410 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2411 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2412 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2413 }
2414 #endif /* COMPILE_PCRE16 */
2415
2416 #endif /* COMPILE_PCRE8 */
2417
2418 #endif /* SUPPORT_UTF */
2419
2420 #ifdef SUPPORT_UCP
2421
2422 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2423 #define UCD_BLOCK_MASK 127
2424 #define UCD_BLOCK_SHIFT 7
2425
2426 static void do_getucd(compiler_common *common)
2427 {
2428 /* Search the UCD record for the character comes in TMP1.
2429 Returns chartype in TMP1 and UCD offset in TMP2. */
2430 DEFINE_COMPILER;
2431
2432 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2433
2434 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2435 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2436 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_w)PRIV(ucd_stage1));
2437 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2438 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2439 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2440 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_stage2));
2441 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2442 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2443 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2444 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2445 }
2446 #endif
2447
2448 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2449 {
2450 DEFINE_COMPILER;
2451 struct sljit_label *mainloop;
2452 struct sljit_label *newlinelabel = NULL;
2453 struct sljit_jump *start;
2454 struct sljit_jump *end = NULL;
2455 struct sljit_jump *nl = NULL;
2456 #ifdef SUPPORT_UTF
2457 struct sljit_jump *singlechar;
2458 #endif
2459 jump_list *newline = NULL;
2460 BOOL newlinecheck = FALSE;
2461 BOOL readuchar = FALSE;
2462
2463 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2464 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2465 newlinecheck = TRUE;
2466
2467 if (firstline)
2468 {
2469 /* Search for the end of the first line. */
2470 SLJIT_ASSERT(common->first_line_end != 0);
2471 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2472
2473 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2474 {
2475 mainloop = LABEL();
2476 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2477 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2478 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2479 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2480 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2481 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2482 JUMPHERE(end);
2483 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2484 }
2485 else
2486 {
2487 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2488 mainloop = LABEL();
2489 /* Continual stores does not cause data dependency. */
2490 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2491 read_char(common);
2492 check_newlinechar(common, common->nltype, &newline, TRUE);
2493 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2494 JUMPHERE(end);
2495 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2496 set_jumps(newline, LABEL());
2497 }
2498
2499 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2500 }
2501
2502 start = JUMP(SLJIT_JUMP);
2503
2504 if (newlinecheck)
2505 {
2506 newlinelabel = LABEL();
2507 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2508 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2509 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2510 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2511 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2512 #ifdef COMPILE_PCRE16
2513 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2514 #endif
2515 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2516 nl = JUMP(SLJIT_JUMP);
2517 }
2518
2519 mainloop = LABEL();
2520
2521 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2522 #ifdef SUPPORT_UTF
2523 if (common->utf) readuchar = TRUE;
2524 #endif
2525 if (newlinecheck) readuchar = TRUE;
2526
2527 if (readuchar)
2528 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2529
2530 if (newlinecheck)
2531 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2532
2533 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2534 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2535 if (common->utf)
2536 {
2537 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2538 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2539 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2540 JUMPHERE(singlechar);
2541 }
2542 #endif
2543 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2544 if (common->utf)
2545 {
2546 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2547 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2548 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2549 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2550 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2551 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2552 JUMPHERE(singlechar);
2553 }
2554 #endif
2555 JUMPHERE(start);
2556
2557 if (newlinecheck)
2558 {
2559 JUMPHERE(end);
2560 JUMPHERE(nl);
2561 }
2562
2563 return mainloop;
2564 }
2565
2566 static SLJIT_INLINE BOOL fast_forward_first_two_chars(compiler_common *common, BOOL firstline)
2567 {
2568 DEFINE_COMPILER;
2569 struct sljit_label *start;
2570 struct sljit_jump *quit;
2571 struct sljit_jump *found;
2572 pcre_int32 chars[4];
2573 pcre_uchar *cc = common->start + 1 + IMM2_SIZE;
2574 int location = 0;
2575 pcre_int32 len, c, bit, caseless;
2576 BOOL must_end;
2577
2578 #ifdef COMPILE_PCRE8
2579 union {
2580 sljit_uh ascombined;
2581 sljit_ub asuchars[2];
2582 } pair;
2583 #else
2584 union {
2585 sljit_ui ascombined;
2586 sljit_uh asuchars[2];
2587 } pair;
2588 #endif
2589
2590 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2591 return FALSE;
2592
2593 while (TRUE)
2594 {
2595 caseless = 0;
2596 must_end = TRUE;
2597 switch(*cc)
2598 {
2599 case OP_CHAR:
2600 must_end = FALSE;
2601 cc++;
2602 break;
2603
2604 case OP_CHARI:
2605 caseless = 1;
2606 must_end = FALSE;
2607 cc++;
2608 break;
2609
2610 case OP_SOD:
2611 case OP_SOM:
2612 case OP_SET_SOM:
2613 case OP_NOT_WORD_BOUNDARY:
2614 case OP_WORD_BOUNDARY:
2615 case OP_EODN:
2616 case OP_EOD:
2617 case OP_CIRC:
2618 case OP_CIRCM:
2619 case OP_DOLL:
2620 case OP_DOLLM:
2621 /* Zero width assertions. */
2622 cc++;
2623 continue;
2624
2625 case OP_PLUS:
2626 case OP_MINPLUS:
2627 case OP_POSPLUS:
2628 cc++;
2629 break;
2630
2631 case OP_EXACT:
2632 cc += 1 + IMM2_SIZE;
2633 break;
2634
2635 case OP_PLUSI:
2636 case OP_MINPLUSI:
2637 case OP_POSPLUSI:
2638 caseless = 1;
2639 cc++;
2640 break;
2641
2642 case OP_EXACTI:
2643 caseless = 1;
2644 cc += 1 + IMM2_SIZE;
2645 break;
2646
2647 default:
2648 return FALSE;
2649 }
2650
2651 len = 1;
2652 #ifdef SUPPORT_UTF
2653 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2654 #endif
2655
2656 if (caseless && char_has_othercase(common, cc))
2657 {
2658 caseless = char_get_othercase_bit(common, cc);
2659 if (caseless == 0)
2660 return FALSE;
2661 #ifdef COMPILE_PCRE8
2662 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2663 #else
2664 if ((caseless & 0x100) != 0)
2665 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2666 else
2667 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2668 #endif
2669 }
2670 else
2671 caseless = 0;
2672
2673 while (len > 0 && location < 2 * 2)
2674 {
2675 c = *cc;
2676 bit = 0;
2677 if (len == (caseless & 0xff))
2678 {
2679 bit = caseless >> 8;
2680 c |= bit;
2681 }
2682
2683 chars[location] = c;
2684 chars[location + 1] = bit;
2685
2686 len--;
2687 location += 2;
2688 cc++;
2689 }
2690
2691 if (location == 2 * 2)
2692 break;
2693 else if (must_end)
2694 return FALSE;
2695 }
2696
2697 if (firstline)
2698 {
2699 SLJIT_ASSERT(common->first_line_end != 0);
2700 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2701 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, 1);
2702 }
2703 else
2704 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2705
2706 start = LABEL();
2707 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2708 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
2709 #ifdef COMPILE_PCRE8
2710 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2711 #else /* COMPILE_PCRE8 */
2712 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2713 #endif
2714
2715 #else /* SLJIT_UNALIGNED */
2716
2717 #if defined SLJIT_BIG_ENDIAN && SLJIT_BIG_ENDIAN
2718 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2719 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2720 #else /* SLJIT_BIG_ENDIAN */
2721 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2722 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2723 #endif /* SLJIT_BIG_ENDIAN */
2724
2725 #ifdef COMPILE_PCRE8
2726 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 8);
2727 #else /* COMPILE_PCRE8 */
2728 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 16);
2729 #endif
2730 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2731
2732 #endif
2733
2734 if (chars[1] != 0 || chars[3] != 0)
2735 {
2736 pair.asuchars[0] = chars[1];
2737 pair.asuchars[1] = chars[3];
2738 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, pair.ascombined);
2739 }
2740
2741 pair.asuchars[0] = chars[0];
2742 pair.asuchars[1] = chars[2];
2743 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, pair.ascombined);
2744
2745 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2746 JUMPTO(SLJIT_JUMP, start);
2747 JUMPHERE(found);
2748 JUMPHERE(quit);
2749
2750 if (firstline)
2751 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2752 else
2753 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, 1);
2754 return TRUE;
2755 }
2756
2757 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2758 {
2759 DEFINE_COMPILER;
2760 struct sljit_label *start;
2761 struct sljit_jump *quit;
2762 struct sljit_jump *found;
2763 pcre_uchar oc, bit;
2764
2765 if (firstline)
2766 {
2767 SLJIT_ASSERT(common->first_line_end != 0);
2768 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2769 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2770 }
2771
2772 start = LABEL();
2773 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2774 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2775
2776 oc = first_char;
2777 if (caseless)
2778 {
2779 oc = TABLE_GET(first_char, common->fcc, first_char);
2780 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2781 if (first_char > 127 && common->utf)
2782 oc = UCD_OTHERCASE(first_char);
2783 #endif
2784 }
2785 if (first_char == oc)
2786 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2787 else
2788 {
2789 bit = first_char ^ oc;
2790 if (ispowerof2(bit))
2791 {
2792 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2793 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2794 }
2795 else
2796 {
2797 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2798 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
2799 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2800 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
2801 found = JUMP(SLJIT_C_NOT_ZERO);
2802 }
2803 }
2804
2805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2806 JUMPTO(SLJIT_JUMP, start);
2807 JUMPHERE(found);
2808 JUMPHERE(quit);
2809
2810 if (firstline)
2811 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2812 }
2813
2814 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2815 {
2816 DEFINE_COMPILER;
2817 struct sljit_label *loop;
2818 struct sljit_jump *lastchar;
2819 struct sljit_jump *firstchar;
2820 struct sljit_jump *quit;
2821 struct sljit_jump *foundcr = NULL;
2822 struct sljit_jump *notfoundnl;
2823 jump_list *newline = NULL;
2824
2825 if (firstline)
2826 {
2827 SLJIT_ASSERT(common->first_line_end != 0);
2828 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2829 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2830 }
2831
2832 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2833 {
2834 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2835 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2836 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2837 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2838 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2839
2840 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2841 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2842 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_GREATER_EQUAL);
2843 #ifdef COMPILE_PCRE16
2844 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2845 #endif
2846 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2847
2848 loop = LABEL();
2849 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2850 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2851 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2852 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2853 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2854 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2855
2856 JUMPHERE(quit);
2857 JUMPHERE(firstchar);
2858 JUMPHERE(lastchar);
2859
2860 if (firstline)
2861 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2862 return;
2863 }
2864
2865 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2866 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2867 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2868 skip_char_back(common);
2869
2870 loop = LABEL();
2871 read_char(common);
2872 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2873 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2874 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2875 check_newlinechar(common, common->nltype, &newline, FALSE);
2876 set_jumps(newline, loop);
2877
2878 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2879 {
2880 quit = JUMP(SLJIT_JUMP);
2881 JUMPHERE(foundcr);
2882 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2883 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2884 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2885 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2886 #ifdef COMPILE_PCRE16
2887 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2888 #endif
2889 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2890 JUMPHERE(notfoundnl);
2891 JUMPHERE(quit);
2892 }
2893 JUMPHERE(lastchar);
2894 JUMPHERE(firstchar);
2895
2896 if (firstline)
2897 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2898 }
2899
2900 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2901 {
2902 DEFINE_COMPILER;
2903 struct sljit_label *start;
2904 struct sljit_jump *quit;
2905 struct sljit_jump *found;
2906 #ifndef COMPILE_PCRE8
2907 struct sljit_jump *jump;
2908 #endif
2909
2910 if (firstline)
2911 {
2912 SLJIT_ASSERT(common->first_line_end != 0);
2913 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2914 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2915 }
2916
2917 start = LABEL();
2918 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2919 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2920 #ifdef SUPPORT_UTF
2921 if (common->utf)
2922 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2923 #endif
2924 #ifndef COMPILE_PCRE8
2925 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2926 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2927 JUMPHERE(jump);
2928 #endif
2929 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2930 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2931 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2932 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2933 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2934 found = JUMP(SLJIT_C_NOT_ZERO);
2935
2936 #ifdef SUPPORT_UTF
2937 if (common->utf)
2938 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2939 #endif
2940 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2941 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2942 if (common->utf)
2943 {
2944 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2945 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
2946 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2947 }
2948 #endif
2949 #if defined SUPPORT_UTF && defined COMPILE_PCRE16
2950 if (common->utf)
2951 {
2952 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2953 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2954 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2955 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
2956 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2957 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2958 }
2959 #endif
2960 JUMPTO(SLJIT_JUMP, start);
2961 JUMPHERE(found);
2962 JUMPHERE(quit);
2963
2964 if (firstline)
2965 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2966 }
2967
2968 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2969 {
2970 DEFINE_COMPILER;
2971 struct sljit_label *loop;
2972 struct sljit_jump *toolong;
2973 struct sljit_jump *alreadyfound;
2974 struct sljit_jump *found;
2975 struct sljit_jump *foundoc = NULL;
2976 struct sljit_jump *notfound;
2977 pcre_uchar oc, bit;
2978
2979 SLJIT_ASSERT(common->req_char_ptr != 0);
2980 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
2981 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
2982 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
2983 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
2984
2985 if (has_firstchar)
2986 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2987 else
2988 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
2989
2990 loop = LABEL();
2991 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
2992
2993 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
2994 oc = req_char;
2995 if (caseless)
2996 {
2997 oc = TABLE_GET(req_char, common->fcc, req_char);
2998 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2999 if (req_char > 127 && common->utf)
3000 oc = UCD_OTHERCASE(req_char);
3001 #endif
3002 }
3003 if (req_char == oc)
3004 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3005 else
3006 {
3007 bit = req_char ^ oc;
3008 if (ispowerof2(bit))
3009 {
3010 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3011 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3012 }
3013 else
3014 {
3015 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3016 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3017 }
3018 }
3019 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3020 JUMPTO(SLJIT_JUMP, loop);
3021
3022 JUMPHERE(found);
3023 if (foundoc)
3024 JUMPHERE(foundoc);
3025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3026 JUMPHERE(alreadyfound);
3027 JUMPHERE(toolong);
3028 return notfound;
3029 }
3030
3031 static void do_revertframes(compiler_common *common)
3032 {
3033 DEFINE_COMPILER;
3034 struct sljit_jump *jump;
3035 struct sljit_label *mainloop;
3036
3037 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3038 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3039 GET_LOCAL_BASE(TMP3, 0, 0);
3040
3041 /* Drop frames until we reach STACK_TOP. */
3042 mainloop = LABEL();
3043 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3044 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3045 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3046 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3047 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_w), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_w));
3048 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_w));
3049 JUMPTO(SLJIT_JUMP, mainloop);
3050
3051 JUMPHERE(jump);
3052 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3053 /* End of dropping frames. */
3054 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3055
3056 JUMPHERE(jump);
3057 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3058 /* Set string begin. */
3059 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3060 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3061 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3062 JUMPTO(SLJIT_JUMP, mainloop);
3063
3064 JUMPHERE(jump);
3065 if (common->mark_ptr != 0)
3066 {
3067 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3068 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_w));
3069 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3070 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3071 JUMPTO(SLJIT_JUMP, mainloop);
3072
3073 JUMPHERE(jump);
3074 }
3075
3076 /* Unknown command. */
3077 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_w));
3078 JUMPTO(SLJIT_JUMP, mainloop);
3079 }
3080
3081 static void check_wordboundary(compiler_common *common)
3082 {
3083 DEFINE_COMPILER;
3084 struct sljit_jump *skipread;
3085 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3086 struct sljit_jump *jump;
3087 #endif
3088
3089 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3090
3091 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3092 /* Get type of the previous char, and put it to LOCALS1. */
3093 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3094 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3095 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3096 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3097 skip_char_back(common);
3098 check_start_used_ptr(common);
3099 read_char(common);
3100
3101 /* Testing char type. */
3102 #ifdef SUPPORT_UCP
3103 if (common->use_ucp)
3104 {
3105 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3106 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3107 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3108 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3109 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3110 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3111 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3112 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3113 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3114 JUMPHERE(jump);
3115 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3116 }
3117 else
3118 #endif
3119 {
3120 #ifndef COMPILE_PCRE8
3121 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3122 #elif defined SUPPORT_UTF
3123 /* Here LOCALS1 has already been zeroed. */
3124 jump = NULL;
3125 if (common->utf)
3126 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3127 #endif /* COMPILE_PCRE8 */
3128 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3129 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3130 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3131 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3132 #ifndef COMPILE_PCRE8
3133 JUMPHERE(jump);
3134 #elif defined SUPPORT_UTF
3135 if (jump != NULL)
3136 JUMPHERE(jump);
3137 #endif /* COMPILE_PCRE8 */
3138 }
3139 JUMPHERE(skipread);
3140
3141 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3142 skipread = check_str_end(common);
3143 peek_char(common);
3144
3145 /* Testing char type. This is a code duplication. */
3146 #ifdef SUPPORT_UCP
3147 if (common->use_ucp)
3148 {
3149 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3150 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3151 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3152 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3153 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3154 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3155 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3156 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3157 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3158 JUMPHERE(jump);
3159 }
3160 else
3161 #endif
3162 {
3163 #ifndef COMPILE_PCRE8
3164 /* TMP2 may be destroyed by peek_char. */
3165 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3166 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3167 #elif defined SUPPORT_UTF
3168 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3169 jump = NULL;
3170 if (common->utf)
3171 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3172 #endif
3173 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3174 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3175 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3176 #ifndef COMPILE_PCRE8
3177 JUMPHERE(jump);
3178 #elif defined SUPPORT_UTF
3179 if (jump != NULL)
3180 JUMPHERE(jump);
3181 #endif /* COMPILE_PCRE8 */
3182 }
3183 JUMPHERE(skipread);
3184
3185 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3186 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3187 }
3188
3189 /*
3190 range format:
3191
3192 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3193 ranges[1] = first bit (0 or 1)
3194 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3195 */
3196
3197 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3198 {
3199 DEFINE_COMPILER;
3200 struct sljit_jump *jump;
3201
3202 if (ranges[0] < 0)
3203 return FALSE;
3204
3205 switch(ranges[0])
3206 {
3207 case 1:
3208 if (readch)
3209 read_char(common);
3210 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3211 return TRUE;
3212
3213 case 2:
3214 if (readch)
3215 read_char(common);
3216 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3217 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3218 return TRUE;
3219
3220 case 4:
3221 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3222 {
3223 if (readch)
3224 read_char(common);
3225 if (ranges[1] != 0)
3226 {
3227 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3228 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3229 }
3230 else
3231 {
3232 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3233 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3234 JUMPHERE(jump);
3235 }
3236 return TRUE;
3237 }
3238 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && ispowerof2(ranges[4] - ranges[2]))
3239 {
3240 if (readch)
3241 read_char(common);
3242 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3243 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3244 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3245 return TRUE;
3246 }
3247 return FALSE;
3248
3249 default:
3250 return FALSE;
3251 }
3252 }
3253
3254 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3255 {
3256 int i, bit, length;
3257 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3258
3259 bit = ctypes[0] & flag;
3260 ranges[0] = -1;
3261 ranges[1] = bit != 0 ? 1 : 0;
3262 length = 0;
3263
3264 for (i = 1; i < 256; i++)
3265 if ((ctypes[i] & flag) != bit)
3266 {
3267 if (length >= MAX_RANGE_SIZE)
3268 return;
3269 ranges[2 + length] = i;
3270 length++;
3271 bit ^= flag;
3272 }
3273
3274 if (bit != 0)
3275 {
3276 if (length >= MAX_RANGE_SIZE)
3277 return;
3278 ranges[2 + length] = 256;
3279 length++;
3280 }
3281 ranges[0] = length;
3282 }
3283
3284 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3285 {
3286 int ranges[2 + MAX_RANGE_SIZE];
3287 pcre_uint8 bit, cbit, all;
3288 int i, byte, length = 0;
3289
3290 bit = bits[0] & 0x1;
3291 ranges[1] = bit;
3292 /* Can be 0 or 255. */
3293 all = -bit;
3294
3295 for (i = 0; i < 256; )
3296 {
3297 byte = i >> 3;
3298 if ((i & 0x7) == 0 && bits[byte] == all)
3299 i += 8;
3300 else
3301 {
3302 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3303 if (cbit != bit)
3304 {
3305 if (length >= MAX_RANGE_SIZE)
3306 return FALSE;
3307 ranges[2 + length] = i;
3308 length++;
3309 bit = cbit;
3310 all = -cbit;
3311 }
3312 i++;
3313 }
3314 }
3315
3316 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3317 {
3318 if (length >= MAX_RANGE_SIZE)
3319 return FALSE;
3320 ranges[2 + length] = 256;
3321 length++;
3322 }
3323 ranges[0] = length;
3324
3325 return check_ranges(common, ranges, backtracks, FALSE);
3326 }
3327
3328 static void check_anynewline(compiler_common *common)
3329 {
3330 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3331 DEFINE_COMPILER;
3332
3333 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3334
3335 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3336 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3337 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3338 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3339 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3340 #ifdef COMPILE_PCRE8
3341 if (common->utf)
3342 {
3343 #endif
3344 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3345 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3346 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3347 #ifdef COMPILE_PCRE8
3348 }
3349 #endif
3350 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3351 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3352 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3353 }
3354
3355 static void check_hspace(compiler_common *common)
3356 {
3357 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3358 DEFINE_COMPILER;
3359
3360 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3361
3362 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3363 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3364 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3365 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3366 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3367 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3368 #ifdef COMPILE_PCRE8
3369 if (common->utf)
3370 {
3371 #endif
3372 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3373 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3374 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3375 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3376 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3377 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3378 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3379 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3380 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3381 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3382 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3383 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3384 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3385 #ifdef COMPILE_PCRE8
3386 }
3387 #endif
3388 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3389 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3390
3391 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3392 }
3393
3394 static void check_vspace(compiler_common *common)
3395 {
3396 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3397 DEFINE_COMPILER;
3398
3399 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3400
3401 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3402 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3403 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
3404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3405 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
3406 #ifdef COMPILE_PCRE8
3407 if (common->utf)
3408 {
3409 #endif
3410 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3411 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3412 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3413 #ifdef COMPILE_PCRE8
3414 }
3415 #endif
3416 #endif /* SUPPORT_UTF || COMPILE_PCRE16 */
3417 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3418
3419 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3420 }
3421
3422 #define CHAR1 STR_END
3423 #define CHAR2 STACK_TOP
3424
3425 static void do_casefulcmp(compiler_common *common)
3426 {
3427 DEFINE_COMPILER;
3428 struct sljit_jump *jump;
3429 struct sljit_label *label;
3430
3431 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3432 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3433 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3434 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3435 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3436 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3437
3438 label = LABEL();
3439 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3440 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3441 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3442 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3443 JUMPTO(SLJIT_C_NOT_ZERO, label);
3444
3445 JUMPHERE(jump);
3446 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3447 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3448 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3449 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3450 }
3451
3452 #define LCC_TABLE STACK_LIMIT
3453
3454 static void do_caselesscmp(compiler_common *common)
3455 {
3456 DEFINE_COMPILER;
3457 struct sljit_jump *jump;
3458 struct sljit_label *label;
3459
3460 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3461 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3462
3463 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3464 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3466 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3467 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3468 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3469
3470 label = LABEL();
3471 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3472 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3473 #ifndef COMPILE_PCRE8
3474 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3475 #endif
3476 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3477 #ifndef COMPILE_PCRE8
3478 JUMPHERE(jump);
3479 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3480 #endif
3481 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3482 #ifndef COMPILE_PCRE8
3483 JUMPHERE(jump);
3484 #endif
3485 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3486 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3487 JUMPTO(SLJIT_C_NOT_ZERO, label);
3488
3489 JUMPHERE(jump);
3490 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3491 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3492 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3493 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3494 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3495 }
3496
3497 #undef LCC_TABLE
3498 #undef CHAR1
3499 #undef CHAR2
3500
3501 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3502
3503 static const pcre_uchar *SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3504 {
3505 /* This function would be ineffective to do in JIT level. */
3506 int c1, c2;
3507 const pcre_uchar *src2 = args->uchar_ptr;
3508 const pcre_uchar *end2 = args->end;
3509
3510 while (src1 < end1)
3511 {
3512 if (src2 >= end2)
3513 return (pcre_uchar*)1;
3514 GETCHARINC(c1, src1);
3515 GETCHARINC(c2, src2);
3516 if (c1 != c2 && c1 != UCD_OTHERCASE(c2)) return NULL;
3517 }
3518 return src2;
3519 }
3520
3521 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3522
3523 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3524 compare_context* context, jump_list **backtracks)
3525 {
3526 DEFINE_COMPILER;
3527 unsigned int othercasebit = 0;
3528 pcre_uchar *othercasechar = NULL;
3529 #ifdef SUPPORT_UTF
3530 int utflength;
3531 #endif
3532
3533 if (caseless && char_has_othercase(common, cc))
3534 {
3535 othercasebit = char_get_othercase_bit(common, cc);
3536 SLJIT_ASSERT(othercasebit);
3537 /* Extracting bit difference info. */
3538 #ifdef COMPILE_PCRE8
3539 othercasechar = cc + (othercasebit >> 8);
3540 othercasebit &= 0xff;
3541 #else
3542 #ifdef COMPILE_PCRE16
3543 othercasechar = cc + (othercasebit >> 9);
3544 if ((othercasebit & 0x100) != 0)
3545 othercasebit = (othercasebit & 0xff) << 8;
3546 else
3547 othercasebit &= 0xff;
3548 #endif
3549 #endif
3550 }
3551
3552 if (context->sourcereg == -1)
3553 {
3554 #ifdef COMPILE_PCRE8
3555 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3556 if (context->length >= 4)
3557 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3558 else if (context->length >= 2)
3559 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3560 else
3561 #endif
3562 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3563 #else
3564 #ifdef COMPILE_PCRE16
3565 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3566 if (context->length >= 4)
3567 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3568 else
3569 #endif
3570 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3571 #endif
3572 #endif /* COMPILE_PCRE8 */
3573 context->sourcereg = TMP2;
3574 }
3575
3576 #ifdef SUPPORT_UTF
3577 utflength = 1;
3578 if (common->utf && HAS_EXTRALEN(*cc))
3579 utflength += GET_EXTRALEN(*cc);
3580
3581 do
3582 {
3583 #endif
3584
3585 context->length -= IN_UCHARS(1);
3586 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3587
3588 /* Unaligned read is supported. */
3589 if (othercasebit != 0 && othercasechar == cc)
3590 {
3591 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3592 context->oc.asuchars[context->ucharptr] = othercasebit;
3593 }
3594 else
3595 {
3596 context->c.asuchars[context->ucharptr] = *cc;
3597 context->oc.asuchars[context->ucharptr] = 0;
3598 }
3599 context->ucharptr++;
3600
3601 #ifdef COMPILE_PCRE8
3602 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3603 #else
3604 if (context->ucharptr >= 2 || context->length == 0)
3605 #endif
3606 {
3607 if (context->length >= 4)
3608 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3609 #ifdef COMPILE_PCRE8
3610 else if (context->length >= 2)
3611 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3612 else if (context->length >= 1)
3613 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3614 #else
3615 else if (context->length >= 2)
3616 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3617 #endif
3618 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3619
3620 switch(context->ucharptr)
3621 {
3622 case 4 / sizeof(pcre_uchar):
3623 if (context->oc.asint != 0)
3624 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3625 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3626 break;
3627
3628 case 2 / sizeof(pcre_uchar):
3629 if (context->oc.asushort != 0)
3630 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3631 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3632 break;
3633
3634 #ifdef COMPILE_PCRE8
3635 case 1:
3636 if (context->oc.asbyte != 0)
3637 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3638 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3639 break;
3640 #endif
3641
3642 default:
3643 SLJIT_ASSERT_STOP();
3644 break;
3645 }
3646 context->ucharptr = 0;
3647 }
3648
3649 #else
3650
3651 /* Unaligned read is unsupported. */
3652 #ifdef COMPILE_PCRE8
3653 if (context->length > 0)
3654 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3655 #else
3656 if (context->length > 0)
3657 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3658 #endif
3659 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3660
3661 if (othercasebit != 0 && othercasechar == cc)
3662 {
3663 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3664 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3665 }
3666 else
3667 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3668
3669 #endif
3670
3671 cc++;
3672 #ifdef SUPPORT_UTF
3673 utflength--;
3674 }
3675 while (utflength > 0);
3676 #endif
3677
3678 return cc;
3679 }
3680
3681 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3682
3683 #define SET_TYPE_OFFSET(value) \
3684 if ((value) != typeoffset) \
3685 { \
3686 if ((value) > typeoffset) \
3687 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3688 else \
3689 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3690 } \
3691 typeoffset = (value);
3692
3693 #define SET_CHAR_OFFSET(value) \
3694 if ((value) != charoffset) \
3695 { \
3696 if ((value) > charoffset) \
3697 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3698 else \
3699 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3700 } \
3701 charoffset = (value);
3702
3703 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3704 {
3705 DEFINE_COMPILER;
3706 jump_list *found = NULL;
3707 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3708 unsigned int c;
3709 int compares;
3710 struct sljit_jump *jump = NULL;
3711 pcre_uchar *ccbegin;
3712 #ifdef SUPPORT_UCP
3713 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3714 BOOL charsaved = FALSE;
3715 int typereg = TMP1, scriptreg = TMP1;
3716 unsigned int typeoffset;
3717 #endif
3718 int invertcmp, numberofcmps;
3719 unsigned int charoffset;
3720
3721 /* Although SUPPORT_UTF must be defined, we are
3722 not necessary in utf mode even in 8 bit mode. */
3723 detect_partial_match(common, backtracks);
3724 read_char(common);
3725
3726 if ((*cc++ & XCL_MAP) != 0)
3727 {
3728 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3729 #ifndef COMPILE_PCRE8
3730 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3731 #elif defined SUPPORT_UTF
3732 if (common->utf)
3733 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3734 #endif
3735
3736 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3737 {
3738 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3739 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3740 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
3741 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3742 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3743 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3744 }
3745
3746 #ifndef COMPILE_PCRE8
3747 JUMPHERE(jump);
3748 #elif defined SUPPORT_UTF
3749 if (common->utf)
3750 JUMPHERE(jump);
3751 #endif
3752 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3753 #ifdef SUPPORT_UCP
3754 charsaved = TRUE;
3755 #endif
3756 cc += 32 / sizeof(pcre_uchar);
3757 }
3758
3759 /* Scanning the necessary info. */
3760 ccbegin = cc;
3761 compares = 0;
3762 while (*cc != XCL_END)
3763 {
3764 compares++;
3765 if (*cc == XCL_SINGLE)
3766 {
3767 cc += 2;
3768 #ifdef SUPPORT_UTF
3769 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3770 #endif
3771 #ifdef SUPPORT_UCP
3772 needschar = TRUE;
3773 #endif
3774 }
3775 else if (*cc == XCL_RANGE)
3776 {
3777 cc += 2;
3778 #ifdef SUPPORT_UTF
3779 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3780 #endif
3781 cc++;
3782 #ifdef SUPPORT_UTF
3783 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3784 #endif
3785 #ifdef SUPPORT_UCP
3786 needschar = TRUE;
3787 #endif
3788 }
3789 #ifdef SUPPORT_UCP
3790 else
3791 {
3792 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3793 cc++;
3794 switch(*cc)
3795 {
3796 case PT_ANY:
3797 break;
3798
3799 case PT_LAMP:
3800 case PT_GC:
3801 case PT_PC:
3802 case PT_ALNUM:
3803 needstype = TRUE;
3804 break;
3805
3806 case PT_SC:
3807 needsscript = TRUE;
3808 break;
3809
3810 case PT_SPACE:
3811 case PT_PXSPACE:
3812 case PT_WORD:
3813 needstype = TRUE;
3814 needschar = TRUE;
3815 break;
3816
3817 default:
3818 SLJIT_ASSERT_STOP();
3819 break;
3820 }
3821 cc += 2;
3822 }
3823 #endif
3824 }
3825
3826 #ifdef SUPPORT_UCP
3827 /* Simple register allocation. TMP1 is preferred if possible. */
3828 if (needstype || needsscript)
3829 {
3830 if (needschar && !charsaved)
3831 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3832 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3833 if (needschar)
3834 {
3835 if (needstype)
3836 {
3837 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3838 typereg = RETURN_ADDR;
3839 }
3840
3841 if (needsscript)
3842 scriptreg = TMP3;
3843 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3844 }
3845 else if (needstype && needsscript)
3846 scriptreg = TMP3;
3847 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3848
3849 if (needsscript)
3850 {
3851 if (scriptreg == TMP1)
3852 {
3853 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3854 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3855 }
3856 else
3857 {
3858 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3859 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3860 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3861 }
3862 }
3863 }
3864 #endif
3865
3866 /* Generating code. */
3867 cc = ccbegin;
3868 charoffset = 0;
3869 numberofcmps = 0;
3870 #ifdef SUPPORT_UCP
3871 typeoffset = 0;
3872 #endif
3873
3874 while (*cc != XCL_END)
3875 {
3876 compares--;
3877 invertcmp = (compares == 0 && list != backtracks);
3878 jump = NULL;
3879
3880 if (*cc == XCL_SINGLE)
3881 {
3882 cc ++;
3883 #ifdef SUPPORT_UTF
3884 if (common->utf)
3885 {
3886 GETCHARINC(c, cc);
3887 }
3888 else
3889 #endif
3890 c = *cc++;
3891
3892 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3893 {
3894 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3895 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3896 numberofcmps++;
3897 }
3898 else if (numberofcmps > 0)
3899 {
3900 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3901 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3902 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3903 numberofcmps = 0;
3904 }
3905 else
3906 {
3907 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3908 numberofcmps = 0;
3909 }
3910 }
3911 else if (*cc == XCL_RANGE)
3912 {
3913 cc ++;
3914 #ifdef SUPPORT_UTF
3915 if (common->utf)
3916 {
3917 GETCHARINC(c, cc);
3918 }
3919 else
3920 #endif
3921 c = *cc++;
3922 SET_CHAR_OFFSET(c);
3923 #ifdef SUPPORT_UTF
3924 if (common->utf)
3925 {
3926 GETCHARINC(c, cc);
3927 }
3928 else
3929 #endif
3930 c = *cc++;
3931 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3932 {
3933 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3934 COND_VALUE(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
3935 numberofcmps++;
3936 }
3937 else if (numberofcmps > 0)
3938 {
3939 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3940 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
3941 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3942 numberofcmps = 0;
3943 }
3944 else
3945 {
3946 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3947 numberofcmps = 0;
3948 }
3949 }
3950 #ifdef SUPPORT_UCP
3951 else
3952 {
3953 if (*cc == XCL_NOTPROP)
3954 invertcmp ^= 0x1;
3955 cc++;
3956 switch(*cc)
3957 {
3958 case PT_ANY:
3959 if (list != backtracks)
3960 {
3961 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3962 continue;
3963 }
3964 else if (cc[-1] == XCL_NOTPROP)
3965 continue;
3966 jump = JUMP(SLJIT_JUMP);
3967 break;
3968
3969 case PT_LAMP:
3970 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3971 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
3972 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
3973 COND_VALUE(SLJIT_OR, TMP2, 0, SLJIT_C_EQUAL);
3974 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
3975 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
3976 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3977 break;
3978
3979 case PT_GC:
3980 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
3981 SET_TYPE_OFFSET(c);
3982 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
3983 break;
3984
3985 case PT_PC:
3986 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
3987 break;
3988
3989 case PT_SC:
3990 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
3991 break;
3992
3993 case PT_SPACE:
3994 case PT_PXSPACE:
3995 if (*cc == PT_SPACE)
3996 {
3997 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3998 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
3999 }
4000 SET_CHAR_OFFSET(9);
4001 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4002 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS_EQUAL);
4003 if (*cc == PT_SPACE)
4004 JUMPHERE(jump);
4005
4006 SET_TYPE_OFFSET(ucp_Zl);
4007 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4008 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
4009 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4010 break;
4011
4012 case PT_WORD:
4013 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4014 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4015 /* ... fall through */
4016
4017 case PT_ALNUM:
4018 SET_TYPE_OFFSET(ucp_Ll);
4019 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4020 COND_VALUE((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_C_LESS_EQUAL);
4021 SET_TYPE_OFFSET(ucp_Nd);
4022 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4023 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_LESS_EQUAL);
4024 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4025 break;
4026 }
4027 cc += 2;
4028 }
4029 #endif
4030
4031 if (jump != NULL)
4032 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4033 }
4034
4035 if (found != NULL)
4036 set_jumps(found, LABEL());
4037 }
4038
4039 #undef SET_TYPE_OFFSET
4040 #undef SET_CHAR_OFFSET
4041
4042 #endif
4043
4044 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4045 {
4046 DEFINE_COMPILER;
4047 int length;
4048 unsigned int c, oc, bit;
4049 compare_context context;
4050 struct sljit_jump *jump[4];
4051 #ifdef SUPPORT_UTF
4052 struct sljit_label *label;
4053 #ifdef SUPPORT_UCP
4054 pcre_uchar propdata[5];
4055 #endif
4056 #endif
4057
4058 switch(type)
4059 {
4060 case OP_SOD:
4061 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4062 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4063 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4064 return cc;
4065
4066 case OP_SOM:
4067 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4068 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4069 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4070 return cc;
4071
4072 case OP_NOT_WORD_BOUNDARY:
4073 case OP_WORD_BOUNDARY:
4074 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4075 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4076 return cc;
4077
4078 case OP_NOT_DIGIT:
4079 case OP_DIGIT:
4080 /* Digits are usually 0-9, so it is worth to optimize them. */
4081 if (common->digits[0] == -2)
4082 get_ctype_ranges(common, ctype_digit, common->digits);
4083 detect_partial_match(common, backtracks);
4084 /* Flip the starting bit in the negative case. */
4085 if (type == OP_NOT_DIGIT)
4086 common->digits[1] ^= 1;
4087 if (!check_ranges(common, common->digits, backtracks, TRUE))
4088 {
4089 read_char8_type(common);
4090 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4091 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4092 }
4093 if (type == OP_NOT_DIGIT)
4094 common->digits[1] ^= 1;
4095 return cc;
4096
4097 case OP_NOT_WHITESPACE:
4098 case OP_WHITESPACE:
4099 detect_partial_match(common, backtracks);
4100 read_char8_type(common);
4101 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4102 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4103 return cc;
4104
4105 case OP_NOT_WORDCHAR:
4106 case OP_WORDCHAR:
4107 detect_partial_match(common, backtracks);
4108 read_char8_type(common);
4109 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4110 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4111 return cc;
4112
4113 case OP_ANY:
4114 detect_partial_match(common, backtracks);
4115 read_char(common);
4116 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4117 {
4118 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4119 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4120 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4121 else
4122 jump[1] = check_str_end(common);
4123
4124 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4125 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4126 if (jump[1] != NULL)
4127 JUMPHERE(jump[1]);
4128 JUMPHERE(jump[0]);
4129 }
4130 else
4131 check_newlinechar(common, common->nltype, backtracks, TRUE);
4132 return cc;
4133
4134 case OP_ALLANY:
4135 detect_partial_match(common, backtracks);
4136 #ifdef SUPPORT_UTF
4137 if (common->utf)
4138 {
4139 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4140 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4141 #ifdef COMPILE_PCRE8
4142 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4143 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4144 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4145 #else /* COMPILE_PCRE8 */
4146 #ifdef COMPILE_PCRE16
4147 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4148 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4149 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4150 COND_VALUE(SLJIT_MOV, TMP1, 0, SLJIT_C_EQUAL);
4151 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4152 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4153 #endif /* COMPILE_PCRE16 */
4154 #endif /* COMPILE_PCRE8 */
4155 JUMPHERE(jump[0]);
4156 return cc;
4157 }
4158 #endif
4159 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4160 return cc;
4161
4162 case OP_ANYBYTE:
4163 detect_partial_match(common, backtracks);
4164 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4165 return cc;
4166
4167 #ifdef SUPPORT_UTF
4168 #ifdef SUPPORT_UCP
4169 case OP_NOTPROP:
4170 case OP_PROP:
4171 propdata[0] = 0;
4172 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4173 propdata[2] = cc[0];
4174 propdata[3] = cc[1];
4175 propdata[4] = XCL_END;
4176 compile_xclass_matchingpath(common, propdata, backtracks);
4177 return cc + 2;
4178 #endif
4179 #endif
4180
4181 case OP_ANYNL:
4182 detect_partial_match(common, backtracks);
4183 read_char(common);
4184 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4185 /* We don't need to handle soft partial matching case. */
4186 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4187 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4188 else
4189 jump[1] = check_str_end(common);
4190 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4191 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4192 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4193 jump[3] = JUMP(SLJIT_JUMP);
4194 JUMPHERE(jump[0]);
4195 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4196 JUMPHERE(jump[1]);
4197 JUMPHERE(jump[2]);
4198 JUMPHERE(jump[3]);
4199 return cc;
4200
4201 case OP_NOT_HSPACE:
4202 case OP_HSPACE:
4203 detect_partial_match(common, backtracks);
4204 read_char(common);
4205 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4206 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4207 return cc;
4208
4209 case OP_NOT_VSPACE:
4210 case OP_VSPACE:
4211 detect_partial_match(common, backtracks);
4212 read_char(common);
4213 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4214 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4215 return cc;
4216
4217 #ifdef SUPPORT_UCP
4218 case OP_EXTUNI:
4219 detect_partial_match(common, backtracks);
4220 read_char(common);
4221 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4222 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4223 OP1(SLJIT_MOV_UB, TMP3, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4224
4225 label = LABEL();
4226 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STR_PTR, 0);
4228 read_char(common);
4229 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4230 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_w)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4231 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4232
4233 OP2(SLJIT_MUL, TMP1, 0, TMP3, 0, SLJIT_IMM, ucp_gbCount);
4234 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
4235 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4236 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(ucp_gbtable));
4237 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, label);
4238
4239 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4240 JUMPHERE(jump[0]);
4241 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4242 {
4243 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4244 /* Since we successfully read a char above, partial matching must occure. */
4245 check_partial(common, TRUE);
4246 JUMPHERE(jump[0]);
4247 }
4248 return cc;
4249 #endif
4250
4251 case OP_EODN:
4252 /* Requires rather complex checks. */
4253 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4254 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4255 {
4256 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4257 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4258 if (common->mode == JIT_COMPILE)
4259 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4260 else
4261 {
4262 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4263 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4264 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_LESS);
4265 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4266 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_NOT_EQUAL);
4267 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4268 check_partial(common, TRUE);
4269 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4270 JUMPHERE(jump[1]);
4271 }
4272 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4273 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4274 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4275 }
4276 else if (common->nltype == NLTYPE_FIXED)
4277 {
4278 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4280 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4281 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4282 }
4283 else
4284 {
4285 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4286 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4287 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4288 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4289 jump[2] = JUMP(SLJIT_C_GREATER);
4290 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4291 /* Equal. */
4292 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4293 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4294 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4295
4296 JUMPHERE(jump[1]);
4297 if (common->nltype == NLTYPE_ANYCRLF)
4298 {
4299 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4300 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4301 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4302 }
4303 else
4304 {
4305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4306 read_char(common);
4307 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4308 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4309 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4310 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4311 }
4312 JUMPHERE(jump[2]);
4313 JUMPHERE(jump[3]);
4314 }
4315 JUMPHERE(jump[0]);
4316 check_partial(common, FALSE);
4317 return cc;
4318
4319 case OP_EOD:
4320 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4321 check_partial(common, FALSE);
4322 return cc;
4323
4324 case OP_CIRC:
4325 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4326 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4327 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4328 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4329 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4330 return cc;
4331
4332 case OP_CIRCM:
4333 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4334 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4335 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4336 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4337 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4338 jump[0] = JUMP(SLJIT_JUMP);
4339 JUMPHERE(jump[1]);
4340
4341 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4342 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4343 {
4344 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4345 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4346 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4347 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4348 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4349 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4350 }
4351 else
4352 {
4353 skip_char_back(common);
4354 read_char(common);
4355 check_newlinechar(common, common->nltype, backtracks, FALSE);
4356 }
4357 JUMPHERE(jump[0]);
4358 return cc;
4359
4360 case OP_DOLL:
4361 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4362 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4363 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4364
4365 if (!common->endonly)
4366 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4367 else
4368 {
4369 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4370 check_partial(common, FALSE);
4371 }
4372 return cc;
4373
4374 case OP_DOLLM:
4375 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4376 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4377 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4378 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4379 check_partial(common, FALSE);
4380 jump[0] = JUMP(SLJIT_JUMP);
4381 JUMPHERE(jump[1]);
4382
4383 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4384 {
4385 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4386 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4387 if (common->mode == JIT_COMPILE)
4388 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4389 else
4390 {
4391 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4392 /* STR_PTR = STR_END - IN_UCHARS(1) */
4393 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4394 check_partial(common, TRUE);
4395 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4396 JUMPHERE(jump[1]);
4397 }
4398
4399 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4400 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4401 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4402 }
4403 else
4404 {
4405 peek_char(common);
4406 check_newlinechar(common, common->nltype, backtracks, FALSE);
4407 }
4408 JUMPHERE(jump[0]);
4409 return cc;
4410
4411 case OP_CHAR:
4412 case OP_CHARI:
4413 length = 1;
4414 #ifdef SUPPORT_UTF
4415 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4416 #endif
4417 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4418 {
4419 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4420 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4421
4422 context.length = IN_UCHARS(length);
4423 context.sourcereg = -1;
4424 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4425 context.ucharptr = 0;
4426 #endif
4427 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4428 }
4429 detect_partial_match(common, backtracks);
4430 read_char(common);
4431 #ifdef SUPPORT_UTF
4432 if (common->utf)
4433 {
4434 GETCHAR(c, cc);
4435 }
4436 else
4437 #endif
4438 c = *cc;
4439 if (type == OP_CHAR || !char_has_othercase(common, cc))
4440 {
4441 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4442 return cc + length;
4443 }
4444 oc = char_othercase(common, c);
4445 bit = c ^ oc;
4446 if (ispowerof2(bit))
4447 {
4448 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4449 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4450 return cc + length;
4451 }
4452 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4453 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4454 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4455 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4456 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4457 return cc + length;
4458
4459 case OP_NOT:
4460 case OP_NOTI:
4461 detect_partial_match(common, backtracks);
4462 length = 1;
4463 #ifdef SUPPORT_UTF
4464 if (common->utf)
4465 {
4466 #ifdef COMPILE_PCRE8
4467 c = *cc;
4468 if (c < 128)
4469 {
4470 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4471 if (type == OP_NOT || !char_has_othercase(common, cc))
4472 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4473 else
4474 {
4475 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4476 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4477 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4478 }
4479 /* Skip the variable-length character. */
4480 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4481 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4482 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)PRIV(utf8_table4) - 0xc0);
4483 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4484 JUMPHERE(jump[0]);
4485 return cc + 1;
4486 }
4487 else
4488 #endif /* COMPILE_PCRE8 */
4489 {
4490 GETCHARLEN(c, cc, length);
4491 read_char(common);
4492 }
4493 }
4494 else
4495 #endif /* SUPPORT_UTF */
4496 {
4497 read_char(common);
4498 c = *cc;
4499 }
4500
4501 if (type == OP_NOT || !char_has_othercase(common, cc))
4502 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4503 else
4504 {
4505 oc = char_othercase(common, c);
4506 bit = c ^ oc;
4507 if (ispowerof2(bit))
4508 {
4509 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4510 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4511 }
4512 else
4513 {
4514 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4515 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4516 }
4517 }
4518 return cc + length;
4519
4520 case OP_CLASS:
4521 case OP_NCLASS:
4522 detect_partial_match(common, backtracks);
4523 read_char(common);
4524 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4525 return cc + 32 / sizeof(pcre_uchar);
4526
4527 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4528 jump[0] = NULL;
4529 #ifdef COMPILE_PCRE8
4530 /* This check only affects 8 bit mode. In other modes, we
4531 always need to compare the value with 255. */
4532 if (common->utf)
4533 #endif /* COMPILE_PCRE8 */
4534 {
4535 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4536 if (type == OP_CLASS)
4537 {
4538 add_jump(compiler, backtracks, jump[0]);
4539 jump[0] = NULL;
4540 }
4541 }
4542 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4543 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4544 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4545 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_w)cc);
4546 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4547 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4548 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4549 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4550 if (jump[0] != NULL)
4551 JUMPHERE(jump[0]);
4552 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4553 return cc + 32 / sizeof(pcre_uchar);
4554
4555 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
4556 case OP_XCLASS:
4557 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4558 return cc + GET(cc, 0) - 1;
4559 #endif
4560
4561 case OP_REVERSE:
4562 length = GET(cc, 0);
4563 if (length == 0)
4564 return cc + LINK_SIZE;
4565 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4566 #ifdef SUPPORT_UTF
4567 if (common->utf)
4568 {
4569 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4570 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4571 label = LABEL();
4572 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4573 skip_char_back(common);
4574 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4575 JUMPTO(SLJIT_C_NOT_ZERO, label);
4576 }
4577 else
4578 #endif
4579 {
4580 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4581 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4582 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4583 }
4584 check_start_used_ptr(common);
4585 return cc + LINK_SIZE;
4586 }
4587 SLJIT_ASSERT_STOP();
4588 return cc;
4589 }
4590
4591 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4592 {
4593 /* This function consumes at least one input character. */
4594 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4595 DEFINE_COMPILER;
4596 pcre_uchar *ccbegin = cc;
4597 compare_context context;
4598 int size;
4599
4600 context.length = 0;
4601 do
4602 {
4603 if (cc >= ccend)
4604 break;
4605
4606 if (*cc == OP_CHAR)
4607 {
4608 size = 1;
4609 #ifdef SUPPORT_UTF
4610 if (common->utf && HAS_EXTRALEN(cc[1]))
4611 size += GET_EXTRALEN(cc[1]);
4612 #endif
4613 }
4614 else if (*cc == OP_CHARI)
4615 {
4616 size = 1;
4617 #ifdef SUPPORT_UTF
4618 if (common->utf)
4619 {
4620 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4621 size = 0;
4622 else if (HAS_EXTRALEN(cc[1]))
4623 size += GET_EXTRALEN(cc[1]);
4624 }
4625 else
4626 #endif
4627 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4628 size = 0;
4629 }
4630 else
4631 size = 0;
4632
4633 cc += 1 + size;
4634 context.length += IN_UCHARS(size);
4635 }
4636 while (size > 0 && context.length <= 128);
4637
4638 cc = ccbegin;
4639 if (context.length > 0)
4640 {
4641 /* We have a fixed-length byte sequence. */
4642 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4643 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4644
4645 context.sourcereg = -1;
4646 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4647 context.ucharptr = 0;
4648 #endif
4649 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4650 return cc;
4651 }
4652
4653 /* A non-fixed length character will be checked if length == 0. */
4654 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4655 }
4656
4657 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4658 {
4659 DEFINE_COMPILER;
4660 int offset = GET2(cc, 1) << 1;
4661
4662 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4663 if (!common->jscript_compat)
4664 {
4665 if (backtracks == NULL)
4666 {
4667 /* OVECTOR(1) contains the "string begin - 1" constant. */
4668 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4669 COND_VALUE(SLJIT_MOV, TMP2, 0, SLJIT_C_EQUAL);
4670 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4671 COND_VALUE(SLJIT_OR | SLJIT_SET_E, TMP2, 0, SLJIT_C_EQUAL);
4672 return JUMP(SLJIT_C_NOT_ZERO);
4673 }
4674 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4675 }
4676 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4677 }
4678
4679 /* Forward definitions. */
4680 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4681 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4682
4683 #define PUSH_BACKTRACK(size, ccstart, error) \
4684 do \
4685 { \
4686 backtrack = sljit_alloc_memory(compiler, (size)); \
4687 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4688 return error; \
4689 memset(backtrack, 0, size); \
4690 backtrack->prev = parent->top; \
4691 backtrack->cc = (ccstart); \
4692 parent->top = backtrack; \
4693 } \
4694 while (0)
4695
4696 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4697 do \
4698 { \
4699 backtrack = sljit_alloc_memory(compiler, (size)); \
4700 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4701 return; \
4702 memset(backtrack, 0, size); \
4703 backtrack->prev = parent->top; \
4704 backtrack->cc = (ccstart); \
4705 parent->top = backtrack; \
4706 } \
4707 while (0)
4708
4709 #define BACKTRACK_AS(type) ((type *)backtrack)
4710
4711 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4712 {
4713 DEFINE_COMPILER;
4714 int offset = GET2(cc, 1) << 1;
4715 struct sljit_jump *jump = NULL;
4716 struct sljit_jump *partial;
4717 struct sljit_jump *nopartial;
4718
4719 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4720 /* OVECTOR(1) contains the "string begin - 1" constant. */
4721 if (withchecks && !common->jscript_compat)
4722 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4723
4724 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4725 if (common->utf && *cc == OP_REFI)
4726 {
4727 SLJIT_ASSERT(TMP1 == SLJIT_TEMPORARY_REG1 && STACK_TOP == SLJIT_TEMPORARY_REG2 && TMP2 == SLJIT_TEMPORARY_REG3);
4728 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4729 if (withchecks)
4730 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4731
4732 /* Needed to save important temporary registers. */
4733 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4734 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG2, 0, ARGUMENTS, 0);
4735 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_TEMPORARY_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4736 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4737 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4738 if (common->mode == JIT_COMPILE)
4739 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4740 else
4741 {
4742 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4743 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4744 check_partial(common, FALSE);
4745 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4746 JUMPHERE(nopartial);
4747 }
4748 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4749 }
4750 else
4751 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4752 {
4753 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4754 if (withchecks)
4755 jump = JUMP(SLJIT_C_ZERO);
4756
4757 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4758 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4759 if (common->mode == JIT_COMPILE)
4760 add_jump(compiler, backtracks, partial);
4761
4762 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4763 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4764
4765 if (common->mode != JIT_COMPILE)
4766 {
4767 nopartial = JUMP(SLJIT_JUMP);
4768 JUMPHERE(partial);
4769 /* TMP2 -= STR_END - STR_PTR */
4770 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4771 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4772 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4773 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4774 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4775 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4776 JUMPHERE(partial);
4777 check_partial(common, FALSE);
4778 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4779 JUMPHERE(nopartial);
4780 }
4781 }
4782
4783 if (jump != NULL)
4784 {
4785 if (emptyfail)
4786 add_jump(compiler, backtracks, jump);
4787 else
4788 JUMPHERE(jump);
4789 }
4790 return cc + 1 + IMM2_SIZE;
4791 }
4792
4793 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4794 {
4795 DEFINE_COMPILER;
4796 backtrack_common *backtrack;
4797 pcre_uchar type;
4798 struct sljit_label *label;
4799 struct sljit_jump *zerolength;
4800 struct sljit_jump *jump = NULL;
4801 pcre_uchar *ccbegin = cc;
4802 int min = 0, max = 0;
4803 BOOL minimize;
4804
4805 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4806
4807 type = cc[1 + IMM2_SIZE];
4808 minimize = (type & 0x1) != 0;
4809 switch(type)
4810 {
4811 case OP_CRSTAR:
4812 case OP_CRMINSTAR:
4813 min = 0;
4814 max = 0;
4815 cc += 1 + IMM2_SIZE + 1;
4816 break;
4817 case OP_CRPLUS:
4818 case OP_CRMINPLUS:
4819 min = 1;
4820 max = 0;
4821 cc += 1 + IMM2_SIZE + 1;
4822 break;
4823 case OP_CRQUERY:
4824 case OP_CRMINQUERY:
4825 min = 0;
4826 max = 1;
4827 cc += 1 + IMM2_SIZE + 1;
4828 break;
4829 case OP_CRRANGE:
4830 case OP_CRMINRANGE:
4831 min = GET2(cc, 1 + IMM2_SIZE + 1);
4832 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4833 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4834 break;
4835 default:
4836 SLJIT_ASSERT_STOP();
4837 break;
4838 }
4839
4840 if (!minimize)
4841 {
4842 if (min == 0)
4843 {
4844 allocate_stack(common, 2);
4845 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4846 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4847 /* Temporary release of STR_PTR. */
4848 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4849 zerolength = compile_ref_checks(common, ccbegin, NULL);
4850 /* Restore if not zero length. */
4851 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
4852 }
4853 else
4854 {
4855 allocate_stack(common, 1);
4856 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4857 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4858 }
4859
4860 if (min > 1 || max > 1)
4861 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4862
4863 label = LABEL();
4864 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4865
4866 if (min > 1 || max > 1)
4867 {
4868 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4869 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4870 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4871 if (min > 1)
4872 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4873 if (max > 1)
4874 {
4875 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4876 allocate_stack(common, 1);
4877 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4878 JUMPTO(SLJIT_JUMP, label);
4879 JUMPHERE(jump);
4880 }
4881 }
4882
4883 if (max == 0)
4884 {
4885 /* Includes min > 1 case as well. */
4886 allocate_stack(common, 1);
4887 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4888 JUMPTO(SLJIT_JUMP, label);
4889 }
4890
4891 JUMPHERE(zerolength);
4892 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4893
4894 decrease_call_count(common);
4895 return cc;
4896 }
4897
4898 allocate_stack(common, 2);
4899 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4900 if (type != OP_CRMINSTAR)
4901 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4902
4903 if (min == 0)
4904 {
4905 zerolength = compile_ref_checks(common, ccbegin, NULL);
4906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4907 jump = JUMP(SLJIT_JUMP);
4908 }
4909 else
4910 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4911
4912 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4913 if (max > 0)
4914 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
4915
4916 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
4917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4918
4919 if (min > 1)
4920 {
4921 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
4922 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4923 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4924 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
4925 }
4926 else if (max > 0)
4927 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
4928
4929 if (jump != NULL)
4930 JUMPHERE(jump);
4931 JUMPHERE(zerolength);
4932
4933 decrease_call_count(common);
4934 return cc;
4935 }
4936
4937 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4938 {
4939 DEFINE_COMPILER;
4940 backtrack_common *backtrack;
4941 recurse_entry *entry = common->entries;
4942 recurse_entry *prev = NULL;
4943 int start = GET(cc, 1);
4944
4945 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
4946 while (entry != NULL)
4947 {
4948 if (entry->start == start)
4949 break;
4950 prev = entry;
4951 entry = entry->next;
4952 }
4953
4954 if (entry == NULL)
4955 {
4956 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
4957 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
4958 return NULL;
4959 entry->next = NULL;
4960 entry->entry = NULL;
4961 entry->calls = NULL;
4962 entry->start = start;
4963
4964 if (prev != NULL)
4965 prev->next = entry;
4966 else
4967 common->entries = entry;
4968 }
4969
4970 if (common->has_set_som && common->mark_ptr != 0)
4971 {
4972 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
4973 allocate_stack(common, 2);
4974 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
4975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4976 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
4977 }
4978 else if (common->has_set_som || common->mark_ptr != 0)
4979 {
4980 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
4981 allocate_stack(common, 1);
4982 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
4983 }
4984
4985 if (entry->entry == NULL)
4986 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
4987 else
4988 JUMPTO(SLJIT_FAST_CALL, entry->entry);
4989 /* Leave if the match is failed. */
4990 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4991 return cc + 1 + LINK_SIZE;
4992 }
4993
4994 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
4995 {
4996 DEFINE_COMPILER;
4997 int framesize;
4998 int private_data_ptr;
4999 backtrack_common altbacktrack;
5000 pcre_uchar *ccbegin;
5001 pcre_uchar opcode;
5002 pcre_uchar bra = OP_BRA;
5003 jump_list *tmp = NULL;
5004 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5005 jump_list **found;
5006 /* Saving previous accept variables. */
5007 struct sljit_label *save_quitlabel = common->quitlabel;
5008 struct sljit_label *save_acceptlabel = common->acceptlabel;
5009 jump_list *save_quit = common->quit;
5010 jump_list *save_accept = common->accept;
5011 struct sljit_jump *jump;
5012 struct sljit_jump *brajump = NULL;
5013
5014 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5015 {
5016 SLJIT_ASSERT(!conditional);
5017 bra = *cc;
5018 cc++;
5019 }
5020 private_data_ptr = PRIVATE_DATA(cc);
5021 SLJIT_ASSERT(private_data_ptr != 0);
5022 framesize = get_framesize(common, cc, FALSE);
5023 backtrack->framesize = framesize;
5024 backtrack->private_data_ptr = private_data_ptr;
5025 opcode = *cc;
5026 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5027 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5028 ccbegin = cc;
5029 cc += GET(cc, 1);
5030
5031 if (bra == OP_BRAMINZERO)
5032 {
5033 /* This is a braminzero backtrack path. */
5034 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5035 free_stack(common, 1);
5036 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5037 }
5038
5039 if (framesize < 0)
5040 {
5041 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5042 allocate_stack(common, 1);
5043 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5044 }
5045 else
5046 {
5047 allocate_stack(common, framesize + 2);
5048 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5049 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5050 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5051 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5052 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5053 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5054 }
5055
5056 memset(&altbacktrack, 0, sizeof(backtrack_common));
5057 common->quitlabel = NULL;
5058 common->quit = NULL;
5059 while (1)
5060 {
5061 common->acceptlabel = NULL;
5062 common->accept = NULL;
5063 altbacktrack.top = NULL;
5064 altbacktrack.topbacktracks = NULL;
5065
5066 if (*ccbegin == OP_ALT)
5067 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5068
5069 altbacktrack.cc = ccbegin;
5070 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5071 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5072 {
5073 common->quitlabel = save_quitlabel;
5074 common->acceptlabel = save_acceptlabel;
5075 common->quit = save_quit;
5076 common->accept = save_accept;
5077 return NULL;
5078 }
5079 common->acceptlabel = LABEL();
5080 if (common->accept != NULL)
5081 set_jumps(common->accept, common->acceptlabel);
5082
5083 /* Reset stack. */
5084 if (framesize < 0)
5085 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5086 else {
5087 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5088 {
5089 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5090 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5091 }
5092 else
5093 {
5094 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5095 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5096 }
5097 }
5098
5099 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5100 {
5101 /* We know that STR_PTR was stored on the top of the stack. */
5102 if (conditional)
5103 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5104 else if (bra == OP_BRAZERO)
5105 {
5106 if (framesize < 0)
5107 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5108 else
5109 {
5110 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5111 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_w));
5112 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5113 }
5114 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5115 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5116 }
5117 else if (framesize >= 0)
5118 {
5119 /* For OP_BRA and OP_BRAMINZERO. */
5120 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5121 }
5122 }
5123 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5124
5125 compile_backtrackingpath(common, altbacktrack.top);
5126 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5127 {
5128 common->quitlabel = save_quitlabel;
5129 common->acceptlabel = save_acceptlabel;
5130 common->quit = save_quit;
5131 common->accept = save_accept;
5132 return NULL;
5133 }
5134 set_jumps(altbacktrack.topbacktracks, LABEL());
5135
5136 if (*cc != OP_ALT)
5137 break;
5138
5139 ccbegin = cc;
5140 cc += GET(cc, 1);
5141 }
5142 /* None of them matched. */
5143 if (common->quit != NULL)
5144 set_jumps(common->quit, LABEL());
5145
5146 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5147 {
5148 /* Assert is failed. */
5149 if (conditional || bra == OP_BRAZERO)
5150 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5151
5152 if (framesize < 0)
5153 {
5154 /* The topmost item should be 0. */
5155 if (bra == OP_BRAZERO)
5156 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5157 else
5158 free_stack(common, 1);
5159 }
5160 else
5161 {
5162 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5163 /* The topmost item should be 0. */
5164 if (bra == OP_BRAZERO)
5165 {
5166 free_stack(common, framesize + 1);
5167 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5168 }
5169 else
5170 free_stack(common, framesize + 2);
5171 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5172 }
5173 jump = JUMP(SLJIT_JUMP);
5174 if (bra != OP_BRAZERO)
5175 add_jump(compiler, target, jump);
5176
5177 /* Assert is successful. */
5178 set_jumps(tmp, LABEL());
5179 if (framesize < 0)
5180 {
5181 /* We know that STR_PTR was stored on the top of the stack. */
5182 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5183 /* Keep the STR_PTR on the top of the stack. */
5184 if (bra == OP_BRAZERO)
5185 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5186 else if (bra == OP_BRAMINZERO)
5187 {
5188 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5189 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5190 }
5191 }
5192 else
5193 {
5194 if (bra == OP_BRA)
5195 {
5196 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5197 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_w));
5198 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5199 }
5200 else
5201 {
5202 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5203 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_w));
5204 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5205 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5206 }
5207 }
5208
5209 if (bra == OP_BRAZERO)
5210 {
5211 backtrack->matchingpath = LABEL();
5212 sljit_set_label(jump, backtrack->matchingpath);
5213 }
5214 else if (bra == OP_BRAMINZERO)
5215 {
5216 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5217 JUMPHERE(brajump);
5218 if (framesize >= 0)
5219 {
5220 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5221 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_w));
5223 }
5224 set_jumps(backtrack->common.topbacktracks, LABEL());
5225 }
5226 }
5227 else
5228 {
5229 /* AssertNot is successful. */
5230 if (framesize < 0)
5231 {
5232 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5233 if (bra != OP_BRA)
5234 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5235 else
5236 free_stack(common, 1);
5237 }
5238 else
5239 {
5240 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5241 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5242 /* The topmost item should be 0. */
5243 if (bra != OP_BRA)
5244 {
5245 free_stack(common, framesize + 1);
5246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5247 }
5248 else
5249 free_stack(common, framesize + 2);
5250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5251 }
5252
5253 if (bra == OP_BRAZERO)
5254 backtrack->matchingpath = LABEL();
5255 else if (bra == OP_BRAMINZERO)
5256 {
5257 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5258 JUMPHERE(brajump);
5259 }
5260
5261 if (bra != OP_BRA)
5262 {
5263 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5264 set_jumps(backtrack->common.topbacktracks, LABEL());
5265 backtrack->common.topbacktracks = NULL;
5266 }
5267 }
5268
5269 common->quitlabel = save_quitlabel;
5270 common->acceptlabel = save_acceptlabel;
5271 common->quit = save_quit;
5272 common->accept = save_accept;
5273 return cc + 1 + LINK_SIZE;
5274 }
5275
5276 static sljit_w SLJIT_CALL do_searchovector(sljit_w refno, sljit_w* locals, pcre_uchar *name_table)
5277 {
5278 int condition = FALSE;
5279 pcre_uchar *slotA = name_table;
5280 pcre_uchar *slotB;
5281 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5282 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5283 sljit_w no_capture;
5284 int i;
5285
5286 locals += refno & 0xff;
5287 refno >>= 8;
5288 no_capture = locals[1];
5289
5290 for (i = 0; i < name_count; i++)
5291 {
5292 if (GET2(slotA, 0) == refno) break;
5293 slotA += name_entry_size;
5294 }
5295
5296 if (i < name_count)
5297 {
5298 /* Found a name for the number - there can be only one; duplicate names
5299 for different numbers are allowed, but not vice versa. First scan down
5300 for duplicates. */
5301
5302 slotB = slotA;
5303 while (slotB > name_table)
5304 {
5305 slotB -= name_entry_size;
5306 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5307 {
5308 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5309 if (condition) break;
5310 }
5311 else break;
5312 }
5313
5314 /* Scan up for duplicates */
5315 if (!condition)
5316 {
5317 slotB = slotA;
5318 for (i++; i < name_count; i++)
5319 {
5320 slotB += name_entry_size;
5321 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5322 {
5323 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5324 if (condition) break;
5325 }
5326 else break;
5327 }
5328 }
5329 }
5330 return condition;
5331 }
5332
5333 static sljit_w SLJIT_CALL do_searchgroups(sljit_w recno, sljit_w* locals, pcre_uchar *name_table)
5334 {
5335 int condition = FALSE;
5336 pcre_uchar *slotA = name_table;
5337 pcre_uchar *slotB;
5338 sljit_w name_count = locals[LOCALS0 / sizeof(sljit_w)];
5339 sljit_w name_entry_size = locals[LOCALS1 / sizeof(sljit_w)];
5340 sljit_w group_num = locals[POSSESSIVE0 / sizeof(sljit_w)];
5341 int i;
5342
5343 for (i = 0; i < name_count; i++)
5344 {
5345 if (GET2(slotA, 0) == recno) break;
5346 slotA += name_entry_size;
5347 }
5348
5349 if (i < name_count)
5350 {
5351 /* Found a name for the number - there can be only one; duplicate
5352 names for different numbers are allowed, but not vice versa. First
5353 scan down for duplicates. */
5354
5355 slotB = slotA;
5356 while (slotB > name_table)
5357 {
5358 slotB -= name_entry_size;
5359 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5360 {
5361 condition = GET2(slotB, 0) == group_num;
5362 if (condition) break;
5363 }
5364 else break;
5365 }
5366
5367 /* Scan up for duplicates */
5368 if (!condition)
5369 {
5370 slotB = slotA;
5371 for (i++; i < name_count; i++)
5372 {
5373 slotB += name_entry_size;
5374 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5375 {
5376 condition = GET2(slotB, 0) == group_num;
5377 if (condition) break;
5378 }
5379 else break;
5380 }
5381 }
5382 }
5383 return condition;
5384 }
5385
5386 /*
5387 Handling bracketed expressions is probably the most complex part.
5388
5389 Stack layout naming characters:
5390 S - Push the current STR_PTR
5391 0 - Push a 0 (NULL)
5392 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5393 before the next alternative. Not pushed if there are no alternatives.
5394 M - Any values pushed by the current alternative. Can be empty, or anything.
5395 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5396 L - Push the previous local (pointed by localptr) to the stack
5397 () - opional values stored on the stack
5398 ()* - optonal, can be stored multiple times
5399
5400 The following list shows the regular expression templates, their PCRE byte codes
5401 and stack layout supported by pcre-sljit.
5402
5403 (?:) OP_BRA | OP_KET A M
5404 () OP_CBRA | OP_KET C M
5405 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5406 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5407 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5408 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5409 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5410 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5411 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5412 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5413 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5414 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5415 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5416 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5417 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5418 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5419 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5420 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5421 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5422 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5423 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5424 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5425
5426
5427 Stack layout naming characters:
5428 A - Push the alternative index (starting from 0) on the stack.
5429 Not pushed if there is no alternatives.
5430 M - Any values pushed by the current alternative. Can be empty, or anything.
5431
5432 The next list shows the possible content of a bracket:
5433 (|) OP_*BRA | OP_ALT ... M A
5434 (?()|) OP_*COND | OP_ALT M A
5435 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5436 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5437 Or nothing, if trace is unnecessary
5438 */
5439
5440 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5441 {
5442 DEFINE_COMPILER;
5443 backtrack_common *backtrack;
5444 pcre_uchar opcode;
5445 int private_data_ptr = 0;
5446 int offset = 0;
5447 int stacksize;
5448 pcre_uchar *ccbegin;
5449 pcre_uchar *matchingpath;
5450 pcre_uchar bra = OP_BRA;
5451 pcre_uchar ket;
5452 assert_backtrack *assert;
5453 BOOL has_alternatives;
5454 struct sljit_jump *jump;
5455 struct sljit_jump *skip;
5456 struct sljit_label *rmaxlabel = NULL;
5457 struct sljit_jump *braminzerojump = NULL;
5458
5459 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5460
5461 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5462 {
5463 bra = *cc;
5464 cc++;
5465 opcode = *cc;
5466 }
5467
5468 opcode = *cc;
5469 ccbegin = cc;
5470 matchingpath = ccbegin + 1 + LINK_SIZE;
5471
5472 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5473 {
5474 /* Drop this bracket_backtrack. */
5475 parent->top = backtrack->prev;
5476 return bracketend(cc);
5477 }
5478
5479 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5480 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5481 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5482 cc += GET(cc, 1);
5483
5484 has_alternatives = *cc == OP_ALT;
5485 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5486 {
5487 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5488 if (*matchingpath == OP_NRREF)
5489 {
5490 stacksize = GET2(matchingpath, 1);
5491 if (common->currententry == NULL || stacksize == RREF_ANY)
5492 has_alternatives = FALSE;
5493 else if (common->currententry->start == 0)
5494 has_alternatives = stacksize != 0;
5495 else
5496 has_alternatives = stacksize != GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5497 }
5498 }
5499
5500 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5501 opcode = OP_SCOND;
5502 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5503 opcode = OP_ONCE;
5504
5505 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5506 {
5507 /* Capturing brackets has a pre-allocated space. */
5508 offset = GET2(ccbegin, 1 + LINK_SIZE);
5509 if (common->optimized_cbracket[offset] == 0)
5510 {
5511 private_data_ptr = OVECTOR_PRIV(offset);
5512 offset <<= 1;
5513 }
5514 else
5515 {
5516 offset <<= 1;
5517 private_data_ptr = OVECTOR(offset);
5518 }
5519 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5520 matchingpath += IMM2_SIZE;
5521 }
5522 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5523 {
5524 /* Other brackets simply allocate the next entry. */
5525 private_data_ptr = PRIVATE_DATA(ccbegin);
5526 SLJIT_ASSERT(private_data_ptr != 0);
5527 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5528 if (opcode == OP_ONCE)
5529 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5530 }
5531
5532 /* Instructions before the first alternative. */
5533 stacksize = 0;
5534 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5535 stacksize++;
5536 if (bra == OP_BRAZERO)
5537 stacksize++;
5538
5539 if (stacksize > 0)
5540 allocate_stack(common, stacksize);
5541
5542 stacksize = 0;
5543 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5544 {
5545 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5546 stacksize++;
5547 }
5548
5549 if (bra == OP_BRAZERO)
5550 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5551
5552 if (bra == OP_BRAMINZERO)
5553 {
5554 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5555 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5556 if (ket != OP_KETRMIN)
5557 {
5558 free_stack(common, 1);
5559 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5560 }
5561 else
5562 {
5563 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5564 {
5565 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5566 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5567 /* Nothing stored during the first run. */
5568 skip = JUMP(SLJIT_JUMP);
5569 JUMPHERE(jump);
5570 /* Checking zero-length iteration. */
5571 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5572 {
5573 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5574 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5575 }
5576 else
5577 {
5578 /* Except when the whole stack frame must be saved. */
5579 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5580 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_w));
5581 }
5582 JUMPHERE(skip);
5583 }
5584 else
5585 {
5586 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5587 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5588 JUMPHERE(jump);
5589 }
5590 }
5591 }
5592
5593 if (ket == OP_KETRMIN)
5594 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5595
5596 if (ket == OP_KETRMAX)
5597 {
5598 rmaxlabel = LABEL();
5599 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5600 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5601 }
5602
5603 /* Handling capturing brackets and alternatives. */
5604 if (opcode == OP_ONCE)
5605 {
5606 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5607 {
5608 /* Neither capturing brackets nor recursions are not found in the block. */
5609 if (ket == OP_KETRMIN)
5610 {
5611 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5612 allocate_stack(common, 2);
5613 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5614 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5615 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_w));
5616 }
5617 else if (ket == OP_KETRMAX || has_alternatives)
5618 {
5619 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5620 allocate_stack(common, 1);
5621 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5622 }
5623 else
5624 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5625 }
5626 else
5627 {
5628 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5629 {
5630 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5631 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5632 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5633 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5635 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5636 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5637 }
5638 else
5639 {
5640 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5641 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5642 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5644 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5645 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5646 }
5647 }
5648 }
5649 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5650 {
5651 /* Saving the previous values. */
5652 if (common->optimized_cbracket[offset >> 1] == 0)
5653 {
5654 allocate_stack(common, 3);
5655 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5656 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5657 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5658 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5659 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5660 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5661 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
5662 }
5663 else
5664 {
5665 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5666 allocate_stack(common, 2);
5667 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5668 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_w));
5669 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5670 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5671 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5672 }
5673 }
5674 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5675 {
5676 /* Saving the previous value. */
5677 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5678 allocate_stack(common, 1);
5679 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5680 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5681 }
5682 else if (has_alternatives)
5683 {
5684 /* Pushing the starting string pointer. */
5685 allocate_stack(common, 1);
5686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5687 }
5688
5689 /* Generating code for the first alternative. */
5690 if (opcode == OP_COND || opcode == OP_SCOND)
5691 {
5692 if (*matchingpath == OP_CREF)
5693 {
5694 SLJIT_ASSERT(has_alternatives);
5695 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5696 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5697 matchingpath += 1 + IMM2_SIZE;
5698 }
5699 else if (*matchingpath == OP_NCREF)
5700 {
5701 SLJIT_ASSERT(has_alternatives);
5702 stacksize = GET2(matchingpath, 1);
5703 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5704
5705 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5706 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5707 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5708 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_w)));
5709 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5710 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5711 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5712 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5713 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5714
5715 JUMPHERE(jump);
5716 matchingpath += 1 + IMM2_SIZE;
5717 }
5718 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5719 {
5720 /* Never has other case. */
5721 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5722
5723 stacksize = GET2(matchingpath, 1);
5724 if (common->currententry == NULL)
5725 stacksize = 0;
5726 else if (stacksize == RREF_ANY)
5727 stacksize = 1;
5728 else if (common->currententry->start == 0)
5729 stacksize = stacksize == 0;
5730 else
5731 stacksize = stacksize == GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5732
5733 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5734 {
5735 SLJIT_ASSERT(!has_alternatives);
5736 if (stacksize != 0)
5737 matchingpath += 1 + IMM2_SIZE;
5738 else
5739 {
5740 if (*cc == OP_ALT)
5741 {
5742 matchingpath = cc + 1 + LINK_SIZE;
5743 cc += GET(cc, 1);
5744 }
5745 else
5746 matchingpath = cc;
5747 }
5748 }
5749 else
5750 {
5751 SLJIT_ASSERT(has_alternatives);
5752
5753 stacksize = GET2(matchingpath, 1);
5754 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5756 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5757 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5758 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, stacksize);
5759 GET_LOCAL_BASE(SLJIT_TEMPORARY_REG2, 0, 0);
5760 OP1(SLJIT_MOV, SLJIT_TEMPORARY_REG3, 0, SLJIT_IMM, common->name_table);
5761 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5762 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5763 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_TEMPORARY_REG1, 0, SLJIT_IMM, 0));
5764 matchingpath += 1 + IMM2_SIZE;
5765 }
5766 }
5767 else
5768 {
5769 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5770 /* Similar code as PUSH_BACKTRACK macro. */
5771 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5772 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5773 return NULL;
5774 memset(assert, 0, sizeof(assert_backtrack));
5775 assert->common.cc = matchingpath;
5776 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5777 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5778 }
5779 }
5780
5781 compile_matchingpath(common, matchingpath, cc, backtrack);
5782 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5783 return NULL;
5784
5785 if (opcode == OP_ONCE)
5786 {
5787 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5788 {
5789 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5790 /* TMP2 which is set here used by OP_KETRMAX below. */
5791 if (ket == OP_KETRMAX)
5792 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5793 else if (ket == OP_KETRMIN)
5794 {
5795 /* Move the STR_PTR to the private_data_ptr. */
5796 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5797 }
5798 }
5799 else
5800 {
5801 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5802 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_w));
5803 if (ket == OP_KETRMAX)
5804 {
5805 /* TMP2 which is set here used by OP_KETRMAX below. */
5806 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5807 }
5808 }
5809 }
5810
5811 stacksize = 0;
5812 if (ket != OP_KET || bra != OP_BRA)
5813 stacksize++;
5814 if (has_alternatives && opcode != OP_ONCE)
5815 stacksize++;
5816
5817 if (stacksize > 0)
5818 allocate_stack(common, stacksize);
5819
5820 stacksize = 0;
5821 if (ket != OP_KET)
5822 {
5823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5824 stacksize++;
5825 }
5826 else if (bra != OP_BRA)
5827 {
5828 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5829 stacksize++;
5830 }
5831
5832 if (has_alternatives)
5833 {
5834 if (opcode != OP_ONCE)
5835 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5836 if (ket != OP_KETRMAX)
5837 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5838 }
5839
5840 /* Must be after the matchingpath label. */
5841 if (offset != 0)
5842 {
5843 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5844 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
5845 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 0), TMP1, 0);
5846 }
5847
5848 if (ket == OP_KETRMAX)
5849 {
5850 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5851 {
5852 if (has_alternatives)
5853 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
5854 /* Checking zero-length iteration. */
5855 if (opcode != OP_ONCE)
5856 {
5857 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
5858 /* Drop STR_PTR for greedy plus quantifier. */
5859 if (bra != OP_BRAZERO)
5860 free_stack(common, 1);
5861 }
5862 else
5863 /* TMP2 must contain the starting STR_PTR. */
5864 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
5865 }
5866 else
5867 JUMPTO(SLJIT_JUMP, rmaxlabel);
5868 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5869 }
5870
5871 if (bra == OP_BRAZERO)
5872 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
5873
5874 if (bra == OP_BRAMINZERO)
5875 {
5876 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
5877 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
5878 if (braminzerojump != NULL)
5879 {
5880 JUMPHERE(braminzerojump);
5881 /* We need to release the end pointer to perform the
5882 backtrack for the zero-length iteration. When
5883 framesize is < 0, OP_ONCE will do the release itself. */
5884 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
5885 {
5886 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5887 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5888 }
5889 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
5890 free_stack(common, 1);
5891 }
5892 /* Continue to the normal backtrack. */
5893 }
5894
5895 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
5896 decrease_call_count(common);
5897
5898 /* Skip the other alternatives. */
5899 while (*cc == OP_ALT)
5900 cc += GET(cc, 1);
5901 cc += 1 + LINK_SIZE;
5902 return cc;
5903 }
5904
5905 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5906 {
5907 DEFINE_COMPILER;
5908 backtrack_common *backtrack;
5909 pcre_uchar opcode;
5910 int private_data_ptr;
5911 int cbraprivptr = 0;
5912 int framesize;
5913 int stacksize;
5914 int offset = 0;
5915 BOOL zero = FALSE;
5916 pcre_uchar *ccbegin = NULL;
5917 int stack;
5918 struct sljit_label *loop = NULL;
5919 struct jump_list *emptymatch = NULL;
5920
5921 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
5922 if (*cc == OP_BRAPOSZERO)
5923 {
5924 zero = TRUE;
5925 cc++;
5926 }
5927
5928 opcode = *cc;
5929 private_data_ptr = PRIVATE_DATA(cc);
5930 SLJIT_ASSERT(private_data_ptr != 0);
5931 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
5932 switch(opcode)
5933 {
5934 case OP_BRAPOS:
5935 case OP_SBRAPOS:
5936 ccbegin = cc + 1 + LINK_SIZE;
5937 break;
5938
5939 case OP_CBRAPOS:
5940 case OP_SCBRAPOS:
5941 offset = GET2(cc, 1 + LINK_SIZE);
5942 /* This case cannot be optimized in the same was as
5943 normal capturing brackets. */
5944 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
5945 cbraprivptr = OVECTOR_PRIV(offset);
5946 offset <<= 1;
5947 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
5948 break;
5949
5950 default:
5951 SLJIT_ASSERT_STOP();
5952 break;
5953 }
5954
5955 framesize = get_framesize(common, cc, FALSE);
5956 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
5957 if (framesize < 0)
5958 {
5959 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
5960 if (!zero)
5961 stacksize++;
5962 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5963 allocate_stack(common, stacksize);
5964 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5965
5966 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
5967 {
5968 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
5969 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
5970 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5971 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5972 }
5973 else
5974 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5975
5976 if (!zero)
5977 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
5978 }
5979 else
5980 {
5981 stacksize = framesize + 1;
5982 if (!zero)
5983 stacksize++;
5984 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5985 stacksize++;
5986 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
5987 allocate_stack(common, stacksize);
5988
5989 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5990 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
5991 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5992 stack = 0;
5993 if (!zero)
5994 {
5995 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
5996 stack++;
5997 }
5998 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
5999 {
6000 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6001 stack++;
6002 }
6003 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6004 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6005 }
6006
6007 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6008 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6009
6010 loop = LABEL();
6011 while (*cc != OP_KETRPOS)
6012 {
6013 backtrack->top = NULL;
6014 backtrack->topbacktracks = NULL;
6015 cc += GET(cc, 1);
6016
6017 compile_matchingpath(common, ccbegin, cc, backtrack);
6018 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6019 return NULL;
6020
6021 if (framesize < 0)
6022 {
6023 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6024
6025 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6026 {
6027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6028 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6029 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6030 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6031 }
6032 else
6033 {
6034 if (opcode == OP_SBRAPOS)
6035 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6036 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6037 }
6038
6039 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6040 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6041
6042 if (!zero)
6043 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6044 }
6045 else
6046 {
6047 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6048 {
6049 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_w));
6050 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6051 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6052 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6054 }
6055 else
6056 {
6057 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6058 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_w));
6059 if (opcode == OP_SBRAPOS)
6060 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6061 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w), STR_PTR, 0);
6062 }
6063
6064 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6065 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6066
6067 if (!zero)
6068 {
6069 if (framesize < 0)
6070 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6071 else
6072 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6073 }
6074 }
6075 JUMPTO(SLJIT_JUMP, loop);
6076 flush_stubs(common);
6077
6078 compile_backtrackingpath(common, backtrack->top);
6079 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6080 return NULL;
6081 set_jumps(backtrack->topbacktracks, LABEL());
6082
6083 if (framesize < 0)
6084 {
6085 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6086 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6087 else
6088 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6089 }
6090 else
6091 {
6092 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6093 {
6094 /* Last alternative. */
6095 if (*cc == OP_KETRPOS)
6096 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6097 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6098 }
6099 else
6100 {
6101 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6102 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_w));
6103 }
6104 }
6105
6106 if (*cc == OP_KETRPOS)
6107 break;
6108 ccbegin = cc + 1 + LINK_SIZE;
6109 }
6110
6111 backtrack->topbacktracks = NULL;
6112 if (!zero)
6113 {
6114 if (framesize < 0)
6115 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6116 else /* TMP2 is set to [private_data_ptr] above. */
6117 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_w), SLJIT_IMM, 0));
6118 }
6119
6120 /* None of them matched. */
6121 set_jumps(emptymatch, LABEL());
6122 decrease_call_count(common);
6123 return cc + 1 + LINK_SIZE;
6124 }
6125
6126 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6127 {
6128 int class_len;
6129
6130 *opcode = *cc;
6131 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6132 {
6133 cc++;
6134 *type = OP_CHAR;
6135 }
6136 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6137 {
6138 cc++;
6139 *type = OP_CHARI;
6140 *opcode -= OP_STARI - OP_STAR;
6141 }
6142 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6143 {
6144 cc++;
6145 *type = OP_NOT;
6146 *opcode -= OP_NOTSTAR - OP_STAR;
6147 }
6148 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6149 {
6150 cc++;
6151 *type = OP_NOTI;
6152 *opcode -= OP_NOTSTARI - OP_STAR;
6153 }
6154 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6155 {
6156 cc++;
6157 *opcode -= OP_TYPESTAR - OP_STAR;
6158 *type = 0;
6159 }
6160 else
6161 {
6162 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6163 *type = *opcode;
6164 cc++;
6165 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6166 *opcode = cc[class_len - 1];
6167 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6168 {
6169 *opcode -= OP_CRSTAR - OP_STAR;
6170 if (end != NULL)
6171 *end = cc + class_len;
6172 }
6173 else
6174 {
6175 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6176 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6177 *arg2 = GET2(cc, class_len);
6178
6179 if (*arg2 == 0)
6180 {
6181 SLJIT_ASSERT(*arg1 != 0);
6182 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6183 }
6184 if (*arg1 == *arg2)
6185 *opcode = OP_EXACT;
6186
6187 if (end != NULL)
6188 *end = cc + class_len + 2 * IMM2_SIZE;
6189 }
6190 return cc;
6191 }
6192
6193 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6194 {
6195 *arg1 = GET2(cc, 0);
6196 cc += IMM2_SIZE;
6197 }
6198
6199 if (*type == 0)
6200 {
6201 *type = *cc;
6202 if (end != NULL)
6203 *end = next_opcode(common, cc);
6204 cc++;
6205 return cc;
6206 }
6207
6208 if (end != NULL)
6209 {
6210 *end = cc + 1;
6211 #ifdef SUPPORT_UTF
6212 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6213 #endif
6214 }
6215 return cc;
6216 }
6217
6218 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6219 {
6220 DEFINE_COMPILER;
6221 backtrack_common *backtrack;
6222 pcre_uchar opcode;
6223 pcre_uchar type;
6224 int arg1 = -1, arg2 = -1;
6225 pcre_uchar* end;
6226 jump_list *nomatch = NULL;
6227 struct sljit_jump *jump = NULL;
6228 struct sljit_label *label;
6229 int private_data_ptr = PRIVATE_DATA(cc);
6230 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6231 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6232 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_w);
6233 int tmp_base, tmp_offset;
6234
6235 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6236
6237 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6238
6239 switch (type)
6240 {
6241 case OP_NOT_DIGIT:
6242 case OP_DIGIT:
6243 case OP_NOT_WHITESPACE:
6244 case OP_WHITESPACE:
6245 case OP_NOT_WORDCHAR:
6246 case OP_WORDCHAR:
6247 case OP_ANY:
6248 case OP_ALLANY:
6249 case OP_ANYBYTE:
6250 case OP_ANYNL:
6251 case OP_NOT_HSPACE:
6252 case OP_HSPACE:
6253 case OP_NOT_VSPACE:
6254 case OP_VSPACE:
6255 case OP_CHAR:
6256 case OP_CHARI:
6257 case OP_NOT:
6258 case OP_NOTI:
6259 case OP_CLASS:
6260 case OP_NCLASS:
6261 tmp_base = TMP3;
6262 tmp_offset = 0;
6263 break;
6264
6265 default:
6266 SLJIT_ASSERT_STOP();
6267 /* Fall through. */
6268
6269 case OP_EXTUNI:
6270 case OP_XCLASS:
6271 case OP_NOTPROP:
6272 case OP_PROP:
6273 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6274 tmp_offset = POSSESSIVE0;
6275 break;
6276 }
6277
6278 switch(opcode)
6279 {
6280 case OP_STAR:
6281 case OP_PLUS:
6282 case OP_UPTO:
6283 case OP_CRRANGE:
6284 if (type == OP_ANYNL || type == OP_EXTUNI)
6285 {
6286 SLJIT_ASSERT(private_data_ptr == 0);
6287 if (opcode == OP_STAR || opcode == OP_UPTO)
6288 {
6289 allocate_stack(common, 2);
6290 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6291 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6292 }
6293 else
6294 {
6295 allocate_stack(common, 1);
6296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6297 }
6298
6299 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6300 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6301
6302 label = LABEL();
6303 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6304 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6305 {
6306 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6307 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6308 if (opcode == OP_CRRANGE && arg2 > 0)
6309 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6310 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6311 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6312 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6313 }
6314
6315 /* We cannot use TMP3 because of this allocate_stack. */
6316 allocate_stack(common, 1);
6317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6318 JUMPTO(SLJIT_JUMP, label);
6319 if (jump != NULL)
6320 JUMPHERE(jump);
6321 }
6322 else
6323 {
6324 if (opcode == OP_PLUS)
6325 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6326 if (private_data_ptr == 0)
6327 allocate_stack(common, 2);
6328 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6329 if (opcode <= OP_PLUS)
6330 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6331 else
6332 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6333 label = LABEL();
6334 compile_char1_matchingpath(common, type, cc, &nomatch);
6335 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6336 if (opcode <= OP_PLUS)
6337 JUMPTO(SLJIT_JUMP, label);
6338 else if (opcode == OP_CRRANGE && arg1 == 0)
6339 {
6340 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6341 JUMPTO(SLJIT_JUMP, label);
6342 }
6343 else
6344 {
6345 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6346 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6347 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6348 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6349 }
6350 set_jumps(nomatch, LABEL());
6351 if (opcode == OP_CRRANGE)
6352 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6353 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6354 }
6355 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6356 break;
6357
6358 case OP_MINSTAR:
6359 case OP_MINPLUS:
6360 if (opcode == OP_MINPLUS)
6361 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6362 if (private_data_ptr == 0)
6363 allocate_stack(common, 1);
6364 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6365 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6366 break;
6367
6368 case OP_MINUPTO:
6369 case OP_CRMINRANGE:
6370 if (private_data_ptr == 0)
6371 allocate_stack(common, 2);
6372 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6373 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6374 if (opcode == OP_CRMINRANGE)
6375 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6376 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6377 break;
6378
6379 case OP_QUERY:
6380 case OP_MINQUERY:
6381 if (private_data_ptr == 0)
6382 allocate_stack(common, 1);
6383 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6384 if (opcode == OP_QUERY)
6385 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6386 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6387 break;
6388
6389 case OP_EXACT:
6390 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6391 label = LABEL();
6392 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6393 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6394 JUMPTO(SLJIT_C_NOT_ZERO, label);
6395 break;
6396
6397 case OP_POSSTAR:
6398 case OP_POSPLUS:
6399 case OP_POSUPTO:
6400 if (opcode == OP_POSPLUS)
6401 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6402 if (opcode == OP_POSUPTO)
6403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6404 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6405 label = LABEL();
6406 compile_char1_matchingpath(common, type, cc, &nomatch);
6407 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6408 if (opcode != OP_POSUPTO)
6409 JUMPTO(SLJIT_JUMP, label);
6410 else
6411 {
6412 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6413 JUMPTO(SLJIT_C_NOT_ZERO, label);
6414 }
6415 set_jumps(nomatch, LABEL());
6416 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6417 break;
6418
6419 case OP_POSQUERY:
6420 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6421 compile_char1_matchingpath(common, type, cc, &nomatch);
6422 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6423 set_jumps(nomatch, LABEL());
6424 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6425 break;
6426
6427 default:
6428 SLJIT_ASSERT_STOP();
6429 break;
6430 }
6431
6432 decrease_call_count(common);
6433 return end;
6434 }
6435
6436 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6437 {
6438 DEFINE_COMPILER;
6439 backtrack_common *backtrack;
6440
6441 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
6442
6443 if (*cc == OP_FAIL)
6444 {
6445 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6446 return cc + 1;
6447 }
6448
6449 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6450 {
6451 /* No need to check notempty conditions. */
6452 if (common->acceptlabel == NULL)
6453 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6454 else
6455 JUMPTO(SLJIT_JUMP, common->acceptlabel);
6456 return cc + 1;
6457 }
6458
6459 if (common->acceptlabel == NULL)
6460 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6461 else
6462 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->acceptlabel);
6463 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6464 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6465 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6466 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6467 if (common->acceptlabel == NULL)
6468 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6469 else
6470 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->acceptlabel);
6471 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6472 if (common->acceptlabel == NULL)
6473 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6474 else
6475 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->acceptlabel);
6476 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6477 return cc + 1;
6478 }
6479
6480 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6481 {
6482 DEFINE_COMPILER;
6483 int offset = GET2(cc, 1);
6484 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
6485
6486 /* Data will be discarded anyway... */
6487 if (common->currententry != NULL)
6488 return cc + 1 + IMM2_SIZE;
6489
6490 if (!optimized_cbracket)
6491 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6492 offset <<= 1;
6493 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6494 if (!optimized_cbracket)
6495 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6496 return cc + 1 + IMM2_SIZE;
6497 }
6498
6499 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6500 {
6501 DEFINE_COMPILER;
6502 backtrack_common *backtrack;
6503
6504 while (cc < ccend)
6505 {
6506 switch(*cc)
6507 {
6508 case OP_SOD:
6509 case OP_SOM:
6510 case OP_NOT_WORD_BOUNDARY:
6511 case OP_WORD_BOUNDARY:
6512 case OP_NOT_DIGIT:
6513 case OP_DIGIT:
6514 case OP_NOT_WHITESPACE:
6515 case OP_WHITESPACE:
6516 case OP_NOT_WORDCHAR:
6517 case OP_WORDCHAR:
6518 case OP_ANY:
6519 case OP_ALLANY:
6520 case OP_ANYBYTE:
6521 case OP_NOTPROP:
6522 case OP_PROP:
6523 case OP_ANYNL:
6524 case OP_NOT_HSPACE:
6525 case OP_HSPACE:
6526 case OP_NOT_VSPACE:
6527 case OP_VSPACE:
6528 case OP_EXTUNI:
6529 case OP_EODN:
6530 case OP_EOD:
6531 case OP_CIRC:
6532 case OP_CIRCM:
6533 case OP_DOLL:
6534 case OP_DOLLM:
6535 case OP_NOT:
6536 case OP_NOTI:
6537 case OP_REVERSE:
6538 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6539 break;
6540
6541 case OP_SET_SOM:
6542 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6543 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6544 allocate_stack(common, 1);
6545 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), STR_PTR, 0);
6546 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6547 cc++;
6548 break;
6549
6550 case OP_CHAR:
6551 case OP_CHARI:
6552 if (common->mode == JIT_COMPILE)
6553 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6554 else
6555 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6556 break;
6557
6558 case OP_STAR:
6559 case OP_MINSTAR:
6560 case OP_PLUS:
6561 case OP_MINPLUS:
6562 case OP_QUERY:
6563 case OP_MINQUERY:
6564 case OP_UPTO:
6565 case OP_MINUPTO:
6566 case OP_EXACT:
6567 case OP_POSSTAR:
6568 case OP_POSPLUS:
6569 case OP_POSQUERY:
6570 case OP_POSUPTO:
6571 case OP_STARI:
6572 case OP_MINSTARI:
6573 case OP_PLUSI:
6574 case OP_MINPLUSI:
6575 case OP_QUERYI:
6576 case OP_MINQUERYI:
6577 case OP_UPTOI:
6578 case OP_MINUPTOI:
6579 case OP_EXACTI:
6580 case OP_POSSTARI:
6581 case OP_POSPLUSI:
6582 case OP_POSQUERYI:
6583 case OP_POSUPTOI:
6584 case OP_NOTSTAR:
6585 case OP_NOTMINSTAR:
6586 case OP_NOTPLUS:
6587 case OP_NOTMINPLUS:
6588 case OP_NOTQUERY:
6589 case OP_NOTMINQUERY:
6590 case OP_NOTUPTO:
6591 case OP_NOTMINUPTO:
6592 case OP_NOTEXACT:
6593 case OP_NOTPOSSTAR:
6594 case OP_NOTPOSPLUS:
6595 case OP_NOTPOSQUERY:
6596 case OP_NOTPOSUPTO:
6597 case OP_NOTSTARI:
6598 case OP_NOTMINSTARI:
6599 case OP_NOTPLUSI:
6600 case OP_NOTMINPLUSI:
6601 case OP_NOTQUERYI:
6602 case OP_NOTMINQUERYI:
6603 case OP_NOTUPTOI:
6604 case OP_NOTMINUPTOI:
6605 case OP_NOTEXACTI:
6606 case OP_NOTPOSSTARI:
6607 case OP_NOTPOSPLUSI:
6608 case OP_NOTPOSQUERYI:
6609 case OP_NOTPOSUPTOI:
6610 case OP_TYPESTAR:
6611 case OP_TYPEMINSTAR:
6612 case OP_TYPEPLUS:
6613 case OP_TYPEMINPLUS:
6614 case OP_TYPEQUERY:
6615 case OP_TYPEMINQUERY:
6616 case OP_TYPEUPTO:
6617 case OP_TYPEMINUPTO:
6618 case OP_TYPEEXACT:
6619 case OP_TYPEPOSSTAR:
6620 case OP_TYPEPOSPLUS:
6621 case OP_TYPEPOSQUERY:
6622 case OP_TYPEPOSUPTO:
6623 cc = compile_iterator_matchingpath(common, cc, parent);
6624 break;
6625
6626 case OP_CLASS:
6627 case OP_NCLASS:
6628 if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
6629 cc = compile_iterator_matchingpath(common, cc, parent);
6630 else
6631 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6632 break;
6633
6634 #if defined SUPPORT_UTF || defined COMPILE_PCRE16
6635 case OP_XCLASS:
6636 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
6637 cc = compile_iterator_matchingpath(common, cc, parent);
6638 else
6639 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6640 break;
6641 #endif
6642
6643 case OP_REF:
6644 case OP_REFI:
6645 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
6646 cc = compile_ref_iterator_matchingpath(common, cc, parent);
6647 else
6648 cc = compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
6649 break;
6650
6651 case OP_RECURSE:
6652 cc = compile_recurse_matchingpath(common, cc, parent);
6653 break;
6654
6655 case OP_ASSERT:
6656 case OP_ASSERT_NOT:
6657 case OP_ASSERTBACK:
6658 case OP_ASSERTBACK_NOT:
6659 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6660 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6661 break;
6662
6663 case OP_BRAMINZERO:
6664 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
6665 cc = bracketend(cc + 1);
6666 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
6667 {
6668 allocate_stack(common, 1);
6669 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6670 }
6671 else
6672 {
6673 allocate_stack(common, 2);
6674 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6675 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
6676 }
6677 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
6678 if (cc[1] > OP_ASSERTBACK_NOT)
6679 decrease_call_count(common);
6680 break;
6681
6682 case OP_ONCE:
6683 case OP_ONCE_NC:
6684 case OP_BRA:
6685 case OP_CBRA:
6686 case OP_COND:
6687 case OP_SBRA:
6688 case OP_SCBRA:
6689 case OP_SCOND:
6690 cc = compile_bracket_matchingpath(common, cc, parent);
6691 break;
6692
6693 case OP_BRAZERO:
6694 if (cc[1] > OP_ASSERTBACK_NOT)
6695 cc = compile_bracket_matchingpath(common, cc, parent);
6696 else
6697 {
6698 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
6699 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
6700 }
6701 break;
6702
6703 case OP_BRAPOS:
6704 case OP_CBRAPOS:
6705 case OP_SBRAPOS:
6706 case OP_SCBRAPOS:
6707 case OP_BRAPOSZERO:
6708 cc = compile_bracketpos_matchingpath(common, cc, parent);
6709 break;
6710
6711 case OP_MARK:
6712 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6713 SLJIT_ASSERT(common->mark_ptr != 0);
6714 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
6715 allocate_stack(common, 1);
6716 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6717 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
6718 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_w)(cc + 2));
6719 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
6720 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
6721 cc += 1 + 2 + cc[1];
6722 break;
6723
6724 case OP_COMMIT:
6725 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6726 cc += 1;
6727 break;
6728
6729 case OP_FAIL:
6730 case OP_ACCEPT:
6731 case OP_ASSERT_ACCEPT:
6732 cc = compile_fail_accept_matchingpath(common, cc, parent);
6733 break;
6734
6735 case OP_CLOSE:
6736 cc = compile_close_matchingpath(common, cc);
6737 break;
6738
6739 case OP_SKIPZERO:
6740 cc = bracketend(cc + 1);
6741 break;
6742
6743 default:
6744 SLJIT_ASSERT_STOP();
6745 return;
6746 }
6747 if (cc == NULL)
6748 return;
6749 }
6750 SLJIT_ASSERT(cc == ccend);
6751 }
6752
6753 #undef PUSH_BACKTRACK
6754 #undef PUSH_BACKTRACK_NOVALUE
6755 #undef BACKTRACK_AS
6756
6757 #define COMPILE_BACKTRACKINGPATH(current) \
6758 do \
6759 { \
6760 compile_backtrackingpath(common, (current)); \
6761 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
6762 return; \
6763 } \
6764 while (0)
6765
6766 #define CURRENT_AS(type) ((type *)current)
6767
6768 static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
6769 {
6770 DEFINE_COMPILER;
6771 pcre_uchar *cc = current->cc;
6772 pcre_uchar opcode;
6773 pcre_uchar type;
6774 int arg1 = -1, arg2 = -1;
6775 struct sljit_label *label = NULL;
6776 struct sljit_jump *jump = NULL;
6777 jump_list *jumplist = NULL;
6778 int private_data_ptr = PRIVATE_DATA(cc);
6779 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6780 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6781 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_w);
6782