/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1247 - (show annotations)
Mon Feb 11 21:37:46 2013 UTC (6 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 275235 byte(s)
Supporting callouts and recursions together. Removing an unnecessary length check.
<
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* Allocate memory for the regex stack on the real machine stack.
75 Fast, but limited size. */
76 #define MACHINE_STACK_SIZE 32768
77
78 /* Growth rate for stack allocated by the OS. Should be the multiply
79 of page size. */
80 #define STACK_GROWTH_RATE 8192
81
82 /* Enable to check that the allocation could destroy temporaries. */
83 #if defined SLJIT_DEBUG && SLJIT_DEBUG
84 #define DESTROY_REGISTERS 1
85 #endif
86
87 /*
88 Short summary about the backtracking mechanism empolyed by the jit code generator:
89
90 The code generator follows the recursive nature of the PERL compatible regular
91 expressions. The basic blocks of regular expressions are condition checkers
92 whose execute different commands depending on the result of the condition check.
93 The relationship between the operators can be horizontal (concatenation) and
94 vertical (sub-expression) (See struct backtrack_common for more details).
95
96 'ab' - 'a' and 'b' regexps are concatenated
97 'a+' - 'a' is the sub-expression of the '+' operator
98
99 The condition checkers are boolean (true/false) checkers. Machine code is generated
100 for the checker itself and for the actions depending on the result of the checker.
101 The 'true' case is called as the matching path (expected path), and the other is called as
102 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
103 branches on the matching path.
104
105 Greedy star operator (*) :
106 Matching path: match happens.
107 Backtrack path: match failed.
108 Non-greedy star operator (*?) :
109 Matching path: no need to perform a match.
110 Backtrack path: match is required.
111
112 The following example shows how the code generated for a capturing bracket
113 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
114 we have the following regular expression:
115
116 A(B|C)D
117
118 The generated code will be the following:
119
120 A matching path
121 '(' matching path (pushing arguments to the stack)
122 B matching path
123 ')' matching path (pushing arguments to the stack)
124 D matching path
125 return with successful match
126
127 D backtrack path
128 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
129 B backtrack path
130 C expected path
131 jump to D matching path
132 C backtrack path
133 A backtrack path
134
135 Notice, that the order of backtrack code paths are the opposite of the fast
136 code paths. In this way the topmost value on the stack is always belong
137 to the current backtrack code path. The backtrack path must check
138 whether there is a next alternative. If so, it needs to jump back to
139 the matching path eventually. Otherwise it needs to clear out its own stack
140 frame and continue the execution on the backtrack code paths.
141 */
142
143 /*
144 Saved stack frames:
145
146 Atomic blocks and asserts require reloading the values of private data
147 when the backtrack mechanism performed. Because of OP_RECURSE, the data
148 are not necessarly known in compile time, thus we need a dynamic restore
149 mechanism.
150
151 The stack frames are stored in a chain list, and have the following format:
152 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
153
154 Thus we can restore the private data to a particular point in the stack.
155 */
156
157 typedef struct jit_arguments {
158 /* Pointers first. */
159 struct sljit_stack *stack;
160 const pcre_uchar *str;
161 const pcre_uchar *begin;
162 const pcre_uchar *end;
163 int *offsets;
164 pcre_uchar *uchar_ptr;
165 pcre_uchar *mark_ptr;
166 void *callout_data;
167 /* Everything else after. */
168 int offset_count;
169 int call_limit;
170 pcre_uint8 notbol;
171 pcre_uint8 noteol;
172 pcre_uint8 notempty;
173 pcre_uint8 notempty_atstart;
174 } jit_arguments;
175
176 typedef struct executable_functions {
177 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
178 PUBL(jit_callback) callback;
179 void *userdata;
180 pcre_uint32 top_bracket;
181 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
182 } executable_functions;
183
184 typedef struct jump_list {
185 struct sljit_jump *jump;
186 struct jump_list *next;
187 } jump_list;
188
189 enum stub_types { stack_alloc };
190
191 typedef struct stub_list {
192 enum stub_types type;
193 int data;
194 struct sljit_jump *start;
195 struct sljit_label *quit;
196 struct stub_list *next;
197 } stub_list;
198
199 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
200
201 /* The following structure is the key data type for the recursive
202 code generator. It is allocated by compile_matchingpath, and contains
203 the aguments for compile_backtrackingpath. Must be the first member
204 of its descendants. */
205 typedef struct backtrack_common {
206 /* Concatenation stack. */
207 struct backtrack_common *prev;
208 jump_list *nextbacktracks;
209 /* Internal stack (for component operators). */
210 struct backtrack_common *top;
211 jump_list *topbacktracks;
212 /* Opcode pointer. */
213 pcre_uchar *cc;
214 } backtrack_common;
215
216 typedef struct assert_backtrack {
217 backtrack_common common;
218 jump_list *condfailed;
219 /* Less than 0 (-1) if a frame is not needed. */
220 int framesize;
221 /* Points to our private memory word on the stack. */
222 int private_data_ptr;
223 /* For iterators. */
224 struct sljit_label *matchingpath;
225 } assert_backtrack;
226
227 typedef struct bracket_backtrack {
228 backtrack_common common;
229 /* Where to coninue if an alternative is successfully matched. */
230 struct sljit_label *alternative_matchingpath;
231 /* For rmin and rmax iterators. */
232 struct sljit_label *recursive_matchingpath;
233 /* For greedy ? operator. */
234 struct sljit_label *zero_matchingpath;
235 /* Contains the branches of a failed condition. */
236 union {
237 /* Both for OP_COND, OP_SCOND. */
238 jump_list *condfailed;
239 assert_backtrack *assert;
240 /* For OP_ONCE. -1 if not needed. */
241 int framesize;
242 } u;
243 /* Points to our private memory word on the stack. */
244 int private_data_ptr;
245 } bracket_backtrack;
246
247 typedef struct bracketpos_backtrack {
248 backtrack_common common;
249 /* Points to our private memory word on the stack. */
250 int private_data_ptr;
251 /* Reverting stack is needed. */
252 int framesize;
253 /* Allocated stack size. */
254 int stacksize;
255 } bracketpos_backtrack;
256
257 typedef struct braminzero_backtrack {
258 backtrack_common common;
259 struct sljit_label *matchingpath;
260 } braminzero_backtrack;
261
262 typedef struct iterator_backtrack {
263 backtrack_common common;
264 /* Next iteration. */
265 struct sljit_label *matchingpath;
266 } iterator_backtrack;
267
268 typedef struct recurse_entry {
269 struct recurse_entry *next;
270 /* Contains the function entry. */
271 struct sljit_label *entry;
272 /* Collects the calls until the function is not created. */
273 jump_list *calls;
274 /* Points to the starting opcode. */
275 int start;
276 } recurse_entry;
277
278 typedef struct recurse_backtrack {
279 backtrack_common common;
280 } recurse_backtrack;
281
282 #define MAX_RANGE_SIZE 6
283
284 typedef struct compiler_common {
285 struct sljit_compiler *compiler;
286 pcre_uchar *start;
287
288 /* Maps private data offset to each opcode. */
289 int *private_data_ptrs;
290 /* Tells whether the capturing bracket is optimized. */
291 pcre_uint8 *optimized_cbracket;
292 /* Starting offset of private data for capturing brackets. */
293 int cbraptr;
294 /* OVector starting point. Must be divisible by 2. */
295 int ovector_start;
296 /* Last known position of the requested byte. */
297 int req_char_ptr;
298 /* Head of the last recursion. */
299 int recursive_head_ptr;
300 /* First inspected character for partial matching. */
301 int start_used_ptr;
302 /* Starting pointer for partial soft matches. */
303 int hit_start;
304 /* End pointer of the first line. */
305 int first_line_end;
306 /* Points to the marked string. */
307 int mark_ptr;
308 /* Points to the last matched capture block index. */
309 int capture_last_ptr;
310
311 /* Flipped and lower case tables. */
312 const pcre_uint8 *fcc;
313 sljit_sw lcc;
314 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
315 int mode;
316 /* Newline control. */
317 int nltype;
318 int newline;
319 int bsr_nltype;
320 /* Dollar endonly. */
321 int endonly;
322 BOOL has_set_som;
323 /* Tables. */
324 sljit_sw ctypes;
325 int digits[2 + MAX_RANGE_SIZE];
326 /* Named capturing brackets. */
327 sljit_uw name_table;
328 sljit_sw name_count;
329 sljit_sw name_entry_size;
330
331 /* Labels and jump lists. */
332 struct sljit_label *partialmatchlabel;
333 struct sljit_label *quit_label;
334 struct sljit_label *forced_quit_label;
335 struct sljit_label *accept_label;
336 stub_list *stubs;
337 recurse_entry *entries;
338 recurse_entry *currententry;
339 jump_list *partialmatch;
340 jump_list *quit;
341 jump_list *forced_quit;
342 jump_list *accept;
343 jump_list *calllimit;
344 jump_list *stackalloc;
345 jump_list *revertframes;
346 jump_list *wordboundary;
347 jump_list *anynewline;
348 jump_list *hspace;
349 jump_list *vspace;
350 jump_list *casefulcmp;
351 jump_list *caselesscmp;
352 BOOL jscript_compat;
353 #ifdef SUPPORT_UTF
354 BOOL utf;
355 #ifdef SUPPORT_UCP
356 BOOL use_ucp;
357 #endif
358 #ifndef COMPILE_PCRE32
359 jump_list *utfreadchar;
360 #endif
361 #ifdef COMPILE_PCRE8
362 jump_list *utfreadtype8;
363 #endif
364 #endif /* SUPPORT_UTF */
365 #ifdef SUPPORT_UCP
366 jump_list *getucd;
367 #endif
368 } compiler_common;
369
370 /* For byte_sequence_compare. */
371
372 typedef struct compare_context {
373 int length;
374 int sourcereg;
375 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
376 int ucharptr;
377 union {
378 sljit_si asint;
379 sljit_uh asushort;
380 #if defined COMPILE_PCRE8
381 sljit_ub asbyte;
382 sljit_ub asuchars[4];
383 #elif defined COMPILE_PCRE16
384 sljit_uh asuchars[2];
385 #elif defined COMPILE_PCRE32
386 sljit_ui asuchars[1];
387 #endif
388 } c;
389 union {
390 sljit_si asint;
391 sljit_uh asushort;
392 #if defined COMPILE_PCRE8
393 sljit_ub asbyte;
394 sljit_ub asuchars[4];
395 #elif defined COMPILE_PCRE16
396 sljit_uh asuchars[2];
397 #elif defined COMPILE_PCRE32
398 sljit_ui asuchars[1];
399 #endif
400 } oc;
401 #endif
402 } compare_context;
403
404 /* Undefine sljit macros. */
405 #undef CMP
406
407 /* Used for accessing the elements of the stack. */
408 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
409
410 #define TMP1 SLJIT_SCRATCH_REG1
411 #define TMP2 SLJIT_SCRATCH_REG3
412 #define TMP3 SLJIT_TEMPORARY_EREG2
413 #define STR_PTR SLJIT_SAVED_REG1
414 #define STR_END SLJIT_SAVED_REG2
415 #define STACK_TOP SLJIT_SCRATCH_REG2
416 #define STACK_LIMIT SLJIT_SAVED_REG3
417 #define ARGUMENTS SLJIT_SAVED_EREG1
418 #define CALL_COUNT SLJIT_SAVED_EREG2
419 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
420
421 /* Local space layout. */
422 /* These two locals can be used by the current opcode. */
423 #define LOCALS0 (0 * sizeof(sljit_sw))
424 #define LOCALS1 (1 * sizeof(sljit_sw))
425 /* Two local variables for possessive quantifiers (char1 cannot use them). */
426 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
427 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
428 /* Max limit of recursions. */
429 #define CALL_LIMIT (4 * sizeof(sljit_sw))
430 /* The output vector is stored on the stack, and contains pointers
431 to characters. The vector data is divided into two groups: the first
432 group contains the start / end character pointers, and the second is
433 the start pointers when the end of the capturing group has not yet reached. */
434 #define OVECTOR_START (common->ovector_start)
435 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
436 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw))
437 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
438
439 #if defined COMPILE_PCRE8
440 #define MOV_UCHAR SLJIT_MOV_UB
441 #define MOVU_UCHAR SLJIT_MOVU_UB
442 #elif defined COMPILE_PCRE16
443 #define MOV_UCHAR SLJIT_MOV_UH
444 #define MOVU_UCHAR SLJIT_MOVU_UH
445 #elif defined COMPILE_PCRE32
446 #define MOV_UCHAR SLJIT_MOV_UI
447 #define MOVU_UCHAR SLJIT_MOVU_UI
448 #else
449 #error Unsupported compiling mode
450 #endif
451
452 /* Shortcuts. */
453 #define DEFINE_COMPILER \
454 struct sljit_compiler *compiler = common->compiler
455 #define OP1(op, dst, dstw, src, srcw) \
456 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
457 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
458 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
459 #define LABEL() \
460 sljit_emit_label(compiler)
461 #define JUMP(type) \
462 sljit_emit_jump(compiler, (type))
463 #define JUMPTO(type, label) \
464 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
465 #define JUMPHERE(jump) \
466 sljit_set_label((jump), sljit_emit_label(compiler))
467 #define SET_LABEL(jump, label) \
468 sljit_set_label((jump), (label))
469 #define CMP(type, src1, src1w, src2, src2w) \
470 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
471 #define CMPTO(type, src1, src1w, src2, src2w, label) \
472 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
473 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
474 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
475 #define GET_LOCAL_BASE(dst, dstw, offset) \
476 sljit_get_local_base(compiler, (dst), (dstw), (offset))
477
478 static pcre_uchar* bracketend(pcre_uchar* cc)
479 {
480 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
481 do cc += GET(cc, 1); while (*cc == OP_ALT);
482 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
483 cc += 1 + LINK_SIZE;
484 return cc;
485 }
486
487 /* Functions whose might need modification for all new supported opcodes:
488 next_opcode
489 get_private_data_length
490 set_private_data_ptrs
491 get_framesize
492 init_frame
493 get_private_data_length_for_copy
494 copy_private_data
495 compile_matchingpath
496 compile_backtrackingpath
497 */
498
499 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
500 {
501 SLJIT_UNUSED_ARG(common);
502 switch(*cc)
503 {
504 case OP_SOD:
505 case OP_SOM:
506 case OP_SET_SOM:
507 case OP_NOT_WORD_BOUNDARY:
508 case OP_WORD_BOUNDARY:
509 case OP_NOT_DIGIT:
510 case OP_DIGIT:
511 case OP_NOT_WHITESPACE:
512 case OP_WHITESPACE:
513 case OP_NOT_WORDCHAR:
514 case OP_WORDCHAR:
515 case OP_ANY:
516 case OP_ALLANY:
517 case OP_ANYNL:
518 case OP_NOT_HSPACE:
519 case OP_HSPACE:
520 case OP_NOT_VSPACE:
521 case OP_VSPACE:
522 case OP_EXTUNI:
523 case OP_EODN:
524 case OP_EOD:
525 case OP_CIRC:
526 case OP_CIRCM:
527 case OP_DOLL:
528 case OP_DOLLM:
529 case OP_TYPESTAR:
530 case OP_TYPEMINSTAR:
531 case OP_TYPEPLUS:
532 case OP_TYPEMINPLUS:
533 case OP_TYPEQUERY:
534 case OP_TYPEMINQUERY:
535 case OP_TYPEPOSSTAR:
536 case OP_TYPEPOSPLUS:
537 case OP_TYPEPOSQUERY:
538 case OP_CRSTAR:
539 case OP_CRMINSTAR:
540 case OP_CRPLUS:
541 case OP_CRMINPLUS:
542 case OP_CRQUERY:
543 case OP_CRMINQUERY:
544 case OP_DEF:
545 case OP_BRAZERO:
546 case OP_BRAMINZERO:
547 case OP_BRAPOSZERO:
548 case OP_COMMIT:
549 case OP_FAIL:
550 case OP_ACCEPT:
551 case OP_ASSERT_ACCEPT:
552 case OP_SKIPZERO:
553 return cc + 1;
554
555 case OP_ANYBYTE:
556 #ifdef SUPPORT_UTF
557 if (common->utf) return NULL;
558 #endif
559 return cc + 1;
560
561 case OP_CHAR:
562 case OP_CHARI:
563 case OP_NOT:
564 case OP_NOTI:
565 case OP_STAR:
566 case OP_MINSTAR:
567 case OP_PLUS:
568 case OP_MINPLUS:
569 case OP_QUERY:
570 case OP_MINQUERY:
571 case OP_POSSTAR:
572 case OP_POSPLUS:
573 case OP_POSQUERY:
574 case OP_STARI:
575 case OP_MINSTARI:
576 case OP_PLUSI:
577 case OP_MINPLUSI:
578 case OP_QUERYI:
579 case OP_MINQUERYI:
580 case OP_POSSTARI:
581 case OP_POSPLUSI:
582 case OP_POSQUERYI:
583 case OP_NOTSTAR:
584 case OP_NOTMINSTAR:
585 case OP_NOTPLUS:
586 case OP_NOTMINPLUS:
587 case OP_NOTQUERY:
588 case OP_NOTMINQUERY:
589 case OP_NOTPOSSTAR:
590 case OP_NOTPOSPLUS:
591 case OP_NOTPOSQUERY:
592 case OP_NOTSTARI:
593 case OP_NOTMINSTARI:
594 case OP_NOTPLUSI:
595 case OP_NOTMINPLUSI:
596 case OP_NOTQUERYI:
597 case OP_NOTMINQUERYI:
598 case OP_NOTPOSSTARI:
599 case OP_NOTPOSPLUSI:
600 case OP_NOTPOSQUERYI:
601 cc += 2;
602 #ifdef SUPPORT_UTF
603 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
604 #endif
605 return cc;
606
607 case OP_UPTO:
608 case OP_MINUPTO:
609 case OP_EXACT:
610 case OP_POSUPTO:
611 case OP_UPTOI:
612 case OP_MINUPTOI:
613 case OP_EXACTI:
614 case OP_POSUPTOI:
615 case OP_NOTUPTO:
616 case OP_NOTMINUPTO:
617 case OP_NOTEXACT:
618 case OP_NOTPOSUPTO:
619 case OP_NOTUPTOI:
620 case OP_NOTMINUPTOI:
621 case OP_NOTEXACTI:
622 case OP_NOTPOSUPTOI:
623 cc += 2 + IMM2_SIZE;
624 #ifdef SUPPORT_UTF
625 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
626 #endif
627 return cc;
628
629 case OP_NOTPROP:
630 case OP_PROP:
631 return cc + 1 + 2;
632
633 case OP_TYPEUPTO:
634 case OP_TYPEMINUPTO:
635 case OP_TYPEEXACT:
636 case OP_TYPEPOSUPTO:
637 case OP_REF:
638 case OP_REFI:
639 case OP_CREF:
640 case OP_NCREF:
641 case OP_RREF:
642 case OP_NRREF:
643 case OP_CLOSE:
644 cc += 1 + IMM2_SIZE;
645 return cc;
646
647 case OP_CRRANGE:
648 case OP_CRMINRANGE:
649 return cc + 1 + 2 * IMM2_SIZE;
650
651 case OP_CLASS:
652 case OP_NCLASS:
653 return cc + 1 + 32 / sizeof(pcre_uchar);
654
655 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
656 case OP_XCLASS:
657 return cc + GET(cc, 1);
658 #endif
659
660 case OP_RECURSE:
661 case OP_ASSERT:
662 case OP_ASSERT_NOT:
663 case OP_ASSERTBACK:
664 case OP_ASSERTBACK_NOT:
665 case OP_REVERSE:
666 case OP_ONCE:
667 case OP_ONCE_NC:
668 case OP_BRA:
669 case OP_BRAPOS:
670 case OP_COND:
671 case OP_SBRA:
672 case OP_SBRAPOS:
673 case OP_SCOND:
674 case OP_ALT:
675 case OP_KET:
676 case OP_KETRMAX:
677 case OP_KETRMIN:
678 case OP_KETRPOS:
679 return cc + 1 + LINK_SIZE;
680
681 case OP_CBRA:
682 case OP_CBRAPOS:
683 case OP_SCBRA:
684 case OP_SCBRAPOS:
685 return cc + 1 + LINK_SIZE + IMM2_SIZE;
686
687 case OP_MARK:
688 return cc + 1 + 2 + cc[1];
689
690 case OP_CALLOUT:
691 return cc + 2 + 2 * LINK_SIZE;
692
693 default:
694 return NULL;
695 }
696 }
697
698 #define CASE_ITERATOR_PRIVATE_DATA_1 \
699 case OP_MINSTAR: \
700 case OP_MINPLUS: \
701 case OP_QUERY: \
702 case OP_MINQUERY: \
703 case OP_MINSTARI: \
704 case OP_MINPLUSI: \
705 case OP_QUERYI: \
706 case OP_MINQUERYI: \
707 case OP_NOTMINSTAR: \
708 case OP_NOTMINPLUS: \
709 case OP_NOTQUERY: \
710 case OP_NOTMINQUERY: \
711 case OP_NOTMINSTARI: \
712 case OP_NOTMINPLUSI: \
713 case OP_NOTQUERYI: \
714 case OP_NOTMINQUERYI:
715
716 #define CASE_ITERATOR_PRIVATE_DATA_2A \
717 case OP_STAR: \
718 case OP_PLUS: \
719 case OP_STARI: \
720 case OP_PLUSI: \
721 case OP_NOTSTAR: \
722 case OP_NOTPLUS: \
723 case OP_NOTSTARI: \
724 case OP_NOTPLUSI:
725
726 #define CASE_ITERATOR_PRIVATE_DATA_2B \
727 case OP_UPTO: \
728 case OP_MINUPTO: \
729 case OP_UPTOI: \
730 case OP_MINUPTOI: \
731 case OP_NOTUPTO: \
732 case OP_NOTMINUPTO: \
733 case OP_NOTUPTOI: \
734 case OP_NOTMINUPTOI:
735
736 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
737 case OP_TYPEMINSTAR: \
738 case OP_TYPEMINPLUS: \
739 case OP_TYPEQUERY: \
740 case OP_TYPEMINQUERY:
741
742 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
743 case OP_TYPESTAR: \
744 case OP_TYPEPLUS:
745
746 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
747 case OP_TYPEUPTO: \
748 case OP_TYPEMINUPTO:
749
750 static int get_class_iterator_size(pcre_uchar *cc)
751 {
752 switch(*cc)
753 {
754 case OP_CRSTAR:
755 case OP_CRPLUS:
756 return 2;
757
758 case OP_CRMINSTAR:
759 case OP_CRMINPLUS:
760 case OP_CRQUERY:
761 case OP_CRMINQUERY:
762 return 1;
763
764 case OP_CRRANGE:
765 case OP_CRMINRANGE:
766 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
767 return 0;
768 return 2;
769
770 default:
771 return 0;
772 }
773 }
774
775 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
776 {
777 int private_data_length = 0;
778 pcre_uchar *alternative;
779 pcre_uchar *name;
780 pcre_uchar *end = NULL;
781 int space, size, i;
782 pcre_uint32 bracketlen;
783
784 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
785 while (cc < ccend)
786 {
787 space = 0;
788 size = 0;
789 bracketlen = 0;
790 switch(*cc)
791 {
792 case OP_SET_SOM:
793 common->has_set_som = TRUE;
794 cc += 1;
795 break;
796
797 case OP_REF:
798 case OP_REFI:
799 common->optimized_cbracket[GET2(cc, 1)] = 0;
800 cc += 1 + IMM2_SIZE;
801 break;
802
803 case OP_ASSERT:
804 case OP_ASSERT_NOT:
805 case OP_ASSERTBACK:
806 case OP_ASSERTBACK_NOT:
807 case OP_ONCE:
808 case OP_ONCE_NC:
809 case OP_BRAPOS:
810 case OP_SBRA:
811 case OP_SBRAPOS:
812 private_data_length += sizeof(sljit_sw);
813 bracketlen = 1 + LINK_SIZE;
814 break;
815
816 case OP_CBRAPOS:
817 case OP_SCBRAPOS:
818 private_data_length += sizeof(sljit_sw);
819 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
820 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
821 break;
822
823 case OP_COND:
824 case OP_SCOND:
825 /* Only AUTO_CALLOUT can insert this opcode. We do
826 not intend to support this case. */
827 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
828 return -1;
829
830 if (*cc == OP_COND)
831 {
832 /* Might be a hidden SCOND. */
833 alternative = cc + GET(cc, 1);
834 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
835 private_data_length += sizeof(sljit_sw);
836 }
837 else
838 private_data_length += sizeof(sljit_sw);
839 bracketlen = 1 + LINK_SIZE;
840 break;
841
842 case OP_CREF:
843 i = GET2(cc, 1);
844 common->optimized_cbracket[i] = 0;
845 cc += 1 + IMM2_SIZE;
846 break;
847
848 case OP_NCREF:
849 bracketlen = GET2(cc, 1);
850 name = (pcre_uchar *)common->name_table;
851 alternative = name;
852 for (i = 0; i < common->name_count; i++)
853 {
854 if (GET2(name, 0) == bracketlen) break;
855 name += common->name_entry_size;
856 }
857 SLJIT_ASSERT(i != common->name_count);
858
859 for (i = 0; i < common->name_count; i++)
860 {
861 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
862 common->optimized_cbracket[GET2(alternative, 0)] = 0;
863 alternative += common->name_entry_size;
864 }
865 bracketlen = 0;
866 cc += 1 + IMM2_SIZE;
867 break;
868
869 case OP_BRA:
870 bracketlen = 1 + LINK_SIZE;
871 break;
872
873 case OP_CBRA:
874 case OP_SCBRA:
875 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
876 break;
877
878 CASE_ITERATOR_PRIVATE_DATA_1
879 space = 1;
880 size = -2;
881 break;
882
883 CASE_ITERATOR_PRIVATE_DATA_2A
884 space = 2;
885 size = -2;
886 break;
887
888 CASE_ITERATOR_PRIVATE_DATA_2B
889 space = 2;
890 size = -(2 + IMM2_SIZE);
891 break;
892
893 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
894 space = 1;
895 size = 1;
896 break;
897
898 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
899 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
900 space = 2;
901 size = 1;
902 break;
903
904 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
905 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
906 space = 2;
907 size = 1 + IMM2_SIZE;
908 break;
909
910 case OP_CLASS:
911 case OP_NCLASS:
912 size += 1 + 32 / sizeof(pcre_uchar);
913 space = get_class_iterator_size(cc + size);
914 break;
915
916 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
917 case OP_XCLASS:
918 size = GET(cc, 1);
919 space = get_class_iterator_size(cc + size);
920 break;
921 #endif
922
923 case OP_RECURSE:
924 /* Set its value only once. */
925 if (common->recursive_head_ptr == 0)
926 {
927 common->recursive_head_ptr = common->ovector_start;
928 common->ovector_start += sizeof(sljit_sw);
929 }
930 cc += 1 + LINK_SIZE;
931 break;
932
933 case OP_CALLOUT:
934 if (common->capture_last_ptr == 0)
935 {
936 common->capture_last_ptr = common->ovector_start;
937 common->ovector_start += sizeof(sljit_sw);
938 }
939 cc += 2 + 2 * LINK_SIZE;
940 break;
941
942 case OP_MARK:
943 if (common->mark_ptr == 0)
944 {
945 common->mark_ptr = common->ovector_start;
946 common->ovector_start += sizeof(sljit_sw);
947 }
948 cc += 1 + 2 + cc[1];
949 break;
950
951 default:
952 cc = next_opcode(common, cc);
953 if (cc == NULL)
954 return -1;
955 break;
956 }
957
958 if (space > 0 && cc >= end)
959 private_data_length += sizeof(sljit_sw) * space;
960
961 if (size != 0)
962 {
963 if (size < 0)
964 {
965 cc += -size;
966 #ifdef SUPPORT_UTF
967 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
968 #endif
969 }
970 else
971 cc += size;
972 }
973
974 if (bracketlen != 0)
975 {
976 if (cc >= end)
977 {
978 end = bracketend(cc);
979 if (end[-1 - LINK_SIZE] == OP_KET)
980 end = NULL;
981 }
982 cc += bracketlen;
983 }
984 }
985 return private_data_length;
986 }
987
988 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
989 {
990 pcre_uchar *cc = common->start;
991 pcre_uchar *alternative;
992 pcre_uchar *end = NULL;
993 int space, size, bracketlen;
994
995 while (cc < ccend)
996 {
997 space = 0;
998 size = 0;
999 bracketlen = 0;
1000 switch(*cc)
1001 {
1002 case OP_ASSERT:
1003 case OP_ASSERT_NOT:
1004 case OP_ASSERTBACK:
1005 case OP_ASSERTBACK_NOT:
1006 case OP_ONCE:
1007 case OP_ONCE_NC:
1008 case OP_BRAPOS:
1009 case OP_SBRA:
1010 case OP_SBRAPOS:
1011 case OP_SCOND:
1012 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1013 private_data_ptr += sizeof(sljit_sw);
1014 bracketlen = 1 + LINK_SIZE;
1015 break;
1016
1017 case OP_CBRAPOS:
1018 case OP_SCBRAPOS:
1019 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1020 private_data_ptr += sizeof(sljit_sw);
1021 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1022 break;
1023
1024 case OP_COND:
1025 /* Might be a hidden SCOND. */
1026 alternative = cc + GET(cc, 1);
1027 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1028 {
1029 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1030 private_data_ptr += sizeof(sljit_sw);
1031 }
1032 bracketlen = 1 + LINK_SIZE;
1033 break;
1034
1035 case OP_BRA:
1036 bracketlen = 1 + LINK_SIZE;
1037 break;
1038
1039 case OP_CBRA:
1040 case OP_SCBRA:
1041 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1042 break;
1043
1044 CASE_ITERATOR_PRIVATE_DATA_1
1045 space = 1;
1046 size = -2;
1047 break;
1048
1049 CASE_ITERATOR_PRIVATE_DATA_2A
1050 space = 2;
1051 size = -2;
1052 break;
1053
1054 CASE_ITERATOR_PRIVATE_DATA_2B
1055 space = 2;
1056 size = -(2 + IMM2_SIZE);
1057 break;
1058
1059 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1060 space = 1;
1061 size = 1;
1062 break;
1063
1064 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1065 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1066 space = 2;
1067 size = 1;
1068 break;
1069
1070 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1071 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1072 space = 2;
1073 size = 1 + IMM2_SIZE;
1074 break;
1075
1076 case OP_CLASS:
1077 case OP_NCLASS:
1078 size += 1 + 32 / sizeof(pcre_uchar);
1079 space = get_class_iterator_size(cc + size);
1080 break;
1081
1082 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1083 case OP_XCLASS:
1084 size = GET(cc, 1);
1085 space = get_class_iterator_size(cc + size);
1086 break;
1087 #endif
1088
1089 default:
1090 cc = next_opcode(common, cc);
1091 SLJIT_ASSERT(cc != NULL);
1092 break;
1093 }
1094
1095 if (space > 0 && cc >= end)
1096 {
1097 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1098 private_data_ptr += sizeof(sljit_sw) * space;
1099 }
1100
1101 if (size != 0)
1102 {
1103 if (size < 0)
1104 {
1105 cc += -size;
1106 #ifdef SUPPORT_UTF
1107 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1108 #endif
1109 }
1110 else
1111 cc += size;
1112 }
1113
1114 if (bracketlen > 0)
1115 {
1116 if (cc >= end)
1117 {
1118 end = bracketend(cc);
1119 if (end[-1 - LINK_SIZE] == OP_KET)
1120 end = NULL;
1121 }
1122 cc += bracketlen;
1123 }
1124 }
1125 }
1126
1127 /* Returns with -1 if no need for frame. */
1128 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1129 {
1130 pcre_uchar *ccend = bracketend(cc);
1131 int length = 0;
1132 int possessive = 0;
1133 BOOL setsom_found = recursive;
1134 BOOL setmark_found = recursive;
1135 /* The last capture is a local variable even for recursions. */
1136 BOOL capture_last_found = FALSE;
1137
1138 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1139 {
1140 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1141 /* This is correct regardless of common->capture_last_ptr. */
1142 capture_last_found = TRUE;
1143 }
1144
1145 cc = next_opcode(common, cc);
1146 SLJIT_ASSERT(cc != NULL);
1147 while (cc < ccend)
1148 switch(*cc)
1149 {
1150 case OP_SET_SOM:
1151 SLJIT_ASSERT(common->has_set_som);
1152 if (!setsom_found)
1153 {
1154 length += 2;
1155 setsom_found = TRUE;
1156 }
1157 cc += 1;
1158 break;
1159
1160 case OP_MARK:
1161 SLJIT_ASSERT(common->mark_ptr != 0);
1162 if (!setmark_found)
1163 {
1164 length += 2;
1165 setmark_found = TRUE;
1166 }
1167 cc += 1 + 2 + cc[1];
1168 break;
1169
1170 case OP_RECURSE:
1171 if (common->has_set_som && !setsom_found)
1172 {
1173 length += 2;
1174 setsom_found = TRUE;
1175 }
1176 if (common->mark_ptr != 0 && !setmark_found)
1177 {
1178 length += 2;
1179 setmark_found = TRUE;
1180 }
1181 if (common->capture_last_ptr != 0 && !capture_last_found)
1182 {
1183 length += 2;
1184 capture_last_found = TRUE;
1185 }
1186 cc += 1 + LINK_SIZE;
1187 break;
1188
1189 case OP_CBRA:
1190 case OP_CBRAPOS:
1191 case OP_SCBRA:
1192 case OP_SCBRAPOS:
1193 if (common->capture_last_ptr != 0 && !capture_last_found)
1194 {
1195 length += 2;
1196 capture_last_found = TRUE;
1197 }
1198 length += 3;
1199 cc += 1 + LINK_SIZE + IMM2_SIZE;
1200 break;
1201
1202 default:
1203 cc = next_opcode(common, cc);
1204 SLJIT_ASSERT(cc != NULL);
1205 break;
1206 }
1207
1208 /* Possessive quantifiers can use a special case. */
1209 if (SLJIT_UNLIKELY(possessive == length))
1210 return -1;
1211
1212 if (length > 0)
1213 return length + 1;
1214 return -1;
1215 }
1216
1217 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1218 {
1219 DEFINE_COMPILER;
1220 pcre_uchar *ccend = bracketend(cc);
1221 BOOL setsom_found = recursive;
1222 BOOL setmark_found = recursive;
1223 /* The last capture is a local variable even for recursions. */
1224 BOOL capture_last_found = FALSE;
1225 int offset;
1226
1227 /* >= 1 + shortest item size (2) */
1228 SLJIT_UNUSED_ARG(stacktop);
1229 SLJIT_ASSERT(stackpos >= stacktop + 2);
1230
1231 stackpos = STACK(stackpos);
1232 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1233 cc = next_opcode(common, cc);
1234 SLJIT_ASSERT(cc != NULL);
1235 while (cc < ccend)
1236 switch(*cc)
1237 {
1238 case OP_SET_SOM:
1239 SLJIT_ASSERT(common->has_set_som);
1240 if (!setsom_found)
1241 {
1242 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1243 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1244 stackpos += (int)sizeof(sljit_sw);
1245 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1246 stackpos += (int)sizeof(sljit_sw);
1247 setsom_found = TRUE;
1248 }
1249 cc += 1;
1250 break;
1251
1252 case OP_MARK:
1253 SLJIT_ASSERT(common->mark_ptr != 0);
1254 if (!setmark_found)
1255 {
1256 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1258 stackpos += (int)sizeof(sljit_sw);
1259 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1260 stackpos += (int)sizeof(sljit_sw);
1261 setmark_found = TRUE;
1262 }
1263 cc += 1 + 2 + cc[1];
1264 break;
1265
1266 case OP_RECURSE:
1267 if (common->has_set_som && !setsom_found)
1268 {
1269 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1270 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1271 stackpos += (int)sizeof(sljit_sw);
1272 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1273 stackpos += (int)sizeof(sljit_sw);
1274 setsom_found = TRUE;
1275 }
1276 if (common->mark_ptr != 0 && !setmark_found)
1277 {
1278 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1279 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1280 stackpos += (int)sizeof(sljit_sw);
1281 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1282 stackpos += (int)sizeof(sljit_sw);
1283 setmark_found = TRUE;
1284 }
1285 if (common->capture_last_ptr != 0 && !capture_last_found)
1286 {
1287 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1288 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1289 stackpos += (int)sizeof(sljit_sw);
1290 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1291 stackpos += (int)sizeof(sljit_sw);
1292 capture_last_found = TRUE;
1293 }
1294 cc += 1 + LINK_SIZE;
1295 break;
1296
1297 case OP_CBRA:
1298 case OP_CBRAPOS:
1299 case OP_SCBRA:
1300 case OP_SCBRAPOS:
1301 if (common->capture_last_ptr != 0 && !capture_last_found)
1302 {
1303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1304 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1305 stackpos += (int)sizeof(sljit_sw);
1306 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1307 stackpos += (int)sizeof(sljit_sw);
1308 capture_last_found = TRUE;
1309 }
1310 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1311 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1312 stackpos += (int)sizeof(sljit_sw);
1313 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1314 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1315 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1316 stackpos += (int)sizeof(sljit_sw);
1317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1318 stackpos += (int)sizeof(sljit_sw);
1319
1320 cc += 1 + LINK_SIZE + IMM2_SIZE;
1321 break;
1322
1323 default:
1324 cc = next_opcode(common, cc);
1325 SLJIT_ASSERT(cc != NULL);
1326 break;
1327 }
1328
1329 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1330 SLJIT_ASSERT(stackpos == STACK(stacktop));
1331 }
1332
1333 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1334 {
1335 int private_data_length = 2;
1336 int size;
1337 pcre_uchar *alternative;
1338 /* Calculate the sum of the private machine words. */
1339 while (cc < ccend)
1340 {
1341 size = 0;
1342 switch(*cc)
1343 {
1344 case OP_ASSERT:
1345 case OP_ASSERT_NOT:
1346 case OP_ASSERTBACK:
1347 case OP_ASSERTBACK_NOT:
1348 case OP_ONCE:
1349 case OP_ONCE_NC:
1350 case OP_BRAPOS:
1351 case OP_SBRA:
1352 case OP_SBRAPOS:
1353 case OP_SCOND:
1354 private_data_length++;
1355 cc += 1 + LINK_SIZE;
1356 break;
1357
1358 case OP_CBRA:
1359 case OP_SCBRA:
1360 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1361 private_data_length++;
1362 cc += 1 + LINK_SIZE + IMM2_SIZE;
1363 break;
1364
1365 case OP_CBRAPOS:
1366 case OP_SCBRAPOS:
1367 private_data_length += 2;
1368 cc += 1 + LINK_SIZE + IMM2_SIZE;
1369 break;
1370
1371 case OP_COND:
1372 /* Might be a hidden SCOND. */
1373 alternative = cc + GET(cc, 1);
1374 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1375 private_data_length++;
1376 cc += 1 + LINK_SIZE;
1377 break;
1378
1379 CASE_ITERATOR_PRIVATE_DATA_1
1380 if (PRIVATE_DATA(cc))
1381 private_data_length++;
1382 cc += 2;
1383 #ifdef SUPPORT_UTF
1384 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1385 #endif
1386 break;
1387
1388 CASE_ITERATOR_PRIVATE_DATA_2A
1389 if (PRIVATE_DATA(cc))
1390 private_data_length += 2;
1391 cc += 2;
1392 #ifdef SUPPORT_UTF
1393 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1394 #endif
1395 break;
1396
1397 CASE_ITERATOR_PRIVATE_DATA_2B
1398 if (PRIVATE_DATA(cc))
1399 private_data_length += 2;
1400 cc += 2 + IMM2_SIZE;
1401 #ifdef SUPPORT_UTF
1402 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1403 #endif
1404 break;
1405
1406 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1407 if (PRIVATE_DATA(cc))
1408 private_data_length++;
1409 cc += 1;
1410 break;
1411
1412 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1413 if (PRIVATE_DATA(cc))
1414 private_data_length += 2;
1415 cc += 1;
1416 break;
1417
1418 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1419 if (PRIVATE_DATA(cc))
1420 private_data_length += 2;
1421 cc += 1 + IMM2_SIZE;
1422 break;
1423
1424 case OP_CLASS:
1425 case OP_NCLASS:
1426 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1427 case OP_XCLASS:
1428 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1429 #else
1430 size = 1 + 32 / (int)sizeof(pcre_uchar);
1431 #endif
1432 if (PRIVATE_DATA(cc))
1433 private_data_length += get_class_iterator_size(cc + size);
1434 cc += size;
1435 break;
1436
1437 default:
1438 cc = next_opcode(common, cc);
1439 SLJIT_ASSERT(cc != NULL);
1440 break;
1441 }
1442 }
1443 SLJIT_ASSERT(cc == ccend);
1444 return private_data_length;
1445 }
1446
1447 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1448 BOOL save, int stackptr, int stacktop)
1449 {
1450 DEFINE_COMPILER;
1451 int srcw[2];
1452 int count, size;
1453 BOOL tmp1next = TRUE;
1454 BOOL tmp1empty = TRUE;
1455 BOOL tmp2empty = TRUE;
1456 pcre_uchar *alternative;
1457 enum {
1458 start,
1459 loop,
1460 end
1461 } status;
1462
1463 status = save ? start : loop;
1464 stackptr = STACK(stackptr - 2);
1465 stacktop = STACK(stacktop - 1);
1466
1467 if (!save)
1468 {
1469 stackptr += sizeof(sljit_sw);
1470 if (stackptr < stacktop)
1471 {
1472 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1473 stackptr += sizeof(sljit_sw);
1474 tmp1empty = FALSE;
1475 }
1476 if (stackptr < stacktop)
1477 {
1478 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1479 stackptr += sizeof(sljit_sw);
1480 tmp2empty = FALSE;
1481 }
1482 /* The tmp1next must be TRUE in either way. */
1483 }
1484
1485 while (status != end)
1486 {
1487 count = 0;
1488 switch(status)
1489 {
1490 case start:
1491 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1492 count = 1;
1493 srcw[0] = common->recursive_head_ptr;
1494 status = loop;
1495 break;
1496
1497 case loop:
1498 if (cc >= ccend)
1499 {
1500 status = end;
1501 break;
1502 }
1503
1504 switch(*cc)
1505 {
1506 case OP_ASSERT:
1507 case OP_ASSERT_NOT:
1508 case OP_ASSERTBACK:
1509 case OP_ASSERTBACK_NOT:
1510 case OP_ONCE:
1511 case OP_ONCE_NC:
1512 case OP_BRAPOS:
1513 case OP_SBRA:
1514 case OP_SBRAPOS:
1515 case OP_SCOND:
1516 count = 1;
1517 srcw[0] = PRIVATE_DATA(cc);
1518 SLJIT_ASSERT(srcw[0] != 0);
1519 cc += 1 + LINK_SIZE;
1520 break;
1521
1522 case OP_CBRA:
1523 case OP_SCBRA:
1524 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1525 {
1526 count = 1;
1527 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1528 }
1529 cc += 1 + LINK_SIZE + IMM2_SIZE;
1530 break;
1531
1532 case OP_CBRAPOS:
1533 case OP_SCBRAPOS:
1534 count = 2;
1535 srcw[0] = PRIVATE_DATA(cc);
1536 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1537 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1538 cc += 1 + LINK_SIZE + IMM2_SIZE;
1539 break;
1540
1541 case OP_COND:
1542 /* Might be a hidden SCOND. */
1543 alternative = cc + GET(cc, 1);
1544 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1545 {
1546 count = 1;
1547 srcw[0] = PRIVATE_DATA(cc);
1548 SLJIT_ASSERT(srcw[0] != 0);
1549 }
1550 cc += 1 + LINK_SIZE;
1551 break;
1552
1553 CASE_ITERATOR_PRIVATE_DATA_1
1554 if (PRIVATE_DATA(cc))
1555 {
1556 count = 1;
1557 srcw[0] = PRIVATE_DATA(cc);
1558 }
1559 cc += 2;
1560 #ifdef SUPPORT_UTF
1561 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1562 #endif
1563 break;
1564
1565 CASE_ITERATOR_PRIVATE_DATA_2A
1566 if (PRIVATE_DATA(cc))
1567 {
1568 count = 2;
1569 srcw[0] = PRIVATE_DATA(cc);
1570 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1571 }
1572 cc += 2;
1573 #ifdef SUPPORT_UTF
1574 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1575 #endif
1576 break;
1577
1578 CASE_ITERATOR_PRIVATE_DATA_2B
1579 if (PRIVATE_DATA(cc))
1580 {
1581 count = 2;
1582 srcw[0] = PRIVATE_DATA(cc);
1583 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1584 }
1585 cc += 2 + IMM2_SIZE;
1586 #ifdef SUPPORT_UTF
1587 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1588 #endif
1589 break;
1590
1591 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1592 if (PRIVATE_DATA(cc))
1593 {
1594 count = 1;
1595 srcw[0] = PRIVATE_DATA(cc);
1596 }
1597 cc += 1;
1598 break;
1599
1600 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1601 if (PRIVATE_DATA(cc))
1602 {
1603 count = 2;
1604 srcw[0] = PRIVATE_DATA(cc);
1605 srcw[1] = srcw[0] + sizeof(sljit_sw);
1606 }
1607 cc += 1;
1608 break;
1609
1610 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1611 if (PRIVATE_DATA(cc))
1612 {
1613 count = 2;
1614 srcw[0] = PRIVATE_DATA(cc);
1615 srcw[1] = srcw[0] + sizeof(sljit_sw);
1616 }
1617 cc += 1 + IMM2_SIZE;
1618 break;
1619
1620 case OP_CLASS:
1621 case OP_NCLASS:
1622 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1623 case OP_XCLASS:
1624 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1625 #else
1626 size = 1 + 32 / (int)sizeof(pcre_uchar);
1627 #endif
1628 if (PRIVATE_DATA(cc))
1629 switch(get_class_iterator_size(cc + size))
1630 {
1631 case 1:
1632 count = 1;
1633 srcw[0] = PRIVATE_DATA(cc);
1634 break;
1635
1636 case 2:
1637 count = 2;
1638 srcw[0] = PRIVATE_DATA(cc);
1639 srcw[1] = srcw[0] + sizeof(sljit_sw);
1640 break;
1641
1642 default:
1643 SLJIT_ASSERT_STOP();
1644 break;
1645 }
1646 cc += size;
1647 break;
1648
1649 default:
1650 cc = next_opcode(common, cc);
1651 SLJIT_ASSERT(cc != NULL);
1652 break;
1653 }
1654 break;
1655
1656 case end:
1657 SLJIT_ASSERT_STOP();
1658 break;
1659 }
1660
1661 while (count > 0)
1662 {
1663 count--;
1664 if (save)
1665 {
1666 if (tmp1next)
1667 {
1668 if (!tmp1empty)
1669 {
1670 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1671 stackptr += sizeof(sljit_sw);
1672 }
1673 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1674 tmp1empty = FALSE;
1675 tmp1next = FALSE;
1676 }
1677 else
1678 {
1679 if (!tmp2empty)
1680 {
1681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1682 stackptr += sizeof(sljit_sw);
1683 }
1684 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1685 tmp2empty = FALSE;
1686 tmp1next = TRUE;
1687 }
1688 }
1689 else
1690 {
1691 if (tmp1next)
1692 {
1693 SLJIT_ASSERT(!tmp1empty);
1694 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1695 tmp1empty = stackptr >= stacktop;
1696 if (!tmp1empty)
1697 {
1698 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1699 stackptr += sizeof(sljit_sw);
1700 }
1701 tmp1next = FALSE;
1702 }
1703 else
1704 {
1705 SLJIT_ASSERT(!tmp2empty);
1706 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1707 tmp2empty = stackptr >= stacktop;
1708 if (!tmp2empty)
1709 {
1710 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1711 stackptr += sizeof(sljit_sw);
1712 }
1713 tmp1next = TRUE;
1714 }
1715 }
1716 }
1717 }
1718
1719 if (save)
1720 {
1721 if (tmp1next)
1722 {
1723 if (!tmp1empty)
1724 {
1725 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1726 stackptr += sizeof(sljit_sw);
1727 }
1728 if (!tmp2empty)
1729 {
1730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1731 stackptr += sizeof(sljit_sw);
1732 }
1733 }
1734 else
1735 {
1736 if (!tmp2empty)
1737 {
1738 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1739 stackptr += sizeof(sljit_sw);
1740 }
1741 if (!tmp1empty)
1742 {
1743 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1744 stackptr += sizeof(sljit_sw);
1745 }
1746 }
1747 }
1748 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1749 }
1750
1751 #undef CASE_ITERATOR_PRIVATE_DATA_1
1752 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1753 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1754 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1755 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1756 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1757
1758 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1759 {
1760 return (value & (value - 1)) == 0;
1761 }
1762
1763 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1764 {
1765 while (list)
1766 {
1767 /* sljit_set_label is clever enough to do nothing
1768 if either the jump or the label is NULL. */
1769 SET_LABEL(list->jump, label);
1770 list = list->next;
1771 }
1772 }
1773
1774 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1775 {
1776 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1777 if (list_item)
1778 {
1779 list_item->next = *list;
1780 list_item->jump = jump;
1781 *list = list_item;
1782 }
1783 }
1784
1785 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1786 {
1787 DEFINE_COMPILER;
1788 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1789
1790 if (list_item)
1791 {
1792 list_item->type = type;
1793 list_item->data = data;
1794 list_item->start = start;
1795 list_item->quit = LABEL();
1796 list_item->next = common->stubs;
1797 common->stubs = list_item;
1798 }
1799 }
1800
1801 static void flush_stubs(compiler_common *common)
1802 {
1803 DEFINE_COMPILER;
1804 stub_list* list_item = common->stubs;
1805
1806 while (list_item)
1807 {
1808 JUMPHERE(list_item->start);
1809 switch(list_item->type)
1810 {
1811 case stack_alloc:
1812 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1813 break;
1814 }
1815 JUMPTO(SLJIT_JUMP, list_item->quit);
1816 list_item = list_item->next;
1817 }
1818 common->stubs = NULL;
1819 }
1820
1821 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1822 {
1823 DEFINE_COMPILER;
1824
1825 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1826 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1827 }
1828
1829 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1830 {
1831 /* May destroy all locals and registers except TMP2. */
1832 DEFINE_COMPILER;
1833
1834 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1835 #ifdef DESTROY_REGISTERS
1836 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1837 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1838 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1839 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1840 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1841 #endif
1842 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1843 }
1844
1845 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1846 {
1847 DEFINE_COMPILER;
1848 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1849 }
1850
1851 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1852 {
1853 DEFINE_COMPILER;
1854 struct sljit_label *loop;
1855 int i;
1856 /* At this point we can freely use all temporary registers. */
1857 /* TMP1 returns with begin - 1. */
1858 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1859 if (length < 8)
1860 {
1861 for (i = 0; i < length; i++)
1862 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1863 }
1864 else
1865 {
1866 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));
1867 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);
1868 loop = LABEL();
1869 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1870 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1871 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1872 }
1873 }
1874
1875 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1876 {
1877 DEFINE_COMPILER;
1878 struct sljit_label *loop;
1879 struct sljit_jump *early_quit;
1880
1881 /* At this point we can freely use all registers. */
1882 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1883 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1884
1885 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1886 if (common->mark_ptr != 0)
1887 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1888 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
1889 if (common->mark_ptr != 0)
1890 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1891 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1892 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1893 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1894 /* Unlikely, but possible */
1895 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1896 loop = LABEL();
1897 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1898 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
1899 /* Copy the integer value to the output buffer */
1900 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1901 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1902 #endif
1903 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1904 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1905 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1906 JUMPHERE(early_quit);
1907
1908 /* Calculate the return value, which is the maximum ovector value. */
1909 if (topbracket > 1)
1910 {
1911 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
1912 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
1913
1914 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1915 loop = LABEL();
1916 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
1917 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1918 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1919 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
1920 }
1921 else
1922 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1923 }
1924
1925 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1926 {
1927 DEFINE_COMPILER;
1928
1929 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1930 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1931
1932 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
1933 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1934 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offset_count));
1935 CMPTO(SLJIT_C_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
1936
1937 /* Store match begin and end. */
1938 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1939 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1940 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1941 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1942 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1943 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1944 #endif
1945 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1946
1947 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
1948 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1949 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1950 #endif
1951 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
1952
1953 JUMPTO(SLJIT_JUMP, quit);
1954 }
1955
1956 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1957 {
1958 /* May destroy TMP1. */
1959 DEFINE_COMPILER;
1960 struct sljit_jump *jump;
1961
1962 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1963 {
1964 /* The value of -1 must be kept for start_used_ptr! */
1965 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1966 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1967 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1968 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1969 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1970 JUMPHERE(jump);
1971 }
1972 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1973 {
1974 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1975 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1976 JUMPHERE(jump);
1977 }
1978 }
1979
1980 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1981 {
1982 /* Detects if the character has an othercase. */
1983 unsigned int c;
1984
1985 #ifdef SUPPORT_UTF
1986 if (common->utf)
1987 {
1988 GETCHAR(c, cc);
1989 if (c > 127)
1990 {
1991 #ifdef SUPPORT_UCP
1992 return c != UCD_OTHERCASE(c);
1993 #else
1994 return FALSE;
1995 #endif
1996 }
1997 #ifndef COMPILE_PCRE8
1998 return common->fcc[c] != c;
1999 #endif
2000 }
2001 else
2002 #endif
2003 c = *cc;
2004 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2005 }
2006
2007 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2008 {
2009 /* Returns with the othercase. */
2010 #ifdef SUPPORT_UTF
2011 if (common->utf && c > 127)
2012 {
2013 #ifdef SUPPORT_UCP
2014 return UCD_OTHERCASE(c);
2015 #else
2016 return c;
2017 #endif
2018 }
2019 #endif
2020 return TABLE_GET(c, common->fcc, c);
2021 }
2022
2023 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2024 {
2025 /* Detects if the character and its othercase has only 1 bit difference. */
2026 unsigned int c, oc, bit;
2027 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2028 int n;
2029 #endif
2030
2031 #ifdef SUPPORT_UTF
2032 if (common->utf)
2033 {
2034 GETCHAR(c, cc);
2035 if (c <= 127)
2036 oc = common->fcc[c];
2037 else
2038 {
2039 #ifdef SUPPORT_UCP
2040 oc = UCD_OTHERCASE(c);
2041 #else
2042 oc = c;
2043 #endif
2044 }
2045 }
2046 else
2047 {
2048 c = *cc;
2049 oc = TABLE_GET(c, common->fcc, c);
2050 }
2051 #else
2052 c = *cc;
2053 oc = TABLE_GET(c, common->fcc, c);
2054 #endif
2055
2056 SLJIT_ASSERT(c != oc);
2057
2058 bit = c ^ oc;
2059 /* Optimized for English alphabet. */
2060 if (c <= 127 && bit == 0x20)
2061 return (0 << 8) | 0x20;
2062
2063 /* Since c != oc, they must have at least 1 bit difference. */
2064 if (!is_powerof2(bit))
2065 return 0;
2066
2067 #if defined COMPILE_PCRE8
2068
2069 #ifdef SUPPORT_UTF
2070 if (common->utf && c > 127)
2071 {
2072 n = GET_EXTRALEN(*cc);
2073 while ((bit & 0x3f) == 0)
2074 {
2075 n--;
2076 bit >>= 6;
2077 }
2078 return (n << 8) | bit;
2079 }
2080 #endif /* SUPPORT_UTF */
2081 return (0 << 8) | bit;
2082
2083 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2084
2085 #ifdef SUPPORT_UTF
2086 if (common->utf && c > 65535)
2087 {
2088 if (bit >= (1 << 10))
2089 bit >>= 10;
2090 else
2091 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2092 }
2093 #endif /* SUPPORT_UTF */
2094 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2095
2096 #endif /* COMPILE_PCRE[8|16|32] */
2097 }
2098
2099 static void check_partial(compiler_common *common, BOOL force)
2100 {
2101 /* Checks whether a partial matching is occured. Does not modify registers. */
2102 DEFINE_COMPILER;
2103 struct sljit_jump *jump = NULL;
2104
2105 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2106
2107 if (common->mode == JIT_COMPILE)
2108 return;
2109
2110 if (!force)
2111 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2112 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2113 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2114
2115 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2117 else
2118 {
2119 if (common->partialmatchlabel != NULL)
2120 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2121 else
2122 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2123 }
2124
2125 if (jump != NULL)
2126 JUMPHERE(jump);
2127 }
2128
2129 static struct sljit_jump *check_str_end(compiler_common *common)
2130 {
2131 /* Does not affect registers. Usually used in a tight spot. */
2132 DEFINE_COMPILER;
2133 struct sljit_jump *jump;
2134 struct sljit_jump *nohit;
2135 struct sljit_jump *return_value;
2136
2137 if (common->mode == JIT_COMPILE)
2138 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2139
2140 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2141 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2142 {
2143 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2145 JUMPHERE(nohit);
2146 return_value = JUMP(SLJIT_JUMP);
2147 }
2148 else
2149 {
2150 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2151 if (common->partialmatchlabel != NULL)
2152 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2153 else
2154 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2155 }
2156 JUMPHERE(jump);
2157 return return_value;
2158 }
2159
2160 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2161 {
2162 DEFINE_COMPILER;
2163 struct sljit_jump *jump;
2164
2165 if (common->mode == JIT_COMPILE)
2166 {
2167 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2168 return;
2169 }
2170
2171 /* Partial matching mode. */
2172 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2173 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2174 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2175 {
2176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2177 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2178 }
2179 else
2180 {
2181 if (common->partialmatchlabel != NULL)
2182 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2183 else
2184 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2185 }
2186 JUMPHERE(jump);
2187 }
2188
2189 static void read_char(compiler_common *common)
2190 {
2191 /* Reads the character into TMP1, updates STR_PTR.
2192 Does not check STR_END. TMP2 Destroyed. */
2193 DEFINE_COMPILER;
2194 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2195 struct sljit_jump *jump;
2196 #endif
2197
2198 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2199 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2200 if (common->utf)
2201 {
2202 #if defined COMPILE_PCRE8
2203 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2204 #elif defined COMPILE_PCRE16
2205 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2206 #endif /* COMPILE_PCRE[8|16] */
2207 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2208 JUMPHERE(jump);
2209 }
2210 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2211 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2212 }
2213
2214 static void peek_char(compiler_common *common)
2215 {
2216 /* Reads the character into TMP1, keeps STR_PTR.
2217 Does not check STR_END. TMP2 Destroyed. */
2218 DEFINE_COMPILER;
2219 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2220 struct sljit_jump *jump;
2221 #endif
2222
2223 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2224 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2225 if (common->utf)
2226 {
2227 #if defined COMPILE_PCRE8
2228 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2229 #elif defined COMPILE_PCRE16
2230 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2231 #endif /* COMPILE_PCRE[8|16] */
2232 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2233 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2234 JUMPHERE(jump);
2235 }
2236 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2237 }
2238
2239 static void read_char8_type(compiler_common *common)
2240 {
2241 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2242 DEFINE_COMPILER;
2243 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2244 struct sljit_jump *jump;
2245 #endif
2246
2247 #ifdef SUPPORT_UTF
2248 if (common->utf)
2249 {
2250 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2251 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2252 #if defined COMPILE_PCRE8
2253 /* This can be an extra read in some situations, but hopefully
2254 it is needed in most cases. */
2255 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2256 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2257 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2258 JUMPHERE(jump);
2259 #elif defined COMPILE_PCRE16
2260 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2261 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2262 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2263 JUMPHERE(jump);
2264 /* Skip low surrogate if necessary. */
2265 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2266 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2267 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2268 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2269 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2270 #elif defined COMPILE_PCRE32
2271 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2272 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2273 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2274 JUMPHERE(jump);
2275 #endif /* COMPILE_PCRE[8|16|32] */
2276 return;
2277 }
2278 #endif /* SUPPORT_UTF */
2279 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2280 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2281 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2282 /* The ctypes array contains only 256 values. */
2283 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2284 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2285 #endif
2286 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2287 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2288 JUMPHERE(jump);
2289 #endif
2290 }
2291
2292 static void skip_char_back(compiler_common *common)
2293 {
2294 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2295 DEFINE_COMPILER;
2296 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2297 #if defined COMPILE_PCRE8
2298 struct sljit_label *label;
2299
2300 if (common->utf)
2301 {
2302 label = LABEL();
2303 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2304 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2305 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2306 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2307 return;
2308 }
2309 #elif defined COMPILE_PCRE16
2310 if (common->utf)
2311 {
2312 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2313 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2314 /* Skip low surrogate if necessary. */
2315 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2316 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2317 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2318 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2319 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2320 return;
2321 }
2322 #endif /* COMPILE_PCRE[8|16] */
2323 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2324 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2325 }
2326
2327 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2328 {
2329 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2330 DEFINE_COMPILER;
2331
2332 if (nltype == NLTYPE_ANY)
2333 {
2334 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2335 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2336 }
2337 else if (nltype == NLTYPE_ANYCRLF)
2338 {
2339 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2340 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2341 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2342 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2343 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2344 }
2345 else
2346 {
2347 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2348 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2349 }
2350 }
2351
2352 #ifdef SUPPORT_UTF
2353
2354 #if defined COMPILE_PCRE8
2355 static void do_utfreadchar(compiler_common *common)
2356 {
2357 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2358 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2359 DEFINE_COMPILER;
2360 struct sljit_jump *jump;
2361
2362 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2363 /* Searching for the first zero. */
2364 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2365 jump = JUMP(SLJIT_C_NOT_ZERO);
2366 /* Two byte sequence. */
2367 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2369 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2370 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2371 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2372 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2373 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2374 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2375 JUMPHERE(jump);
2376
2377 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2378 jump = JUMP(SLJIT_C_NOT_ZERO);
2379 /* Three byte sequence. */
2380 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2381 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2382 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2383 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2384 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2385 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2386 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2387 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2388 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2389 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2390 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2391 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2392 JUMPHERE(jump);
2393
2394 /* Four byte sequence. */
2395 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2396 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2397 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2398 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2399 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2400 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2401 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2402 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2403 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2404 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2405 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2406 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2407 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2408 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2409 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2410 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2411 }
2412
2413 static void do_utfreadtype8(compiler_common *common)
2414 {
2415 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2416 of the character (>= 0xc0). Return value in TMP1. */
2417 DEFINE_COMPILER;
2418 struct sljit_jump *jump;
2419 struct sljit_jump *compare;
2420
2421 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2422
2423 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2424 jump = JUMP(SLJIT_C_NOT_ZERO);
2425 /* Two byte sequence. */
2426 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2427 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2428 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2429 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2430 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2431 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2432 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2433 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2434 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2435
2436 JUMPHERE(compare);
2437 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2438 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2439 JUMPHERE(jump);
2440
2441 /* We only have types for characters less than 256. */
2442 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2443 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2444 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2445 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2446 }
2447
2448 #elif defined COMPILE_PCRE16
2449
2450 static void do_utfreadchar(compiler_common *common)
2451 {
2452 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2453 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2454 DEFINE_COMPILER;
2455 struct sljit_jump *jump;
2456
2457 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2458 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2459 /* Do nothing, only return. */
2460 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2461
2462 JUMPHERE(jump);
2463 /* Combine two 16 bit characters. */
2464 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2465 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2466 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2467 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2468 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2469 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2470 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2471 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2472 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2473 }
2474
2475 #endif /* COMPILE_PCRE[8|16] */
2476
2477 #endif /* SUPPORT_UTF */
2478
2479 #ifdef SUPPORT_UCP
2480
2481 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2482 #define UCD_BLOCK_MASK 127
2483 #define UCD_BLOCK_SHIFT 7
2484
2485 static void do_getucd(compiler_common *common)
2486 {
2487 /* Search the UCD record for the character comes in TMP1.
2488 Returns chartype in TMP1 and UCD offset in TMP2. */
2489 DEFINE_COMPILER;
2490
2491 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2492
2493 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2494 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2495 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2496 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2497 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2498 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2499 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2500 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2502 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2503 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2504 }
2505 #endif
2506
2507 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2508 {
2509 DEFINE_COMPILER;
2510 struct sljit_label *mainloop;
2511 struct sljit_label *newlinelabel = NULL;
2512 struct sljit_jump *start;
2513 struct sljit_jump *end = NULL;
2514 struct sljit_jump *nl = NULL;
2515 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2516 struct sljit_jump *singlechar;
2517 #endif
2518 jump_list *newline = NULL;
2519 BOOL newlinecheck = FALSE;
2520 BOOL readuchar = FALSE;
2521
2522 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2523 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2524 newlinecheck = TRUE;
2525
2526 if (firstline)
2527 {
2528 /* Search for the end of the first line. */
2529 SLJIT_ASSERT(common->first_line_end != 0);
2530 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2531
2532 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2533 {
2534 mainloop = LABEL();
2535 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2536 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2537 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2538 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2539 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2540 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2541 JUMPHERE(end);
2542 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2543 }
2544 else
2545 {
2546 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2547 mainloop = LABEL();
2548 /* Continual stores does not cause data dependency. */
2549 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2550 read_char(common);
2551 check_newlinechar(common, common->nltype, &newline, TRUE);
2552 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2553 JUMPHERE(end);
2554 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2555 set_jumps(newline, LABEL());
2556 }
2557
2558 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2559 }
2560
2561 start = JUMP(SLJIT_JUMP);
2562
2563 if (newlinecheck)
2564 {
2565 newlinelabel = LABEL();
2566 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2567 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2568 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2569 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2570 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2571 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2572 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2573 #endif
2574 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2575 nl = JUMP(SLJIT_JUMP);
2576 }
2577
2578 mainloop = LABEL();
2579
2580 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2581 #ifdef SUPPORT_UTF
2582 if (common->utf) readuchar = TRUE;
2583 #endif
2584 if (newlinecheck) readuchar = TRUE;
2585
2586 if (readuchar)
2587 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2588
2589 if (newlinecheck)
2590 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2591
2592 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2593 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2594 #if defined COMPILE_PCRE8
2595 if (common->utf)
2596 {
2597 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2598 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2599 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2600 JUMPHERE(singlechar);
2601 }
2602 #elif defined COMPILE_PCRE16
2603 if (common->utf)
2604 {
2605 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2606 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2607 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2608 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2609 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2610 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2611 JUMPHERE(singlechar);
2612 }
2613 #endif /* COMPILE_PCRE[8|16] */
2614 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2615 JUMPHERE(start);
2616
2617 if (newlinecheck)
2618 {
2619 JUMPHERE(end);
2620 JUMPHERE(nl);
2621 }
2622
2623 return mainloop;
2624 }
2625
2626 #define MAX_N_CHARS 3
2627
2628 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2629 {
2630 DEFINE_COMPILER;
2631 struct sljit_label *start;
2632 struct sljit_jump *quit;
2633 pcre_uint32 chars[MAX_N_CHARS * 2];
2634 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2635 int location = 0;
2636 pcre_int32 len, c, bit, caseless;
2637 int must_stop;
2638
2639 /* We do not support alternatives now. */
2640 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2641 return FALSE;
2642
2643 while (TRUE)
2644 {
2645 caseless = 0;
2646 must_stop = 1;
2647 switch(*cc)
2648 {
2649 case OP_CHAR:
2650 must_stop = 0;
2651 cc++;
2652 break;
2653
2654 case OP_CHARI:
2655 caseless = 1;
2656 must_stop = 0;
2657 cc++;
2658 break;
2659
2660 case OP_SOD:
2661 case OP_SOM:
2662 case OP_SET_SOM:
2663 case OP_NOT_WORD_BOUNDARY:
2664 case OP_WORD_BOUNDARY:
2665 case OP_EODN:
2666 case OP_EOD:
2667 case OP_CIRC:
2668 case OP_CIRCM:
2669 case OP_DOLL:
2670 case OP_DOLLM:
2671 /* Zero width assertions. */
2672 cc++;
2673 continue;
2674
2675 case OP_PLUS:
2676 case OP_MINPLUS:
2677 case OP_POSPLUS:
2678 cc++;
2679 break;
2680
2681 case OP_EXACT:
2682 cc += 1 + IMM2_SIZE;
2683 break;
2684
2685 case OP_PLUSI:
2686 case OP_MINPLUSI:
2687 case OP_POSPLUSI:
2688 caseless = 1;
2689 cc++;
2690 break;
2691
2692 case OP_EXACTI:
2693 caseless = 1;
2694 cc += 1 + IMM2_SIZE;
2695 break;
2696
2697 default:
2698 must_stop = 2;
2699 break;
2700 }
2701
2702 if (must_stop == 2)
2703 break;
2704
2705 len = 1;
2706 #ifdef SUPPORT_UTF
2707 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2708 #endif
2709
2710 if (caseless && char_has_othercase(common, cc))
2711 {
2712 caseless = char_get_othercase_bit(common, cc);
2713 if (caseless == 0)
2714 return FALSE;
2715 #ifdef COMPILE_PCRE8
2716 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2717 #else
2718 if ((caseless & 0x100) != 0)
2719 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2720 else
2721 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2722 #endif
2723 }
2724 else
2725 caseless = 0;
2726
2727 while (len > 0 && location < MAX_N_CHARS * 2)
2728 {
2729 c = *cc;
2730 bit = 0;
2731 if (len == (caseless & 0xff))
2732 {
2733 bit = caseless >> 8;
2734 c |= bit;
2735 }
2736
2737 chars[location] = c;
2738 chars[location + 1] = bit;
2739
2740 len--;
2741 location += 2;
2742 cc++;
2743 }
2744
2745 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2746 break;
2747 }
2748
2749 /* At least two characters are required. */
2750 if (location < 2 * 2)
2751 return FALSE;
2752
2753 if (firstline)
2754 {
2755 SLJIT_ASSERT(common->first_line_end != 0);
2756 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2757 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2758 }
2759 else
2760 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2761
2762 start = LABEL();
2763 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2764
2765 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2766 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2767 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2768 if (chars[1] != 0)
2769 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2770 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2771 if (location > 2 * 2)
2772 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2773 if (chars[3] != 0)
2774 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2775 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2776 if (location > 2 * 2)
2777 {
2778 if (chars[5] != 0)
2779 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2780 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2781 }
2782 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2783
2784 JUMPHERE(quit);
2785
2786 if (firstline)
2787 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2788 else
2789 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2790 return TRUE;
2791 }
2792
2793 #undef MAX_N_CHARS
2794
2795 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2796 {
2797 DEFINE_COMPILER;
2798 struct sljit_label *start;
2799 struct sljit_jump *quit;
2800 struct sljit_jump *found;
2801 pcre_uchar oc, bit;
2802
2803 if (firstline)
2804 {
2805 SLJIT_ASSERT(common->first_line_end != 0);
2806 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2807 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2808 }
2809
2810 start = LABEL();
2811 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2812 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2813
2814 oc = first_char;
2815 if (caseless)
2816 {
2817 oc = TABLE_GET(first_char, common->fcc, first_char);
2818 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2819 if (first_char > 127 && common->utf)
2820 oc = UCD_OTHERCASE(first_char);
2821 #endif
2822 }
2823 if (first_char == oc)
2824 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2825 else
2826 {
2827 bit = first_char ^ oc;
2828 if (is_powerof2(bit))
2829 {
2830 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2831 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2832 }
2833 else
2834 {
2835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2836 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2837 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2838 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2839 found = JUMP(SLJIT_C_NOT_ZERO);
2840 }
2841 }
2842
2843 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2844 JUMPTO(SLJIT_JUMP, start);
2845 JUMPHERE(found);
2846 JUMPHERE(quit);
2847
2848 if (firstline)
2849 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2850 }
2851
2852 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2853 {
2854 DEFINE_COMPILER;
2855 struct sljit_label *loop;
2856 struct sljit_jump *lastchar;
2857 struct sljit_jump *firstchar;
2858 struct sljit_jump *quit;
2859 struct sljit_jump *foundcr = NULL;
2860 struct sljit_jump *notfoundnl;
2861 jump_list *newline = NULL;
2862
2863 if (firstline)
2864 {
2865 SLJIT_ASSERT(common->first_line_end != 0);
2866 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2867 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2868 }
2869
2870 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2871 {
2872 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2873 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2874 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2875 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2876 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2877
2878 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2879 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2880 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2881 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2882 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2883 #endif
2884 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2885
2886 loop = LABEL();
2887 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2888 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2889 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2890 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2891 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2892 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2893
2894 JUMPHERE(quit);
2895 JUMPHERE(firstchar);
2896 JUMPHERE(lastchar);
2897
2898 if (firstline)
2899 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2900 return;
2901 }
2902
2903 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2904 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2905 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2906 skip_char_back(common);
2907
2908 loop = LABEL();
2909 read_char(common);
2910 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2911 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2912 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2913 check_newlinechar(common, common->nltype, &newline, FALSE);
2914 set_jumps(newline, loop);
2915
2916 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2917 {
2918 quit = JUMP(SLJIT_JUMP);
2919 JUMPHERE(foundcr);
2920 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2921 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2922 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2923 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2924 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2925 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2926 #endif
2927 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2928 JUMPHERE(notfoundnl);
2929 JUMPHERE(quit);
2930 }
2931 JUMPHERE(lastchar);
2932 JUMPHERE(firstchar);
2933
2934 if (firstline)
2935 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2936 }
2937
2938 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
2939
2940 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2941 {
2942 DEFINE_COMPILER;
2943 struct sljit_label *start;
2944 struct sljit_jump *quit;
2945 struct sljit_jump *found = NULL;
2946 jump_list *matches = NULL;
2947 pcre_uint8 inverted_start_bits[32];
2948 int i;
2949 #ifndef COMPILE_PCRE8
2950 struct sljit_jump *jump;
2951 #endif
2952
2953 for (i = 0; i < 32; ++i)
2954 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
2955
2956 if (firstline)
2957 {
2958 SLJIT_ASSERT(common->first_line_end != 0);
2959 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2960 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2961 }
2962
2963 start = LABEL();
2964 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2965 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2966 #ifdef SUPPORT_UTF
2967 if (common->utf)
2968 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2969 #endif
2970
2971 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
2972 {
2973 #ifndef COMPILE_PCRE8
2974 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2975 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2976 JUMPHERE(jump);
2977 #endif
2978 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2979 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2980 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2981 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2982 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2983 found = JUMP(SLJIT_C_NOT_ZERO);
2984 }
2985
2986 #ifdef SUPPORT_UTF
2987 if (common->utf)
2988 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2989 #endif
2990 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2991 #ifdef SUPPORT_UTF
2992 #if defined COMPILE_PCRE8
2993 if (common->utf)
2994 {
2995 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2996 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2997 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2998 }
2999 #elif defined COMPILE_PCRE16
3000 if (common->utf)
3001 {
3002 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3003 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3004 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3005 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3006 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3007 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3008 }
3009 #endif /* COMPILE_PCRE[8|16] */
3010 #endif /* SUPPORT_UTF */
3011 JUMPTO(SLJIT_JUMP, start);
3012 if (found != NULL)
3013 JUMPHERE(found);
3014 if (matches != NULL)
3015 set_jumps(matches, LABEL());
3016 JUMPHERE(quit);
3017
3018 if (firstline)
3019 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3020 }
3021
3022 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3023 {
3024 DEFINE_COMPILER;
3025 struct sljit_label *loop;
3026 struct sljit_jump *toolong;
3027 struct sljit_jump *alreadyfound;
3028 struct sljit_jump *found;
3029 struct sljit_jump *foundoc = NULL;
3030 struct sljit_jump *notfound;
3031 pcre_uint32 oc, bit;
3032
3033 SLJIT_ASSERT(common->req_char_ptr != 0);
3034 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3035 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3036 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3037 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3038
3039 if (has_firstchar)
3040 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3041 else
3042 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3043
3044 loop = LABEL();
3045 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3046
3047 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3048 oc = req_char;
3049 if (caseless)
3050 {
3051 oc = TABLE_GET(req_char, common->fcc, req_char);
3052 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3053 if (req_char > 127 && common->utf)
3054 oc = UCD_OTHERCASE(req_char);
3055 #endif
3056 }
3057 if (req_char == oc)
3058 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3059 else
3060 {
3061 bit = req_char ^ oc;
3062 if (is_powerof2(bit))
3063 {
3064 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3065 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3066 }
3067 else
3068 {
3069 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3070 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3071 }
3072 }
3073 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3074 JUMPTO(SLJIT_JUMP, loop);
3075
3076 JUMPHERE(found);
3077 if (foundoc)
3078 JUMPHERE(foundoc);
3079 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3080 JUMPHERE(alreadyfound);
3081 JUMPHERE(toolong);
3082 return notfound;
3083 }
3084
3085 static void do_revertframes(compiler_common *common)
3086 {
3087 DEFINE_COMPILER;
3088 struct sljit_jump *jump;
3089 struct sljit_label *mainloop;
3090
3091 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3092 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3093 GET_LOCAL_BASE(TMP3, 0, 0);
3094
3095 /* Drop frames until we reach STACK_TOP. */
3096 mainloop = LABEL();
3097 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3098 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3099 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3100
3101 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3102 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3103 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3104 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3105 JUMPTO(SLJIT_JUMP, mainloop);
3106
3107 JUMPHERE(jump);
3108 jump = JUMP(SLJIT_C_SIG_LESS);
3109 /* End of dropping frames. */
3110 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3111
3112 JUMPHERE(jump);
3113 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3114 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3115 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3116 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3117 JUMPTO(SLJIT_JUMP, mainloop);
3118 }
3119
3120 static void check_wordboundary(compiler_common *common)
3121 {
3122 DEFINE_COMPILER;
3123 struct sljit_jump *skipread;
3124 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3125 struct sljit_jump *jump;
3126 #endif
3127
3128 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3129
3130 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3131 /* Get type of the previous char, and put it to LOCALS1. */
3132 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3133 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3134 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3135 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3136 skip_char_back(common);
3137 check_start_used_ptr(common);
3138 read_char(common);
3139
3140 /* Testing char type. */
3141 #ifdef SUPPORT_UCP
3142 if (common->use_ucp)
3143 {
3144 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3145 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3146 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3147 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3148 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3149 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3150 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3151 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3152 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3153 JUMPHERE(jump);
3154 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3155 }
3156 else
3157 #endif
3158 {
3159 #ifndef COMPILE_PCRE8
3160 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3161 #elif defined SUPPORT_UTF
3162 /* Here LOCALS1 has already been zeroed. */
3163 jump = NULL;
3164 if (common->utf)
3165 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3166 #endif /* COMPILE_PCRE8 */
3167 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3168 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3169 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3171 #ifndef COMPILE_PCRE8
3172 JUMPHERE(jump);
3173 #elif defined SUPPORT_UTF
3174 if (jump != NULL)
3175 JUMPHERE(jump);
3176 #endif /* COMPILE_PCRE8 */
3177 }
3178 JUMPHERE(skipread);
3179
3180 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3181 skipread = check_str_end(common);
3182 peek_char(common);
3183
3184 /* Testing char type. This is a code duplication. */
3185 #ifdef SUPPORT_UCP
3186 if (common->use_ucp)
3187 {
3188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3189 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3190 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3191 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3192 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3193 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3194 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3195 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3196 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3197 JUMPHERE(jump);
3198 }
3199 else
3200 #endif
3201 {
3202 #ifndef COMPILE_PCRE8
3203 /* TMP2 may be destroyed by peek_char. */
3204 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3205 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3206 #elif defined SUPPORT_UTF
3207 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3208 jump = NULL;
3209 if (common->utf)
3210 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3211 #endif
3212 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3213 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3214 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3215 #ifndef COMPILE_PCRE8
3216 JUMPHERE(jump);
3217 #elif defined SUPPORT_UTF
3218 if (jump != NULL)
3219 JUMPHERE(jump);
3220 #endif /* COMPILE_PCRE8 */
3221 }
3222 JUMPHERE(skipread);
3223
3224 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3225 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3226 }
3227
3228 /*
3229 range format:
3230
3231 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3232 ranges[1] = first bit (0 or 1)
3233 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3234 */
3235
3236 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3237 {
3238 DEFINE_COMPILER;
3239 struct sljit_jump *jump;
3240
3241 if (ranges[0] < 0)
3242 return FALSE;
3243
3244 switch(ranges[0])
3245 {
3246 case 1:
3247 if (readch)
3248 read_char(common);
3249 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3250 return TRUE;
3251
3252 case 2:
3253 if (readch)
3254 read_char(common);
3255 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3256 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3257 return TRUE;
3258
3259 case 4:
3260 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3261 {
3262 if (readch)
3263 read_char(common);
3264 if (ranges[1] != 0)
3265 {
3266 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3267 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3268 }
3269 else
3270 {
3271 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3272 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3273 JUMPHERE(jump);
3274 }
3275 return TRUE;
3276 }
3277 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3278 {
3279 if (readch)
3280 read_char(common);
3281 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3282 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3283 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3284 return TRUE;
3285 }
3286 return FALSE;
3287
3288 default:
3289 return FALSE;
3290 }
3291 }
3292
3293 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3294 {
3295 int i, bit, length;
3296 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3297
3298 bit = ctypes[0] & flag;
3299 ranges[0] = -1;
3300 ranges[1] = bit != 0 ? 1 : 0;
3301 length = 0;
3302
3303 for (i = 1; i < 256; i++)
3304 if ((ctypes[i] & flag) != bit)
3305 {
3306 if (length >= MAX_RANGE_SIZE)
3307 return;
3308 ranges[2 + length] = i;
3309 length++;
3310 bit ^= flag;
3311 }
3312
3313 if (bit != 0)
3314 {
3315 if (length >= MAX_RANGE_SIZE)
3316 return;
3317 ranges[2 + length] = 256;
3318 length++;
3319 }
3320 ranges[0] = length;
3321 }
3322
3323 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3324 {
3325 int ranges[2 + MAX_RANGE_SIZE];
3326 pcre_uint8 bit, cbit, all;
3327 int i, byte, length = 0;
3328
3329 bit = bits[0] & 0x1;
3330 ranges[1] = bit;
3331 /* Can be 0 or 255. */
3332 all = -bit;
3333
3334 for (i = 0; i < 256; )
3335 {
3336 byte = i >> 3;
3337 if ((i & 0x7) == 0 && bits[byte] == all)
3338 i += 8;
3339 else
3340 {
3341 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3342 if (cbit != bit)
3343 {
3344 if (length >= MAX_RANGE_SIZE)
3345 return FALSE;
3346 ranges[2 + length] = i;
3347 length++;
3348 bit = cbit;
3349 all = -cbit;
3350 }
3351 i++;
3352 }
3353 }
3354
3355 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3356 {
3357 if (length >= MAX_RANGE_SIZE)
3358 return FALSE;
3359 ranges[2 + length] = 256;
3360 length++;
3361 }
3362 ranges[0] = length;
3363
3364 return check_ranges(common, ranges, backtracks, FALSE);
3365 }
3366
3367 static void check_anynewline(compiler_common *common)
3368 {
3369 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3370 DEFINE_COMPILER;
3371
3372 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3373
3374 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3375 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3376 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3377 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3378 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3379 #ifdef COMPILE_PCRE8
3380 if (common->utf)
3381 {
3382 #endif
3383 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3384 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3385 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3386 #ifdef COMPILE_PCRE8
3387 }
3388 #endif
3389 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3390 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3391 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3392 }
3393
3394 static void check_hspace(compiler_common *common)
3395 {
3396 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3397 DEFINE_COMPILER;
3398
3399 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3400
3401 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3402 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3403 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3404 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3405 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3406 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3407 #ifdef COMPILE_PCRE8
3408 if (common->utf)
3409 {
3410 #endif
3411 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3412 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3413 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3414 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3415 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3416 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3417 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3418 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3419 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3420 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3421 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3422 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3423 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3424 #ifdef COMPILE_PCRE8
3425 }
3426 #endif
3427 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3428 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3429
3430 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3431 }
3432
3433 static void check_vspace(compiler_common *common)
3434 {
3435 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3436 DEFINE_COMPILER;
3437
3438 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3439
3440 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3441 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3442 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3443 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3444 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3445 #ifdef COMPILE_PCRE8
3446 if (common->utf)
3447 {
3448 #endif
3449 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3450 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3451 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3452 #ifdef COMPILE_PCRE8
3453 }
3454 #endif
3455 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3456 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3457
3458 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3459 }
3460
3461 #define CHAR1 STR_END
3462 #define CHAR2 STACK_TOP
3463
3464 static void do_casefulcmp(compiler_common *common)
3465 {
3466 DEFINE_COMPILER;
3467 struct sljit_jump *jump;
3468 struct sljit_label *label;
3469
3470 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3471 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3472 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3474 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3475 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3476
3477 label = LABEL();
3478 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3479 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3480 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3481 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3482 JUMPTO(SLJIT_C_NOT_ZERO, label);
3483
3484 JUMPHERE(jump);
3485 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3486 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3487 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3488 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3489 }
3490
3491 #define LCC_TABLE STACK_LIMIT
3492
3493 static void do_caselesscmp(compiler_common *common)
3494 {
3495 DEFINE_COMPILER;
3496 struct sljit_jump *jump;
3497 struct sljit_label *label;
3498
3499 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3500 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3501
3502 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3503 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3504 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3505 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3506 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3507 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3508
3509 label = LABEL();
3510 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3511 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3512 #ifndef COMPILE_PCRE8
3513 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3514 #endif
3515 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3516 #ifndef COMPILE_PCRE8
3517 JUMPHERE(jump);
3518 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3519 #endif
3520 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3521 #ifndef COMPILE_PCRE8
3522 JUMPHERE(jump);
3523 #endif
3524 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3525 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3526 JUMPTO(SLJIT_C_NOT_ZERO, label);
3527
3528 JUMPHERE(jump);
3529 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3530 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3531 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3532 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3533 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3534 }
3535
3536 #undef LCC_TABLE
3537 #undef CHAR1
3538 #undef CHAR2
3539
3540 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3541
3542 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3543 {
3544 /* This function would be ineffective to do in JIT level. */
3545 pcre_uint32 c1, c2;
3546 const pcre_uchar *src2 = args->uchar_ptr;
3547 const pcre_uchar *end2 = args->end;
3548 const ucd_record *ur;
3549 const pcre_uint32 *pp;
3550
3551 while (src1 < end1)
3552 {
3553 if (src2 >= end2)
3554 return (pcre_uchar*)1;
3555 GETCHARINC(c1, src1);
3556 GETCHARINC(c2, src2);
3557 ur = GET_UCD(c2);
3558 if (c1 != c2 && c1 != c2 + ur->other_case)
3559 {
3560 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3561 for (;;)
3562 {
3563 if (c1 < *pp) return NULL;
3564 if (c1 == *pp++) break;
3565 }
3566 }
3567 }
3568 return src2;
3569 }
3570
3571 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3572
3573 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3574 compare_context* context, jump_list **backtracks)
3575 {
3576 DEFINE_COMPILER;
3577 unsigned int othercasebit = 0;
3578 pcre_uchar *othercasechar = NULL;
3579 #ifdef SUPPORT_UTF
3580 int utflength;
3581 #endif
3582
3583 if (caseless && char_has_othercase(common, cc))
3584 {
3585 othercasebit = char_get_othercase_bit(common, cc);
3586 SLJIT_ASSERT(othercasebit);
3587 /* Extracting bit difference info. */
3588 #if defined COMPILE_PCRE8
3589 othercasechar = cc + (othercasebit >> 8);
3590 othercasebit &= 0xff;
3591 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3592 /* Note that this code only handles characters in the BMP. If there
3593 ever are characters outside the BMP whose othercase differs in only one
3594 bit from itself (there currently are none), this code will need to be
3595 revised for COMPILE_PCRE32. */
3596 othercasechar = cc + (othercasebit >> 9);
3597 if ((othercasebit & 0x100) != 0)
3598 othercasebit = (othercasebit & 0xff) << 8;
3599 else
3600 othercasebit &= 0xff;
3601 #endif /* COMPILE_PCRE[8|16|32] */
3602 }
3603
3604 if (context->sourcereg == -1)
3605 {
3606 #if defined COMPILE_PCRE8
3607 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3608 if (context->length >= 4)
3609 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3610 else if (context->length >= 2)
3611 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3612 else
3613 #endif
3614 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3615 #elif defined COMPILE_PCRE16
3616 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3617 if (context->length >= 4)
3618 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3619 else
3620 #endif
3621 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3622 #elif defined COMPILE_PCRE32
3623 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3624 #endif /* COMPILE_PCRE[8|16|32] */
3625 context->sourcereg = TMP2;
3626 }
3627
3628 #ifdef SUPPORT_UTF
3629 utflength = 1;
3630 if (common->utf && HAS_EXTRALEN(*cc))
3631 utflength += GET_EXTRALEN(*cc);
3632
3633 do
3634 {
3635 #endif
3636
3637 context->length -= IN_UCHARS(1);
3638 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3639
3640 /* Unaligned read is supported. */
3641 if (othercasebit != 0 && othercasechar == cc)
3642 {
3643 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3644 context->oc.asuchars[context->ucharptr] = othercasebit;
3645 }
3646 else
3647 {
3648 context->c.asuchars[context->ucharptr] = *cc;
3649 context->oc.asuchars[context->ucharptr] = 0;
3650 }
3651 context->ucharptr++;
3652
3653 #if defined COMPILE_PCRE8
3654 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3655 #else
3656 if (context->ucharptr >= 2 || context->length == 0)
3657 #endif
3658 {
3659 if (context->length >= 4)
3660 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3661 else if (context->length >= 2)
3662 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3663 #if defined COMPILE_PCRE8
3664 else if (context->length >= 1)
3665 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3666 #endif /* COMPILE_PCRE8 */
3667 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3668
3669 switch(context->ucharptr)
3670 {
3671 case 4 / sizeof(pcre_uchar):
3672 if (context->oc.asint != 0)
3673 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3674 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3675 break;
3676
3677 case 2 / sizeof(pcre_uchar):
3678 if (context->oc.asushort != 0)
3679 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3680 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3681 break;
3682
3683 #ifdef COMPILE_PCRE8
3684 case 1:
3685 if (context->oc.asbyte != 0)
3686 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3687 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3688 break;
3689 #endif
3690
3691 default:
3692 SLJIT_ASSERT_STOP();
3693 break;
3694 }
3695 context->ucharptr = 0;
3696 }
3697
3698 #else
3699
3700 /* Unaligned read is unsupported or in 32 bit mode. */
3701 if (context->length >= 1)
3702 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3703
3704 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3705
3706 if (othercasebit != 0 && othercasechar == cc)
3707 {
3708 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3709 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3710 }
3711 else
3712 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3713
3714 #endif
3715
3716 cc++;
3717 #ifdef SUPPORT_UTF
3718 utflength--;
3719 }
3720 while (utflength > 0);
3721 #endif
3722
3723 return cc;
3724 }
3725
3726 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3727
3728 #define SET_TYPE_OFFSET(value) \
3729 if ((value) != typeoffset) \
3730 { \
3731 if ((value) > typeoffset) \
3732 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3733 else \
3734 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3735 } \
3736 typeoffset = (value);
3737
3738 #define SET_CHAR_OFFSET(value) \
3739 if ((value) != charoffset) \
3740 { \
3741 if ((value) > charoffset) \
3742 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3743 else \
3744 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3745 } \
3746 charoffset = (value);
3747
3748 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3749 {
3750 DEFINE_COMPILER;
3751 jump_list *found = NULL;
3752 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3753 pcre_int32 c, charoffset;
3754 const pcre_uint32 *other_cases;
3755 struct sljit_jump *jump = NULL;
3756 pcre_uchar *ccbegin;
3757 int compares, invertcmp, numberofcmps;
3758 #ifdef SUPPORT_UCP
3759 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3760 BOOL charsaved = FALSE;
3761 int typereg = TMP1, scriptreg = TMP1;
3762 pcre_int32 typeoffset;
3763 #endif
3764
3765 /* Although SUPPORT_UTF must be defined, we are
3766 not necessary in utf mode even in 8 bit mode. */
3767 detect_partial_match(common, backtracks);
3768 read_char(common);
3769
3770 if ((*cc++ & XCL_MAP) != 0)
3771 {
3772 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3773 #ifndef COMPILE_PCRE8
3774 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3775 #elif defined SUPPORT_UTF
3776 if (common->utf)
3777 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3778 #endif
3779
3780 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3781 {
3782 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3783 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3784 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3785 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3786 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3787 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3788 }
3789
3790 #ifndef COMPILE_PCRE8
3791 JUMPHERE(jump);
3792 #elif defined SUPPORT_UTF
3793 if (common->utf)
3794 JUMPHERE(jump);
3795 #endif
3796 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3797 #ifdef SUPPORT_UCP
3798 charsaved = TRUE;
3799 #endif
3800 cc += 32 / sizeof(pcre_uchar);
3801 }
3802
3803 /* Scanning the necessary info. */
3804 ccbegin = cc;
3805 compares = 0;
3806 while (*cc != XCL_END)
3807 {
3808 compares++;
3809 if (*cc == XCL_SINGLE)
3810 {
3811 cc += 2;
3812 #ifdef SUPPORT_UTF
3813 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3814 #endif
3815 #ifdef SUPPORT_UCP
3816 needschar = TRUE;
3817 #endif
3818 }
3819 else if (*cc == XCL_RANGE)
3820 {
3821 cc += 2;
3822 #ifdef SUPPORT_UTF
3823 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3824 #endif
3825 cc++;
3826 #ifdef SUPPORT_UTF
3827 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3828 #endif
3829 #ifdef SUPPORT_UCP
3830 needschar = TRUE;
3831 #endif
3832 }
3833 #ifdef SUPPORT_UCP
3834 else
3835 {
3836 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3837 cc++;
3838 switch(*cc)
3839 {
3840 case PT_ANY:
3841 break;
3842
3843 case PT_LAMP:
3844 case PT_GC:
3845 case PT_PC:
3846 case PT_ALNUM:
3847 needstype = TRUE;
3848 break;
3849
3850 case PT_SC:
3851 needsscript = TRUE;
3852 break;
3853
3854 case PT_SPACE:
3855 case PT_PXSPACE:
3856 case PT_WORD:
3857 needstype = TRUE;
3858 needschar = TRUE;
3859 break;
3860
3861 case PT_CLIST:
3862 needschar = TRUE;
3863 break;
3864
3865 default:
3866 SLJIT_ASSERT_STOP();
3867 break;
3868 }
3869 cc += 2;
3870 }
3871 #endif
3872 }
3873
3874 #ifdef SUPPORT_UCP
3875 /* Simple register allocation. TMP1 is preferred if possible. */
3876 if (needstype || needsscript)
3877 {
3878 if (needschar && !charsaved)
3879 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3880 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3881 if (needschar)
3882 {
3883 if (needstype)
3884 {
3885 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3886 typereg = RETURN_ADDR;
3887 }
3888
3889 if (needsscript)
3890 scriptreg = TMP3;
3891 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3892 }
3893 else if (needstype && needsscript)
3894 scriptreg = TMP3;
3895 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3896
3897 if (needsscript)
3898 {
3899 if (scriptreg == TMP1)
3900 {
3901 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3902 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3903 }
3904 else
3905 {
3906 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3907 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3908 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3909 }
3910 }
3911 }
3912 #endif
3913
3914 /* Generating code. */
3915 cc = ccbegin;
3916 charoffset = 0;
3917 numberofcmps = 0;
3918 #ifdef SUPPORT_UCP
3919 typeoffset = 0;
3920 #endif
3921
3922 while (*cc != XCL_END)
3923 {
3924 compares--;
3925 invertcmp = (compares == 0 && list != backtracks);
3926 jump = NULL;
3927
3928 if (*cc == XCL_SINGLE)
3929 {
3930 cc ++;
3931 #ifdef SUPPORT_UTF
3932 if (common->utf)
3933 {
3934 GETCHARINC(c, cc);
3935 }
3936 else
3937 #endif
3938 c = *cc++;
3939
3940 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3941 {
3942 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3943 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
3944 numberofcmps++;
3945 }
3946 else if (numberofcmps > 0)
3947 {
3948 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3949 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3950 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3951 numberofcmps = 0;
3952 }
3953 else
3954 {
3955 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3956 numberofcmps = 0;
3957 }
3958 }
3959 else if (*cc == XCL_RANGE)
3960 {
3961 cc ++;
3962 #ifdef SUPPORT_UTF
3963 if (common->utf)
3964 {
3965 GETCHARINC(c, cc);
3966 }
3967 else
3968 #endif
3969 c = *cc++;
3970 SET_CHAR_OFFSET(c);
3971 #ifdef SUPPORT_UTF
3972 if (common->utf)
3973 {
3974 GETCHARINC(c, cc);
3975 }
3976 else
3977 #endif
3978 c = *cc++;
3979 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3980 {
3981 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3982 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
3983 numberofcmps++;
3984 }
3985 else if (numberofcmps > 0)
3986 {
3987 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3988 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3989 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3990 numberofcmps = 0;
3991 }
3992 else
3993 {
3994 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3995 numberofcmps = 0;
3996 }
3997 }
3998 #ifdef SUPPORT_UCP
3999 else
4000 {
4001 if (*cc == XCL_NOTPROP)
4002 invertcmp ^= 0x1;
4003 cc++;
4004 switch(*cc)
4005 {
4006 case PT_ANY:
4007 if (list != backtracks)
4008 {
4009 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4010 continue;
4011 }
4012 else if (cc[-1] == XCL_NOTPROP)
4013 continue;
4014 jump = JUMP(SLJIT_JUMP);
4015 break;
4016
4017 case PT_LAMP:
4018 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4019 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4020 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4021 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4022 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4023 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4024 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4025 break;
4026
4027 case PT_GC:
4028 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4029 SET_TYPE_OFFSET(c);
4030 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4031 break;
4032
4033 case PT_PC:
4034 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4035 break;
4036
4037 case PT_SC:
4038 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4039 break;
4040
4041 case PT_SPACE:
4042 case PT_PXSPACE:
4043 if (*cc == PT_SPACE)
4044 {
4045 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4046 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4047 }
4048 SET_CHAR_OFFSET(9);
4049 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4050 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4051 if (*cc == PT_SPACE)
4052 JUMPHERE(jump);
4053
4054 SET_TYPE_OFFSET(ucp_Zl);
4055 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4056 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4057 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4058 break;
4059
4060 case PT_WORD:
4061 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4062 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4063 /* ... fall through */
4064
4065 case PT_ALNUM:
4066 SET_TYPE_OFFSET(ucp_Ll);
4067 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4068 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4069 SET_TYPE_OFFSET(ucp_Nd);
4070 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4071 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4072 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4073 break;
4074
4075 case PT_CLIST:
4076 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4077
4078 /* At least three characters are required.
4079 Otherwise this case would be handled by the normal code path. */
4080 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4081 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4082
4083 /* Optimizing character pairs, if their difference is power of 2. */
4084 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4085 {
4086 if (charoffset == 0)
4087 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4088 else
4089 {
4090 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4091 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4092 }
4093 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4094 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4095 other_cases += 2;
4096 }
4097 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4098 {
4099 if (charoffset == 0)
4100 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4101 else
4102 {
4103 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4104 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4105 }
4106 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4107 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4108
4109 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4110 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4111
4112 other_cases += 3;
4113 }
4114 else
4115 {
4116 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4117 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4118 }
4119
4120 while (*other_cases != NOTACHAR)
4121 {
4122 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4123 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4124 }
4125 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4126 break;
4127 }
4128 cc += 2;
4129 }
4130 #endif
4131
4132 if (jump != NULL)
4133 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4134 }
4135
4136 if (found != NULL)
4137 set_jumps(found, LABEL());
4138 }
4139
4140 #undef SET_TYPE_OFFSET
4141 #undef SET_CHAR_OFFSET
4142
4143 #endif
4144
4145 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4146 {
4147 DEFINE_COMPILER;
4148 int length;
4149 unsigned int c, oc, bit;
4150 compare_context context;
4151 struct sljit_jump *jump[4];
4152 #ifdef SUPPORT_UTF
4153 struct sljit_label *label;
4154 #ifdef SUPPORT_UCP
4155 pcre_uchar propdata[5];
4156 #endif
4157 #endif
4158
4159 switch(type)
4160 {
4161 case OP_SOD:
4162 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4163 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4164 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4165 return cc;
4166
4167 case OP_SOM:
4168 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4169 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4170 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4171 return cc;
4172
4173 case OP_NOT_WORD_BOUNDARY:
4174 case OP_WORD_BOUNDARY:
4175 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4176 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4177 return cc;
4178
4179 case OP_NOT_DIGIT:
4180 case OP_DIGIT:
4181 /* Digits are usually 0-9, so it is worth to optimize them. */
4182 if (common->digits[0] == -2)
4183 get_ctype_ranges(common, ctype_digit, common->digits);
4184 detect_partial_match(common, backtracks);
4185 /* Flip the starting bit in the negative case. */
4186 if (type == OP_NOT_DIGIT)
4187 common->digits[1] ^= 1;
4188 if (!check_ranges(common, common->digits, backtracks, TRUE))
4189 {
4190 read_char8_type(common);
4191 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4192 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4193 }
4194 if (type == OP_NOT_DIGIT)
4195 common->digits[1] ^= 1;
4196 return cc;
4197
4198 case OP_NOT_WHITESPACE:
4199 case OP_WHITESPACE:
4200 detect_partial_match(common, backtracks);
4201 read_char8_type(common);
4202 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4203 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4204 return cc;
4205
4206 case OP_NOT_WORDCHAR:
4207 case OP_WORDCHAR:
4208 detect_partial_match(common, backtracks);
4209 read_char8_type(common);
4210 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4211 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4212 return cc;
4213
4214 case OP_ANY:
4215 detect_partial_match(common, backtracks);
4216 read_char(common);
4217 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4218 {
4219 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4220 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4221 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4222 else
4223 jump[1] = check_str_end(common);
4224
4225 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4226 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4227 if (jump[1] != NULL)
4228 JUMPHERE(jump[1]);
4229 JUMPHERE(jump[0]);
4230 }
4231 else
4232 check_newlinechar(common, common->nltype, backtracks, TRUE);
4233 return cc;
4234
4235 case OP_ALLANY:
4236 detect_partial_match(common, backtracks);
4237 #ifdef SUPPORT_UTF
4238 if (common->utf)
4239 {
4240 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4241 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4242 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4243 #if defined COMPILE_PCRE8
4244 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4245 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4246 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4247 #elif defined COMPILE_PCRE16
4248 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4249 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4250 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4251 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4252 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4253 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4254 #endif
4255 JUMPHERE(jump[0]);
4256 #endif /* COMPILE_PCRE[8|16] */
4257 return cc;
4258 }
4259 #endif
4260 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4261 return cc;
4262
4263 case OP_ANYBYTE:
4264 detect_partial_match(common, backtracks);
4265 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4266 return cc;
4267
4268 #ifdef SUPPORT_UTF
4269 #ifdef SUPPORT_UCP
4270 case OP_NOTPROP:
4271 case OP_PROP:
4272 propdata[0] = 0;
4273 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4274 propdata[2] = cc[0];
4275 propdata[3] = cc[1];
4276 propdata[4] = XCL_END;
4277 compile_xclass_matchingpath(common, propdata, backtracks);
4278 return cc + 2;
4279 #endif
4280 #endif
4281
4282 case OP_ANYNL:
4283 detect_partial_match(common, backtracks);
4284 read_char(common);
4285 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4286 /* We don't need to handle soft partial matching case. */
4287 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4288 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4289 else
4290 jump[1] = check_str_end(common);
4291 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4292 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4293 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4294 jump[3] = JUMP(SLJIT_JUMP);
4295 JUMPHERE(jump[0]);
4296 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4297 JUMPHERE(jump[1]);
4298 JUMPHERE(jump[2]);
4299 JUMPHERE(jump[3]);
4300 return cc;
4301
4302 case OP_NOT_HSPACE:
4303 case OP_HSPACE:
4304 detect_partial_match(common, backtracks);
4305 read_char(common);
4306 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4307 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4308 return cc;
4309
4310 case OP_NOT_VSPACE:
4311 case OP_VSPACE:
4312 detect_partial_match(common, backtracks);
4313 read_char(common);
4314 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4315 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4316 return cc;
4317
4318 #ifdef SUPPORT_UCP
4319 case OP_EXTUNI:
4320 detect_partial_match(common, backtracks);
4321 read_char(common);
4322 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4323 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4324 /* Optimize register allocation: use a real register. */
4325 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4326 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4327
4328 label = LABEL();
4329 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4330 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4331 read_char(common);
4332 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4333 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4334 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4335
4336 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4337 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4338 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4339 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4340 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4341 JUMPTO(SLJIT_C_NOT_ZERO, label);
4342
4343 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4344 JUMPHERE(jump[0]);
4345 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4346
4347 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4348 {
4349 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4350 /* Since we successfully read a char above, partial matching must occure. */
4351 check_partial(common, TRUE);
4352 JUMPHERE(jump[0]);
4353 }
4354 return cc;
4355 #endif
4356
4357 case OP_EODN:
4358 /* Requires rather complex checks. */
4359 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4360 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4361 {
4362 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4363 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4364 if (common->mode == JIT_COMPILE)
4365 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4366 else
4367 {
4368 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4369 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4370 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4371 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4372 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4373 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4374 check_partial(common, TRUE);
4375 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4376 JUMPHERE(jump[1]);
4377 }
4378 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4379 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4380 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4381 }
4382 else if (common->nltype == NLTYPE_FIXED)
4383 {
4384 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4385 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4386 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4387 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4388 }
4389 else
4390 {
4391 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4392 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4393 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4394 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4395 jump[2] = JUMP(SLJIT_C_GREATER);
4396 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4397 /* Equal. */
4398 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4399 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4400 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4401
4402 JUMPHERE(jump[1]);
4403 if (common->nltype == NLTYPE_ANYCRLF)
4404 {
4405 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4406 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4407 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4408 }
4409 else
4410 {
4411 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4412 read_char(common);
4413 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4414 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4415 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4416 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4417 }
4418 JUMPHERE(jump[2]);
4419 JUMPHERE(jump[3]);
4420 }
4421 JUMPHERE(jump[0]);
4422 check_partial(common, FALSE);
4423 return cc;
4424
4425 case OP_EOD:
4426 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4427 check_partial(common, FALSE);
4428 return cc;
4429
4430 case OP_CIRC:
4431 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4432 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4433 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4434 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4435 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4436 return cc;
4437
4438 case OP_CIRCM:
4439 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4440 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4441 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4442 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4443 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4444 jump[0] = JUMP(SLJIT_JUMP);
4445 JUMPHERE(jump[1]);
4446
4447 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4448 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4449 {
4450 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4451 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4452 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4453 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4454 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4455 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4456 }
4457 else
4458 {
4459 skip_char_back(common);
4460 read_char(common);
4461 check_newlinechar(common, common->nltype, backtracks, FALSE);
4462 }
4463 JUMPHERE(jump[0]);
4464 return cc;
4465
4466 case OP_DOLL:
4467 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4468 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4469 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4470
4471 if (!common->endonly)
4472 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4473 else
4474 {
4475 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4476 check_partial(common, FALSE);
4477 }
4478 return cc;
4479
4480 case OP_DOLLM:
4481 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4482 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4483 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4484 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4485 check_partial(common, FALSE);
4486 jump[0] = JUMP(SLJIT_JUMP);
4487 JUMPHERE(jump[1]);
4488
4489 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4490 {
4491 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4492 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4493 if (common->mode == JIT_COMPILE)
4494 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4495 else
4496 {
4497 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4498 /* STR_PTR = STR_END - IN_UCHARS(1) */
4499 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4500 check_partial(common, TRUE);
4501 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4502 JUMPHERE(jump[1]);
4503 }
4504
4505 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4506 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4507 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4508 }
4509 else
4510 {
4511 peek_char(common);
4512 check_newlinechar(common, common->nltype, backtracks, FALSE);
4513 }
4514 JUMPHERE(jump[0]);
4515 return cc;
4516
4517 case OP_CHAR:
4518 case OP_CHARI:
4519 length = 1;
4520 #ifdef SUPPORT_UTF
4521 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4522 #endif
4523 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4524 {
4525 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4526 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4527
4528 context.length = IN_UCHARS(length);
4529 context.sourcereg = -1;
4530 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4531 context.ucharptr = 0;
4532 #endif
4533 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4534 }
4535 detect_partial_match(common, backtracks);
4536 read_char(common);
4537 #ifdef SUPPORT_UTF
4538 if (common->utf)
4539 {
4540 GETCHAR(c, cc);
4541 }
4542 else
4543 #endif
4544 c = *cc;
4545 if (type == OP_CHAR || !char_has_othercase(common, cc))
4546 {
4547 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4548 return cc + length;
4549 }
4550 oc = char_othercase(common, c);
4551 bit = c ^ oc;
4552 if (is_powerof2(bit))
4553 {
4554 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4555 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4556 return cc + length;
4557 }
4558 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4559 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4560 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4561 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4562 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4563 return cc + length;
4564
4565 case OP_NOT:
4566 case OP_NOTI:
4567 detect_partial_match(common, backtracks);
4568 length = 1;
4569 #ifdef SUPPORT_UTF
4570 if (common->utf)
4571 {
4572 #ifdef COMPILE_PCRE8
4573 c = *cc;
4574 if (c < 128)
4575 {
4576 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4577 if (type == OP_NOT || !char_has_othercase(common, cc))
4578 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4579 else
4580 {
4581 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4582 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4583 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4584 }
4585 /* Skip the variable-length character. */
4586 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4587 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4588 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4589 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4590 JUMPHERE(jump[0]);
4591 return cc + 1;
4592 }
4593 else
4594 #endif /* COMPILE_PCRE8 */
4595 {
4596 GETCHARLEN(c, cc, length);
4597 read_char(common);
4598 }
4599 }
4600 else
4601 #endif /* SUPPORT_UTF */
4602 {
4603 read_char(common);
4604 c = *cc;
4605 }
4606
4607 if (type == OP_NOT || !char_has_othercase(common, cc))
4608 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4609 else
4610 {
4611 oc = char_othercase(common, c);
4612 bit = c ^ oc;
4613 if (is_powerof2(bit))
4614 {
4615 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4616 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4617 }
4618 else
4619 {
4620 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4621 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4622 }
4623 }
4624 return cc + length;
4625
4626 case OP_CLASS:
4627 case OP_NCLASS:
4628 detect_partial_match(common, backtracks);
4629 read_char(common);
4630 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4631 return cc + 32 / sizeof(pcre_uchar);
4632
4633 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4634 jump[0] = NULL;
4635 #ifdef COMPILE_PCRE8
4636 /* This check only affects 8 bit mode. In other modes, we
4637 always need to compare the value with 255. */
4638 if (common->utf)
4639 #endif /* COMPILE_PCRE8 */
4640 {
4641 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4642 if (type == OP_CLASS)
4643 {
4644 add_jump(compiler, backtracks, jump[0]);
4645 jump[0] = NULL;
4646 }
4647 }
4648 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4649 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4650 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4651 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4652 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4653 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4654 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4655 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4656 if (jump[0] != NULL)
4657 JUMPHERE(jump[0]);
4658 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4659 return cc + 32 / sizeof(pcre_uchar);
4660
4661 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4662 case OP_XCLASS:
4663 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4664 return cc + GET(cc, 0) - 1;
4665 #endif
4666
4667 case OP_REVERSE:
4668 length = GET(cc, 0);
4669 if (length == 0)
4670 return cc + LINK_SIZE;
4671 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4672 #ifdef SUPPORT_UTF
4673 if (common->utf)
4674 {
4675 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4676 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4677 label = LABEL();
4678 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4679 skip_char_back(common);
4680 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4681 JUMPTO(SLJIT_C_NOT_ZERO, label);
4682 }
4683 else
4684 #endif
4685 {
4686 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4687 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4688 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4689 }
4690 check_start_used_ptr(common);
4691 return cc + LINK_SIZE;
4692 }
4693 SLJIT_ASSERT_STOP();
4694 return cc;
4695 }
4696
4697 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4698 {
4699 /* This function consumes at least one input character. */
4700 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4701 DEFINE_COMPILER;
4702 pcre_uchar *ccbegin = cc;
4703 compare_context context;
4704 int size;
4705
4706 context.length = 0;
4707 do
4708 {
4709 if (cc >= ccend)
4710 break;
4711
4712 if (*cc == OP_CHAR)
4713 {
4714 size = 1;
4715 #ifdef SUPPORT_UTF
4716 if (common->utf && HAS_EXTRALEN(cc[1]))
4717 size += GET_EXTRALEN(cc[1]);
4718 #endif
4719 }
4720 else if (*cc == OP_CHARI)
4721 {
4722 size = 1;
4723 #ifdef SUPPORT_UTF
4724 if (common->utf)
4725 {
4726 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4727 size = 0;
4728 else if (HAS_EXTRALEN(cc[1]))
4729 size += GET_EXTRALEN(cc[1]);
4730 }
4731 else
4732 #endif
4733 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4734 size = 0;
4735 }
4736 else
4737 size = 0;
4738
4739 cc += 1 + size;
4740 context.length += IN_UCHARS(size);
4741 }
4742 while (size > 0 && context.length <= 128);
4743
4744 cc = ccbegin;
4745 if (context.length > 0)
4746 {
4747 /* We have a fixed-length byte sequence. */
4748 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4749 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4750
4751 context.sourcereg = -1;
4752 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4753 context.ucharptr = 0;
4754 #endif
4755 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4756 return cc;
4757 }
4758
4759 /* A non-fixed length character will be checked if length == 0. */
4760 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4761 }
4762
4763 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4764 {
4765 DEFINE_COMPILER;
4766 int offset = GET2(cc, 1) << 1;
4767
4768 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4769 if (!common->jscript_compat)
4770 {
4771 if (backtracks == NULL)
4772 {
4773 /* OVECTOR(1) contains the "string begin - 1" constant. */
4774 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4775 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4776 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4777 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4778 return JUMP(SLJIT_C_NOT_ZERO);
4779 }
4780 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4781 }
4782 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4783 }
4784
4785 /* Forward definitions. */
4786 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4787 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4788
4789 #define PUSH_BACKTRACK(size, ccstart, error) \
4790 do \
4791 { \
4792 backtrack = sljit_alloc_memory(compiler, (size)); \
4793 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4794 return error; \
4795 memset(backtrack, 0, size); \
4796 backtrack->prev = parent->top; \
4797 backtrack->cc = (ccstart); \
4798 parent->top = backtrack; \
4799 } \
4800 while (0)
4801
4802 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4803 do \
4804 { \
4805 backtrack = sljit_alloc_memory(compiler, (size)); \
4806 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4807 return; \
4808 memset(backtrack, 0, size); \
4809 backtrack->prev = parent->top; \
4810 backtrack->cc = (ccstart); \
4811 parent->top = backtrack; \
4812 } \
4813 while (0)
4814
4815 #define BACKTRACK_AS(type) ((type *)backtrack)
4816
4817 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4818 {
4819 DEFINE_COMPILER;
4820 int offset = GET2(cc, 1) << 1;
4821 struct sljit_jump *jump = NULL;
4822 struct sljit_jump *partial;
4823 struct sljit_jump *nopartial;
4824
4825 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4826 /* OVECTOR(1) contains the "string begin - 1" constant. */
4827 if (withchecks && !common->jscript_compat)
4828 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4829
4830 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4831 if (common->utf && *cc == OP_REFI)
4832 {
4833 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
4834 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4835 if (withchecks)
4836 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4837
4838 /* Needed to save important temporary registers. */
4839 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4840 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
4841 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4842 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4843 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4844 if (common->mode == JIT_COMPILE)
4845 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4846 else
4847 {
4848 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4849 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4850 check_partial(common, FALSE);
4851 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4852 JUMPHERE(nopartial);
4853 }
4854 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4855 }
4856 else
4857 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4858 {
4859 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4860 if (withchecks)
4861 jump = JUMP(SLJIT_C_ZERO);
4862
4863 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4864 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4865 if (common->mode == JIT_COMPILE)
4866 add_jump(compiler, backtracks, partial);
4867
4868 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4869 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4870
4871 if (common->mode != JIT_COMPILE)
4872 {
4873 nopartial = JUMP(SLJIT_JUMP);
4874 JUMPHERE(partial);
4875 /* TMP2 -= STR_END - STR_PTR */
4876 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4877 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4878 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4879 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4880 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4881 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4882 JUMPHERE(partial);
4883 check_partial(common, FALSE);
4884 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4885 JUMPHERE(nopartial);
4886 }
4887 }
4888
4889 if (jump != NULL)
4890 {
4891 if (emptyfail)
4892 add_jump(compiler, backtracks, jump);
4893 else
4894 JUMPHERE(jump);
4895 }
4896 return cc + 1 + IMM2_SIZE;
4897 }
4898
4899 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4900 {
4901 DEFINE_COMPILER;
4902 backtrack_common *backtrack;
4903 pcre_uchar type;
4904 struct sljit_label *label;
4905 struct sljit_jump *zerolength;
4906 struct sljit_jump *jump = NULL;
4907 pcre_uchar *ccbegin = cc;
4908 int min = 0, max = 0;
4909 BOOL minimize;
4910
4911 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4912
4913 type = cc[1 + IMM2_SIZE];
4914 minimize = (type & 0x1) != 0;
4915 switch(type)
4916 {
4917 case OP_CRSTAR:
4918 case OP_CRMINSTAR:
4919 min = 0;
4920 max = 0;
4921 cc += 1 + IMM2_SIZE + 1;
4922 break;
4923 case OP_CRPLUS:
4924 case OP_CRMINPLUS:
4925 min = 1;
4926 max = 0;
4927 cc += 1 + IMM2_SIZE + 1;
4928 break;
4929 case OP_CRQUERY:
4930 case OP_CRMINQUERY:
4931 min = 0;
4932 max = 1;
4933 cc += 1 + IMM2_SIZE + 1;
4934 break;
4935 case OP_CRRANGE:
4936 case OP_CRMINRANGE:
4937 min = GET2(cc, 1 + IMM2_SIZE + 1);
4938 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4939 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4940 break;
4941 default:
4942 SLJIT_ASSERT_STOP();
4943 break;
4944 }
4945
4946 if (!minimize)
4947 {
4948 if (min == 0)
4949 {
4950 allocate_stack(common, 2);
4951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4952 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4953 /* Temporary release of STR_PTR. */
4954 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
4955 zerolength = compile_ref_checks(common, ccbegin, NULL);
4956 /* Restore if not zero length. */
4957 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
4958 }
4959 else
4960 {
4961 allocate_stack(common, 1);
4962 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4963 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4964 }
4965
4966 if (min > 1 || max > 1)
4967 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4968
4969 label = LABEL();
4970 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4971
4972 if (min > 1 || max > 1)
4973 {
4974 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4975 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4976 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4977 if (min > 1)
4978 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4979 if (max > 1)
4980 {
4981 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4982 allocate_stack(common, 1);
4983 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4984 JUMPTO(SLJIT_JUMP, label);
4985 JUMPHERE(jump);
4986 }
4987 }
4988
4989 if (max == 0)
4990 {
4991 /* Includes min > 1 case as well. */
4992 allocate_stack(common, 1);
4993 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4994 JUMPTO(SLJIT_JUMP, label);
4995 }
4996
4997 JUMPHERE(zerolength);
4998 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4999
5000 decrease_call_count(common);
5001 return cc;
5002 }
5003
5004 allocate_stack(common, 2);
5005 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5006 if (type != OP_CRMINSTAR)
5007 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5008
5009 if (min == 0)
5010 {
5011 zerolength = compile_ref_checks(common, ccbegin, NULL);
5012 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5013 jump = JUMP(SLJIT_JUMP);
5014 }
5015 else
5016 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5017
5018 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5019 if (max > 0)
5020 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5021
5022 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5023 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5024
5025 if (min > 1)
5026 {
5027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5028 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5029 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5030 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5031 }
5032 else if (max > 0)
5033 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5034
5035 if (jump != NULL)
5036 JUMPHERE(jump);
5037 JUMPHERE(zerolength);
5038
5039 decrease_call_count(common);
5040 return cc;
5041 }
5042
5043 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5044 {
5045 DEFINE_COMPILER;
5046 backtrack_common *backtrack;
5047 recurse_entry *entry = common->entries;
5048 recurse_entry *prev = NULL;
5049 int start = GET(cc, 1);
5050
5051 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5052 while (entry != NULL)
5053 {
5054 if (entry->start == start)
5055 break;
5056 prev = entry;
5057 entry = entry->next;
5058 }
5059
5060 if (entry == NULL)
5061 {
5062 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5063 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5064 return NULL;
5065 entry->next = NULL;
5066 entry->entry = NULL;
5067 entry->calls = NULL;
5068 entry->start = start;
5069
5070 if (prev != NULL)
5071 prev->next = entry;
5072 else
5073 common->entries = entry;
5074 }
5075
5076 if (common->has_set_som && common->mark_ptr != 0)
5077 {
5078 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5079 allocate_stack(common, 2);
5080 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5081 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5082 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5083 }
5084 else if (common->has_set_som || common->mark_ptr != 0)
5085 {
5086 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5087 allocate_stack(common, 1);
5088 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5089 }
5090
5091 if (entry->entry == NULL)
5092 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5093 else
5094 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5095 /* Leave if the match is failed. */
5096 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5097 return cc + 1 + LINK_SIZE;
5098 }
5099
5100 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5101 {
5102 const pcre_uchar *begin = arguments->begin;
5103 int *offset_vector = arguments->offsets;
5104 int offset_count = arguments->offset_count;
5105 int i;
5106
5107 if (PUBL(callout) == NULL)
5108 return 0;
5109
5110 callout_block->version = 2;
5111 callout_block->callout_data = arguments->callout_data;
5112
5113 /* Offsets in subject. */
5114 callout_block->subject_length = arguments->end - arguments->begin;
5115 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5116 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5117 #if defined COMPILE_PCRE8
5118 callout_block->subject = (PCRE_SPTR)begin;
5119 #elif defined COMPILE_PCRE16
5120 callout_block->subject = (PCRE_SPTR16)begin;
5121 #elif defined COMPILE_PCRE32
5122 callout_block->subject = (PCRE_SPTR32)begin;
5123 #endif
5124
5125 /* Convert and copy the JIT offset vector to the offset_vector array. */
5126 callout_block->capture_top = 0;
5127 callout_block->offset_vector = offset_vector;
5128 for (i = 2; i < offset_count; i += 2)
5129 {
5130 offset_vector[i] = jit_ovector[i] - begin;
5131 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5132 if (jit_ovector[i] >= begin)
5133 callout_block->capture_top = i;
5134 }
5135
5136 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5137 if (offset_count > 0)
5138 offset_vector[0] = -1;
5139 if (offset_count > 1)
5140 offset_vector[1] = -1;
5141 return (*PUBL(callout))(callout_block);
5142 }
5143
5144 /* Aligning to 8 byte. */
5145 #define CALLOUT_ARG_SIZE \
5146 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5147
5148 #define CALLOUT_ARG_OFFSET(arg) \
5149 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5150
5151 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5152 {
5153 DEFINE_COMPILER;
5154 backtrack_common *backtrack;
5155
5156 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5157
5158 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5159
5160 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5161 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5162 SLJIT_ASSERT(common->capture_last_ptr != 0);
5163 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5164 OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5165
5166 /* These pointer sized fields temporarly stores internal variables. */
5167 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5168 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5169 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5170
5171 if (common->mark_ptr != 0)
5172 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5173 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5174 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5176
5177 /* Needed to save important temporary registers. */
5178 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5179 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5180 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5181 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5182 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5183 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5184 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5185
5186 /* Check return value. */
5187 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5188 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5189 if (common->forced_quit_label == NULL)
5190 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5191 else
5192 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5193 return cc + 2 + 2 * LINK_SIZE;
5194 }
5195
5196 #undef CALLOUT_ARG_SIZE
5197 #undef CALLOUT_ARG_OFFSET
5198
5199 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5200 {
5201 DEFINE_COMPILER;
5202 int framesize;
5203 int private_data_ptr;
5204 backtrack_common altbacktrack;
5205 pcre_uchar *ccbegin;
5206 pcre_uchar opcode;
5207 pcre_uchar bra = OP_BRA;
5208 jump_list *tmp = NULL;
5209 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5210 jump_list **found;
5211 /* Saving previous accept variables. */
5212 struct sljit_label *save_quit_label = common->quit_label;
5213 struct sljit_label *save_accept_label = common->accept_label;
5214 jump_list *save_quit = common->quit;
5215 jump_list *save_accept = common->accept;
5216 struct sljit_jump *jump;
5217 struct sljit_jump *brajump = NULL;
5218
5219 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5220 {
5221 SLJIT_ASSERT(!conditional);
5222 bra = *cc;
5223 cc++;
5224 }
5225 private_data_ptr = PRIVATE_DATA(cc);
5226 SLJIT_ASSERT(private_data_ptr != 0);
5227 framesize = get_framesize(common, cc, FALSE);
5228 backtrack->framesize = framesize;
5229 backtrack->private_data_ptr = private_data_ptr;
5230 opcode = *cc;
5231 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5232 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5233 ccbegin = cc;
5234 cc += GET(cc, 1);
5235
5236 if (bra == OP_BRAMINZERO)
5237 {
5238 /* This is a braminzero backtrack path. */
5239 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5240 free_stack(common, 1);
5241 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5242 }
5243
5244 if (framesize < 0)
5245 {
5246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5247 allocate_stack(common, 1);
5248 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5249 }
5250 else
5251 {
5252 allocate_stack(common, framesize + 2);
5253 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5254 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5255 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5256 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5257 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5258 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5259 }
5260
5261 memset(&altbacktrack, 0, sizeof(backtrack_common));
5262 common->quit_label = NULL;
5263 common->quit = NULL;
5264 while (1)
5265 {
5266 common->accept_label = NULL;
5267 common->accept = NULL;
5268 altbacktrack.top = NULL;
5269 altbacktrack.topbacktracks = NULL;
5270
5271 if (*ccbegin == OP_ALT)
5272 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5273
5274 altbacktrack.cc = ccbegin;
5275 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5276 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5277 {
5278 common->quit_label = save_quit_label;
5279 common->accept_label = save_accept_label;
5280 common->quit = save_quit;
5281 common->accept = save_accept;
5282 return NULL;
5283 }
5284 common->accept_label = LABEL();
5285 if (common->accept != NULL)
5286 set_jumps(common->accept, common->accept_label);
5287
5288 /* Reset stack. */
5289 if (framesize < 0)
5290 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5291 else {
5292 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5293 {
5294 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5295 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5296 }
5297 else
5298 {
5299 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5300 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5301 }
5302 }
5303
5304 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5305 {
5306 /* We know that STR_PTR was stored on the top of the stack. */
5307 if (conditional)
5308 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5309 else if (bra == OP_BRAZERO)
5310 {
5311 if (framesize < 0)
5312 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5313 else
5314 {
5315 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5316 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5317 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5318 }
5319 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5320 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5321 }
5322 else if (framesize >= 0)
5323 {
5324 /* For OP_BRA and OP_BRAMINZERO. */
5325 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5326 }
5327 }
5328 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5329
5330 compile_backtrackingpath(common, altbacktrack.top);
5331 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5332 {
5333 common->quit_label = save_quit_label;
5334 common->accept_label = save_accept_label;
5335 common->quit = save_quit;
5336 common->accept = save_accept;
5337 return NULL;
5338 }
5339 set_jumps(altbacktrack.topbacktracks, LABEL());
5340
5341 if (*cc != OP_ALT)
5342 break;
5343
5344 ccbegin = cc;
5345 cc += GET(cc, 1);
5346 }
5347 /* None of them matched. */
5348 if (common->quit != NULL)
5349 set_jumps(common->quit, LABEL());
5350
5351 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5352 {
5353 /* Assert is failed. */
5354 if (conditional || bra == OP_BRAZERO)
5355 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5356
5357 if (framesize < 0)
5358 {
5359 /* The topmost item should be 0. */
5360 if (bra == OP_BRAZERO)
5361 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5362 else
5363 free_stack(common, 1);
5364 }
5365 else
5366 {
5367 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5368 /* The topmost item should be 0. */
5369 if (bra == OP_BRAZERO)
5370 {
5371 free_stack(common, framesize + 1);
5372 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5373 }
5374 else
5375 free_stack(common, framesize + 2);
5376 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5377 }
5378 jump = JUMP(SLJIT_JUMP);
5379 if (bra != OP_BRAZERO)
5380 add_jump(compiler, target, jump);
5381
5382 /* Assert is successful. */
5383 set_jumps(tmp, LABEL());
5384 if (framesize < 0)
5385 {
5386 /* We know that STR_PTR was stored on the top of the stack. */
5387 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5388 /* Keep the STR_PTR on the top of the stack. */
5389 if (bra == OP_BRAZERO)
5390 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5391 else if (bra == OP_BRAMINZERO)
5392 {
5393 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5394 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5395 }
5396 }
5397 else
5398 {
5399 if (bra == OP_BRA)
5400 {
5401 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5402 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5403 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5404 }
5405 else
5406 {
5407 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5408 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5409 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5410 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5411 }
5412 }
5413
5414 if (bra == OP_BRAZERO)
5415 {
5416 backtrack->matchingpath = LABEL();
5417 SET_LABEL(jump, backtrack->matchingpath);
5418 }
5419 else if (bra == OP_BRAMINZERO)
5420 {
5421 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5422 JUMPHERE(brajump);
5423 if (framesize >= 0)
5424 {
5425 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5426 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5427 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5428 }
5429 set_jumps(backtrack->common.topbacktracks, LABEL());
5430 }
5431 }
5432 else
5433 {
5434 /* AssertNot is successful. */
5435 if (framesize < 0)
5436 {
5437 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5438 if (bra != OP_BRA)
5439 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5440 else
5441 free_stack(common, 1);
5442 }
5443 else
5444 {
5445 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5446 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5447 /* The topmost item should be 0. */
5448 if (bra != OP_BRA)
5449 {
5450 free_stack(common, framesize + 1);
5451 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5452 }
5453 else
5454 free_stack(common, framesize + 2);
5455 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5456 }
5457
5458 if (bra == OP_BRAZERO)
5459 backtrack->matchingpath = LABEL();
5460 else if (bra == OP_BRAMINZERO)
5461 {
5462 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5463 JUMPHERE(brajump);
5464 }
5465
5466 if (bra != OP_BRA)
5467 {
5468 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5469 set_jumps(backtrack->common.topbacktracks, LABEL());
5470 backtrack->common.topbacktracks = NULL;
5471 }
5472 }
5473
5474 common->quit_label = save_quit_label;
5475 common->accept_label = save_accept_label;
5476 common->quit = save_quit;
5477 common->accept = save_accept;
5478 return cc + 1 + LINK_SIZE;
5479 }
5480
5481 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5482 {
5483 int condition = FALSE;
5484 pcre_uchar *slotA = name_table;
5485 pcre_uchar *slotB;
5486 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5487 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5488 sljit_sw no_capture;
5489 int i;
5490
5491 locals += refno & 0xff;
5492 refno >>= 8;
5493 no_capture = locals[1];
5494
5495 for (i = 0; i < name_count; i++)
5496 {
5497 if (GET2(slotA, 0) == refno) break;
5498 slotA += name_entry_size;
5499 }
5500
5501 if (i < name_count)
5502 {
5503 /* Found a name for the number - there can be only one; duplicate names
5504 for different numbers are allowed, but not vice versa. First scan down
5505 for duplicates. */
5506
5507 slotB = slotA;
5508 while (slotB > name_table)
5509 {
5510 slotB -= name_entry_size;
5511 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5512 {
5513 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5514 if (condition) break;
5515 }
5516 else break;
5517 }
5518
5519 /* Scan up for duplicates */
5520 if (!condition)
5521 {
5522 slotB = slotA;
5523 for (i++; i < name_count; i++)
5524 {
5525 slotB += name_entry_size;
5526 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5527 {
5528 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5529 if (condition) break;
5530 }
5531 else break;
5532 }
5533 }
5534 }
5535 return condition;
5536 }
5537
5538 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5539 {
5540 int condition = FALSE;
5541 pcre_uchar *slotA = name_table;
5542 pcre_uchar *slotB;
5543 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5544 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5545 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5546 sljit_uw i;
5547
5548 for (i = 0; i < name_count; i++)
5549 {
5550 if (GET2(slotA, 0) == recno) break;
5551 slotA += name_entry_size;
5552 }
5553
5554 if (i < name_count)
5555 {
5556 /* Found a name for the number - there can be only one; duplicate
5557 names for different numbers are allowed, but not vice versa. First
5558 scan down for duplicates. */
5559
5560 slotB = slotA;
5561 while (slotB > name_table)
5562 {
5563 slotB -= name_entry_size;
5564 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5565 {
5566 condition = GET2(slotB, 0) == group_num;
5567 if (condition) break;
5568 }
5569 else break;
5570 }
5571
5572 /* Scan up for duplicates */
5573 if (!condition)
5574 {
5575 slotB = slotA;
5576 for (i++; i < name_count; i++)
5577 {
5578 slotB += name_entry_size;
5579 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5580 {
5581 condition = GET2(slotB, 0) == group_num;
5582 if (condition) break;
5583 }
5584 else break;
5585 }
5586 }
5587 }
5588 return condition;
5589 }
5590
5591 /*
5592 Handling bracketed expressions is probably the most complex part.
5593
5594 Stack layout naming characters:
5595 S - Push the current STR_PTR
5596 0 - Push a 0 (NULL)
5597 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5598 before the next alternative. Not pushed if there are no alternatives.
5599 M - Any values pushed by the current alternative. Can be empty, or anything.
5600 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5601 L - Push the previous local (pointed by localptr) to the stack
5602 () - opional values stored on the stack
5603 ()* - optonal, can be stored multiple times
5604
5605 The following list shows the regular expression templates, their PCRE byte codes
5606 and stack layout supported by pcre-sljit.
5607
5608 (?:) OP_BRA | OP_KET A M
5609 () OP_CBRA | OP_KET C M
5610 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5611 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5612 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5613 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5614 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5615 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5616 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5617 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5618 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5619 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5620 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5621 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5622 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5623 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5624 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5625 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5626 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5627 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5628 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5629 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5630
5631
5632 Stack layout naming characters:
5633 A - Push the alternative index (starting from 0) on the stack.
5634 Not pushed if there is no alternatives.
5635 M - Any values pushed by the current alternative. Can be empty, or anything.
5636
5637 The next list shows the possible content of a bracket:
5638 (|) OP_*BRA | OP_ALT ... M A
5639 (?()|) OP_*COND | OP_ALT M A
5640 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5641 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5642 Or nothing, if trace is unnecessary
5643 */
5644
5645 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5646 {
5647 DEFINE_COMPILER;
5648 backtrack_common *backtrack;
5649 pcre_uchar opcode;
5650 int private_data_ptr = 0;
5651 int offset = 0;
5652 int stacksize;
5653 pcre_uchar *ccbegin;
5654 pcre_uchar *matchingpath;
5655 pcre_uchar bra = OP_BRA;
5656 pcre_uchar ket;
5657 assert_backtrack *assert;
5658 BOOL has_alternatives;
5659 struct sljit_jump *jump;
5660 struct sljit_jump *skip;
5661 struct sljit_label *rmaxlabel = NULL;
5662 struct sljit_jump *braminzerojump = NULL;
5663
5664 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5665
5666 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5667 {
5668 bra = *cc;
5669 cc++;
5670 opcode = *cc;
5671 }
5672
5673 opcode = *cc;
5674 ccbegin = cc;
5675 matchingpath = ccbegin + 1 + LINK_SIZE;
5676
5677 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5678 {
5679 /* Drop this bracket_backtrack. */
5680 parent->top = backtrack->prev;
5681 return bracketend(cc);
5682 }
5683
5684 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5685 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5686 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5687 cc += GET(cc, 1);
5688
5689 has_alternatives = *cc == OP_ALT;
5690 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5691 {
5692 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5693 if (*matchingpath == OP_NRREF)
5694 {
5695 stacksize = GET2(matchingpath, 1);
5696 if (common->currententry == NULL || stacksize == RREF_ANY)
5697 has_alternatives = FALSE;
5698 else if (common->currententry->start == 0)
5699 has_alternatives = stacksize != 0;
5700 else
5701 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5702 }
5703 }
5704
5705 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5706 opcode = OP_SCOND;
5707 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5708 opcode = OP_ONCE;
5709
5710 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5711 {
5712 /* Capturing brackets has a pre-allocated space. */
5713 offset = GET2(ccbegin, 1 + LINK_SIZE);
5714 if (common->optimized_cbracket[offset] == 0)
5715 {
5716 private_data_ptr = OVECTOR_PRIV(offset);
5717 offset <<= 1;
5718 }
5719 else
5720 {
5721 offset <<= 1;
5722 private_data_ptr = OVECTOR(offset);
5723 }
5724 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5725 matchingpath += IMM2_SIZE;
5726 }
5727 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5728 {
5729 /* Other brackets simply allocate the next entry. */
5730 private_data_ptr = PRIVATE_DATA(ccbegin);
5731 SLJIT_ASSERT(private_data_ptr != 0);
5732 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5733 if (opcode == OP_ONCE)
5734 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5735 }
5736
5737 /* Instructions before the first alternative. */
5738 stacksize = 0;
5739 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5740 stacksize++;
5741 if (bra == OP_BRAZERO)
5742 stacksize++;
5743
5744 if (stacksize > 0)
5745 allocate_stack(common, stacksize);
5746
5747 stacksize = 0;
5748 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5749 {
5750 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5751 stacksize++;
5752 }
5753
5754 if (bra == OP_BRAZERO)
5755 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5756
5757 if (bra == OP_BRAMINZERO)
5758 {
5759 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5760 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5761 if (ket != OP_KETRMIN)
5762 {
5763 free_stack(common, 1);
5764 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5765 }
5766 else
5767 {
5768 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5769 {
5770 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5771 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5772 /* Nothing stored during the first run. */
5773 skip = JUMP(SLJIT_JUMP);
5774 JUMPHERE(jump);
5775 /* Checking zero-length iteration. */
5776 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5777 {
5778 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5779 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5780 }
5781 else
5782 {
5783 /* Except when the whole stack frame must be saved. */
5784 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5785 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
5786 }
5787 JUMPHERE(skip);
5788 }
5789 else
5790 {
5791 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5792 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5793 JUMPHERE(jump);
5794 }
5795 }
5796 }
5797
5798 if (ket == OP_KETRMIN)
5799 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5800
5801 if (ket == OP_KETRMAX)
5802 {
5803 rmaxlabel = LABEL();
5804 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5805 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5806 }
5807
5808 /* Handling capturing brackets and alternatives. */
5809 if (opcode == OP_ONCE)
5810 {
5811 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5812 {
5813 /* Neither capturing brackets nor recursions are not found in the block. */
5814 if (ket == OP_KETRMIN)
5815 {
5816 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5817 allocate_stack(common, 2);
5818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5819 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5820 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5821 }
5822 else if (ket == OP_KETRMAX || has_alternatives)
5823 {
5824 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5825 allocate_stack(common, 1);
5826 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5827 }
5828 else
5829 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5830 }
5831 else
5832 {
5833 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5834 {
5835 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5836 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5837 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5838 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5839 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5840 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5841 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5842 }
5843 else
5844 {
5845 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5846 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5847 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5848 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5849 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5850 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5851 }
5852 }
5853 }
5854 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5855 {
5856 /* Saving the previous values. */
5857 if (common->optimized_cbracket[offset >> 1] != 0)
5858 {
5859 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5860 allocate_stack(common, 2);
5861 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5862 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
5863 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5864 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5865 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5866 }
5867 else
5868 {
5869 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5870 allocate_stack(common, 1);
5871 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5872 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5873 }
5874 }
5875 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5876 {
5877 /* Saving the previous value. */
5878 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5879 allocate_stack(common, 1);
5880 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5881 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5882 }
5883 else if (has_alternatives)
5884 {
5885 /* Pushing the starting string pointer. */
5886 allocate_stack(common, 1);
5887 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5888 }
5889
5890 /* Generating code for the first alternative. */
5891 if (opcode == OP_COND || opcode == OP_SCOND)
5892 {
5893 if (*matchingpath == OP_CREF)
5894 {
5895 SLJIT_ASSERT(has_alternatives);
5896 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5897 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5898 matchingpath += 1 + IMM2_SIZE;
5899 }
5900 else if (*matchingpath == OP_NCREF)
5901 {
5902 SLJIT_ASSERT(has_alternatives);
5903 stacksize = GET2(matchingpath, 1);
5904 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5905
5906 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5907 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5908 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5909 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
5910 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5911 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5912 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5913 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5914 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5915
5916 JUMPHERE(jump);
5917 matchingpath += 1 + IMM2_SIZE;
5918 }
5919 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5920 {
5921 /* Never has other case. */
5922 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5923
5924 stacksize = GET2(matchingpath, 1);
5925 if (common->currententry == NULL)
5926 stacksize = 0;
5927 else if (stacksize == RREF_ANY)
5928 stacksize = 1;
5929 else if (common->currententry->start == 0)
5930 stacksize = stacksize == 0;
5931 else
5932 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5933
5934 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5935 {
5936 SLJIT_ASSERT(!has_alternatives);
5937 if (stacksize != 0)
5938 matchingpath += 1 + IMM2_SIZE;
5939 else
5940 {
5941 if (*cc == OP_ALT)
5942 {
5943 matchingpath = cc + 1 + LINK_SIZE;
5944 cc += GET(cc, 1);
5945 }
5946 else
5947 matchingpath = cc;
5948 }
5949 }
5950 else
5951 {
5952 SLJIT_ASSERT(has_alternatives);
5953
5954 stacksize = GET2(matchingpath, 1);
5955 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5957 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5958 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5959 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
5960 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5961 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5962 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5963 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5964 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5965 matchingpath += 1 + IMM2_SIZE;
5966 }
5967 }
5968 else
5969 {
5970 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5971 /* Similar code as PUSH_BACKTRACK macro. */
5972 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5973 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5974 return NULL;
5975 memset(assert, 0, sizeof(assert_backtrack));
5976 assert->common.cc = matchingpath;
5977 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5978 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5979 }
5980 }
5981
5982 compile_matchingpath(common, matchingpath, cc, backtrack);
5983 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5984 return NULL;
5985
5986 if (opcode == OP_ONCE)
5987 {
5988 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5989 {
5990 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5991 /* TMP2 which is set here used by OP_KETRMAX below. */
5992 if (ket == OP_KETRMAX)
5993 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5994 else if (ket == OP_KETRMIN)
5995 {
5996 /* Move the STR_PTR to the private_data_ptr. */
5997 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5998 }
5999 }
6000 else
6001 {
6002 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
6003 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
6004 if (ket == OP_KETRMAX)
6005 {
6006 /* TMP2 which is set here used by OP_KETRMAX below. */
6007 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6008 }
6009 }
6010 }
6011
6012 stacksize = 0;
6013 if (ket != OP_KET || bra != OP_BRA)
6014 stacksize++;
6015 if (offset != 0)
6016 {
6017 if (common->capture_last_ptr != 0)
6018 stacksize++;
6019 if (common->optimized_cbracket[offset >> 1] == 0)
6020 stacksize += 2;
6021 }
6022 if (has_alternatives && opcode != OP_ONCE)
6023 stacksize++;
6024
6025 if (stacksize > 0)
6026 allocate_stack(common, stacksize);
6027
6028 stacksize = 0;
6029 if (ket != OP_KET || bra != OP_BRA)
6030 {
6031 if (ket != OP_KET)
6032 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6033 else
6034 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6035 stacksize++;
6036 }
6037
6038 if (offset != 0)
6039 {
6040 if (common->capture_last_ptr != 0)
6041 {
6042 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6043 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6044 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0);
6045 stacksize++;
6046 }
6047 if (common->optimized_cbracket[offset >> 1] == 0)
6048 {
6049 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6050 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6051 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6052 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6053 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6054 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6055 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6056 stacksize += 2;
6057 }
6058 }
6059
6060 if (has_alternatives)
6061 {
6062 if (opcode != OP_ONCE)
6063 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6064 if (ket != OP_KETRMAX)
6065 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6066 }
6067
6068 /* Must be after the matchingpath label. */
6069 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6070 {
6071 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6073 }
6074
6075 if (ket == OP_KETRMAX)
6076 {
6077 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6078 {
6079 if (has_alternatives)
6080 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6081 /* Checking zero-length iteration. */
6082 if (opcode != OP_ONCE)
6083 {
6084 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6085 /* Drop STR_PTR for greedy plus quantifier. */
6086 if (bra != OP_BRAZERO)
6087 free_stack(common, 1);
6088 }
6089 else
6090 /* TMP2 must contain the starting STR_PTR. */
6091 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6092 }
6093 else
6094 JUMPTO(SLJIT_JUMP, rmaxlabel);
6095 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6096 }
6097
6098 if (bra == OP_BRAZERO)
6099 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6100
6101 if (bra == OP_BRAMINZERO)
6102 {
6103 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6104 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6105 if (braminzerojump != NULL)
6106 {
6107 JUMPHERE(braminzerojump);
6108 /* We need to release the end pointer to perform the
6109 backtrack for the zero-length iteration. When
6110 framesize is < 0, OP_ONCE will do the release itself. */
6111 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6112 {
6113 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6114 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6115 }
6116 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6117 free_stack(common, 1);
6118 }
6119 /* Continue to the normal backtrack. */
6120 }
6121
6122 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6123 decrease_call_count(common);
6124
6125 /* Skip the other alternatives. */
6126 while (*cc == OP_ALT)
6127 cc += GET(cc, 1);
6128 cc += 1 + LINK_SIZE;
6129 return cc;
6130 }
6131
6132 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6133 {
6134 DEFINE_COMPILER;
6135 backtrack_common *backtrack;
6136 pcre_uchar opcode;
6137 int private_data_ptr;
6138 int cbraprivptr = 0;
6139 int framesize;
6140 int stacksize;
6141 int offset = 0;
6142 BOOL zero = FALSE;
6143 pcre_uchar *ccbegin = NULL;
6144 int stack;
6145 struct sljit_label *loop = NULL;
6146 struct jump_list *emptymatch = NULL;
6147
6148 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6149 if (*cc == OP_BRAPOSZERO)
6150 {
6151 zero = TRUE;
6152 cc++;
6153 }
6154
6155 opcode = *cc;
6156 private_data_ptr = PRIVATE_DATA(cc);
6157 SLJIT_ASSERT(private_data_ptr != 0);
6158 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6159 switch(opcode)
6160 {
6161 case OP_BRAPOS:
6162 case OP_SBRAPOS:
6163 ccbegin = cc + 1 + LINK_SIZE;
6164 break;
6165
6166 case OP_CBRAPOS:
6167 case OP_SCBRAPOS:
6168 offset = GET2(cc, 1 + LINK_SIZE);
6169 /* This case cannot be optimized in the same was as
6170 normal capturing brackets. */
6171 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6172 cbraprivptr = OVECTOR_PRIV(offset);
6173 offset <<= 1;
6174 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6175 break;
6176
6177 default:
6178 SLJIT_ASSERT_STOP();
6179 break;
6180 }
6181
6182 framesize = get_framesize(common, cc, FALSE);
6183 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6184 if (framesize < 0)
6185 {
6186 if (offset != 0)
6187 {
6188 stacksize = 2;
6189 if (common->capture_last_ptr != 0)
6190 stacksize++;
6191 }
6192 else
6193 stacksize = 1;
6194
6195 if (!zero)
6196 stacksize++;
6197
6198 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6199 allocate_stack(common, stacksize);
6200 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6201
6202 if (offset != 0)
6203 {
6204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6205 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6206 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6207 if (common->capture_last_ptr != 0)
6208 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6210 if (common->capture_last_ptr != 0)
6211 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6212 }
6213 else
6214 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6215
6216 if (!zero)
6217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6218 }
6219 else
6220 {
6221 stacksize = framesize + 1;
6222 if (!zero)
6223 stacksize++;
6224 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6225 stacksize++;
6226 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6227
6228 allocate_stack(common, stacksize);
6229 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6230 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6231 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6232
6233 stack = 0;
6234 if (!zero)
6235 {
6236 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6237 stack++;
6238 }
6239 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6240 {
6241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6242 stack++;
6243 }
6244 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6245 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6246 }
6247
6248 if (offset != 0)
6249 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6250
6251 loop = LABEL();
6252 while (*cc != OP_KETRPOS)
6253 {
6254 backtrack->top = NULL;
6255 backtrack->topbacktracks = NULL;
6256 cc += GET(cc, 1);
6257
6258 compile_matchingpath(common, ccbegin, cc, backtrack);
6259 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6260 return NULL;
6261
6262 if (framesize < 0)
6263 {
6264 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6265
6266 if (offset != 0)
6267 {
6268 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6270 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6271 if (common->capture_last_ptr != 0)
6272 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6273 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6274 }
6275 else
6276 {
6277 if (opcode == OP_SBRAPOS)
6278 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6279 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6280 }
6281
6282 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6283 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6284
6285 if (!zero)
6286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6287 }
6288 else
6289 {
6290 if (offset != 0)
6291 {
6292 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6293 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6294 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6295 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6296 if (common->capture_last_ptr != 0)
6297 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6298 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6299 }
6300 else
6301 {
6302 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6303 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6304 if (opcode == OP_SBRAPOS)
6305 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6306 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6307 }
6308
6309 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6310 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6311
6312 if (!zero)
6313 {
6314 if (framesize < 0)
6315 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6316 else
6317 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6318 }
6319 }
6320 JUMPTO(SLJIT_JUMP, loop);
6321 flush_stubs(common);
6322
6323 compile_backtrackingpath(common, backtrack->top);
6324 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6325 return NULL;
6326 set_jumps(backtrack->topbacktracks, LABEL());
6327
6328 if (framesize < 0)
6329 {
6330 if (offset != 0)
6331 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6332 else
6333 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6334 }
6335 else
6336 {
6337 if (offset != 0)
6338 {
6339 /* Last alternative. */
6340 if (*cc == OP_KETRPOS)
6341 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6342 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6343 }
6344 else
6345 {
6346 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6347 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6348 }
6349 }
6350
6351 if (*cc == OP_KETRPOS)
6352 break;
6353 ccbegin = cc + 1 + LINK_SIZE;
6354 }
6355
6356 backtrack->topbacktracks = NULL;
6357 if (!zero)
6358 {
6359 if (framesize < 0)
6360 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6361 else /* TMP2 is set to [private_data_ptr] above. */
6362 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6363 }
6364
6365 /* None of them matched. */
6366 set_jumps(emptymatch, LABEL());
6367 decrease_call_count(common);
6368 return cc + 1 + LINK_SIZE;
6369 }
6370
6371 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6372 {
6373 int class_len;
6374
6375 *opcode = *cc;
6376 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6377 {
6378 cc++;
6379 *type = OP_CHAR;
6380 }
6381 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6382 {
6383 cc++;
6384 *type = OP_CHARI;
6385 *opcode -= OP_STARI - OP_STAR;
6386 }
6387 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6388 {
6389 cc++;
6390 *type = OP_NOT;
6391 *opcode -= OP_NOTSTAR - OP_STAR;
6392 }
6393 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6394 {
6395 cc++;
6396 *type = OP_NOTI;
6397 *opcode -= OP_NOTSTARI - OP_STAR;
6398 }
6399 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6400 {
6401 cc++;
6402 *opcode -= OP_TYPESTAR - OP_STAR;
6403 *type = 0;
6404 }
6405 else
6406 {
6407 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6408 *type = *opcode;
6409 cc++;
6410 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6411 *opcode = cc[class_len - 1];
6412 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6413 {
6414 *opcode -= OP_CRSTAR - OP_STAR;
6415 if (end != NULL)
6416 *end = cc + class_len;
6417 }
6418 else
6419 {
6420 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6421 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6422 *arg2 = GET2(cc, class_len);
6423
6424 if (*arg2 == 0)
6425 {
6426 SLJIT_ASSERT(*arg1 != 0);
6427 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6428 }
6429 if (*arg1 == *arg2)
6430 *opcode = OP_EXACT;
6431
6432 if (end != NULL)
6433 *end = cc + class_len + 2 * IMM2_SIZE;
6434 }
6435 return cc;
6436 }
6437
6438 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6439 {
6440 *arg1 = GET2(cc, 0);
6441 cc += IMM2_SIZE;
6442 }
6443
6444 if (*type == 0)
6445 {
6446 *type = *cc;
6447 if (end != NULL)
6448 *end = next_opcode(common, cc);
6449 cc++;
6450 return cc;
6451 }
6452
6453 if (end != NULL)
6454 {
6455 *end = cc + 1;
6456 #ifdef SUPPORT_UTF
6457 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6458 #endif
6459 }
6460 return cc;
6461 }
6462
6463 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6464 {
6465 DEFINE_COMPILER;
6466 backtrack_common *backtrack;
6467 pcre_uchar opcode;
6468 pcre_uchar type;
6469 int arg1 = -1, arg2 = -1;
6470 pcre_uchar* end;
6471 jump_list *nomatch = NULL;
6472 struct sljit_jump *jump = NULL;
6473 struct sljit_label *label;
6474 int private_data_ptr = PRIVATE_DATA(cc);
6475 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6476 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6477 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
6478 int tmp_base, tmp_offset;
6479
6480 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6481
6482 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6483
6484 switch(type)
6485 {
6486 case OP_NOT_DIGIT:
6487 case OP_DIGIT:
6488 case OP_NOT_WHITESPACE:
6489 case OP_WHITESPACE:
6490 case OP_NOT_WORDCHAR:
6491 case OP_WORDCHAR:
6492 case OP_ANY:
6493 case OP_ALLANY:
6494 case OP_ANYBYTE:
6495 case OP_ANYNL:
6496 case OP_NOT_HSPACE:
6497 case OP_HSPACE:
6498 case OP_NOT_VSPACE:
6499 case OP_VSPACE:
6500 case OP_CHAR:
6501 case OP_CHARI:
6502 case OP_NOT:
6503 case OP_NOTI:
6504 case OP_CLASS:
6505 case OP_NCLASS:
6506 tmp_base = TMP3;
6507 tmp_offset = 0;
6508 break;
6509
6510 default:
6511 SLJIT_ASSERT_STOP();
6512 /* Fall through. */
6513
6514 case OP_EXTUNI:
6515 case OP_XCLASS:
6516 case OP_NOTPROP:
6517 case OP_PROP:
6518 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6519 tmp_offset = POSSESSIVE0;
6520 break;
6521 }
6522
6523 switch(opcode)
6524 {
6525 case OP_STAR:
6526 case OP_PLUS:
6527 case OP_UPTO:
6528 case OP_CRRANGE:
6529 if (type == OP_ANYNL || type == OP_EXTUNI)
6530 {
6531 SLJIT_ASSERT(private_data_ptr == 0);
6532 if (opcode == OP_STAR || opcode == OP_UPTO)
6533 {
6534 allocate_stack(common, 2);
6535 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6536 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6537 }
6538 else
6539 {
6540 allocate_stack(common, 1);
6541 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6542 }
6543
6544 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6545 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6546
6547 label = LABEL();
6548 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6549 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6550 {
6551 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6552 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6553 if (opcode == OP_CRRANGE && arg2 > 0)
6554 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6555 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6556 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6557 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6558 }
6559
6560 /* We cannot use TMP3 because of this allocate_stack. */
6561 allocate_stack(common, 1);
6562 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6563 JUMPTO(SLJIT_JUMP, label);
6564 if (jump != NULL)
6565 JUMPHERE(jump);
6566 }
6567 else
6568 {
6569 if (opcode == OP_PLUS)
6570 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6571 if (private_data_ptr == 0)
6572 allocate_stack(common, 2);
6573 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6574 if (opcode <= OP_PLUS)
6575 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6576 else
6577 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6578 label = LABEL();
6579 compile_char1_matchingpath(common, type, cc, &nomatch);
6580 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6581 if (opcode <= OP_PLUS)
6582 JUMPTO(SLJIT_JUMP, label);
6583 else if (opcode == OP_CRRANGE && arg1 == 0)
6584 {
6585 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6586 JUMPTO(SLJIT_JUMP, label);
6587 }
6588 else
6589 {
6590 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6591 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6592 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6593 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6594 }
6595 set_jumps(nomatch, LABEL());
6596 if (opcode == OP_CRRANGE)
6597 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6598 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6599 }
6600 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6601 break;
6602
6603 case OP_MINSTAR:
6604 case OP_MINPLUS:
6605 if (opcode == OP_MINPLUS)
6606 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6607 if (private_data_ptr == 0)
6608 allocate_stack(common, 1);
6609 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6610 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6611 break;
6612
6613 case OP_MINUPTO:
6614 case OP_CRMINRANGE:
6615 if (private_data_ptr == 0)
6616 allocate_stack(common, 2);
6617 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6618 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6619 if (opcode == OP_CRMINRANGE)
6620 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6621 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6622 break;
6623
6624 case OP_QUERY:
6625 case OP_MINQUERY:
6626 if (private_data_ptr == 0)
6627 allocate_stack(common, 1);
6628 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6629 if (opcode == OP_QUERY)
6630 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6631 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6632 break;
6633
6634 case OP_EXACT:
6635 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6636 label = LABEL();
6637 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6638 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6639 JUMPTO(SLJIT_C_NOT_ZERO, label);
6640 break;
6641
6642 case OP_POSSTAR:
6643 case OP_POSPLUS:
6644 case OP_POSUPTO:
6645 if (opcode == OP_POSPLUS)
6646 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6647 if (opcode == OP_POSUPTO)
6648 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6649 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6650 label = LABEL();
6651 compile_char1_matchingpath(common, type, cc, &nomatch);
6652 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6653 if (opcode != OP_POSUPTO)
6654 JUMPTO(SLJIT_JUMP, label);
6655 else
6656 {
6657 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6658 JUMPTO(SLJIT_C_NOT_ZERO, label);
6659 }
6660 set_jumps(nomatch, LABEL());
6661 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6662 break;
6663
6664 case OP_POSQUERY:
6665 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6666 compile_char1_matchingpath(common, type, cc, &nomatch);
6667 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6668 set_jumps(nomatch, LABEL());
6669 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6670 break;
6671
6672 default:
6673 SLJIT_ASSERT_STOP();
6674 break;
6675 }
6676
6677 decrease_call_count(common);
6678 return end;
6679 }
6680
6681 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6682 {
6683 DEFINE_COMPILER;
6684 backtrack_common *backtrack;
6685
6686 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6687
6688 if (*cc == OP_FAIL)
6689 {
6690 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6691 return cc + 1;
6692 }
6693
6694 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6695 {
6696 /* No need to check notempty conditions. */
6697 if (common->accept_label == NULL)
6698 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6699 else
6700 JUMPTO(SLJIT_JUMP, common->accept_label);
6701 return cc + 1;
6702 }
6703
6704 if (common->accept_label == NULL)
6705 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6706 else
6707 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label);
6708 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6709 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6710 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6711 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6712 if (common->accept_label == NULL)
6713 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6714 else
6715 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
6716 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6717 if (common->accept_label == NULL)
6718 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6719 else
6720 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
6721 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6722 return cc + 1;
6723 }
6724
6725 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6726 {
6727 DEFINE_COMPILER;
6728 int offset = GET2(cc, 1);
6729 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
6730
6731 /* Data will be discarded anyway... */
6732 if (common->currententry != NULL)
6733 return cc + 1 + IMM2_SIZE;
6734
6735 if (!optimized_cbracket)
6736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6737 offset <<= 1;
6738 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6739 if (!optimized_cbracket)
6740 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6741 return cc + 1 + IMM2_SIZE;
6742 }
6743
6744 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6745 {
6746