/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1245 - (show annotations)
Sat Feb 9 11:30:51 2013 UTC (6 years, 7 months ago) by zherczeg
File MIME type: text/plain
File size: 273728 byte(s)
Adding experimental support for callouts in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory for the regex stack on the real machine stack.
69 Fast, but limited size. */
70 #define MACHINE_STACK_SIZE 32768
71
72 /* Growth rate for stack allocated by the OS. Should be the multiply
73 of page size. */
74 #define STACK_GROWTH_RATE 8192
75
76 /* Enable to check that the allocation could destroy temporaries. */
77 #if defined SLJIT_DEBUG && SLJIT_DEBUG
78 #define DESTROY_REGISTERS 1
79 #endif
80
81 /*
82 Short summary about the backtracking mechanism empolyed by the jit code generator:
83
84 The code generator follows the recursive nature of the PERL compatible regular
85 expressions. The basic blocks of regular expressions are condition checkers
86 whose execute different commands depending on the result of the condition check.
87 The relationship between the operators can be horizontal (concatenation) and
88 vertical (sub-expression) (See struct backtrack_common for more details).
89
90 'ab' - 'a' and 'b' regexps are concatenated
91 'a+' - 'a' is the sub-expression of the '+' operator
92
93 The condition checkers are boolean (true/false) checkers. Machine code is generated
94 for the checker itself and for the actions depending on the result of the checker.
95 The 'true' case is called as the matching path (expected path), and the other is called as
96 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97 branches on the matching path.
98
99 Greedy star operator (*) :
100 Matching path: match happens.
101 Backtrack path: match failed.
102 Non-greedy star operator (*?) :
103 Matching path: no need to perform a match.
104 Backtrack path: match is required.
105
106 The following example shows how the code generated for a capturing bracket
107 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
108 we have the following regular expression:
109
110 A(B|C)D
111
112 The generated code will be the following:
113
114 A matching path
115 '(' matching path (pushing arguments to the stack)
116 B matching path
117 ')' matching path (pushing arguments to the stack)
118 D matching path
119 return with successful match
120
121 D backtrack path
122 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123 B backtrack path
124 C expected path
125 jump to D matching path
126 C backtrack path
127 A backtrack path
128
129 Notice, that the order of backtrack code paths are the opposite of the fast
130 code paths. In this way the topmost value on the stack is always belong
131 to the current backtrack code path. The backtrack path must check
132 whether there is a next alternative. If so, it needs to jump back to
133 the matching path eventually. Otherwise it needs to clear out its own stack
134 frame and continue the execution on the backtrack code paths.
135 */
136
137 /*
138 Saved stack frames:
139
140 Atomic blocks and asserts require reloading the values of private data
141 when the backtrack mechanism performed. Because of OP_RECURSE, the data
142 are not necessarly known in compile time, thus we need a dynamic restore
143 mechanism.
144
145 The stack frames are stored in a chain list, and have the following format:
146 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147
148 Thus we can restore the private data to a particular point in the stack.
149 */
150
151 typedef struct jit_arguments {
152 /* Pointers first. */
153 struct sljit_stack *stack;
154 const pcre_uchar *str;
155 const pcre_uchar *begin;
156 const pcre_uchar *end;
157 int *offsets;
158 pcre_uchar *uchar_ptr;
159 pcre_uchar *mark_ptr;
160 void *callout_data;
161 /* Everything else after. */
162 int offset_count;
163 int call_limit;
164 pcre_uint8 notbol;
165 pcre_uint8 noteol;
166 pcre_uint8 notempty;
167 pcre_uint8 notempty_atstart;
168 } jit_arguments;
169
170 typedef struct executable_functions {
171 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
172 PUBL(jit_callback) callback;
173 void *userdata;
174 pcre_uint32 top_bracket;
175 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
176 } executable_functions;
177
178 typedef struct jump_list {
179 struct sljit_jump *jump;
180 struct jump_list *next;
181 } jump_list;
182
183 enum stub_types { stack_alloc };
184
185 typedef struct stub_list {
186 enum stub_types type;
187 int data;
188 struct sljit_jump *start;
189 struct sljit_label *quit;
190 struct stub_list *next;
191 } stub_list;
192
193 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
194
195 /* The following structure is the key data type for the recursive
196 code generator. It is allocated by compile_matchingpath, and contains
197 the aguments for compile_backtrackingpath. Must be the first member
198 of its descendants. */
199 typedef struct backtrack_common {
200 /* Concatenation stack. */
201 struct backtrack_common *prev;
202 jump_list *nextbacktracks;
203 /* Internal stack (for component operators). */
204 struct backtrack_common *top;
205 jump_list *topbacktracks;
206 /* Opcode pointer. */
207 pcre_uchar *cc;
208 } backtrack_common;
209
210 typedef struct assert_backtrack {
211 backtrack_common common;
212 jump_list *condfailed;
213 /* Less than 0 (-1) if a frame is not needed. */
214 int framesize;
215 /* Points to our private memory word on the stack. */
216 int private_data_ptr;
217 /* For iterators. */
218 struct sljit_label *matchingpath;
219 } assert_backtrack;
220
221 typedef struct bracket_backtrack {
222 backtrack_common common;
223 /* Where to coninue if an alternative is successfully matched. */
224 struct sljit_label *alternative_matchingpath;
225 /* For rmin and rmax iterators. */
226 struct sljit_label *recursive_matchingpath;
227 /* For greedy ? operator. */
228 struct sljit_label *zero_matchingpath;
229 /* Contains the branches of a failed condition. */
230 union {
231 /* Both for OP_COND, OP_SCOND. */
232 jump_list *condfailed;
233 assert_backtrack *assert;
234 /* For OP_ONCE. -1 if not needed. */
235 int framesize;
236 } u;
237 /* Points to our private memory word on the stack. */
238 int private_data_ptr;
239 } bracket_backtrack;
240
241 typedef struct bracketpos_backtrack {
242 backtrack_common common;
243 /* Points to our private memory word on the stack. */
244 int private_data_ptr;
245 /* Reverting stack is needed. */
246 int framesize;
247 /* Allocated stack size. */
248 int stacksize;
249 } bracketpos_backtrack;
250
251 typedef struct braminzero_backtrack {
252 backtrack_common common;
253 struct sljit_label *matchingpath;
254 } braminzero_backtrack;
255
256 typedef struct iterator_backtrack {
257 backtrack_common common;
258 /* Next iteration. */
259 struct sljit_label *matchingpath;
260 } iterator_backtrack;
261
262 typedef struct recurse_entry {
263 struct recurse_entry *next;
264 /* Contains the function entry. */
265 struct sljit_label *entry;
266 /* Collects the calls until the function is not created. */
267 jump_list *calls;
268 /* Points to the starting opcode. */
269 int start;
270 } recurse_entry;
271
272 typedef struct recurse_backtrack {
273 backtrack_common common;
274 } recurse_backtrack;
275
276 #define MAX_RANGE_SIZE 6
277
278 typedef struct compiler_common {
279 struct sljit_compiler *compiler;
280 pcre_uchar *start;
281
282 /* Maps private data offset to each opcode. */
283 int *private_data_ptrs;
284 /* Tells whether the capturing bracket is optimized. */
285 pcre_uint8 *optimized_cbracket;
286 /* Starting offset of private data for capturing brackets. */
287 int cbraptr;
288 /* OVector starting point. Must be divisible by 2. */
289 int ovector_start;
290 /* Last known position of the requested byte. */
291 int req_char_ptr;
292 /* Head of the last recursion. */
293 int recursive_head_ptr;
294 /* First inspected character for partial matching. */
295 int start_used_ptr;
296 /* Starting pointer for partial soft matches. */
297 int hit_start;
298 /* End pointer of the first line. */
299 int first_line_end;
300 /* Points to the marked string. */
301 int mark_ptr;
302 /* Points to the last matched capture block index. */
303 int capture_last_ptr;
304
305 /* Flipped and lower case tables. */
306 const pcre_uint8 *fcc;
307 sljit_sw lcc;
308 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
309 int mode;
310 /* Newline control. */
311 int nltype;
312 int newline;
313 int bsr_nltype;
314 /* Dollar endonly. */
315 int endonly;
316 BOOL has_set_som;
317 /* Tables. */
318 sljit_sw ctypes;
319 int digits[2 + MAX_RANGE_SIZE];
320 /* Named capturing brackets. */
321 sljit_uw name_table;
322 sljit_sw name_count;
323 sljit_sw name_entry_size;
324
325 /* Labels and jump lists. */
326 struct sljit_label *partialmatchlabel;
327 struct sljit_label *quit_label;
328 struct sljit_label *forced_quit_label;
329 struct sljit_label *accept_label;
330 stub_list *stubs;
331 recurse_entry *entries;
332 recurse_entry *currententry;
333 jump_list *partialmatch;
334 jump_list *quit;
335 jump_list *forced_quit;
336 jump_list *accept;
337 jump_list *calllimit;
338 jump_list *stackalloc;
339 jump_list *revertframes;
340 jump_list *wordboundary;
341 jump_list *anynewline;
342 jump_list *hspace;
343 jump_list *vspace;
344 jump_list *casefulcmp;
345 jump_list *caselesscmp;
346 BOOL jscript_compat;
347 #ifdef SUPPORT_UTF
348 BOOL utf;
349 #ifdef SUPPORT_UCP
350 BOOL use_ucp;
351 #endif
352 #ifndef COMPILE_PCRE32
353 jump_list *utfreadchar;
354 #endif
355 #ifdef COMPILE_PCRE8
356 jump_list *utfreadtype8;
357 #endif
358 #endif /* SUPPORT_UTF */
359 #ifdef SUPPORT_UCP
360 jump_list *getucd;
361 #endif
362 } compiler_common;
363
364 /* For byte_sequence_compare. */
365
366 typedef struct compare_context {
367 int length;
368 int sourcereg;
369 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
370 int ucharptr;
371 union {
372 sljit_si asint;
373 sljit_uh asushort;
374 #if defined COMPILE_PCRE8
375 sljit_ub asbyte;
376 sljit_ub asuchars[4];
377 #elif defined COMPILE_PCRE16
378 sljit_uh asuchars[2];
379 #elif defined COMPILE_PCRE32
380 sljit_ui asuchars[1];
381 #endif
382 } c;
383 union {
384 sljit_si asint;
385 sljit_uh asushort;
386 #if defined COMPILE_PCRE8
387 sljit_ub asbyte;
388 sljit_ub asuchars[4];
389 #elif defined COMPILE_PCRE16
390 sljit_uh asuchars[2];
391 #elif defined COMPILE_PCRE32
392 sljit_ui asuchars[1];
393 #endif
394 } oc;
395 #endif
396 } compare_context;
397
398 enum {
399 frame_end = 0,
400 frame_setstrbegin = -1,
401 frame_setmark = -2
402 };
403
404 /* Undefine sljit macros. */
405 #undef CMP
406
407 /* Used for accessing the elements of the stack. */
408 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
409
410 #define TMP1 SLJIT_SCRATCH_REG1
411 #define TMP2 SLJIT_SCRATCH_REG3
412 #define TMP3 SLJIT_TEMPORARY_EREG2
413 #define STR_PTR SLJIT_SAVED_REG1
414 #define STR_END SLJIT_SAVED_REG2
415 #define STACK_TOP SLJIT_SCRATCH_REG2
416 #define STACK_LIMIT SLJIT_SAVED_REG3
417 #define ARGUMENTS SLJIT_SAVED_EREG1
418 #define CALL_COUNT SLJIT_SAVED_EREG2
419 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
420
421 /* Local space layout. */
422 /* These two locals can be used by the current opcode. */
423 #define LOCALS0 (0 * sizeof(sljit_sw))
424 #define LOCALS1 (1 * sizeof(sljit_sw))
425 /* Two local variables for possessive quantifiers (char1 cannot use them). */
426 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
427 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
428 /* Max limit of recursions. */
429 #define CALL_LIMIT (4 * sizeof(sljit_sw))
430 /* The output vector is stored on the stack, and contains pointers
431 to characters. The vector data is divided into two groups: the first
432 group contains the start / end character pointers, and the second is
433 the start pointers when the end of the capturing group has not yet reached. */
434 #define OVECTOR_START (common->ovector_start)
435 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
436 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw))
437 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
438
439 #if defined COMPILE_PCRE8
440 #define MOV_UCHAR SLJIT_MOV_UB
441 #define MOVU_UCHAR SLJIT_MOVU_UB
442 #elif defined COMPILE_PCRE16
443 #define MOV_UCHAR SLJIT_MOV_UH
444 #define MOVU_UCHAR SLJIT_MOVU_UH
445 #elif defined COMPILE_PCRE32
446 #define MOV_UCHAR SLJIT_MOV_UI
447 #define MOVU_UCHAR SLJIT_MOVU_UI
448 #else
449 #error Unsupported compiling mode
450 #endif
451
452 /* Shortcuts. */
453 #define DEFINE_COMPILER \
454 struct sljit_compiler *compiler = common->compiler
455 #define OP1(op, dst, dstw, src, srcw) \
456 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
457 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
458 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
459 #define LABEL() \
460 sljit_emit_label(compiler)
461 #define JUMP(type) \
462 sljit_emit_jump(compiler, (type))
463 #define JUMPTO(type, label) \
464 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
465 #define JUMPHERE(jump) \
466 sljit_set_label((jump), sljit_emit_label(compiler))
467 #define CMP(type, src1, src1w, src2, src2w) \
468 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
469 #define CMPTO(type, src1, src1w, src2, src2w, label) \
470 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
471 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
472 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
473 #define GET_LOCAL_BASE(dst, dstw, offset) \
474 sljit_get_local_base(compiler, (dst), (dstw), (offset))
475
476 static pcre_uchar* bracketend(pcre_uchar* cc)
477 {
478 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
479 do cc += GET(cc, 1); while (*cc == OP_ALT);
480 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
481 cc += 1 + LINK_SIZE;
482 return cc;
483 }
484
485 /* Functions whose might need modification for all new supported opcodes:
486 next_opcode
487 get_private_data_length
488 set_private_data_ptrs
489 get_framesize
490 init_frame
491 get_private_data_length_for_copy
492 copy_private_data
493 compile_matchingpath
494 compile_backtrackingpath
495 */
496
497 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
498 {
499 SLJIT_UNUSED_ARG(common);
500 switch(*cc)
501 {
502 case OP_SOD:
503 case OP_SOM:
504 case OP_SET_SOM:
505 case OP_NOT_WORD_BOUNDARY:
506 case OP_WORD_BOUNDARY:
507 case OP_NOT_DIGIT:
508 case OP_DIGIT:
509 case OP_NOT_WHITESPACE:
510 case OP_WHITESPACE:
511 case OP_NOT_WORDCHAR:
512 case OP_WORDCHAR:
513 case OP_ANY:
514 case OP_ALLANY:
515 case OP_ANYNL:
516 case OP_NOT_HSPACE:
517 case OP_HSPACE:
518 case OP_NOT_VSPACE:
519 case OP_VSPACE:
520 case OP_EXTUNI:
521 case OP_EODN:
522 case OP_EOD:
523 case OP_CIRC:
524 case OP_CIRCM:
525 case OP_DOLL:
526 case OP_DOLLM:
527 case OP_TYPESTAR:
528 case OP_TYPEMINSTAR:
529 case OP_TYPEPLUS:
530 case OP_TYPEMINPLUS:
531 case OP_TYPEQUERY:
532 case OP_TYPEMINQUERY:
533 case OP_TYPEPOSSTAR:
534 case OP_TYPEPOSPLUS:
535 case OP_TYPEPOSQUERY:
536 case OP_CRSTAR:
537 case OP_CRMINSTAR:
538 case OP_CRPLUS:
539 case OP_CRMINPLUS:
540 case OP_CRQUERY:
541 case OP_CRMINQUERY:
542 case OP_DEF:
543 case OP_BRAZERO:
544 case OP_BRAMINZERO:
545 case OP_BRAPOSZERO:
546 case OP_COMMIT:
547 case OP_FAIL:
548 case OP_ACCEPT:
549 case OP_ASSERT_ACCEPT:
550 case OP_SKIPZERO:
551 return cc + 1;
552
553 case OP_ANYBYTE:
554 #ifdef SUPPORT_UTF
555 if (common->utf) return NULL;
556 #endif
557 return cc + 1;
558
559 case OP_CHAR:
560 case OP_CHARI:
561 case OP_NOT:
562 case OP_NOTI:
563 case OP_STAR:
564 case OP_MINSTAR:
565 case OP_PLUS:
566 case OP_MINPLUS:
567 case OP_QUERY:
568 case OP_MINQUERY:
569 case OP_POSSTAR:
570 case OP_POSPLUS:
571 case OP_POSQUERY:
572 case OP_STARI:
573 case OP_MINSTARI:
574 case OP_PLUSI:
575 case OP_MINPLUSI:
576 case OP_QUERYI:
577 case OP_MINQUERYI:
578 case OP_POSSTARI:
579 case OP_POSPLUSI:
580 case OP_POSQUERYI:
581 case OP_NOTSTAR:
582 case OP_NOTMINSTAR:
583 case OP_NOTPLUS:
584 case OP_NOTMINPLUS:
585 case OP_NOTQUERY:
586 case OP_NOTMINQUERY:
587 case OP_NOTPOSSTAR:
588 case OP_NOTPOSPLUS:
589 case OP_NOTPOSQUERY:
590 case OP_NOTSTARI:
591 case OP_NOTMINSTARI:
592 case OP_NOTPLUSI:
593 case OP_NOTMINPLUSI:
594 case OP_NOTQUERYI:
595 case OP_NOTMINQUERYI:
596 case OP_NOTPOSSTARI:
597 case OP_NOTPOSPLUSI:
598 case OP_NOTPOSQUERYI:
599 cc += 2;
600 #ifdef SUPPORT_UTF
601 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
602 #endif
603 return cc;
604
605 case OP_UPTO:
606 case OP_MINUPTO:
607 case OP_EXACT:
608 case OP_POSUPTO:
609 case OP_UPTOI:
610 case OP_MINUPTOI:
611 case OP_EXACTI:
612 case OP_POSUPTOI:
613 case OP_NOTUPTO:
614 case OP_NOTMINUPTO:
615 case OP_NOTEXACT:
616 case OP_NOTPOSUPTO:
617 case OP_NOTUPTOI:
618 case OP_NOTMINUPTOI:
619 case OP_NOTEXACTI:
620 case OP_NOTPOSUPTOI:
621 cc += 2 + IMM2_SIZE;
622 #ifdef SUPPORT_UTF
623 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
624 #endif
625 return cc;
626
627 case OP_NOTPROP:
628 case OP_PROP:
629 return cc + 1 + 2;
630
631 case OP_TYPEUPTO:
632 case OP_TYPEMINUPTO:
633 case OP_TYPEEXACT:
634 case OP_TYPEPOSUPTO:
635 case OP_REF:
636 case OP_REFI:
637 case OP_CREF:
638 case OP_NCREF:
639 case OP_RREF:
640 case OP_NRREF:
641 case OP_CLOSE:
642 cc += 1 + IMM2_SIZE;
643 return cc;
644
645 case OP_CRRANGE:
646 case OP_CRMINRANGE:
647 return cc + 1 + 2 * IMM2_SIZE;
648
649 case OP_CLASS:
650 case OP_NCLASS:
651 return cc + 1 + 32 / sizeof(pcre_uchar);
652
653 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
654 case OP_XCLASS:
655 return cc + GET(cc, 1);
656 #endif
657
658 case OP_RECURSE:
659 case OP_ASSERT:
660 case OP_ASSERT_NOT:
661 case OP_ASSERTBACK:
662 case OP_ASSERTBACK_NOT:
663 case OP_REVERSE:
664 case OP_ONCE:
665 case OP_ONCE_NC:
666 case OP_BRA:
667 case OP_BRAPOS:
668 case OP_COND:
669 case OP_SBRA:
670 case OP_SBRAPOS:
671 case OP_SCOND:
672 case OP_ALT:
673 case OP_KET:
674 case OP_KETRMAX:
675 case OP_KETRMIN:
676 case OP_KETRPOS:
677 return cc + 1 + LINK_SIZE;
678
679 case OP_CBRA:
680 case OP_CBRAPOS:
681 case OP_SCBRA:
682 case OP_SCBRAPOS:
683 return cc + 1 + LINK_SIZE + IMM2_SIZE;
684
685 case OP_MARK:
686 return cc + 1 + 2 + cc[1];
687
688 case OP_CALLOUT:
689 return cc + 2 + 2 * LINK_SIZE;
690
691 default:
692 return NULL;
693 }
694 }
695
696 #define CASE_ITERATOR_PRIVATE_DATA_1 \
697 case OP_MINSTAR: \
698 case OP_MINPLUS: \
699 case OP_QUERY: \
700 case OP_MINQUERY: \
701 case OP_MINSTARI: \
702 case OP_MINPLUSI: \
703 case OP_QUERYI: \
704 case OP_MINQUERYI: \
705 case OP_NOTMINSTAR: \
706 case OP_NOTMINPLUS: \
707 case OP_NOTQUERY: \
708 case OP_NOTMINQUERY: \
709 case OP_NOTMINSTARI: \
710 case OP_NOTMINPLUSI: \
711 case OP_NOTQUERYI: \
712 case OP_NOTMINQUERYI:
713
714 #define CASE_ITERATOR_PRIVATE_DATA_2A \
715 case OP_STAR: \
716 case OP_PLUS: \
717 case OP_STARI: \
718 case OP_PLUSI: \
719 case OP_NOTSTAR: \
720 case OP_NOTPLUS: \
721 case OP_NOTSTARI: \
722 case OP_NOTPLUSI:
723
724 #define CASE_ITERATOR_PRIVATE_DATA_2B \
725 case OP_UPTO: \
726 case OP_MINUPTO: \
727 case OP_UPTOI: \
728 case OP_MINUPTOI: \
729 case OP_NOTUPTO: \
730 case OP_NOTMINUPTO: \
731 case OP_NOTUPTOI: \
732 case OP_NOTMINUPTOI:
733
734 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
735 case OP_TYPEMINSTAR: \
736 case OP_TYPEMINPLUS: \
737 case OP_TYPEQUERY: \
738 case OP_TYPEMINQUERY:
739
740 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
741 case OP_TYPESTAR: \
742 case OP_TYPEPLUS:
743
744 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
745 case OP_TYPEUPTO: \
746 case OP_TYPEMINUPTO:
747
748 static int get_class_iterator_size(pcre_uchar *cc)
749 {
750 switch(*cc)
751 {
752 case OP_CRSTAR:
753 case OP_CRPLUS:
754 return 2;
755
756 case OP_CRMINSTAR:
757 case OP_CRMINPLUS:
758 case OP_CRQUERY:
759 case OP_CRMINQUERY:
760 return 1;
761
762 case OP_CRRANGE:
763 case OP_CRMINRANGE:
764 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
765 return 0;
766 return 2;
767
768 default:
769 return 0;
770 }
771 }
772
773 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
774 {
775 int private_data_length = 0;
776 pcre_uchar *alternative;
777 pcre_uchar *name;
778 pcre_uchar *end = NULL;
779 int space, size, i;
780 pcre_uint32 bracketlen;
781
782 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
783 while (cc < ccend)
784 {
785 space = 0;
786 size = 0;
787 bracketlen = 0;
788 switch(*cc)
789 {
790 case OP_SET_SOM:
791 common->has_set_som = TRUE;
792 cc += 1;
793 break;
794
795 case OP_REF:
796 case OP_REFI:
797 common->optimized_cbracket[GET2(cc, 1)] = 0;
798 cc += 1 + IMM2_SIZE;
799 break;
800
801 case OP_ASSERT:
802 case OP_ASSERT_NOT:
803 case OP_ASSERTBACK:
804 case OP_ASSERTBACK_NOT:
805 case OP_ONCE:
806 case OP_ONCE_NC:
807 case OP_BRAPOS:
808 case OP_SBRA:
809 case OP_SBRAPOS:
810 private_data_length += sizeof(sljit_sw);
811 bracketlen = 1 + LINK_SIZE;
812 break;
813
814 case OP_CBRAPOS:
815 case OP_SCBRAPOS:
816 private_data_length += sizeof(sljit_sw);
817 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
818 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
819 break;
820
821 case OP_COND:
822 case OP_SCOND:
823 /* Only AUTO_CALLOUT can insert this opcode. We do
824 not intend to support this case. */
825 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
826 return -1;
827
828 if (*cc == OP_COND)
829 {
830 /* Might be a hidden SCOND. */
831 alternative = cc + GET(cc, 1);
832 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
833 private_data_length += sizeof(sljit_sw);
834 }
835 else
836 private_data_length += sizeof(sljit_sw);
837 bracketlen = 1 + LINK_SIZE;
838 break;
839
840 case OP_CREF:
841 i = GET2(cc, 1);
842 common->optimized_cbracket[i] = 0;
843 cc += 1 + IMM2_SIZE;
844 break;
845
846 case OP_NCREF:
847 bracketlen = GET2(cc, 1);
848 name = (pcre_uchar *)common->name_table;
849 alternative = name;
850 for (i = 0; i < common->name_count; i++)
851 {
852 if (GET2(name, 0) == bracketlen) break;
853 name += common->name_entry_size;
854 }
855 SLJIT_ASSERT(i != common->name_count);
856
857 for (i = 0; i < common->name_count; i++)
858 {
859 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
860 common->optimized_cbracket[GET2(alternative, 0)] = 0;
861 alternative += common->name_entry_size;
862 }
863 bracketlen = 0;
864 cc += 1 + IMM2_SIZE;
865 break;
866
867 case OP_BRA:
868 bracketlen = 1 + LINK_SIZE;
869 break;
870
871 case OP_CBRA:
872 case OP_SCBRA:
873 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
874 break;
875
876 CASE_ITERATOR_PRIVATE_DATA_1
877 space = 1;
878 size = -2;
879 break;
880
881 CASE_ITERATOR_PRIVATE_DATA_2A
882 space = 2;
883 size = -2;
884 break;
885
886 CASE_ITERATOR_PRIVATE_DATA_2B
887 space = 2;
888 size = -(2 + IMM2_SIZE);
889 break;
890
891 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
892 space = 1;
893 size = 1;
894 break;
895
896 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
897 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
898 space = 2;
899 size = 1;
900 break;
901
902 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
903 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
904 space = 2;
905 size = 1 + IMM2_SIZE;
906 break;
907
908 case OP_CLASS:
909 case OP_NCLASS:
910 size += 1 + 32 / sizeof(pcre_uchar);
911 space = get_class_iterator_size(cc + size);
912 break;
913
914 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
915 case OP_XCLASS:
916 size = GET(cc, 1);
917 space = get_class_iterator_size(cc + size);
918 break;
919 #endif
920
921 case OP_RECURSE:
922 /* Set its value only once. */
923 if (common->recursive_head_ptr == 0)
924 {
925 common->recursive_head_ptr = common->ovector_start;
926 common->ovector_start += sizeof(sljit_sw);
927 }
928 cc += 1 + LINK_SIZE;
929 break;
930
931 case OP_CALLOUT:
932 if (common->capture_last_ptr == 0)
933 {
934 common->capture_last_ptr = common->ovector_start;
935 common->ovector_start += sizeof(sljit_sw);
936 }
937 cc += 2 + 2 * LINK_SIZE;
938 break;
939
940 case OP_MARK:
941 if (common->mark_ptr == 0)
942 {
943 common->mark_ptr = common->ovector_start;
944 common->ovector_start += sizeof(sljit_sw);
945 }
946 cc += 1 + 2 + cc[1];
947 break;
948
949 default:
950 cc = next_opcode(common, cc);
951 if (cc == NULL)
952 return -1;
953 break;
954 }
955
956 if (space > 0 && cc >= end)
957 private_data_length += sizeof(sljit_sw) * space;
958
959 if (size != 0)
960 {
961 if (size < 0)
962 {
963 cc += -size;
964 #ifdef SUPPORT_UTF
965 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
966 #endif
967 }
968 else
969 cc += size;
970 }
971
972 if (bracketlen != 0)
973 {
974 if (cc >= end)
975 {
976 end = bracketend(cc);
977 if (end[-1 - LINK_SIZE] == OP_KET)
978 end = NULL;
979 }
980 cc += bracketlen;
981 }
982 }
983 return private_data_length;
984 }
985
986 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
987 {
988 pcre_uchar *cc = common->start;
989 pcre_uchar *alternative;
990 pcre_uchar *end = NULL;
991 int space, size, bracketlen;
992
993 while (cc < ccend)
994 {
995 space = 0;
996 size = 0;
997 bracketlen = 0;
998 switch(*cc)
999 {
1000 case OP_ASSERT:
1001 case OP_ASSERT_NOT:
1002 case OP_ASSERTBACK:
1003 case OP_ASSERTBACK_NOT:
1004 case OP_ONCE:
1005 case OP_ONCE_NC:
1006 case OP_BRAPOS:
1007 case OP_SBRA:
1008 case OP_SBRAPOS:
1009 case OP_SCOND:
1010 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1011 private_data_ptr += sizeof(sljit_sw);
1012 bracketlen = 1 + LINK_SIZE;
1013 break;
1014
1015 case OP_CBRAPOS:
1016 case OP_SCBRAPOS:
1017 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1018 private_data_ptr += sizeof(sljit_sw);
1019 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1020 break;
1021
1022 case OP_COND:
1023 /* Might be a hidden SCOND. */
1024 alternative = cc + GET(cc, 1);
1025 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1026 {
1027 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1028 private_data_ptr += sizeof(sljit_sw);
1029 }
1030 bracketlen = 1 + LINK_SIZE;
1031 break;
1032
1033 case OP_BRA:
1034 bracketlen = 1 + LINK_SIZE;
1035 break;
1036
1037 case OP_CBRA:
1038 case OP_SCBRA:
1039 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1040 break;
1041
1042 CASE_ITERATOR_PRIVATE_DATA_1
1043 space = 1;
1044 size = -2;
1045 break;
1046
1047 CASE_ITERATOR_PRIVATE_DATA_2A
1048 space = 2;
1049 size = -2;
1050 break;
1051
1052 CASE_ITERATOR_PRIVATE_DATA_2B
1053 space = 2;
1054 size = -(2 + IMM2_SIZE);
1055 break;
1056
1057 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1058 space = 1;
1059 size = 1;
1060 break;
1061
1062 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1063 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1064 space = 2;
1065 size = 1;
1066 break;
1067
1068 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1069 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1070 space = 2;
1071 size = 1 + IMM2_SIZE;
1072 break;
1073
1074 case OP_CLASS:
1075 case OP_NCLASS:
1076 size += 1 + 32 / sizeof(pcre_uchar);
1077 space = get_class_iterator_size(cc + size);
1078 break;
1079
1080 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1081 case OP_XCLASS:
1082 size = GET(cc, 1);
1083 space = get_class_iterator_size(cc + size);
1084 break;
1085 #endif
1086
1087 default:
1088 cc = next_opcode(common, cc);
1089 SLJIT_ASSERT(cc != NULL);
1090 break;
1091 }
1092
1093 if (space > 0 && cc >= end)
1094 {
1095 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1096 private_data_ptr += sizeof(sljit_sw) * space;
1097 }
1098
1099 if (size != 0)
1100 {
1101 if (size < 0)
1102 {
1103 cc += -size;
1104 #ifdef SUPPORT_UTF
1105 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1106 #endif
1107 }
1108 else
1109 cc += size;
1110 }
1111
1112 if (bracketlen > 0)
1113 {
1114 if (cc >= end)
1115 {
1116 end = bracketend(cc);
1117 if (end[-1 - LINK_SIZE] == OP_KET)
1118 end = NULL;
1119 }
1120 cc += bracketlen;
1121 }
1122 }
1123 }
1124
1125 /* Returns with -1 if no need for frame. */
1126 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1127 {
1128 pcre_uchar *ccend = bracketend(cc);
1129 int length = 0;
1130 BOOL possessive = FALSE;
1131 BOOL setsom_found = recursive;
1132 BOOL setmark_found = recursive;
1133
1134 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1135 {
1136 length = 3;
1137 possessive = TRUE;
1138 }
1139
1140 cc = next_opcode(common, cc);
1141 SLJIT_ASSERT(cc != NULL);
1142 while (cc < ccend)
1143 switch(*cc)
1144 {
1145 case OP_SET_SOM:
1146 SLJIT_ASSERT(common->has_set_som);
1147 if (!setsom_found)
1148 {
1149 length += 2;
1150 setsom_found = TRUE;
1151 }
1152 cc += 1;
1153 break;
1154
1155 case OP_MARK:
1156 SLJIT_ASSERT(common->mark_ptr != 0);
1157 if (!setmark_found)
1158 {
1159 length += 2;
1160 setmark_found = TRUE;
1161 }
1162 cc += 1 + 2 + cc[1];
1163 break;
1164
1165 case OP_RECURSE:
1166 if (common->has_set_som && !setsom_found)
1167 {
1168 length += 2;
1169 setsom_found = TRUE;
1170 }
1171 if (common->mark_ptr != 0 && !setmark_found)
1172 {
1173 length += 2;
1174 setmark_found = TRUE;
1175 }
1176 cc += 1 + LINK_SIZE;
1177 break;
1178
1179 case OP_CBRA:
1180 case OP_CBRAPOS:
1181 case OP_SCBRA:
1182 case OP_SCBRAPOS:
1183 length += 3;
1184 cc += 1 + LINK_SIZE + IMM2_SIZE;
1185 break;
1186
1187 default:
1188 cc = next_opcode(common, cc);
1189 SLJIT_ASSERT(cc != NULL);
1190 break;
1191 }
1192
1193 /* Possessive quantifiers can use a special case. */
1194 if (SLJIT_UNLIKELY(possessive) && length == 3)
1195 return -1;
1196
1197 if (length > 0)
1198 return length + 1;
1199 return -1;
1200 }
1201
1202 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1203 {
1204 DEFINE_COMPILER;
1205 pcre_uchar *ccend = bracketend(cc);
1206 BOOL setsom_found = recursive;
1207 BOOL setmark_found = recursive;
1208 int offset;
1209
1210 /* >= 1 + shortest item size (2) */
1211 SLJIT_UNUSED_ARG(stacktop);
1212 SLJIT_ASSERT(stackpos >= stacktop + 2);
1213
1214 stackpos = STACK(stackpos);
1215 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1216 cc = next_opcode(common, cc);
1217 SLJIT_ASSERT(cc != NULL);
1218 while (cc < ccend)
1219 switch(*cc)
1220 {
1221 case OP_SET_SOM:
1222 SLJIT_ASSERT(common->has_set_som);
1223 if (!setsom_found)
1224 {
1225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1226 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1227 stackpos += (int)sizeof(sljit_sw);
1228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1229 stackpos += (int)sizeof(sljit_sw);
1230 setsom_found = TRUE;
1231 }
1232 cc += 1;
1233 break;
1234
1235 case OP_MARK:
1236 SLJIT_ASSERT(common->mark_ptr != 0);
1237 if (!setmark_found)
1238 {
1239 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1241 stackpos += (int)sizeof(sljit_sw);
1242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1243 stackpos += (int)sizeof(sljit_sw);
1244 setmark_found = TRUE;
1245 }
1246 cc += 1 + 2 + cc[1];
1247 break;
1248
1249 case OP_RECURSE:
1250 if (common->has_set_som && !setsom_found)
1251 {
1252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setstrbegin);
1254 stackpos += (int)sizeof(sljit_sw);
1255 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1256 stackpos += (int)sizeof(sljit_sw);
1257 setsom_found = TRUE;
1258 }
1259 if (common->mark_ptr != 0 && !setmark_found)
1260 {
1261 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1262 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_setmark);
1263 stackpos += (int)sizeof(sljit_sw);
1264 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1265 stackpos += (int)sizeof(sljit_sw);
1266 setmark_found = TRUE;
1267 }
1268 cc += 1 + LINK_SIZE;
1269 break;
1270
1271 case OP_CBRA:
1272 case OP_CBRAPOS:
1273 case OP_SCBRA:
1274 case OP_SCBRAPOS:
1275 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1277 stackpos += (int)sizeof(sljit_sw);
1278 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1279 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1280 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1281 stackpos += (int)sizeof(sljit_sw);
1282 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1283 stackpos += (int)sizeof(sljit_sw);
1284
1285 cc += 1 + LINK_SIZE + IMM2_SIZE;
1286 break;
1287
1288 default:
1289 cc = next_opcode(common, cc);
1290 SLJIT_ASSERT(cc != NULL);
1291 break;
1292 }
1293
1294 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, frame_end);
1295 SLJIT_ASSERT(stackpos == STACK(stacktop));
1296 }
1297
1298 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1299 {
1300 int private_data_length = 2;
1301 int size;
1302 pcre_uchar *alternative;
1303 /* Calculate the sum of the private machine words. */
1304 while (cc < ccend)
1305 {
1306 size = 0;
1307 switch(*cc)
1308 {
1309 case OP_ASSERT:
1310 case OP_ASSERT_NOT:
1311 case OP_ASSERTBACK:
1312 case OP_ASSERTBACK_NOT:
1313 case OP_ONCE:
1314 case OP_ONCE_NC:
1315 case OP_BRAPOS:
1316 case OP_SBRA:
1317 case OP_SBRAPOS:
1318 case OP_SCOND:
1319 private_data_length++;
1320 cc += 1 + LINK_SIZE;
1321 break;
1322
1323 case OP_CBRA:
1324 case OP_SCBRA:
1325 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1326 private_data_length++;
1327 cc += 1 + LINK_SIZE + IMM2_SIZE;
1328 break;
1329
1330 case OP_CBRAPOS:
1331 case OP_SCBRAPOS:
1332 private_data_length += 2;
1333 cc += 1 + LINK_SIZE + IMM2_SIZE;
1334 break;
1335
1336 case OP_COND:
1337 /* Might be a hidden SCOND. */
1338 alternative = cc + GET(cc, 1);
1339 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1340 private_data_length++;
1341 cc += 1 + LINK_SIZE;
1342 break;
1343
1344 CASE_ITERATOR_PRIVATE_DATA_1
1345 if (PRIVATE_DATA(cc))
1346 private_data_length++;
1347 cc += 2;
1348 #ifdef SUPPORT_UTF
1349 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1350 #endif
1351 break;
1352
1353 CASE_ITERATOR_PRIVATE_DATA_2A
1354 if (PRIVATE_DATA(cc))
1355 private_data_length += 2;
1356 cc += 2;
1357 #ifdef SUPPORT_UTF
1358 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1359 #endif
1360 break;
1361
1362 CASE_ITERATOR_PRIVATE_DATA_2B
1363 if (PRIVATE_DATA(cc))
1364 private_data_length += 2;
1365 cc += 2 + IMM2_SIZE;
1366 #ifdef SUPPORT_UTF
1367 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1368 #endif
1369 break;
1370
1371 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1372 if (PRIVATE_DATA(cc))
1373 private_data_length++;
1374 cc += 1;
1375 break;
1376
1377 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1378 if (PRIVATE_DATA(cc))
1379 private_data_length += 2;
1380 cc += 1;
1381 break;
1382
1383 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1384 if (PRIVATE_DATA(cc))
1385 private_data_length += 2;
1386 cc += 1 + IMM2_SIZE;
1387 break;
1388
1389 case OP_CLASS:
1390 case OP_NCLASS:
1391 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1392 case OP_XCLASS:
1393 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1394 #else
1395 size = 1 + 32 / (int)sizeof(pcre_uchar);
1396 #endif
1397 if (PRIVATE_DATA(cc))
1398 private_data_length += get_class_iterator_size(cc + size);
1399 cc += size;
1400 break;
1401
1402 default:
1403 cc = next_opcode(common, cc);
1404 SLJIT_ASSERT(cc != NULL);
1405 break;
1406 }
1407 }
1408 SLJIT_ASSERT(cc == ccend);
1409 return private_data_length;
1410 }
1411
1412 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1413 BOOL save, int stackptr, int stacktop)
1414 {
1415 DEFINE_COMPILER;
1416 int srcw[2];
1417 int count, size;
1418 BOOL tmp1next = TRUE;
1419 BOOL tmp1empty = TRUE;
1420 BOOL tmp2empty = TRUE;
1421 pcre_uchar *alternative;
1422 enum {
1423 start,
1424 loop,
1425 end
1426 } status;
1427
1428 status = save ? start : loop;
1429 stackptr = STACK(stackptr - 2);
1430 stacktop = STACK(stacktop - 1);
1431
1432 if (!save)
1433 {
1434 stackptr += sizeof(sljit_sw);
1435 if (stackptr < stacktop)
1436 {
1437 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1438 stackptr += sizeof(sljit_sw);
1439 tmp1empty = FALSE;
1440 }
1441 if (stackptr < stacktop)
1442 {
1443 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1444 stackptr += sizeof(sljit_sw);
1445 tmp2empty = FALSE;
1446 }
1447 /* The tmp1next must be TRUE in either way. */
1448 }
1449
1450 while (status != end)
1451 {
1452 count = 0;
1453 switch(status)
1454 {
1455 case start:
1456 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1457 count = 1;
1458 srcw[0] = common->recursive_head_ptr;
1459 status = loop;
1460 break;
1461
1462 case loop:
1463 if (cc >= ccend)
1464 {
1465 status = end;
1466 break;
1467 }
1468
1469 switch(*cc)
1470 {
1471 case OP_ASSERT:
1472 case OP_ASSERT_NOT:
1473 case OP_ASSERTBACK:
1474 case OP_ASSERTBACK_NOT:
1475 case OP_ONCE:
1476 case OP_ONCE_NC:
1477 case OP_BRAPOS:
1478 case OP_SBRA:
1479 case OP_SBRAPOS:
1480 case OP_SCOND:
1481 count = 1;
1482 srcw[0] = PRIVATE_DATA(cc);
1483 SLJIT_ASSERT(srcw[0] != 0);
1484 cc += 1 + LINK_SIZE;
1485 break;
1486
1487 case OP_CBRA:
1488 case OP_SCBRA:
1489 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1490 {
1491 count = 1;
1492 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1493 }
1494 cc += 1 + LINK_SIZE + IMM2_SIZE;
1495 break;
1496
1497 case OP_CBRAPOS:
1498 case OP_SCBRAPOS:
1499 count = 2;
1500 srcw[0] = PRIVATE_DATA(cc);
1501 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1502 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1503 cc += 1 + LINK_SIZE + IMM2_SIZE;
1504 break;
1505
1506 case OP_COND:
1507 /* Might be a hidden SCOND. */
1508 alternative = cc + GET(cc, 1);
1509 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1510 {
1511 count = 1;
1512 srcw[0] = PRIVATE_DATA(cc);
1513 SLJIT_ASSERT(srcw[0] != 0);
1514 }
1515 cc += 1 + LINK_SIZE;
1516 break;
1517
1518 CASE_ITERATOR_PRIVATE_DATA_1
1519 if (PRIVATE_DATA(cc))
1520 {
1521 count = 1;
1522 srcw[0] = PRIVATE_DATA(cc);
1523 }
1524 cc += 2;
1525 #ifdef SUPPORT_UTF
1526 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1527 #endif
1528 break;
1529
1530 CASE_ITERATOR_PRIVATE_DATA_2A
1531 if (PRIVATE_DATA(cc))
1532 {
1533 count = 2;
1534 srcw[0] = PRIVATE_DATA(cc);
1535 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1536 }
1537 cc += 2;
1538 #ifdef SUPPORT_UTF
1539 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1540 #endif
1541 break;
1542
1543 CASE_ITERATOR_PRIVATE_DATA_2B
1544 if (PRIVATE_DATA(cc))
1545 {
1546 count = 2;
1547 srcw[0] = PRIVATE_DATA(cc);
1548 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1549 }
1550 cc += 2 + IMM2_SIZE;
1551 #ifdef SUPPORT_UTF
1552 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1553 #endif
1554 break;
1555
1556 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1557 if (PRIVATE_DATA(cc))
1558 {
1559 count = 1;
1560 srcw[0] = PRIVATE_DATA(cc);
1561 }
1562 cc += 1;
1563 break;
1564
1565 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1566 if (PRIVATE_DATA(cc))
1567 {
1568 count = 2;
1569 srcw[0] = PRIVATE_DATA(cc);
1570 srcw[1] = srcw[0] + sizeof(sljit_sw);
1571 }
1572 cc += 1;
1573 break;
1574
1575 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1576 if (PRIVATE_DATA(cc))
1577 {
1578 count = 2;
1579 srcw[0] = PRIVATE_DATA(cc);
1580 srcw[1] = srcw[0] + sizeof(sljit_sw);
1581 }
1582 cc += 1 + IMM2_SIZE;
1583 break;
1584
1585 case OP_CLASS:
1586 case OP_NCLASS:
1587 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1588 case OP_XCLASS:
1589 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1590 #else
1591 size = 1 + 32 / (int)sizeof(pcre_uchar);
1592 #endif
1593 if (PRIVATE_DATA(cc))
1594 switch(get_class_iterator_size(cc + size))
1595 {
1596 case 1:
1597 count = 1;
1598 srcw[0] = PRIVATE_DATA(cc);
1599 break;
1600
1601 case 2:
1602 count = 2;
1603 srcw[0] = PRIVATE_DATA(cc);
1604 srcw[1] = srcw[0] + sizeof(sljit_sw);
1605 break;
1606
1607 default:
1608 SLJIT_ASSERT_STOP();
1609 break;
1610 }
1611 cc += size;
1612 break;
1613
1614 default:
1615 cc = next_opcode(common, cc);
1616 SLJIT_ASSERT(cc != NULL);
1617 break;
1618 }
1619 break;
1620
1621 case end:
1622 SLJIT_ASSERT_STOP();
1623 break;
1624 }
1625
1626 while (count > 0)
1627 {
1628 count--;
1629 if (save)
1630 {
1631 if (tmp1next)
1632 {
1633 if (!tmp1empty)
1634 {
1635 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1636 stackptr += sizeof(sljit_sw);
1637 }
1638 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1639 tmp1empty = FALSE;
1640 tmp1next = FALSE;
1641 }
1642 else
1643 {
1644 if (!tmp2empty)
1645 {
1646 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1647 stackptr += sizeof(sljit_sw);
1648 }
1649 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1650 tmp2empty = FALSE;
1651 tmp1next = TRUE;
1652 }
1653 }
1654 else
1655 {
1656 if (tmp1next)
1657 {
1658 SLJIT_ASSERT(!tmp1empty);
1659 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1660 tmp1empty = stackptr >= stacktop;
1661 if (!tmp1empty)
1662 {
1663 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1664 stackptr += sizeof(sljit_sw);
1665 }
1666 tmp1next = FALSE;
1667 }
1668 else
1669 {
1670 SLJIT_ASSERT(!tmp2empty);
1671 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1672 tmp2empty = stackptr >= stacktop;
1673 if (!tmp2empty)
1674 {
1675 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1676 stackptr += sizeof(sljit_sw);
1677 }
1678 tmp1next = TRUE;
1679 }
1680 }
1681 }
1682 }
1683
1684 if (save)
1685 {
1686 if (tmp1next)
1687 {
1688 if (!tmp1empty)
1689 {
1690 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1691 stackptr += sizeof(sljit_sw);
1692 }
1693 if (!tmp2empty)
1694 {
1695 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1696 stackptr += sizeof(sljit_sw);
1697 }
1698 }
1699 else
1700 {
1701 if (!tmp2empty)
1702 {
1703 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1704 stackptr += sizeof(sljit_sw);
1705 }
1706 if (!tmp1empty)
1707 {
1708 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1709 stackptr += sizeof(sljit_sw);
1710 }
1711 }
1712 }
1713 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1714 }
1715
1716 #undef CASE_ITERATOR_PRIVATE_DATA_1
1717 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1718 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1719 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1720 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1721 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1722
1723 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1724 {
1725 return (value & (value - 1)) == 0;
1726 }
1727
1728 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1729 {
1730 while (list)
1731 {
1732 /* sljit_set_label is clever enough to do nothing
1733 if either the jump or the label is NULL. */
1734 sljit_set_label(list->jump, label);
1735 list = list->next;
1736 }
1737 }
1738
1739 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1740 {
1741 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1742 if (list_item)
1743 {
1744 list_item->next = *list;
1745 list_item->jump = jump;
1746 *list = list_item;
1747 }
1748 }
1749
1750 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1751 {
1752 DEFINE_COMPILER;
1753 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1754
1755 if (list_item)
1756 {
1757 list_item->type = type;
1758 list_item->data = data;
1759 list_item->start = start;
1760 list_item->quit = LABEL();
1761 list_item->next = common->stubs;
1762 common->stubs = list_item;
1763 }
1764 }
1765
1766 static void flush_stubs(compiler_common *common)
1767 {
1768 DEFINE_COMPILER;
1769 stub_list* list_item = common->stubs;
1770
1771 while (list_item)
1772 {
1773 JUMPHERE(list_item->start);
1774 switch(list_item->type)
1775 {
1776 case stack_alloc:
1777 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1778 break;
1779 }
1780 JUMPTO(SLJIT_JUMP, list_item->quit);
1781 list_item = list_item->next;
1782 }
1783 common->stubs = NULL;
1784 }
1785
1786 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1787 {
1788 DEFINE_COMPILER;
1789
1790 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1791 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1792 }
1793
1794 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1795 {
1796 /* May destroy all locals and registers except TMP2. */
1797 DEFINE_COMPILER;
1798
1799 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1800 #ifdef DESTROY_REGISTERS
1801 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1802 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1803 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1806 #endif
1807 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1808 }
1809
1810 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1811 {
1812 DEFINE_COMPILER;
1813 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1814 }
1815
1816 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1817 {
1818 DEFINE_COMPILER;
1819 struct sljit_label *loop;
1820 int i;
1821 /* At this point we can freely use all temporary registers. */
1822 /* TMP1 returns with begin - 1. */
1823 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1824 if (length < 8)
1825 {
1826 for (i = 0; i < length; i++)
1827 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1828 }
1829 else
1830 {
1831 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));
1832 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);
1833 loop = LABEL();
1834 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1835 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1836 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1837 }
1838 }
1839
1840 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1841 {
1842 DEFINE_COMPILER;
1843 struct sljit_label *loop;
1844 struct sljit_jump *early_quit;
1845
1846 /* At this point we can freely use all registers. */
1847 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1848 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1849
1850 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1851 if (common->mark_ptr != 0)
1852 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1853 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
1854 if (common->mark_ptr != 0)
1855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1856 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1857 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1858 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1859 /* Unlikely, but possible */
1860 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1861 loop = LABEL();
1862 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1863 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
1864 /* Copy the integer value to the output buffer */
1865 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1866 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1867 #endif
1868 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1869 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1870 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1871 JUMPHERE(early_quit);
1872
1873 /* Calculate the return value, which is the maximum ovector value. */
1874 if (topbracket > 1)
1875 {
1876 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
1877 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
1878
1879 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1880 loop = LABEL();
1881 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
1882 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1883 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1884 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
1885 }
1886 else
1887 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1888 }
1889
1890 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1891 {
1892 DEFINE_COMPILER;
1893
1894 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1895 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1896
1897 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
1898 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1899 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offset_count));
1900 CMPTO(SLJIT_C_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
1901
1902 /* Store match begin and end. */
1903 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1904 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1905 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1906 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1907 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1908 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1909 #endif
1910 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1911
1912 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
1913 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1914 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1915 #endif
1916 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
1917
1918 JUMPTO(SLJIT_JUMP, quit);
1919 }
1920
1921 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1922 {
1923 /* May destroy TMP1. */
1924 DEFINE_COMPILER;
1925 struct sljit_jump *jump;
1926
1927 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1928 {
1929 /* The value of -1 must be kept for start_used_ptr! */
1930 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1931 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1932 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1933 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1934 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1935 JUMPHERE(jump);
1936 }
1937 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1938 {
1939 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1940 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1941 JUMPHERE(jump);
1942 }
1943 }
1944
1945 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1946 {
1947 /* Detects if the character has an othercase. */
1948 unsigned int c;
1949
1950 #ifdef SUPPORT_UTF
1951 if (common->utf)
1952 {
1953 GETCHAR(c, cc);
1954 if (c > 127)
1955 {
1956 #ifdef SUPPORT_UCP
1957 return c != UCD_OTHERCASE(c);
1958 #else
1959 return FALSE;
1960 #endif
1961 }
1962 #ifndef COMPILE_PCRE8
1963 return common->fcc[c] != c;
1964 #endif
1965 }
1966 else
1967 #endif
1968 c = *cc;
1969 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1970 }
1971
1972 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
1973 {
1974 /* Returns with the othercase. */
1975 #ifdef SUPPORT_UTF
1976 if (common->utf && c > 127)
1977 {
1978 #ifdef SUPPORT_UCP
1979 return UCD_OTHERCASE(c);
1980 #else
1981 return c;
1982 #endif
1983 }
1984 #endif
1985 return TABLE_GET(c, common->fcc, c);
1986 }
1987
1988 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
1989 {
1990 /* Detects if the character and its othercase has only 1 bit difference. */
1991 unsigned int c, oc, bit;
1992 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1993 int n;
1994 #endif
1995
1996 #ifdef SUPPORT_UTF
1997 if (common->utf)
1998 {
1999 GETCHAR(c, cc);
2000 if (c <= 127)
2001 oc = common->fcc[c];
2002 else
2003 {
2004 #ifdef SUPPORT_UCP
2005 oc = UCD_OTHERCASE(c);
2006 #else
2007 oc = c;
2008 #endif
2009 }
2010 }
2011 else
2012 {
2013 c = *cc;
2014 oc = TABLE_GET(c, common->fcc, c);
2015 }
2016 #else
2017 c = *cc;
2018 oc = TABLE_GET(c, common->fcc, c);
2019 #endif
2020
2021 SLJIT_ASSERT(c != oc);
2022
2023 bit = c ^ oc;
2024 /* Optimized for English alphabet. */
2025 if (c <= 127 && bit == 0x20)
2026 return (0 << 8) | 0x20;
2027
2028 /* Since c != oc, they must have at least 1 bit difference. */
2029 if (!is_powerof2(bit))
2030 return 0;
2031
2032 #if defined COMPILE_PCRE8
2033
2034 #ifdef SUPPORT_UTF
2035 if (common->utf && c > 127)
2036 {
2037 n = GET_EXTRALEN(*cc);
2038 while ((bit & 0x3f) == 0)
2039 {
2040 n--;
2041 bit >>= 6;
2042 }
2043 return (n << 8) | bit;
2044 }
2045 #endif /* SUPPORT_UTF */
2046 return (0 << 8) | bit;
2047
2048 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2049
2050 #ifdef SUPPORT_UTF
2051 if (common->utf && c > 65535)
2052 {
2053 if (bit >= (1 << 10))
2054 bit >>= 10;
2055 else
2056 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2057 }
2058 #endif /* SUPPORT_UTF */
2059 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2060
2061 #endif /* COMPILE_PCRE[8|16|32] */
2062 }
2063
2064 static void check_partial(compiler_common *common, BOOL force)
2065 {
2066 /* Checks whether a partial matching is occured. Does not modify registers. */
2067 DEFINE_COMPILER;
2068 struct sljit_jump *jump = NULL;
2069
2070 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2071
2072 if (common->mode == JIT_COMPILE)
2073 return;
2074
2075 if (!force)
2076 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2077 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2078 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2079
2080 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2082 else
2083 {
2084 if (common->partialmatchlabel != NULL)
2085 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2086 else
2087 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2088 }
2089
2090 if (jump != NULL)
2091 JUMPHERE(jump);
2092 }
2093
2094 static struct sljit_jump *check_str_end(compiler_common *common)
2095 {
2096 /* Does not affect registers. Usually used in a tight spot. */
2097 DEFINE_COMPILER;
2098 struct sljit_jump *jump;
2099 struct sljit_jump *nohit;
2100 struct sljit_jump *return_value;
2101
2102 if (common->mode == JIT_COMPILE)
2103 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2104
2105 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2106 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2107 {
2108 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2110 JUMPHERE(nohit);
2111 return_value = JUMP(SLJIT_JUMP);
2112 }
2113 else
2114 {
2115 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2116 if (common->partialmatchlabel != NULL)
2117 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2118 else
2119 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2120 }
2121 JUMPHERE(jump);
2122 return return_value;
2123 }
2124
2125 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2126 {
2127 DEFINE_COMPILER;
2128 struct sljit_jump *jump;
2129
2130 if (common->mode == JIT_COMPILE)
2131 {
2132 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2133 return;
2134 }
2135
2136 /* Partial matching mode. */
2137 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2138 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2139 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2140 {
2141 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2142 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2143 }
2144 else
2145 {
2146 if (common->partialmatchlabel != NULL)
2147 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2148 else
2149 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2150 }
2151 JUMPHERE(jump);
2152 }
2153
2154 static void read_char(compiler_common *common)
2155 {
2156 /* Reads the character into TMP1, updates STR_PTR.
2157 Does not check STR_END. TMP2 Destroyed. */
2158 DEFINE_COMPILER;
2159 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2160 struct sljit_jump *jump;
2161 #endif
2162
2163 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2164 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2165 if (common->utf)
2166 {
2167 #if defined COMPILE_PCRE8
2168 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2169 #elif defined COMPILE_PCRE16
2170 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2171 #endif /* COMPILE_PCRE[8|16] */
2172 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2173 JUMPHERE(jump);
2174 }
2175 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2176 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2177 }
2178
2179 static void peek_char(compiler_common *common)
2180 {
2181 /* Reads the character into TMP1, keeps STR_PTR.
2182 Does not check STR_END. TMP2 Destroyed. */
2183 DEFINE_COMPILER;
2184 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2185 struct sljit_jump *jump;
2186 #endif
2187
2188 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2189 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2190 if (common->utf)
2191 {
2192 #if defined COMPILE_PCRE8
2193 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2194 #elif defined COMPILE_PCRE16
2195 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2196 #endif /* COMPILE_PCRE[8|16] */
2197 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2198 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2199 JUMPHERE(jump);
2200 }
2201 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2202 }
2203
2204 static void read_char8_type(compiler_common *common)
2205 {
2206 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2207 DEFINE_COMPILER;
2208 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2209 struct sljit_jump *jump;
2210 #endif
2211
2212 #ifdef SUPPORT_UTF
2213 if (common->utf)
2214 {
2215 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2216 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2217 #if defined COMPILE_PCRE8
2218 /* This can be an extra read in some situations, but hopefully
2219 it is needed in most cases. */
2220 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2221 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2222 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2223 JUMPHERE(jump);
2224 #elif defined COMPILE_PCRE16
2225 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2226 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2227 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2228 JUMPHERE(jump);
2229 /* Skip low surrogate if necessary. */
2230 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2231 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2232 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2233 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2234 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2235 #elif defined COMPILE_PCRE32
2236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2237 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2238 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2239 JUMPHERE(jump);
2240 #endif /* COMPILE_PCRE[8|16|32] */
2241 return;
2242 }
2243 #endif /* SUPPORT_UTF */
2244 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2246 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2247 /* The ctypes array contains only 256 values. */
2248 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2249 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2250 #endif
2251 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2252 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2253 JUMPHERE(jump);
2254 #endif
2255 }
2256
2257 static void skip_char_back(compiler_common *common)
2258 {
2259 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2260 DEFINE_COMPILER;
2261 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2262 #if defined COMPILE_PCRE8
2263 struct sljit_label *label;
2264
2265 if (common->utf)
2266 {
2267 label = LABEL();
2268 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2269 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2270 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2271 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2272 return;
2273 }
2274 #elif defined COMPILE_PCRE16
2275 if (common->utf)
2276 {
2277 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2278 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2279 /* Skip low surrogate if necessary. */
2280 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2281 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2282 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2283 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2284 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2285 return;
2286 }
2287 #endif /* COMPILE_PCRE[8|16] */
2288 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2289 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2290 }
2291
2292 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2293 {
2294 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2295 DEFINE_COMPILER;
2296
2297 if (nltype == NLTYPE_ANY)
2298 {
2299 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2300 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2301 }
2302 else if (nltype == NLTYPE_ANYCRLF)
2303 {
2304 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2305 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2306 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2307 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2308 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2309 }
2310 else
2311 {
2312 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2313 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2314 }
2315 }
2316
2317 #ifdef SUPPORT_UTF
2318
2319 #if defined COMPILE_PCRE8
2320 static void do_utfreadchar(compiler_common *common)
2321 {
2322 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2323 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2324 DEFINE_COMPILER;
2325 struct sljit_jump *jump;
2326
2327 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2328 /* Searching for the first zero. */
2329 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2330 jump = JUMP(SLJIT_C_NOT_ZERO);
2331 /* Two byte sequence. */
2332 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2333 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2334 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2335 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2336 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2337 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2338 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2339 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2340 JUMPHERE(jump);
2341
2342 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2343 jump = JUMP(SLJIT_C_NOT_ZERO);
2344 /* Three byte sequence. */
2345 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2346 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2347 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2348 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2349 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2350 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2351 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2352 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2353 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2354 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2355 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2356 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2357 JUMPHERE(jump);
2358
2359 /* Four byte sequence. */
2360 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2361 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2362 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2363 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2364 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2365 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2366 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2367 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2368 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2369 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2370 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2371 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2372 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2373 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2374 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2375 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2376 }
2377
2378 static void do_utfreadtype8(compiler_common *common)
2379 {
2380 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2381 of the character (>= 0xc0). Return value in TMP1. */
2382 DEFINE_COMPILER;
2383 struct sljit_jump *jump;
2384 struct sljit_jump *compare;
2385
2386 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2387
2388 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2389 jump = JUMP(SLJIT_C_NOT_ZERO);
2390 /* Two byte sequence. */
2391 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2392 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2393 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2394 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2395 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2396 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2397 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2398 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2399 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2400
2401 JUMPHERE(compare);
2402 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2403 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2404 JUMPHERE(jump);
2405
2406 /* We only have types for characters less than 256. */
2407 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2408 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2409 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2410 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2411 }
2412
2413 #elif defined COMPILE_PCRE16
2414
2415 static void do_utfreadchar(compiler_common *common)
2416 {
2417 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2418 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2419 DEFINE_COMPILER;
2420 struct sljit_jump *jump;
2421
2422 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2423 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2424 /* Do nothing, only return. */
2425 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2426
2427 JUMPHERE(jump);
2428 /* Combine two 16 bit characters. */
2429 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2430 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2431 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2432 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2433 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2434 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2435 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2436 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2437 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2438 }
2439
2440 #endif /* COMPILE_PCRE[8|16] */
2441
2442 #endif /* SUPPORT_UTF */
2443
2444 #ifdef SUPPORT_UCP
2445
2446 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2447 #define UCD_BLOCK_MASK 127
2448 #define UCD_BLOCK_SHIFT 7
2449
2450 static void do_getucd(compiler_common *common)
2451 {
2452 /* Search the UCD record for the character comes in TMP1.
2453 Returns chartype in TMP1 and UCD offset in TMP2. */
2454 DEFINE_COMPILER;
2455
2456 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2457
2458 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2459 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2460 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2461 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2462 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2463 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2464 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2465 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2466 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2467 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2468 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2469 }
2470 #endif
2471
2472 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2473 {
2474 DEFINE_COMPILER;
2475 struct sljit_label *mainloop;
2476 struct sljit_label *newlinelabel = NULL;
2477 struct sljit_jump *start;
2478 struct sljit_jump *end = NULL;
2479 struct sljit_jump *nl = NULL;
2480 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2481 struct sljit_jump *singlechar;
2482 #endif
2483 jump_list *newline = NULL;
2484 BOOL newlinecheck = FALSE;
2485 BOOL readuchar = FALSE;
2486
2487 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2488 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2489 newlinecheck = TRUE;
2490
2491 if (firstline)
2492 {
2493 /* Search for the end of the first line. */
2494 SLJIT_ASSERT(common->first_line_end != 0);
2495 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2496
2497 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2498 {
2499 mainloop = LABEL();
2500 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2501 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2502 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2503 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2504 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2505 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2506 JUMPHERE(end);
2507 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2508 }
2509 else
2510 {
2511 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2512 mainloop = LABEL();
2513 /* Continual stores does not cause data dependency. */
2514 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2515 read_char(common);
2516 check_newlinechar(common, common->nltype, &newline, TRUE);
2517 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2518 JUMPHERE(end);
2519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2520 set_jumps(newline, LABEL());
2521 }
2522
2523 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2524 }
2525
2526 start = JUMP(SLJIT_JUMP);
2527
2528 if (newlinecheck)
2529 {
2530 newlinelabel = LABEL();
2531 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2532 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2533 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2534 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2535 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2536 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2537 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2538 #endif
2539 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2540 nl = JUMP(SLJIT_JUMP);
2541 }
2542
2543 mainloop = LABEL();
2544
2545 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2546 #ifdef SUPPORT_UTF
2547 if (common->utf) readuchar = TRUE;
2548 #endif
2549 if (newlinecheck) readuchar = TRUE;
2550
2551 if (readuchar)
2552 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2553
2554 if (newlinecheck)
2555 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2556
2557 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2558 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2559 #if defined COMPILE_PCRE8
2560 if (common->utf)
2561 {
2562 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2563 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2564 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2565 JUMPHERE(singlechar);
2566 }
2567 #elif defined COMPILE_PCRE16
2568 if (common->utf)
2569 {
2570 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2571 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2572 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2573 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2574 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2575 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2576 JUMPHERE(singlechar);
2577 }
2578 #endif /* COMPILE_PCRE[8|16] */
2579 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2580 JUMPHERE(start);
2581
2582 if (newlinecheck)
2583 {
2584 JUMPHERE(end);
2585 JUMPHERE(nl);
2586 }
2587
2588 return mainloop;
2589 }
2590
2591 #define MAX_N_CHARS 3
2592
2593 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2594 {
2595 DEFINE_COMPILER;
2596 struct sljit_label *start;
2597 struct sljit_jump *quit;
2598 pcre_uint32 chars[MAX_N_CHARS * 2];
2599 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2600 int location = 0;
2601 pcre_int32 len, c, bit, caseless;
2602 int must_stop;
2603
2604 /* We do not support alternatives now. */
2605 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2606 return FALSE;
2607
2608 while (TRUE)
2609 {
2610 caseless = 0;
2611 must_stop = 1;
2612 switch(*cc)
2613 {
2614 case OP_CHAR:
2615 must_stop = 0;
2616 cc++;
2617 break;
2618
2619 case OP_CHARI:
2620 caseless = 1;
2621 must_stop = 0;
2622 cc++;
2623 break;
2624
2625 case OP_SOD:
2626 case OP_SOM:
2627 case OP_SET_SOM:
2628 case OP_NOT_WORD_BOUNDARY:
2629 case OP_WORD_BOUNDARY:
2630 case OP_EODN:
2631 case OP_EOD:
2632 case OP_CIRC:
2633 case OP_CIRCM:
2634 case OP_DOLL:
2635 case OP_DOLLM:
2636 /* Zero width assertions. */
2637 cc++;
2638 continue;
2639
2640 case OP_PLUS:
2641 case OP_MINPLUS:
2642 case OP_POSPLUS:
2643 cc++;
2644 break;
2645
2646 case OP_EXACT:
2647 cc += 1 + IMM2_SIZE;
2648 break;
2649
2650 case OP_PLUSI:
2651 case OP_MINPLUSI:
2652 case OP_POSPLUSI:
2653 caseless = 1;
2654 cc++;
2655 break;
2656
2657 case OP_EXACTI:
2658 caseless = 1;
2659 cc += 1 + IMM2_SIZE;
2660 break;
2661
2662 default:
2663 must_stop = 2;
2664 break;
2665 }
2666
2667 if (must_stop == 2)
2668 break;
2669
2670 len = 1;
2671 #ifdef SUPPORT_UTF
2672 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2673 #endif
2674
2675 if (caseless && char_has_othercase(common, cc))
2676 {
2677 caseless = char_get_othercase_bit(common, cc);
2678 if (caseless == 0)
2679 return FALSE;
2680 #ifdef COMPILE_PCRE8
2681 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2682 #else
2683 if ((caseless & 0x100) != 0)
2684 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2685 else
2686 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2687 #endif
2688 }
2689 else
2690 caseless = 0;
2691
2692 while (len > 0 && location < MAX_N_CHARS * 2)
2693 {
2694 c = *cc;
2695 bit = 0;
2696 if (len == (caseless & 0xff))
2697 {
2698 bit = caseless >> 8;
2699 c |= bit;
2700 }
2701
2702 chars[location] = c;
2703 chars[location + 1] = bit;
2704
2705 len--;
2706 location += 2;
2707 cc++;
2708 }
2709
2710 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2711 break;
2712 }
2713
2714 /* At least two characters are required. */
2715 if (location < 2 * 2)
2716 return FALSE;
2717
2718 if (firstline)
2719 {
2720 SLJIT_ASSERT(common->first_line_end != 0);
2721 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2722 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2723 }
2724 else
2725 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2726
2727 start = LABEL();
2728 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2729
2730 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2731 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2732 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2733 if (chars[1] != 0)
2734 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2735 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2736 if (location > 2 * 2)
2737 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2738 if (chars[3] != 0)
2739 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2740 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2741 if (location > 2 * 2)
2742 {
2743 if (chars[5] != 0)
2744 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2745 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2746 }
2747 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2748
2749 JUMPHERE(quit);
2750
2751 if (firstline)
2752 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2753 else
2754 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2755 return TRUE;
2756 }
2757
2758 #undef MAX_N_CHARS
2759
2760 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2761 {
2762 DEFINE_COMPILER;
2763 struct sljit_label *start;
2764 struct sljit_jump *quit;
2765 struct sljit_jump *found;
2766 pcre_uchar oc, bit;
2767
2768 if (firstline)
2769 {
2770 SLJIT_ASSERT(common->first_line_end != 0);
2771 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2772 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2773 }
2774
2775 start = LABEL();
2776 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2777 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2778
2779 oc = first_char;
2780 if (caseless)
2781 {
2782 oc = TABLE_GET(first_char, common->fcc, first_char);
2783 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2784 if (first_char > 127 && common->utf)
2785 oc = UCD_OTHERCASE(first_char);
2786 #endif
2787 }
2788 if (first_char == oc)
2789 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2790 else
2791 {
2792 bit = first_char ^ oc;
2793 if (is_powerof2(bit))
2794 {
2795 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2796 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2797 }
2798 else
2799 {
2800 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2801 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2802 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2803 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2804 found = JUMP(SLJIT_C_NOT_ZERO);
2805 }
2806 }
2807
2808 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2809 JUMPTO(SLJIT_JUMP, start);
2810 JUMPHERE(found);
2811 JUMPHERE(quit);
2812
2813 if (firstline)
2814 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2815 }
2816
2817 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2818 {
2819 DEFINE_COMPILER;
2820 struct sljit_label *loop;
2821 struct sljit_jump *lastchar;
2822 struct sljit_jump *firstchar;
2823 struct sljit_jump *quit;
2824 struct sljit_jump *foundcr = NULL;
2825 struct sljit_jump *notfoundnl;
2826 jump_list *newline = NULL;
2827
2828 if (firstline)
2829 {
2830 SLJIT_ASSERT(common->first_line_end != 0);
2831 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2832 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2833 }
2834
2835 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2836 {
2837 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2838 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2839 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2840 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2841 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2842
2843 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2844 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2845 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2846 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2847 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2848 #endif
2849 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2850
2851 loop = LABEL();
2852 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2853 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2854 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2855 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2856 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2857 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2858
2859 JUMPHERE(quit);
2860 JUMPHERE(firstchar);
2861 JUMPHERE(lastchar);
2862
2863 if (firstline)
2864 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2865 return;
2866 }
2867
2868 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2869 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2870 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2871 skip_char_back(common);
2872
2873 loop = LABEL();
2874 read_char(common);
2875 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2876 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2877 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2878 check_newlinechar(common, common->nltype, &newline, FALSE);
2879 set_jumps(newline, loop);
2880
2881 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2882 {
2883 quit = JUMP(SLJIT_JUMP);
2884 JUMPHERE(foundcr);
2885 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2886 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2887 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2888 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2889 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2890 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2891 #endif
2892 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2893 JUMPHERE(notfoundnl);
2894 JUMPHERE(quit);
2895 }
2896 JUMPHERE(lastchar);
2897 JUMPHERE(firstchar);
2898
2899 if (firstline)
2900 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2901 }
2902
2903 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
2904
2905 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2906 {
2907 DEFINE_COMPILER;
2908 struct sljit_label *start;
2909 struct sljit_jump *quit;
2910 struct sljit_jump *found = NULL;
2911 jump_list *matches = NULL;
2912 pcre_uint8 inverted_start_bits[32];
2913 int i;
2914 #ifndef COMPILE_PCRE8
2915 struct sljit_jump *jump;
2916 #endif
2917
2918 for (i = 0; i < 32; ++i)
2919 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
2920
2921 if (firstline)
2922 {
2923 SLJIT_ASSERT(common->first_line_end != 0);
2924 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2925 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2926 }
2927
2928 start = LABEL();
2929 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2930 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2931 #ifdef SUPPORT_UTF
2932 if (common->utf)
2933 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2934 #endif
2935
2936 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
2937 {
2938 #ifndef COMPILE_PCRE8
2939 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2941 JUMPHERE(jump);
2942 #endif
2943 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2944 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2945 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2946 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2947 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2948 found = JUMP(SLJIT_C_NOT_ZERO);
2949 }
2950
2951 #ifdef SUPPORT_UTF
2952 if (common->utf)
2953 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2954 #endif
2955 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2956 #ifdef SUPPORT_UTF
2957 #if defined COMPILE_PCRE8
2958 if (common->utf)
2959 {
2960 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2961 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2962 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2963 }
2964 #elif defined COMPILE_PCRE16
2965 if (common->utf)
2966 {
2967 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2968 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2969 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2970 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2971 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2972 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2973 }
2974 #endif /* COMPILE_PCRE[8|16] */
2975 #endif /* SUPPORT_UTF */
2976 JUMPTO(SLJIT_JUMP, start);
2977 if (found != NULL)
2978 JUMPHERE(found);
2979 if (matches != NULL)
2980 set_jumps(matches, LABEL());
2981 JUMPHERE(quit);
2982
2983 if (firstline)
2984 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
2985 }
2986
2987 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
2988 {
2989 DEFINE_COMPILER;
2990 struct sljit_label *loop;
2991 struct sljit_jump *toolong;
2992 struct sljit_jump *alreadyfound;
2993 struct sljit_jump *found;
2994 struct sljit_jump *foundoc = NULL;
2995 struct sljit_jump *notfound;
2996 pcre_uint32 oc, bit;
2997
2998 SLJIT_ASSERT(common->req_char_ptr != 0);
2999 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3000 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3001 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3002 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3003
3004 if (has_firstchar)
3005 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3006 else
3007 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3008
3009 loop = LABEL();
3010 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3011
3012 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3013 oc = req_char;
3014 if (caseless)
3015 {
3016 oc = TABLE_GET(req_char, common->fcc, req_char);
3017 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3018 if (req_char > 127 && common->utf)
3019 oc = UCD_OTHERCASE(req_char);
3020 #endif
3021 }
3022 if (req_char == oc)
3023 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3024 else
3025 {
3026 bit = req_char ^ oc;
3027 if (is_powerof2(bit))
3028 {
3029 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3030 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3031 }
3032 else
3033 {
3034 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3035 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3036 }
3037 }
3038 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3039 JUMPTO(SLJIT_JUMP, loop);
3040
3041 JUMPHERE(found);
3042 if (foundoc)
3043 JUMPHERE(foundoc);
3044 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3045 JUMPHERE(alreadyfound);
3046 JUMPHERE(toolong);
3047 return notfound;
3048 }
3049
3050 static void do_revertframes(compiler_common *common)
3051 {
3052 DEFINE_COMPILER;
3053 struct sljit_jump *jump;
3054 struct sljit_label *mainloop;
3055
3056 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3057 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3058 GET_LOCAL_BASE(TMP3, 0, 0);
3059
3060 /* Drop frames until we reach STACK_TOP. */
3061 mainloop = LABEL();
3062 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3063 jump = CMP(SLJIT_C_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3064 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3065 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3066 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3067 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3068 JUMPTO(SLJIT_JUMP, mainloop);
3069
3070 JUMPHERE(jump);
3071 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_end);
3072 /* End of dropping frames. */
3073 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3074
3075 JUMPHERE(jump);
3076 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setstrbegin);
3077 /* Set string begin. */
3078 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3079 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), TMP2, 0);
3081 JUMPTO(SLJIT_JUMP, mainloop);
3082
3083 JUMPHERE(jump);
3084 if (common->mark_ptr != 0)
3085 {
3086 jump = CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, frame_setmark);
3087 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3088 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3089 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr, TMP2, 0);
3090 JUMPTO(SLJIT_JUMP, mainloop);
3091
3092 JUMPHERE(jump);
3093 }
3094
3095 /* Unknown command. */
3096 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3097 JUMPTO(SLJIT_JUMP, mainloop);
3098 }
3099
3100 static void check_wordboundary(compiler_common *common)
3101 {
3102 DEFINE_COMPILER;
3103 struct sljit_jump *skipread;
3104 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3105 struct sljit_jump *jump;
3106 #endif
3107
3108 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3109
3110 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3111 /* Get type of the previous char, and put it to LOCALS1. */
3112 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3114 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3115 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3116 skip_char_back(common);
3117 check_start_used_ptr(common);
3118 read_char(common);
3119
3120 /* Testing char type. */
3121 #ifdef SUPPORT_UCP
3122 if (common->use_ucp)
3123 {
3124 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3125 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3126 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3127 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3128 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3129 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3130 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3131 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3132 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3133 JUMPHERE(jump);
3134 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3135 }
3136 else
3137 #endif
3138 {
3139 #ifndef COMPILE_PCRE8
3140 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3141 #elif defined SUPPORT_UTF
3142 /* Here LOCALS1 has already been zeroed. */
3143 jump = NULL;
3144 if (common->utf)
3145 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3146 #endif /* COMPILE_PCRE8 */
3147 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3148 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3149 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3150 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3151 #ifndef COMPILE_PCRE8
3152 JUMPHERE(jump);
3153 #elif defined SUPPORT_UTF
3154 if (jump != NULL)
3155 JUMPHERE(jump);
3156 #endif /* COMPILE_PCRE8 */
3157 }
3158 JUMPHERE(skipread);
3159
3160 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3161 skipread = check_str_end(common);
3162 peek_char(common);
3163
3164 /* Testing char type. This is a code duplication. */
3165 #ifdef SUPPORT_UCP
3166 if (common->use_ucp)
3167 {
3168 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3169 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3170 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3171 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3172 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3173 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3174 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3175 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3176 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3177 JUMPHERE(jump);
3178 }
3179 else
3180 #endif
3181 {
3182 #ifndef COMPILE_PCRE8
3183 /* TMP2 may be destroyed by peek_char. */
3184 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3185 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3186 #elif defined SUPPORT_UTF
3187 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3188 jump = NULL;
3189 if (common->utf)
3190 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3191 #endif
3192 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3193 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3194 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3195 #ifndef COMPILE_PCRE8
3196 JUMPHERE(jump);
3197 #elif defined SUPPORT_UTF
3198 if (jump != NULL)
3199 JUMPHERE(jump);
3200 #endif /* COMPILE_PCRE8 */
3201 }
3202 JUMPHERE(skipread);
3203
3204 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3205 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3206 }
3207
3208 /*
3209 range format:
3210
3211 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3212 ranges[1] = first bit (0 or 1)
3213 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3214 */
3215
3216 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3217 {
3218 DEFINE_COMPILER;
3219 struct sljit_jump *jump;
3220
3221 if (ranges[0] < 0)
3222 return FALSE;
3223
3224 switch(ranges[0])
3225 {
3226 case 1:
3227 if (readch)
3228 read_char(common);
3229 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3230 return TRUE;
3231
3232 case 2:
3233 if (readch)
3234 read_char(common);
3235 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3236 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3237 return TRUE;
3238
3239 case 4:
3240 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3241 {
3242 if (readch)
3243 read_char(common);
3244 if (ranges[1] != 0)
3245 {
3246 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3247 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3248 }
3249 else
3250 {
3251 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3252 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3253 JUMPHERE(jump);
3254 }
3255 return TRUE;
3256 }
3257 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3258 {
3259 if (readch)
3260 read_char(common);
3261 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3262 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3263 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3264 return TRUE;
3265 }
3266 return FALSE;
3267
3268 default:
3269 return FALSE;
3270 }
3271 }
3272
3273 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3274 {
3275 int i, bit, length;
3276 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3277
3278 bit = ctypes[0] & flag;
3279 ranges[0] = -1;
3280 ranges[1] = bit != 0 ? 1 : 0;
3281 length = 0;
3282
3283 for (i = 1; i < 256; i++)
3284 if ((ctypes[i] & flag) != bit)
3285 {
3286 if (length >= MAX_RANGE_SIZE)
3287 return;
3288 ranges[2 + length] = i;
3289 length++;
3290 bit ^= flag;
3291 }
3292
3293 if (bit != 0)
3294 {
3295 if (length >= MAX_RANGE_SIZE)
3296 return;
3297 ranges[2 + length] = 256;
3298 length++;
3299 }
3300 ranges[0] = length;
3301 }
3302
3303 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3304 {
3305 int ranges[2 + MAX_RANGE_SIZE];
3306 pcre_uint8 bit, cbit, all;
3307 int i, byte, length = 0;
3308
3309 bit = bits[0] & 0x1;
3310 ranges[1] = bit;
3311 /* Can be 0 or 255. */
3312 all = -bit;
3313
3314 for (i = 0; i < 256; )
3315 {
3316 byte = i >> 3;
3317 if ((i & 0x7) == 0 && bits[byte] == all)
3318 i += 8;
3319 else
3320 {
3321 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3322 if (cbit != bit)
3323 {
3324 if (length >= MAX_RANGE_SIZE)
3325 return FALSE;
3326 ranges[2 + length] = i;
3327 length++;
3328 bit = cbit;
3329 all = -cbit;
3330 }
3331 i++;
3332 }
3333 }
3334
3335 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3336 {
3337 if (length >= MAX_RANGE_SIZE)
3338 return FALSE;
3339 ranges[2 + length] = 256;
3340 length++;
3341 }
3342 ranges[0] = length;
3343
3344 return check_ranges(common, ranges, backtracks, FALSE);
3345 }
3346
3347 static void check_anynewline(compiler_common *common)
3348 {
3349 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3350 DEFINE_COMPILER;
3351
3352 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3353
3354 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3355 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3356 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3357 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3358 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3359 #ifdef COMPILE_PCRE8
3360 if (common->utf)
3361 {
3362 #endif
3363 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3364 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3365 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3366 #ifdef COMPILE_PCRE8
3367 }
3368 #endif
3369 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3370 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3371 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3372 }
3373
3374 static void check_hspace(compiler_common *common)
3375 {
3376 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3377 DEFINE_COMPILER;
3378
3379 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3380
3381 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3382 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3383 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3384 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3385 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3386 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3387 #ifdef COMPILE_PCRE8
3388 if (common->utf)
3389 {
3390 #endif
3391 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3392 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3393 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3394 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3395 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3396 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3397 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3398 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3399 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3400 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3401 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3402 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3403 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3404 #ifdef COMPILE_PCRE8
3405 }
3406 #endif
3407 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3408 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3409
3410 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3411 }
3412
3413 static void check_vspace(compiler_common *common)
3414 {
3415 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3416 DEFINE_COMPILER;
3417
3418 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3419
3420 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3421 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3422 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3423 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3424 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3425 #ifdef COMPILE_PCRE8
3426 if (common->utf)
3427 {
3428 #endif
3429 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3430 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3431 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3432 #ifdef COMPILE_PCRE8
3433 }
3434 #endif
3435 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3436 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3437
3438 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3439 }
3440
3441 #define CHAR1 STR_END
3442 #define CHAR2 STACK_TOP
3443
3444 static void do_casefulcmp(compiler_common *common)
3445 {
3446 DEFINE_COMPILER;
3447 struct sljit_jump *jump;
3448 struct sljit_label *label;
3449
3450 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3451 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3452 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3453 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3454 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3455 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3456
3457 label = LABEL();
3458 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3459 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3460 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3461 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3462 JUMPTO(SLJIT_C_NOT_ZERO, label);
3463
3464 JUMPHERE(jump);
3465 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3466 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3467 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3468 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3469 }
3470
3471 #define LCC_TABLE STACK_LIMIT
3472
3473 static void do_caselesscmp(compiler_common *common)
3474 {
3475 DEFINE_COMPILER;
3476 struct sljit_jump *jump;
3477 struct sljit_label *label;
3478
3479 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3480 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3481
3482 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3483 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3484 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3485 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3486 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3487 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3488
3489 label = LABEL();
3490 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3491 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3492 #ifndef COMPILE_PCRE8
3493 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3494 #endif
3495 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3496 #ifndef COMPILE_PCRE8
3497 JUMPHERE(jump);
3498 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3499 #endif
3500 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3501 #ifndef COMPILE_PCRE8
3502 JUMPHERE(jump);
3503 #endif
3504 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3505 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3506 JUMPTO(SLJIT_C_NOT_ZERO, label);
3507
3508 JUMPHERE(jump);
3509 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3510 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3511 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3512 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3513 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3514 }
3515
3516 #undef LCC_TABLE
3517 #undef CHAR1
3518 #undef CHAR2
3519
3520 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3521
3522 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3523 {
3524 /* This function would be ineffective to do in JIT level. */
3525 pcre_uint32 c1, c2;
3526 const pcre_uchar *src2 = args->uchar_ptr;
3527 const pcre_uchar *end2 = args->end;
3528 const ucd_record *ur;
3529 const pcre_uint32 *pp;
3530
3531 while (src1 < end1)
3532 {
3533 if (src2 >= end2)
3534 return (pcre_uchar*)1;
3535 GETCHARINC(c1, src1);
3536 GETCHARINC(c2, src2);
3537 ur = GET_UCD(c2);
3538 if (c1 != c2 && c1 != c2 + ur->other_case)
3539 {
3540 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3541 for (;;)
3542 {
3543 if (c1 < *pp) return NULL;
3544 if (c1 == *pp++) break;
3545 }
3546 }
3547 }
3548 return src2;
3549 }
3550
3551 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3552
3553 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3554 compare_context* context, jump_list **backtracks)
3555 {
3556 DEFINE_COMPILER;
3557 unsigned int othercasebit = 0;
3558 pcre_uchar *othercasechar = NULL;
3559 #ifdef SUPPORT_UTF
3560 int utflength;
3561 #endif
3562
3563 if (caseless && char_has_othercase(common, cc))
3564 {
3565 othercasebit = char_get_othercase_bit(common, cc);
3566 SLJIT_ASSERT(othercasebit);
3567 /* Extracting bit difference info. */
3568 #if defined COMPILE_PCRE8
3569 othercasechar = cc + (othercasebit >> 8);
3570 othercasebit &= 0xff;
3571 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3572 /* Note that this code only handles characters in the BMP. If there
3573 ever are characters outside the BMP whose othercase differs in only one
3574 bit from itself (there currently are none), this code will need to be
3575 revised for COMPILE_PCRE32. */
3576 othercasechar = cc + (othercasebit >> 9);
3577 if ((othercasebit & 0x100) != 0)
3578 othercasebit = (othercasebit & 0xff) << 8;
3579 else
3580 othercasebit &= 0xff;
3581 #endif /* COMPILE_PCRE[8|16|32] */
3582 }
3583
3584 if (context->sourcereg == -1)
3585 {
3586 #if defined COMPILE_PCRE8
3587 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3588 if (context->length >= 4)
3589 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3590 else if (context->length >= 2)
3591 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3592 else
3593 #endif
3594 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3595 #elif defined COMPILE_PCRE16
3596 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3597 if (context->length >= 4)
3598 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3599 else
3600 #endif
3601 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3602 #elif defined COMPILE_PCRE32
3603 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3604 #endif /* COMPILE_PCRE[8|16|32] */
3605 context->sourcereg = TMP2;
3606 }
3607
3608 #ifdef SUPPORT_UTF
3609 utflength = 1;
3610 if (common->utf && HAS_EXTRALEN(*cc))
3611 utflength += GET_EXTRALEN(*cc);
3612
3613 do
3614 {
3615 #endif
3616
3617 context->length -= IN_UCHARS(1);
3618 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3619
3620 /* Unaligned read is supported. */
3621 if (othercasebit != 0 && othercasechar == cc)
3622 {
3623 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3624 context->oc.asuchars[context->ucharptr] = othercasebit;
3625 }
3626 else
3627 {
3628 context->c.asuchars[context->ucharptr] = *cc;
3629 context->oc.asuchars[context->ucharptr] = 0;
3630 }
3631 context->ucharptr++;
3632
3633 #if defined COMPILE_PCRE8
3634 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3635 #else
3636 if (context->ucharptr >= 2 || context->length == 0)
3637 #endif
3638 {
3639 if (context->length >= 4)
3640 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3641 else if (context->length >= 2)
3642 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3643 #if defined COMPILE_PCRE8
3644 else if (context->length >= 1)
3645 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3646 #endif /* COMPILE_PCRE8 */
3647 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3648
3649 switch(context->ucharptr)
3650 {
3651 case 4 / sizeof(pcre_uchar):
3652 if (context->oc.asint != 0)
3653 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3654 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3655 break;
3656
3657 case 2 / sizeof(pcre_uchar):
3658 if (context->oc.asushort != 0)
3659 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3660 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3661 break;
3662
3663 #ifdef COMPILE_PCRE8
3664 case 1:
3665 if (context->oc.asbyte != 0)
3666 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3667 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3668 break;
3669 #endif
3670
3671 default:
3672 SLJIT_ASSERT_STOP();
3673 break;
3674 }
3675 context->ucharptr = 0;
3676 }
3677
3678 #else
3679
3680 /* Unaligned read is unsupported or in 32 bit mode. */
3681 if (context->length >= 1)
3682 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3683
3684 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3685
3686 if (othercasebit != 0 && othercasechar == cc)
3687 {
3688 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3689 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3690 }
3691 else
3692 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3693
3694 #endif
3695
3696 cc++;
3697 #ifdef SUPPORT_UTF
3698 utflength--;
3699 }
3700 while (utflength > 0);
3701 #endif
3702
3703 return cc;
3704 }
3705
3706 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3707
3708 #define SET_TYPE_OFFSET(value) \
3709 if ((value) != typeoffset) \
3710 { \
3711 if ((value) > typeoffset) \
3712 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3713 else \
3714 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3715 } \
3716 typeoffset = (value);
3717
3718 #define SET_CHAR_OFFSET(value) \
3719 if ((value) != charoffset) \
3720 { \
3721 if ((value) > charoffset) \
3722 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3723 else \
3724 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3725 } \
3726 charoffset = (value);
3727
3728 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3729 {
3730 DEFINE_COMPILER;
3731 jump_list *found = NULL;
3732 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3733 pcre_int32 c, charoffset;
3734 const pcre_uint32 *other_cases;
3735 struct sljit_jump *jump = NULL;
3736 pcre_uchar *ccbegin;
3737 int compares, invertcmp, numberofcmps;
3738 #ifdef SUPPORT_UCP
3739 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3740 BOOL charsaved = FALSE;
3741 int typereg = TMP1, scriptreg = TMP1;
3742 pcre_int32 typeoffset;
3743 #endif
3744
3745 /* Although SUPPORT_UTF must be defined, we are
3746 not necessary in utf mode even in 8 bit mode. */
3747 detect_partial_match(common, backtracks);
3748 read_char(common);
3749
3750 if ((*cc++ & XCL_MAP) != 0)
3751 {
3752 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3753 #ifndef COMPILE_PCRE8
3754 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3755 #elif defined SUPPORT_UTF
3756 if (common->utf)
3757 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3758 #endif
3759
3760 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3761 {
3762 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3763 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3764 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3765 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3766 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3767 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3768 }
3769
3770 #ifndef COMPILE_PCRE8
3771 JUMPHERE(jump);
3772 #elif defined SUPPORT_UTF
3773 if (common->utf)
3774 JUMPHERE(jump);
3775 #endif
3776 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3777 #ifdef SUPPORT_UCP
3778 charsaved = TRUE;
3779 #endif
3780 cc += 32 / sizeof(pcre_uchar);
3781 }
3782
3783 /* Scanning the necessary info. */
3784 ccbegin = cc;
3785 compares = 0;
3786 while (*cc != XCL_END)
3787 {
3788 compares++;
3789 if (*cc == XCL_SINGLE)
3790 {
3791 cc += 2;
3792 #ifdef SUPPORT_UTF
3793 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3794 #endif
3795 #ifdef SUPPORT_UCP
3796 needschar = TRUE;
3797 #endif
3798 }
3799 else if (*cc == XCL_RANGE)
3800 {
3801 cc += 2;
3802 #ifdef SUPPORT_UTF
3803 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3804 #endif
3805 cc++;
3806 #ifdef SUPPORT_UTF
3807 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3808 #endif
3809 #ifdef SUPPORT_UCP
3810 needschar = TRUE;
3811 #endif
3812 }
3813 #ifdef SUPPORT_UCP
3814 else
3815 {
3816 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3817 cc++;
3818 switch(*cc)
3819 {
3820 case PT_ANY:
3821 break;
3822
3823 case PT_LAMP:
3824 case PT_GC:
3825 case PT_PC:
3826 case PT_ALNUM:
3827 needstype = TRUE;
3828 break;
3829
3830 case PT_SC:
3831 needsscript = TRUE;
3832 break;
3833
3834 case PT_SPACE:
3835 case PT_PXSPACE:
3836 case PT_WORD:
3837 needstype = TRUE;
3838 needschar = TRUE;
3839 break;
3840
3841 case PT_CLIST:
3842 needschar = TRUE;
3843 break;
3844
3845 default:
3846 SLJIT_ASSERT_STOP();
3847 break;
3848 }
3849 cc += 2;
3850 }
3851 #endif
3852 }
3853
3854 #ifdef SUPPORT_UCP
3855 /* Simple register allocation. TMP1 is preferred if possible. */
3856 if (needstype || needsscript)
3857 {
3858 if (needschar && !charsaved)
3859 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3860 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3861 if (needschar)
3862 {
3863 if (needstype)
3864 {
3865 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3866 typereg = RETURN_ADDR;
3867 }
3868
3869 if (needsscript)
3870 scriptreg = TMP3;
3871 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3872 }
3873 else if (needstype && needsscript)
3874 scriptreg = TMP3;
3875 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3876
3877 if (needsscript)
3878 {
3879 if (scriptreg == TMP1)
3880 {
3881 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3882 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3883 }
3884 else
3885 {
3886 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3887 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3888 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3889 }
3890 }
3891 }
3892 #endif
3893
3894 /* Generating code. */
3895 cc = ccbegin;
3896 charoffset = 0;
3897 numberofcmps = 0;
3898 #ifdef SUPPORT_UCP
3899 typeoffset = 0;
3900 #endif
3901
3902 while (*cc != XCL_END)
3903 {
3904 compares--;
3905 invertcmp = (compares == 0 && list != backtracks);
3906 jump = NULL;
3907
3908 if (*cc == XCL_SINGLE)
3909 {
3910 cc ++;
3911 #ifdef SUPPORT_UTF
3912 if (common->utf)
3913 {
3914 GETCHARINC(c, cc);
3915 }
3916 else
3917 #endif
3918 c = *cc++;
3919
3920 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3921 {
3922 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3923 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
3924 numberofcmps++;
3925 }
3926 else if (numberofcmps > 0)
3927 {
3928 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3929 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3930 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3931 numberofcmps = 0;
3932 }
3933 else
3934 {
3935 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3936 numberofcmps = 0;
3937 }
3938 }
3939 else if (*cc == XCL_RANGE)
3940 {
3941 cc ++;
3942 #ifdef SUPPORT_UTF
3943 if (common->utf)
3944 {
3945 GETCHARINC(c, cc);
3946 }
3947 else
3948 #endif
3949 c = *cc++;
3950 SET_CHAR_OFFSET(c);
3951 #ifdef SUPPORT_UTF
3952 if (common->utf)
3953 {
3954 GETCHARINC(c, cc);
3955 }
3956 else
3957 #endif
3958 c = *cc++;
3959 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3960 {
3961 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3962 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
3963 numberofcmps++;
3964 }
3965 else if (numberofcmps > 0)
3966 {
3967 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3968 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3969 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3970 numberofcmps = 0;
3971 }
3972 else
3973 {
3974 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3975 numberofcmps = 0;
3976 }
3977 }
3978 #ifdef SUPPORT_UCP
3979 else
3980 {
3981 if (*cc == XCL_NOTPROP)
3982 invertcmp ^= 0x1;
3983 cc++;
3984 switch(*cc)
3985 {
3986 case PT_ANY:
3987 if (list != backtracks)
3988 {
3989 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
3990 continue;
3991 }
3992 else if (cc[-1] == XCL_NOTPROP)
3993 continue;
3994 jump = JUMP(SLJIT_JUMP);
3995 break;
3996
3997 case PT_LAMP:
3998 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
3999 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4000 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4001 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4002 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4003 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4004 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4005 break;
4006
4007 case PT_GC:
4008 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4009 SET_TYPE_OFFSET(c);
4010 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4011 break;
4012
4013 case PT_PC:
4014 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4015 break;
4016
4017 case PT_SC:
4018 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4019 break;
4020
4021 case PT_SPACE:
4022 case PT_PXSPACE:
4023 if (*cc == PT_SPACE)
4024 {
4025 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4026 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4027 }
4028 SET_CHAR_OFFSET(9);
4029 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4030 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4031 if (*cc == PT_SPACE)
4032 JUMPHERE(jump);
4033
4034 SET_TYPE_OFFSET(ucp_Zl);
4035 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4036 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4037 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4038 break;
4039
4040 case PT_WORD:
4041 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4042 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4043 /* ... fall through */
4044
4045 case PT_ALNUM:
4046 SET_TYPE_OFFSET(ucp_Ll);
4047 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4048 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4049 SET_TYPE_OFFSET(ucp_Nd);
4050 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4051 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4052 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4053 break;
4054
4055 case PT_CLIST:
4056 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4057
4058 /* At least three characters are required.
4059 Otherwise this case would be handled by the normal code path. */
4060 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4061 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4062
4063 /* Optimizing character pairs, if their difference is power of 2. */
4064 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4065 {
4066 if (charoffset == 0)
4067 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4068 else
4069 {
4070 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4071 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4072 }
4073 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4074 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4075 other_cases += 2;
4076 }
4077 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4078 {
4079 if (charoffset == 0)
4080 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4081 else
4082 {
4083 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4084 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4085 }
4086 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4087 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4088
4089 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4090 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4091
4092 other_cases += 3;
4093 }
4094 else
4095 {
4096 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4097 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4098 }
4099
4100 while (*other_cases != NOTACHAR)
4101 {
4102 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4103 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4104 }
4105 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4106 break;
4107 }
4108 cc += 2;
4109 }
4110 #endif
4111
4112 if (jump != NULL)
4113 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4114 }
4115
4116 if (found != NULL)
4117 set_jumps(found, LABEL());
4118 }
4119
4120 #undef SET_TYPE_OFFSET
4121 #undef SET_CHAR_OFFSET
4122
4123 #endif
4124
4125 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4126 {
4127 DEFINE_COMPILER;
4128 int length;
4129 unsigned int c, oc, bit;
4130 compare_context context;
4131 struct sljit_jump *jump[4];
4132 #ifdef SUPPORT_UTF
4133 struct sljit_label *label;
4134 #ifdef SUPPORT_UCP
4135 pcre_uchar propdata[5];
4136 #endif
4137 #endif
4138
4139 switch(type)
4140 {
4141 case OP_SOD:
4142 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4143 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4144 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4145 return cc;
4146
4147 case OP_SOM:
4148 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4150 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4151 return cc;
4152
4153 case OP_NOT_WORD_BOUNDARY:
4154 case OP_WORD_BOUNDARY:
4155 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4156 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4157 return cc;
4158
4159 case OP_NOT_DIGIT:
4160 case OP_DIGIT:
4161 /* Digits are usually 0-9, so it is worth to optimize them. */
4162 if (common->digits[0] == -2)
4163 get_ctype_ranges(common, ctype_digit, common->digits);
4164 detect_partial_match(common, backtracks);
4165 /* Flip the starting bit in the negative case. */
4166 if (type == OP_NOT_DIGIT)
4167 common->digits[1] ^= 1;
4168 if (!check_ranges(common, common->digits, backtracks, TRUE))
4169 {
4170 read_char8_type(common);
4171 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4172 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4173 }
4174 if (type == OP_NOT_DIGIT)
4175 common->digits[1] ^= 1;
4176 return cc;
4177
4178 case OP_NOT_WHITESPACE:
4179 case OP_WHITESPACE:
4180 detect_partial_match(common, backtracks);
4181 read_char8_type(common);
4182 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4183 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4184 return cc;
4185
4186 case OP_NOT_WORDCHAR:
4187 case OP_WORDCHAR:
4188 detect_partial_match(common, backtracks);
4189 read_char8_type(common);
4190 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4191 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4192 return cc;
4193
4194 case OP_ANY:
4195 detect_partial_match(common, backtracks);
4196 read_char(common);
4197 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4198 {
4199 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4200 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4201 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4202 else
4203 jump[1] = check_str_end(common);
4204
4205 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4206 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4207 if (jump[1] != NULL)
4208 JUMPHERE(jump[1]);
4209 JUMPHERE(jump[0]);
4210 }
4211 else
4212 check_newlinechar(common, common->nltype, backtracks, TRUE);
4213 return cc;
4214
4215 case OP_ALLANY:
4216 detect_partial_match(common, backtracks);
4217 #ifdef SUPPORT_UTF
4218 if (common->utf)
4219 {
4220 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4221 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4222 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4223 #if defined COMPILE_PCRE8
4224 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4225 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4226 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4227 #elif defined COMPILE_PCRE16
4228 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4229 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4230 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4231 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4232 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4233 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4234 #endif
4235 JUMPHERE(jump[0]);
4236 #endif /* COMPILE_PCRE[8|16] */
4237 return cc;
4238 }
4239 #endif
4240 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4241 return cc;
4242
4243 case OP_ANYBYTE:
4244 detect_partial_match(common, backtracks);
4245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4246 return cc;
4247
4248 #ifdef SUPPORT_UTF
4249 #ifdef SUPPORT_UCP
4250 case OP_NOTPROP:
4251 case OP_PROP:
4252 propdata[0] = 0;
4253 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4254 propdata[2] = cc[0];
4255 propdata[3] = cc[1];
4256 propdata[4] = XCL_END;
4257 compile_xclass_matchingpath(common, propdata, backtracks);
4258 return cc + 2;
4259 #endif
4260 #endif
4261
4262 case OP_ANYNL:
4263 detect_partial_match(common, backtracks);
4264 read_char(common);
4265 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4266 /* We don't need to handle soft partial matching case. */
4267 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4268 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4269 else
4270 jump[1] = check_str_end(common);
4271 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4272 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4273 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4274 jump[3] = JUMP(SLJIT_JUMP);
4275 JUMPHERE(jump[0]);
4276 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4277 JUMPHERE(jump[1]);
4278 JUMPHERE(jump[2]);
4279 JUMPHERE(jump[3]);
4280 return cc;
4281
4282 case OP_NOT_HSPACE:
4283 case OP_HSPACE:
4284 detect_partial_match(common, backtracks);
4285 read_char(common);
4286 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4287 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4288 return cc;
4289
4290 case OP_NOT_VSPACE:
4291 case OP_VSPACE:
4292 detect_partial_match(common, backtracks);
4293 read_char(common);
4294 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4295 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4296 return cc;
4297
4298 #ifdef SUPPORT_UCP
4299 case OP_EXTUNI:
4300 detect_partial_match(common, backtracks);
4301 read_char(common);
4302 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4304 /* Optimize register allocation: use a real register. */
4305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4306 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4307
4308 label = LABEL();
4309 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4310 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4311 read_char(common);
4312 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4313 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4314 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4315
4316 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4317 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4318 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4319 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4320 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4321 JUMPTO(SLJIT_C_NOT_ZERO, label);
4322
4323 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4324 JUMPHERE(jump[0]);
4325 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4326
4327 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4328 {
4329 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4330 /* Since we successfully read a char above, partial matching must occure. */
4331 check_partial(common, TRUE);
4332 JUMPHERE(jump[0]);
4333 }
4334 return cc;
4335 #endif
4336
4337 case OP_EODN:
4338 /* Requires rather complex checks. */
4339 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4340 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4341 {
4342 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4343 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4344 if (common->mode == JIT_COMPILE)
4345 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4346 else
4347 {
4348 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4349 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4350 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4351 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4352 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4353 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4354 check_partial(common, TRUE);
4355 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4356 JUMPHERE(jump[1]);
4357 }
4358 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4359 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4360 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4361 }
4362 else if (common->nltype == NLTYPE_FIXED)
4363 {
4364 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4365 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4366 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4367 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4368 }
4369 else
4370 {
4371 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4372 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4373 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4374 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4375 jump[2] = JUMP(SLJIT_C_GREATER);
4376 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4377 /* Equal. */
4378 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4379 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4380 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4381
4382 JUMPHERE(jump[1]);
4383 if (common->nltype == NLTYPE_ANYCRLF)
4384 {
4385 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4386 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4387 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4388 }
4389 else
4390 {
4391 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4392 read_char(common);
4393 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4394 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4395 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4396 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4397 }
4398 JUMPHERE(jump[2]);
4399 JUMPHERE(jump[3]);
4400 }
4401 JUMPHERE(jump[0]);
4402 check_partial(common, FALSE);
4403 return cc;
4404
4405 case OP_EOD:
4406 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4407 check_partial(common, FALSE);
4408 return cc;
4409
4410 case OP_CIRC:
4411 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4412 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4413 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4414 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4415 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4416 return cc;
4417
4418 case OP_CIRCM:
4419 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4420 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4421 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4422 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4423 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4424 jump[0] = JUMP(SLJIT_JUMP);
4425 JUMPHERE(jump[1]);
4426
4427 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4428 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4429 {
4430 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4431 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4432 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4433 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4434 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4435 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4436 }
4437 else
4438 {
4439 skip_char_back(common);
4440 read_char(common);
4441 check_newlinechar(common, common->nltype, backtracks, FALSE);
4442 }
4443 JUMPHERE(jump[0]);
4444 return cc;
4445
4446 case OP_DOLL:
4447 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4448 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4449 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4450
4451 if (!common->endonly)
4452 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4453 else
4454 {
4455 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4456 check_partial(common, FALSE);
4457 }
4458 return cc;
4459
4460 case OP_DOLLM:
4461 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4462 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4463 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4464 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4465 check_partial(common, FALSE);
4466 jump[0] = JUMP(SLJIT_JUMP);
4467 JUMPHERE(jump[1]);
4468
4469 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4470 {
4471 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4472 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4473 if (common->mode == JIT_COMPILE)
4474 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4475 else
4476 {
4477 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4478 /* STR_PTR = STR_END - IN_UCHARS(1) */
4479 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4480 check_partial(common, TRUE);
4481 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4482 JUMPHERE(jump[1]);
4483 }
4484
4485 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4486 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4487 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4488 }
4489 else
4490 {
4491 peek_char(common);
4492 check_newlinechar(common, common->nltype, backtracks, FALSE);
4493 }
4494 JUMPHERE(jump[0]);
4495 return cc;
4496
4497 case OP_CHAR:
4498 case OP_CHARI:
4499 length = 1;
4500 #ifdef SUPPORT_UTF
4501 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4502 #endif
4503 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4504 {
4505 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4506 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4507
4508 context.length = IN_UCHARS(length);
4509 context.sourcereg = -1;
4510 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4511 context.ucharptr = 0;
4512 #endif
4513 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4514 }
4515 detect_partial_match(common, backtracks);
4516 read_char(common);
4517 #ifdef SUPPORT_UTF
4518 if (common->utf)
4519 {
4520 GETCHAR(c, cc);
4521 }
4522 else
4523 #endif
4524 c = *cc;
4525 if (type == OP_CHAR || !char_has_othercase(common, cc))
4526 {
4527 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4528 return cc + length;
4529 }
4530 oc = char_othercase(common, c);
4531 bit = c ^ oc;
4532 if (is_powerof2(bit))
4533 {
4534 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4535 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4536 return cc + length;
4537 }
4538 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4539 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4540 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4541 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4542 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4543 return cc + length;
4544
4545 case OP_NOT:
4546 case OP_NOTI:
4547 detect_partial_match(common, backtracks);
4548 length = 1;
4549 #ifdef SUPPORT_UTF
4550 if (common->utf)
4551 {
4552 #ifdef COMPILE_PCRE8
4553 c = *cc;
4554 if (c < 128)
4555 {
4556 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4557 if (type == OP_NOT || !char_has_othercase(common, cc))
4558 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4559 else
4560 {
4561 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4562 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4563 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4564 }
4565 /* Skip the variable-length character. */
4566 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4567 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4568 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4569 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4570 JUMPHERE(jump[0]);
4571 return cc + 1;
4572 }
4573 else
4574 #endif /* COMPILE_PCRE8 */
4575 {
4576 GETCHARLEN(c, cc, length);
4577 read_char(common);
4578 }
4579 }
4580 else
4581 #endif /* SUPPORT_UTF */
4582 {
4583 read_char(common);
4584 c = *cc;
4585 }
4586
4587 if (type == OP_NOT || !char_has_othercase(common, cc))
4588 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4589 else
4590 {
4591 oc = char_othercase(common, c);
4592 bit = c ^ oc;
4593 if (is_powerof2(bit))
4594 {
4595 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4596 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4597 }
4598 else
4599 {
4600 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4601 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4602 }
4603 }
4604 return cc + length;
4605
4606 case OP_CLASS:
4607 case OP_NCLASS:
4608 detect_partial_match(common, backtracks);
4609 read_char(common);
4610 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4611 return cc + 32 / sizeof(pcre_uchar);
4612
4613 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4614 jump[0] = NULL;
4615 #ifdef COMPILE_PCRE8
4616 /* This check only affects 8 bit mode. In other modes, we
4617 always need to compare the value with 255. */
4618 if (common->utf)
4619 #endif /* COMPILE_PCRE8 */
4620 {
4621 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4622 if (type == OP_CLASS)
4623 {
4624 add_jump(compiler, backtracks, jump[0]);
4625 jump[0] = NULL;
4626 }
4627 }
4628 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4629 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4630 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4631 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4632 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4633 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4634 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4635 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4636 if (jump[0] != NULL)
4637 JUMPHERE(jump[0]);
4638 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4639 return cc + 32 / sizeof(pcre_uchar);
4640
4641 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4642 case OP_XCLASS:
4643 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4644 return cc + GET(cc, 0) - 1;
4645 #endif
4646
4647 case OP_REVERSE:
4648 length = GET(cc, 0);
4649 if (length == 0)
4650 return cc + LINK_SIZE;
4651 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4652 #ifdef SUPPORT_UTF
4653 if (common->utf)
4654 {
4655 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4656 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4657 label = LABEL();
4658 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4659 skip_char_back(common);
4660 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4661 JUMPTO(SLJIT_C_NOT_ZERO, label);
4662 }
4663 else
4664 #endif
4665 {
4666 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4667 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4668 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4669 }
4670 check_start_used_ptr(common);
4671 return cc + LINK_SIZE;
4672 }
4673 SLJIT_ASSERT_STOP();
4674 return cc;
4675 }
4676
4677 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4678 {
4679 /* This function consumes at least one input character. */
4680 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4681 DEFINE_COMPILER;
4682 pcre_uchar *ccbegin = cc;
4683 compare_context context;
4684 int size;
4685
4686 context.length = 0;
4687 do
4688 {
4689 if (cc >= ccend)
4690 break;
4691
4692 if (*cc == OP_CHAR)
4693 {
4694 size = 1;
4695 #ifdef SUPPORT_UTF
4696 if (common->utf && HAS_EXTRALEN(cc[1]))
4697 size += GET_EXTRALEN(cc[1]);
4698 #endif
4699 }
4700 else if (*cc == OP_CHARI)
4701 {
4702 size = 1;
4703 #ifdef SUPPORT_UTF
4704 if (common->utf)
4705 {
4706 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4707 size = 0;
4708 else if (HAS_EXTRALEN(cc[1]))
4709 size += GET_EXTRALEN(cc[1]);
4710 }
4711 else
4712 #endif
4713 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4714 size = 0;
4715 }
4716 else
4717 size = 0;
4718
4719 cc += 1 + size;
4720 context.length += IN_UCHARS(size);
4721 }
4722 while (size > 0 && context.length <= 128);
4723
4724 cc = ccbegin;
4725 if (context.length > 0)
4726 {
4727 /* We have a fixed-length byte sequence. */
4728 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4729 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4730
4731 context.sourcereg = -1;
4732 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4733 context.ucharptr = 0;
4734 #endif
4735 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4736 return cc;
4737 }
4738
4739 /* A non-fixed length character will be checked if length == 0. */
4740 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4741 }
4742
4743 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4744 {
4745 DEFINE_COMPILER;
4746 int offset = GET2(cc, 1) << 1;
4747
4748 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4749 if (!common->jscript_compat)
4750 {
4751 if (backtracks == NULL)
4752 {
4753 /* OVECTOR(1) contains the "string begin - 1" constant. */
4754 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4755 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4756 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4757 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4758 return JUMP(SLJIT_C_NOT_ZERO);
4759 }
4760 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4761 }
4762 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4763 }
4764
4765 /* Forward definitions. */
4766 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4767 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4768
4769 #define PUSH_BACKTRACK(size, ccstart, error) \
4770 do \
4771 { \
4772 backtrack = sljit_alloc_memory(compiler, (size)); \
4773 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4774 return error; \
4775 memset(backtrack, 0, size); \
4776 backtrack->prev = parent->top; \
4777 backtrack->cc = (ccstart); \
4778 parent->top = backtrack; \
4779 } \
4780 while (0)
4781
4782 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4783 do \
4784 { \
4785 backtrack = sljit_alloc_memory(compiler, (size)); \
4786 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4787 return; \
4788 memset(backtrack, 0, size); \
4789 backtrack->prev = parent->top; \
4790 backtrack->cc = (ccstart); \
4791 parent->top = backtrack; \
4792 } \
4793 while (0)
4794
4795 #define BACKTRACK_AS(type) ((type *)backtrack)
4796
4797 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4798 {
4799 DEFINE_COMPILER;
4800 int offset = GET2(cc, 1) << 1;
4801 struct sljit_jump *jump = NULL;
4802 struct sljit_jump *partial;
4803 struct sljit_jump *nopartial;
4804
4805 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4806 /* OVECTOR(1) contains the "string begin - 1" constant. */
4807 if (withchecks && !common->jscript_compat)
4808 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4809
4810 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4811 if (common->utf && *cc == OP_REFI)
4812 {
4813 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
4814 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4815 if (withchecks)
4816 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4817
4818 /* Needed to save important temporary registers. */
4819 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4820 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
4821 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4822 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4823 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4824 if (common->mode == JIT_COMPILE)
4825 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4826 else
4827 {
4828 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4829 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4830 check_partial(common, FALSE);
4831 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4832 JUMPHERE(nopartial);
4833 }
4834 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4835 }
4836 else
4837 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4838 {
4839 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4840 if (withchecks)
4841 jump = JUMP(SLJIT_C_ZERO);
4842
4843 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4844 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4845 if (common->mode == JIT_COMPILE)
4846 add_jump(compiler, backtracks, partial);
4847
4848 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4849 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4850
4851 if (common->mode != JIT_COMPILE)
4852 {
4853 nopartial = JUMP(SLJIT_JUMP);
4854 JUMPHERE(partial);
4855 /* TMP2 -= STR_END - STR_PTR */
4856 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4857 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4858 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4859 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4860 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4861 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4862 JUMPHERE(partial);
4863 check_partial(common, FALSE);
4864 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4865 JUMPHERE(nopartial);
4866 }
4867 }
4868
4869 if (jump != NULL)
4870 {
4871 if (emptyfail)
4872 add_jump(compiler, backtracks, jump);
4873 else
4874 JUMPHERE(jump);
4875 }
4876 return cc + 1 + IMM2_SIZE;
4877 }
4878
4879 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4880 {
4881 DEFINE_COMPILER;
4882 backtrack_common *backtrack;
4883 pcre_uchar type;
4884 struct sljit_label *label;
4885 struct sljit_jump *zerolength;
4886 struct sljit_jump *jump = NULL;
4887 pcre_uchar *ccbegin = cc;
4888 int min = 0, max = 0;
4889 BOOL minimize;
4890
4891 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4892
4893 type = cc[1 + IMM2_SIZE];
4894 minimize = (type & 0x1) != 0;
4895 switch(type)
4896 {
4897 case OP_CRSTAR:
4898 case OP_CRMINSTAR:
4899 min = 0;
4900 max = 0;
4901 cc += 1 + IMM2_SIZE + 1;
4902 break;
4903 case OP_CRPLUS:
4904 case OP_CRMINPLUS:
4905 min = 1;
4906 max = 0;
4907 cc += 1 + IMM2_SIZE + 1;
4908 break;
4909 case OP_CRQUERY:
4910 case OP_CRMINQUERY:
4911 min = 0;
4912 max = 1;
4913 cc += 1 + IMM2_SIZE + 1;
4914 break;
4915 case OP_CRRANGE:
4916 case OP_CRMINRANGE:
4917 min = GET2(cc, 1 + IMM2_SIZE + 1);
4918 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4919 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4920 break;
4921 default:
4922 SLJIT_ASSERT_STOP();
4923 break;
4924 }
4925
4926 if (!minimize)
4927 {
4928 if (min == 0)
4929 {
4930 allocate_stack(common, 2);
4931 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4932 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4933 /* Temporary release of STR_PTR. */
4934 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
4935 zerolength = compile_ref_checks(common, ccbegin, NULL);
4936 /* Restore if not zero length. */
4937 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
4938 }
4939 else
4940 {
4941 allocate_stack(common, 1);
4942 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4943 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4944 }
4945
4946 if (min > 1 || max > 1)
4947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4948
4949 label = LABEL();
4950 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4951
4952 if (min > 1 || max > 1)
4953 {
4954 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4955 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4957 if (min > 1)
4958 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4959 if (max > 1)
4960 {
4961 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4962 allocate_stack(common, 1);
4963 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4964 JUMPTO(SLJIT_JUMP, label);
4965 JUMPHERE(jump);
4966 }
4967 }
4968
4969 if (max == 0)
4970 {
4971 /* Includes min > 1 case as well. */
4972 allocate_stack(common, 1);
4973 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4974 JUMPTO(SLJIT_JUMP, label);
4975 }
4976
4977 JUMPHERE(zerolength);
4978 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4979
4980 decrease_call_count(common);
4981 return cc;
4982 }
4983
4984 allocate_stack(common, 2);
4985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4986 if (type != OP_CRMINSTAR)
4987 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4988
4989 if (min == 0)
4990 {
4991 zerolength = compile_ref_checks(common, ccbegin, NULL);
4992 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4993 jump = JUMP(SLJIT_JUMP);
4994 }
4995 else
4996 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4997
4998 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4999 if (max > 0)
5000 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5001
5002 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5003 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5004
5005 if (min > 1)
5006 {
5007 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5008 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5009 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5010 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5011 }
5012 else if (max > 0)
5013 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5014
5015 if (jump != NULL)
5016 JUMPHERE(jump);
5017 JUMPHERE(zerolength);
5018
5019 decrease_call_count(common);
5020 return cc;
5021 }
5022
5023 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5024 {
5025 DEFINE_COMPILER;
5026 backtrack_common *backtrack;
5027 recurse_entry *entry = common->entries;
5028 recurse_entry *prev = NULL;
5029 int start = GET(cc, 1);
5030
5031 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5032 while (entry != NULL)
5033 {
5034 if (entry->start == start)
5035 break;
5036 prev = entry;
5037 entry = entry->next;
5038 }
5039
5040 if (entry == NULL)
5041 {
5042 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5043 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5044 return NULL;
5045 entry->next = NULL;
5046 entry->entry = NULL;
5047 entry->calls = NULL;
5048 entry->start = start;
5049
5050 if (prev != NULL)
5051 prev->next = entry;
5052 else
5053 common->entries = entry;
5054 }
5055
5056 if (common->has_set_som && common->mark_ptr != 0)
5057 {
5058 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5059 allocate_stack(common, 2);
5060 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5061 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5062 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5063 }
5064 else if (common->has_set_som || common->mark_ptr != 0)
5065 {
5066 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5067 allocate_stack(common, 1);
5068 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5069 }
5070
5071 if (entry->entry == NULL)
5072 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5073 else
5074 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5075 /* Leave if the match is failed. */
5076 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5077 return cc + 1 + LINK_SIZE;
5078 }
5079
5080 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5081 {
5082 const pcre_uchar *begin = arguments->begin;
5083 int *offset_vector = arguments->offsets;
5084 int offset_count = arguments->offset_count;
5085 int i;
5086
5087 if (PUBL(callout) == NULL)
5088 return 0;
5089
5090 callout_block->version = 2;
5091 callout_block->callout_data = arguments->callout_data;
5092
5093 /* Offsets in subject. */
5094 callout_block->subject_length = arguments->end - arguments->begin;
5095 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5096 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5097 #if defined COMPILE_PCRE8
5098 callout_block->subject = (PCRE_SPTR)begin;
5099 #elif defined COMPILE_PCRE16
5100 callout_block->subject = (PCRE_SPTR16)begin;
5101 #elif defined COMPILE_PCRE32
5102 callout_block->subject = (PCRE_SPTR32)begin;
5103 #endif
5104
5105 /* Convert and copy the JIT offset vector to the offset_vector array. */
5106 callout_block->capture_top = 0;
5107 callout_block->offset_vector = offset_vector;
5108 for (i = 2; i < offset_count; i += 2)
5109 {
5110 offset_vector[i] = jit_ovector[i] - begin;
5111 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5112 if (jit_ovector[i] >= begin)
5113 callout_block->capture_top = i;
5114 }
5115
5116 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5117 if (offset_count > 0)
5118 offset_vector[0] = -1;
5119 if (offset_count > 1)
5120 offset_vector[1] = -1;
5121 return (*PUBL(callout))(callout_block);
5122 }
5123
5124 /* Aligning to 8 byte. */
5125 #define CALLOUT_ARG_SIZE \
5126 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5127
5128 #define CALLOUT_ARG_OFFSET(arg) \
5129 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5130
5131 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5132 {
5133 DEFINE_COMPILER;
5134 backtrack_common *backtrack;
5135
5136 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5137
5138 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5139
5140 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5141 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5142 SLJIT_ASSERT(common->capture_last_ptr != 0);
5143 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5144 OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5145
5146 /* These pointer sized fields temporarly stores internal variables. */
5147 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5148 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5150
5151 if (common->mark_ptr != 0)
5152 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5154 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5156
5157 /* Needed to save important temporary registers. */
5158 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5159 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5160 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5161 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5162 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5163 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5164 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5165
5166 /* Check return value. */
5167 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5168 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5169 if (common->forced_quit_label == NULL)
5170 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5171 else
5172 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5173 return cc + 2 + 2 * LINK_SIZE;
5174 }
5175
5176 #undef CALLOUT_ARG_SIZE
5177 #undef CALLOUT_ARG_OFFSET
5178
5179 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5180 {
5181 DEFINE_COMPILER;
5182 int framesize;
5183 int private_data_ptr;
5184 backtrack_common altbacktrack;
5185 pcre_uchar *ccbegin;
5186 pcre_uchar opcode;
5187 pcre_uchar bra = OP_BRA;
5188 jump_list *tmp = NULL;
5189 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5190 jump_list **found;
5191 /* Saving previous accept variables. */
5192 struct sljit_label *save_quit_label = common->quit_label;
5193 struct sljit_label *save_accept_label = common->accept_label;
5194 jump_list *save_quit = common->quit;
5195 jump_list *save_accept = common->accept;
5196 struct sljit_jump *jump;
5197 struct sljit_jump *brajump = NULL;
5198
5199 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5200 {
5201 SLJIT_ASSERT(!conditional);
5202 bra = *cc;
5203 cc++;
5204 }
5205 private_data_ptr = PRIVATE_DATA(cc);
5206 SLJIT_ASSERT(private_data_ptr != 0);
5207 framesize = get_framesize(common, cc, FALSE);
5208 backtrack->framesize = framesize;
5209 backtrack->private_data_ptr = private_data_ptr;
5210 opcode = *cc;
5211 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5212 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5213 ccbegin = cc;
5214 cc += GET(cc, 1);
5215
5216 if (bra == OP_BRAMINZERO)
5217 {
5218 /* This is a braminzero backtrack path. */
5219 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5220 free_stack(common, 1);
5221 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5222 }
5223
5224 if (framesize < 0)
5225 {
5226 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5227 allocate_stack(common, 1);
5228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5229 }
5230 else
5231 {
5232 allocate_stack(common, framesize + 2);
5233 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5234 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5236 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5238 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5239 }
5240
5241 memset(&altbacktrack, 0, sizeof(backtrack_common));
5242 common->quit_label = NULL;
5243 common->quit = NULL;
5244 while (1)
5245 {
5246 common->accept_label = NULL;
5247 common->accept = NULL;
5248 altbacktrack.top = NULL;
5249 altbacktrack.topbacktracks = NULL;
5250
5251 if (*ccbegin == OP_ALT)
5252 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5253
5254 altbacktrack.cc = ccbegin;
5255 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5256 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5257 {
5258 common->quit_label = save_quit_label;
5259 common->accept_label = save_accept_label;
5260 common->quit = save_quit;
5261 common->accept = save_accept;
5262 return NULL;
5263 }
5264 common->accept_label = LABEL();
5265 if (common->accept != NULL)
5266 set_jumps(common->accept, common->accept_label);
5267
5268 /* Reset stack. */
5269 if (framesize < 0)
5270 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5271 else {
5272 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5273 {
5274 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5275 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5276 }
5277 else
5278 {
5279 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5280 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5281 }
5282 }
5283
5284 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5285 {
5286 /* We know that STR_PTR was stored on the top of the stack. */
5287 if (conditional)
5288 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5289 else if (bra == OP_BRAZERO)
5290 {
5291 if (framesize < 0)
5292 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5293 else
5294 {
5295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5296 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5297 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5298 }
5299 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5300 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5301 }
5302 else if (framesize >= 0)
5303 {
5304 /* For OP_BRA and OP_BRAMINZERO. */
5305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5306 }
5307 }
5308 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5309
5310 compile_backtrackingpath(common, altbacktrack.top);
5311 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5312 {
5313 common->quit_label = save_quit_label;
5314 common->accept_label = save_accept_label;
5315 common->quit = save_quit;
5316 common->accept = save_accept;
5317 return NULL;
5318 }
5319 set_jumps(altbacktrack.topbacktracks, LABEL());
5320
5321 if (*cc != OP_ALT)
5322 break;
5323
5324 ccbegin = cc;
5325 cc += GET(cc, 1);
5326 }
5327 /* None of them matched. */
5328 if (common->quit != NULL)
5329 set_jumps(common->quit, LABEL());
5330
5331 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5332 {
5333 /* Assert is failed. */
5334 if (conditional || bra == OP_BRAZERO)
5335 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5336
5337 if (framesize < 0)
5338 {
5339 /* The topmost item should be 0. */
5340 if (bra == OP_BRAZERO)
5341 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5342 else
5343 free_stack(common, 1);
5344 }
5345 else
5346 {
5347 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5348 /* The topmost item should be 0. */
5349 if (bra == OP_BRAZERO)
5350 {
5351 free_stack(common, framesize + 1);
5352 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5353 }
5354 else
5355 free_stack(common, framesize + 2);
5356 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5357 }
5358 jump = JUMP(SLJIT_JUMP);
5359 if (bra != OP_BRAZERO)
5360 add_jump(compiler, target, jump);
5361
5362 /* Assert is successful. */
5363 set_jumps(tmp, LABEL());
5364 if (framesize < 0)
5365 {
5366 /* We know that STR_PTR was stored on the top of the stack. */
5367 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5368 /* Keep the STR_PTR on the top of the stack. */
5369 if (bra == OP_BRAZERO)
5370 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5371 else if (bra == OP_BRAMINZERO)
5372 {
5373 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5374 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5375 }
5376 }
5377 else
5378 {
5379 if (bra == OP_BRA)
5380 {
5381 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5382 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5383 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5384 }
5385 else
5386 {
5387 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5388 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5389 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5390 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5391 }
5392 }
5393
5394 if (bra == OP_BRAZERO)
5395 {
5396 backtrack->matchingpath = LABEL();
5397 sljit_set_label(jump, backtrack->matchingpath);
5398 }
5399 else if (bra == OP_BRAMINZERO)
5400 {
5401 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5402 JUMPHERE(brajump);
5403 if (framesize >= 0)
5404 {
5405 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5406 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5408 }
5409 set_jumps(backtrack->common.topbacktracks, LABEL());
5410 }
5411 }
5412 else
5413 {
5414 /* AssertNot is successful. */
5415 if (framesize < 0)
5416 {
5417 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5418 if (bra != OP_BRA)
5419 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5420 else
5421 free_stack(common, 1);
5422 }
5423 else
5424 {
5425 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5427 /* The topmost item should be 0. */
5428 if (bra != OP_BRA)
5429 {
5430 free_stack(common, framesize + 1);
5431 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5432 }
5433 else
5434 free_stack(common, framesize + 2);
5435 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5436 }
5437
5438 if (bra == OP_BRAZERO)
5439 backtrack->matchingpath = LABEL();
5440 else if (bra == OP_BRAMINZERO)
5441 {
5442 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5443 JUMPHERE(brajump);
5444 }
5445
5446 if (bra != OP_BRA)
5447 {
5448 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5449 set_jumps(backtrack->common.topbacktracks, LABEL());
5450 backtrack->common.topbacktracks = NULL;
5451 }
5452 }
5453
5454 common->quit_label = save_quit_label;
5455 common->accept_label = save_accept_label;
5456 common->quit = save_quit;
5457 common->accept = save_accept;
5458 return cc + 1 + LINK_SIZE;
5459 }
5460
5461 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5462 {
5463 int condition = FALSE;
5464 pcre_uchar *slotA = name_table;
5465 pcre_uchar *slotB;
5466 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5467 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5468 sljit_sw no_capture;
5469 int i;
5470
5471 locals += refno & 0xff;
5472 refno >>= 8;
5473 no_capture = locals[1];
5474
5475 for (i = 0; i < name_count; i++)
5476 {
5477 if (GET2(slotA, 0) == refno) break;
5478 slotA += name_entry_size;
5479 }
5480
5481 if (i < name_count)
5482 {
5483 /* Found a name for the number - there can be only one; duplicate names
5484 for different numbers are allowed, but not vice versa. First scan down
5485 for duplicates. */
5486
5487 slotB = slotA;
5488 while (slotB > name_table)
5489 {
5490 slotB -= name_entry_size;
5491 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5492 {
5493 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5494 if (condition) break;
5495 }
5496 else break;
5497 }
5498
5499 /* Scan up for duplicates */
5500 if (!condition)
5501 {
5502 slotB = slotA;
5503 for (i++; i < name_count; i++)
5504 {
5505 slotB += name_entry_size;
5506 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5507 {
5508 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5509 if (condition) break;
5510 }
5511 else break;
5512 }
5513 }
5514 }
5515 return condition;
5516 }
5517
5518 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5519 {
5520 int condition = FALSE;
5521 pcre_uchar *slotA = name_table;
5522 pcre_uchar *slotB;
5523 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5524 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5525 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5526 sljit_uw i;
5527
5528 for (i = 0; i < name_count; i++)
5529 {
5530 if (GET2(slotA, 0) == recno) break;
5531 slotA += name_entry_size;
5532 }
5533
5534 if (i < name_count)
5535 {
5536 /* Found a name for the number - there can be only one; duplicate
5537 names for different numbers are allowed, but not vice versa. First
5538 scan down for duplicates. */
5539
5540 slotB = slotA;
5541 while (slotB > name_table)
5542 {
5543 slotB -= name_entry_size;
5544 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5545 {
5546 condition = GET2(slotB, 0) == group_num;
5547 if (condition) break;
5548 }
5549 else break;
5550 }
5551
5552 /* Scan up for duplicates */
5553 if (!condition)
5554 {
5555 slotB = slotA;
5556 for (i++; i < name_count; i++)
5557 {
5558 slotB += name_entry_size;
5559 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5560 {
5561 condition = GET2(slotB, 0) == group_num;
5562 if (condition) break;
5563 }
5564 else break;
5565 }
5566 }
5567 }
5568 return condition;
5569 }
5570
5571 /*
5572 Handling bracketed expressions is probably the most complex part.
5573
5574 Stack layout naming characters:
5575 S - Push the current STR_PTR
5576 0 - Push a 0 (NULL)
5577 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5578 before the next alternative. Not pushed if there are no alternatives.
5579 M - Any values pushed by the current alternative. Can be empty, or anything.
5580 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5581 L - Push the previous local (pointed by localptr) to the stack
5582 () - opional values stored on the stack
5583 ()* - optonal, can be stored multiple times
5584
5585 The following list shows the regular expression templates, their PCRE byte codes
5586 and stack layout supported by pcre-sljit.
5587
5588 (?:) OP_BRA | OP_KET A M
5589 () OP_CBRA | OP_KET C M
5590 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5591 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5592 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5593 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5594 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5595 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5596 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5597 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5598 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5599 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5600 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5601 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5602 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5603 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5604 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5605 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5606 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5607 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5608 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5609 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5610
5611
5612 Stack layout naming characters:
5613 A - Push the alternative index (starting from 0) on the stack.
5614 Not pushed if there is no alternatives.
5615 M - Any values pushed by the current alternative. Can be empty, or anything.
5616
5617 The next list shows the possible content of a bracket:
5618 (|) OP_*BRA | OP_ALT ... M A
5619 (?()|) OP_*COND | OP_ALT M A
5620 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5621 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5622 Or nothing, if trace is unnecessary
5623 */
5624
5625 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5626 {
5627 DEFINE_COMPILER;
5628 backtrack_common *backtrack;
5629 pcre_uchar opcode;
5630 int private_data_ptr = 0;
5631 int offset = 0;
5632 int stacksize;
5633 pcre_uchar *ccbegin;
5634 pcre_uchar *matchingpath;
5635 pcre_uchar bra = OP_BRA;
5636 pcre_uchar ket;
5637 assert_backtrack *assert;
5638 BOOL has_alternatives;
5639 struct sljit_jump *jump;
5640 struct sljit_jump *skip;
5641 struct sljit_label *rmaxlabel = NULL;
5642 struct sljit_jump *braminzerojump = NULL;
5643
5644 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5645
5646 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5647 {
5648 bra = *cc;
5649 cc++;
5650 opcode = *cc;
5651 }
5652
5653 opcode = *cc;
5654 ccbegin = cc;
5655 matchingpath = ccbegin + 1 + LINK_SIZE;
5656
5657 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5658 {
5659 /* Drop this bracket_backtrack. */
5660 parent->top = backtrack->prev;
5661 return bracketend(cc);
5662 }
5663
5664 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5665 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5666 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5667 cc += GET(cc, 1);
5668
5669 has_alternatives = *cc == OP_ALT;
5670 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5671 {
5672 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5673 if (*matchingpath == OP_NRREF)
5674 {
5675 stacksize = GET2(matchingpath, 1);
5676 if (common->currententry == NULL || stacksize == RREF_ANY)
5677 has_alternatives = FALSE;
5678 else if (common->currententry->start == 0)
5679 has_alternatives = stacksize != 0;
5680 else
5681 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5682 }
5683 }
5684
5685 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5686 opcode = OP_SCOND;
5687 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5688 opcode = OP_ONCE;
5689
5690 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5691 {
5692 /* Capturing brackets has a pre-allocated space. */
5693 offset = GET2(ccbegin, 1 + LINK_SIZE);
5694 if (common->optimized_cbracket[offset] == 0)
5695 {
5696 private_data_ptr = OVECTOR_PRIV(offset);
5697 offset <<= 1;
5698 }
5699 else
5700 {
5701 offset <<= 1;
5702 private_data_ptr = OVECTOR(offset);
5703 }
5704 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5705 matchingpath += IMM2_SIZE;
5706 }
5707 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5708 {
5709 /* Other brackets simply allocate the next entry. */
5710 private_data_ptr = PRIVATE_DATA(ccbegin);
5711 SLJIT_ASSERT(private_data_ptr != 0);
5712 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5713 if (opcode == OP_ONCE)
5714 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5715 }
5716
5717 /* Instructions before the first alternative. */
5718 stacksize = 0;
5719 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5720 stacksize++;
5721 if (bra == OP_BRAZERO)
5722 stacksize++;
5723
5724 if (stacksize > 0)
5725 allocate_stack(common, stacksize);
5726
5727 stacksize = 0;
5728 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5729 {
5730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5731 stacksize++;
5732 }
5733
5734 if (bra == OP_BRAZERO)
5735 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5736
5737 if (bra == OP_BRAMINZERO)
5738 {
5739 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5740 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5741 if (ket != OP_KETRMIN)
5742 {
5743 free_stack(common, 1);
5744 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5745 }
5746 else
5747 {
5748 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5749 {
5750 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5751 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5752 /* Nothing stored during the first run. */
5753 skip = JUMP(SLJIT_JUMP);
5754 JUMPHERE(jump);
5755 /* Checking zero-length iteration. */
5756 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5757 {
5758 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5759 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5760 }
5761 else
5762 {
5763 /* Except when the whole stack frame must be saved. */
5764 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5765 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
5766 }
5767 JUMPHERE(skip);
5768 }
5769 else
5770 {
5771 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5772 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5773 JUMPHERE(jump);
5774 }
5775 }
5776 }
5777
5778 if (ket == OP_KETRMIN)
5779 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5780
5781 if (ket == OP_KETRMAX)
5782 {
5783 rmaxlabel = LABEL();
5784 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5785 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5786 }
5787
5788 /* Handling capturing brackets and alternatives. */
5789 if (opcode == OP_ONCE)
5790 {
5791 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5792 {
5793 /* Neither capturing brackets nor recursions are not found in the block. */
5794 if (ket == OP_KETRMIN)
5795 {
5796 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5797 allocate_stack(common, 2);
5798 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5799 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5800 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5801 }
5802 else if (ket == OP_KETRMAX || has_alternatives)
5803 {
5804 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5805 allocate_stack(common, 1);
5806 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5807 }
5808 else
5809 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5810 }
5811 else
5812 {
5813 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5814 {
5815 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5816 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5817 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5819 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5820 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5821 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5822 }
5823 else
5824 {
5825 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5826 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5827 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5828 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5829 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5830 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5831 }
5832 }
5833 }
5834 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5835 {
5836 /* Saving the previous values. */
5837 if (common->optimized_cbracket[offset >> 1] != 0)
5838 {
5839 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5840 allocate_stack(common, 2);
5841 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5842 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
5843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5844 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5845 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5846 }
5847 else
5848 {
5849 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5850 allocate_stack(common, 1);
5851 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5852 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5853 }
5854 }
5855 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5856 {
5857 /* Saving the previous value. */
5858 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5859 allocate_stack(common, 1);
5860 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5861 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5862 }
5863 else if (has_alternatives)
5864 {
5865 /* Pushing the starting string pointer. */
5866 allocate_stack(common, 1);
5867 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5868 }
5869
5870 /* Generating code for the first alternative. */
5871 if (opcode == OP_COND || opcode == OP_SCOND)
5872 {
5873 if (*matchingpath == OP_CREF)
5874 {
5875 SLJIT_ASSERT(has_alternatives);
5876 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5877 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5878 matchingpath += 1 + IMM2_SIZE;
5879 }
5880 else if (*matchingpath == OP_NCREF)
5881 {
5882 SLJIT_ASSERT(has_alternatives);
5883 stacksize = GET2(matchingpath, 1);
5884 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5885
5886 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5888 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5889 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
5890 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5891 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5892 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5893 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5894 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5895
5896 JUMPHERE(jump);
5897 matchingpath += 1 + IMM2_SIZE;
5898 }
5899 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5900 {
5901 /* Never has other case. */
5902 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5903
5904 stacksize = GET2(matchingpath, 1);
5905 if (common->currententry == NULL)
5906 stacksize = 0;
5907 else if (stacksize == RREF_ANY)
5908 stacksize = 1;
5909 else if (common->currententry->start == 0)
5910 stacksize = stacksize == 0;
5911 else
5912 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5913
5914 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5915 {
5916 SLJIT_ASSERT(!has_alternatives);
5917 if (stacksize != 0)
5918 matchingpath += 1 + IMM2_SIZE;
5919 else
5920 {
5921 if (*cc == OP_ALT)
5922 {
5923 matchingpath = cc + 1 + LINK_SIZE;
5924 cc += GET(cc, 1);
5925 }
5926 else
5927 matchingpath = cc;
5928 }
5929 }
5930 else
5931 {
5932 SLJIT_ASSERT(has_alternatives);
5933
5934 stacksize = GET2(matchingpath, 1);
5935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5936 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5937 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5938 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5939 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
5940 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5941 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5942 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5943 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5944 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5945 matchingpath += 1 + IMM2_SIZE;
5946 }
5947 }
5948 else
5949 {
5950 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5951 /* Similar code as PUSH_BACKTRACK macro. */
5952 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5953 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5954 return NULL;
5955 memset(assert, 0, sizeof(assert_backtrack));
5956 assert->common.cc = matchingpath;
5957 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5958 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5959 }
5960 }
5961
5962 compile_matchingpath(common, matchingpath, cc, backtrack);
5963 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5964 return NULL;
5965
5966 if (opcode == OP_ONCE)
5967 {
5968 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5969 {
5970 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5971 /* TMP2 which is set here used by OP_KETRMAX below. */
5972 if (ket == OP_KETRMAX)
5973 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5974 else if (ket == OP_KETRMIN)
5975 {
5976 /* Move the STR_PTR to the private_data_ptr. */
5977 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5978 }
5979 }
5980 else
5981 {
5982 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5983 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
5984 if (ket == OP_KETRMAX)
5985 {
5986 /* TMP2 which is set here used by OP_KETRMAX below. */
5987 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5988 }
5989 }
5990 }
5991
5992 stacksize = 0;
5993 if (ket != OP_KET || bra != OP_BRA)
5994 stacksize++;
5995 if (offset != 0)
5996 {
5997 if (common->capture_last_ptr != 0)
5998 stacksize++;
5999 if (common->optimized_cbracket[offset >> 1] == 0)
6000 stacksize += 2;
6001 }
6002 if (has_alternatives && opcode != OP_ONCE)
6003 stacksize++;
6004
6005 if (stacksize > 0)
6006 allocate_stack(common, stacksize);
6007
6008 stacksize = 0;
6009 if (ket != OP_KET || bra != OP_BRA)
6010 {
6011 if (ket != OP_KET)
6012 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6013 else
6014 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6015 stacksize++;
6016 }
6017
6018 if (offset != 0)
6019 {
6020 if (common->capture_last_ptr != 0)
6021 {
6022 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6023 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0);
6025 stacksize++;
6026 }
6027 if (common->optimized_cbracket[offset >> 1] == 0)
6028 {
6029 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6030 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6031 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6032 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6033 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6034 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6035 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6036 stacksize += 2;
6037 }
6038 }
6039
6040 if (has_alternatives)
6041 {
6042 if (opcode != OP_ONCE)
6043 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6044 if (ket != OP_KETRMAX)
6045 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6046 }
6047
6048 /* Must be after the matchingpath label. */
6049 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6050 {
6051 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6052 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6053 }
6054
6055 if (ket == OP_KETRMAX)
6056 {
6057 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6058 {
6059 if (has_alternatives)
6060 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6061 /* Checking zero-length iteration. */
6062 if (opcode != OP_ONCE)
6063 {
6064 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6065 /* Drop STR_PTR for greedy plus quantifier. */
6066 if (bra != OP_BRAZERO)
6067 free_stack(common, 1);
6068 }
6069 else
6070 /* TMP2 must contain the starting STR_PTR. */
6071 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6072 }
6073 else
6074 JUMPTO(SLJIT_JUMP, rmaxlabel);
6075 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6076 }
6077
6078 if (bra == OP_BRAZERO)
6079 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6080
6081 if (bra == OP_BRAMINZERO)
6082 {
6083 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6084 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6085 if (braminzerojump != NULL)
6086 {
6087 JUMPHERE(braminzerojump);
6088 /* We need to release the end pointer to perform the
6089 backtrack for the zero-length iteration. When
6090 framesize is < 0, OP_ONCE will do the release itself. */
6091 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6092 {
6093 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6094 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6095 }
6096 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6097 free_stack(common, 1);
6098 }
6099 /* Continue to the normal backtrack. */
6100 }
6101
6102 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6103 decrease_call_count(common);
6104
6105 /* Skip the other alternatives. */
6106 while (*cc == OP_ALT)
6107 cc += GET(cc, 1);
6108 cc += 1 + LINK_SIZE;
6109 return cc;
6110 }
6111
6112 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6113 {
6114 DEFINE_COMPILER;
6115 backtrack_common *backtrack;
6116 pcre_uchar opcode;
6117 int private_data_ptr;
6118 int cbraprivptr = 0;
6119 int framesize;
6120 int stacksize;
6121 int offset = 0;
6122 BOOL zero = FALSE;
6123 pcre_uchar *ccbegin = NULL;
6124 int stack;
6125 struct sljit_label *loop = NULL;
6126 struct jump_list *emptymatch = NULL;
6127
6128 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6129 if (*cc == OP_BRAPOSZERO)
6130 {
6131 zero = TRUE;
6132 cc++;
6133 }
6134
6135 opcode = *cc;
6136 private_data_ptr = PRIVATE_DATA(cc);
6137 SLJIT_ASSERT(private_data_ptr != 0);
6138 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6139 switch(opcode)
6140 {
6141 case OP_BRAPOS:
6142 case OP_SBRAPOS:
6143 ccbegin = cc + 1 + LINK_SIZE;
6144 break;
6145
6146 case OP_CBRAPOS:
6147 case OP_SCBRAPOS:
6148 offset = GET2(cc, 1 + LINK_SIZE);
6149 /* This case cannot be optimized in the same was as
6150 normal capturing brackets. */
6151 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6152 cbraprivptr = OVECTOR_PRIV(offset);
6153 offset <<= 1;
6154 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6155 break;
6156
6157 default:
6158 SLJIT_ASSERT_STOP();
6159 break;
6160 }
6161
6162 framesize = get_framesize(common, cc, FALSE);
6163 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6164 if (framesize < 0)
6165 {
6166 stacksize = (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS) ? 2 : 1;
6167 if (!zero)
6168 stacksize++;
6169 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6170 allocate_stack(common, stacksize);
6171 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6172
6173 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6174 {
6175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6176 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6178 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6179 }
6180 else
6181 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6182
6183 if (!zero)
6184 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6185 }
6186 else
6187 {
6188 stacksize = framesize + 1;
6189 if (!zero)
6190 stacksize++;
6191 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6192 stacksize++;
6193 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6194 allocate_stack(common, stacksize);
6195
6196 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6197 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6198 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6199 stack = 0;
6200 if (!zero)
6201 {
6202 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6203 stack++;
6204 }
6205 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6206 {
6207 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6208 stack++;
6209 }
6210 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6211 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6212 }
6213
6214 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6215 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6216
6217 loop = LABEL();
6218 while (*cc != OP_KETRPOS)
6219 {
6220 backtrack->top = NULL;
6221 backtrack->topbacktracks = NULL;
6222 cc += GET(cc, 1);
6223
6224 compile_matchingpath(common, ccbegin, cc, backtrack);
6225 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6226 return NULL;
6227
6228 if (framesize < 0)
6229 {
6230 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6231
6232 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6233 {
6234 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6235 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6236 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6237 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6238 }
6239 else
6240 {
6241 if (opcode == OP_SBRAPOS)
6242 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6243 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6244 }
6245
6246 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6247 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6248
6249 if (!zero)
6250 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6251 }
6252 else
6253 {
6254 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6255 {
6256 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6257 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6258 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6259 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6260 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6261 }
6262 else
6263 {
6264 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6265 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6266 if (opcode == OP_SBRAPOS)
6267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6268 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6269 }
6270
6271 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6272 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6273
6274 if (!zero)
6275 {
6276 if (framesize < 0)
6277 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6278 else
6279 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6280 }
6281 }
6282 JUMPTO(SLJIT_JUMP, loop);
6283 flush_stubs(common);
6284
6285 compile_backtrackingpath(common, backtrack->top);
6286 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6287 return NULL;
6288 set_jumps(backtrack->topbacktracks, LABEL());
6289
6290 if (framesize < 0)
6291 {
6292 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6293 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6294 else
6295 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6296 }
6297 else
6298 {
6299 if (opcode == OP_CBRAPOS || opcode == OP_SCBRAPOS)
6300 {
6301 /* Last alternative. */
6302 if (*cc == OP_KETRPOS)
6303 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6304 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6305 }
6306 else
6307 {
6308 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6309 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6310 }
6311 }
6312
6313 if (*cc == OP_KETRPOS)
6314 break;
6315 ccbegin = cc + 1 + LINK_SIZE;
6316 }
6317
6318 backtrack->topbacktracks = NULL;
6319 if (!zero)
6320 {
6321 if (framesize < 0)
6322 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6323 else /* TMP2 is set to [private_data_ptr] above. */
6324 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6325 }
6326
6327 /* None of them matched. */
6328 set_jumps(emptymatch, LABEL());
6329 decrease_call_count(common);
6330 return cc + 1 + LINK_SIZE;
6331 }
6332
6333 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6334 {
6335 int class_len;
6336
6337 *opcode = *cc;
6338 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6339 {
6340 cc++;
6341 *type = OP_CHAR;
6342 }
6343 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6344 {
6345 cc++;
6346 *type = OP_CHARI;
6347 *opcode -= OP_STARI - OP_STAR;
6348 }
6349 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6350 {
6351 cc++;
6352 *type = OP_NOT;
6353 *opcode -= OP_NOTSTAR - OP_STAR;
6354 }
6355 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6356 {
6357 cc++;
6358 *type = OP_NOTI;
6359 *opcode -= OP_NOTSTARI - OP_STAR;
6360 }
6361 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6362 {
6363 cc++;
6364 *opcode -= OP_TYPESTAR - OP_STAR;
6365 *type = 0;
6366 }
6367 else
6368 {
6369 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6370 *type = *opcode;
6371 cc++;
6372 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6373 *opcode = cc[class_len - 1];
6374 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6375 {
6376 *opcode -= OP_CRSTAR - OP_STAR;
6377 if (end != NULL)
6378 *end = cc + class_len;
6379 }
6380 else
6381 {
6382 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6383 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6384 *arg2 = GET2(cc, class_len);
6385
6386 if (*arg2 == 0)
6387 {
6388 SLJIT_ASSERT(*arg1 != 0);
6389 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6390 }
6391 if (*arg1 == *arg2)
6392 *opcode = OP_EXACT;
6393
6394 if (end != NULL)
6395 *end = cc + class_len + 2 * IMM2_SIZE;
6396 }
6397 return cc;
6398 }
6399
6400 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6401 {
6402 *arg1 = GET2(cc, 0);
6403 cc += IMM2_SIZE;
6404 }
6405
6406 if (*type == 0)
6407 {
6408 *type = *cc;
6409 if (end != NULL)
6410 *end = next_opcode(common, cc);
6411 cc++;
6412 return cc;
6413 }
6414
6415 if (end != NULL)
6416 {
6417 *end = cc + 1;
6418 #ifdef SUPPORT_UTF
6419 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6420 #endif
6421 }
6422 return cc;
6423 }
6424
6425 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6426 {
6427 DEFINE_COMPILER;
6428 backtrack_common *backtrack;
6429 pcre_uchar opcode;
6430 pcre_uchar type;
6431 int arg1 = -1, arg2 = -1;
6432 pcre_uchar* end;
6433 jump_list *nomatch = NULL;
6434 struct sljit_jump *jump = NULL;
6435 struct sljit_label *label;
6436 int private_data_ptr = PRIVATE_DATA(cc);
6437 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6438 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6439 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
6440 int tmp_base, tmp_offset;
6441
6442 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6443
6444 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6445
6446 switch (type)
6447 {
6448 case OP_NOT_DIGIT:
6449 case OP_DIGIT:
6450 case OP_NOT_WHITESPACE:
6451 case OP_WHITESPACE:
6452 case OP_NOT_WORDCHAR:
6453 case OP_WORDCHAR:
6454 case OP_ANY:
6455 case OP_ALLANY:
6456 case OP_ANYBYTE:
6457 case OP_ANYNL:
6458 case OP_NOT_HSPACE:
6459 case OP_HSPACE:
6460 case OP_NOT_VSPACE:
6461 case OP_VSPACE:
6462 case OP_CHAR:
6463 case OP_CHARI:
6464 case OP_NOT:
6465 case OP_NOTI:
6466 case OP_CLASS:
6467 case OP_NCLASS:
6468 tmp_base = TMP3;
6469 tmp_offset = 0;
6470 break;
6471
6472 default:
6473 SLJIT_ASSERT_STOP();
6474 /* Fall through. */
6475
6476 case OP_EXTUNI:
6477 case OP_XCLASS:
6478 case OP_NOTPROP:
6479 case OP_PROP:
6480 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6481 tmp_offset = POSSESSIVE0;
6482 break;
6483 }
6484
6485 switch(opcode)
6486 {
6487 case OP_STAR:
6488 case OP_PLUS:
6489 case OP_UPTO:
6490 case OP_CRRANGE:
6491 if (type == OP_ANYNL || type == OP_EXTUNI)
6492 {
6493 SLJIT_ASSERT(private_data_ptr == 0);
6494 if (opcode == OP_STAR || opcode == OP_UPTO)
6495 {
6496 allocate_stack(common, 2);
6497 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6498 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6499 }
6500 else
6501 {
6502 allocate_stack(common, 1);
6503 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6504 }
6505
6506 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6508
6509 label = LABEL();
6510 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6511 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6512 {
6513 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6514 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6515 if (opcode == OP_CRRANGE && arg2 > 0)
6516 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6517 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6518 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6519 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6520 }
6521
6522 /* We cannot use TMP3 because of this allocate_stack. */
6523 allocate_stack(common, 1);
6524 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6525 JUMPTO(SLJIT_JUMP, label);
6526 if (jump != NULL)
6527 JUMPHERE(jump);
6528 }
6529 else
6530 {
6531 if (opcode == OP_PLUS)
6532 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6533 if (private_data_ptr == 0)
6534 allocate_stack(common, 2);
6535 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6536 if (opcode <= OP_PLUS)
6537 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6538 else
6539 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6540 label = LABEL();
6541 compile_char1_matchingpath(common, type, cc, &nomatch);
6542 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6543 if (opcode <= OP_PLUS)
6544 JUMPTO(SLJIT_JUMP, label);
6545 else if (opcode == OP_CRRANGE && arg1 == 0)
6546 {
6547 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6548 JUMPTO(SLJIT_JUMP, label);
6549 }
6550 else
6551 {
6552 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6553 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6554 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6555 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6556 }
6557 set_jumps(nomatch, LABEL());
6558 if (opcode == OP_CRRANGE)
6559 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6560 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6561 }
6562 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6563 break;
6564
6565 case OP_MINSTAR:
6566 case OP_MINPLUS:
6567 if (opcode == OP_MINPLUS)
6568 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6569 if (private_data_ptr == 0)
6570 allocate_stack(common, 1);
6571 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6572 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6573 break;
6574
6575 case OP_MINUPTO:
6576 case OP_CRMINRANGE:
6577 if (private_data_ptr == 0)
6578 allocate_stack(common, 2);
6579 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6580 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6581 if (opcode == OP_CRMINRANGE)
6582 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6583 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6584 break;
6585
6586 case OP_QUERY:
6587 case OP_MINQUERY:
6588 if (private_data_ptr == 0)
6589 allocate_stack(common, 1);
6590 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6591 if (opcode == OP_QUERY)
6592 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6593 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6594 break;
6595
6596 case OP_EXACT:
6597 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6598 label = LABEL();
6599 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6600 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6601 JUMPTO(SLJIT_C_NOT_ZERO, label);
6602 break;
6603
6604 case OP_POSSTAR:
6605 case OP_POSPLUS:
6606 case OP_POSUPTO:
6607 if (opcode == OP_POSPLUS)
6608 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6609 if (opcode == OP_POSUPTO)
6610 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6611 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6612 label = LABEL();
6613 compile_char1_matchingpath(common, type, cc, &nomatch);
6614 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6615 if (opcode != OP_POSUPTO)
6616 JUMPTO(SLJIT_JUMP, label);
6617 else
6618 {
6619 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6620 JUMPTO(SLJIT_C_NOT_ZERO, label);
6621 }
6622 set_jumps(nomatch, LABEL());
6623 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6624 break;
6625
6626 case OP_POSQUERY:
6627 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6628 compile_char1_matchingpath(common, type, cc, &nomatch);
6629 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6630 set_jumps(nomatch, LABEL());
6631 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6632 break;
6633
6634 default:
6635 SLJIT_ASSERT_STOP();
6636 break;
6637 }
6638
6639 decrease_call_count(common);
6640 return end;
6641 }
6642
6643 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6644 {
6645 DEFINE_COMPILER;
6646 backtrack_common *backtrack;
6647
6648 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6649
6650 if (*cc == OP_FAIL)
6651 {
6652 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6653 return cc + 1;
6654 }
6655
6656 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6657 {
6658 /* No need to check notempty conditions. */
6659 if (common->accept_label == NULL)
6660 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6661 else
6662 JUMPTO(SLJIT_JUMP, common->accept_label);
6663 return cc + 1;
6664 }
6665
6666 if (common->accept_label == NULL)
6667 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6668 else
6669 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label);
6670 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6671 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6672 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6673 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6674 if (common->accept_label == NULL)
6675 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6676 else
6677 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
6678 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6679 if (common->accept_label == NULL)
6680 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6681 else
6682 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
6683 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6684 return cc + 1;
6685 }
6686
6687 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6688 {
6689 DEFINE_COMPILER;
6690 int offset = GET2(cc, 1);
6691 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
6692
6693 /* Data will be discarded anyway... */
6694 if (common->currententry != NULL)
6695 return cc + 1 + IMM2_SIZE;
6696
6697 if (!optimized_cbracket)
6698 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6699 offset <<= 1;
6700 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6701 if (!optimized_cbracket)
6702 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6703 return cc + 1 + IMM2_SIZE;
6704 }
6705
6706 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6707 {
6708 DEFINE_COMPILER;
6709 backtrack_common *backtrack;
6710
6711 while (cc < ccend)
6712 {
6713 switch(*cc)
6714 {
6715 case OP_SOD:
6716 case OP_SOM:
6717 case OP_NOT_WORD_BOUNDARY:
6718 case OP_WORD_BOUNDARY:
6719 case OP_NOT_DIGIT:
6720 case OP_DIGIT:
6721 case OP_NOT_WHITESPACE:
6722 case OP_WHITESPACE:
6723 case OP_NOT_WORDCHAR:
6724 case OP_WORDCHAR:
6725 case OP_ANY:
6726 case OP_ALLANY:
6727 case OP_ANYBYTE:
6728 case OP_NOTPROP:
6729 case OP_PROP:
6730 case OP_ANYNL:
6731 case OP_NOT_HSPACE:
6732 case OP_HSPACE:
6733 case OP_NOT_VSPACE:
6734 case OP_VSPACE:
6735 case OP_EXTUNI:
6736 case OP_EODN:
6737 case OP_EOD:
6738 case OP_CIRC:
6739 case OP_CIRCM:
6740 case OP_DOLL:
6741 case OP_DOLLM:
6742 case OP_NOT:
6743 case OP_NOTI:
6744 case OP_REVERSE:
6745 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
6746 break;
6747
6748 case OP_SET_SOM:
6749 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
6750 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
6751 allocate_