/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1246 - (show annotations)
Sun Feb 10 08:44:53 2013 UTC (6 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 275409 byte(s)
Continue work on callouts and possessive quantifiers in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Allocate memory for the regex stack on the real machine stack.
69 Fast, but limited size. */
70 #define MACHINE_STACK_SIZE 32768
71
72 /* Growth rate for stack allocated by the OS. Should be the multiply
73 of page size. */
74 #define STACK_GROWTH_RATE 8192
75
76 /* Enable to check that the allocation could destroy temporaries. */
77 #if defined SLJIT_DEBUG && SLJIT_DEBUG
78 #define DESTROY_REGISTERS 1
79 #endif
80
81 /*
82 Short summary about the backtracking mechanism empolyed by the jit code generator:
83
84 The code generator follows the recursive nature of the PERL compatible regular
85 expressions. The basic blocks of regular expressions are condition checkers
86 whose execute different commands depending on the result of the condition check.
87 The relationship between the operators can be horizontal (concatenation) and
88 vertical (sub-expression) (See struct backtrack_common for more details).
89
90 'ab' - 'a' and 'b' regexps are concatenated
91 'a+' - 'a' is the sub-expression of the '+' operator
92
93 The condition checkers are boolean (true/false) checkers. Machine code is generated
94 for the checker itself and for the actions depending on the result of the checker.
95 The 'true' case is called as the matching path (expected path), and the other is called as
96 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
97 branches on the matching path.
98
99 Greedy star operator (*) :
100 Matching path: match happens.
101 Backtrack path: match failed.
102 Non-greedy star operator (*?) :
103 Matching path: no need to perform a match.
104 Backtrack path: match is required.
105
106 The following example shows how the code generated for a capturing bracket
107 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
108 we have the following regular expression:
109
110 A(B|C)D
111
112 The generated code will be the following:
113
114 A matching path
115 '(' matching path (pushing arguments to the stack)
116 B matching path
117 ')' matching path (pushing arguments to the stack)
118 D matching path
119 return with successful match
120
121 D backtrack path
122 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
123 B backtrack path
124 C expected path
125 jump to D matching path
126 C backtrack path
127 A backtrack path
128
129 Notice, that the order of backtrack code paths are the opposite of the fast
130 code paths. In this way the topmost value on the stack is always belong
131 to the current backtrack code path. The backtrack path must check
132 whether there is a next alternative. If so, it needs to jump back to
133 the matching path eventually. Otherwise it needs to clear out its own stack
134 frame and continue the execution on the backtrack code paths.
135 */
136
137 /*
138 Saved stack frames:
139
140 Atomic blocks and asserts require reloading the values of private data
141 when the backtrack mechanism performed. Because of OP_RECURSE, the data
142 are not necessarly known in compile time, thus we need a dynamic restore
143 mechanism.
144
145 The stack frames are stored in a chain list, and have the following format:
146 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
147
148 Thus we can restore the private data to a particular point in the stack.
149 */
150
151 typedef struct jit_arguments {
152 /* Pointers first. */
153 struct sljit_stack *stack;
154 const pcre_uchar *str;
155 const pcre_uchar *begin;
156 const pcre_uchar *end;
157 int *offsets;
158 pcre_uchar *uchar_ptr;
159 pcre_uchar *mark_ptr;
160 void *callout_data;
161 /* Everything else after. */
162 int offset_count;
163 int call_limit;
164 pcre_uint8 notbol;
165 pcre_uint8 noteol;
166 pcre_uint8 notempty;
167 pcre_uint8 notempty_atstart;
168 } jit_arguments;
169
170 typedef struct executable_functions {
171 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
172 PUBL(jit_callback) callback;
173 void *userdata;
174 pcre_uint32 top_bracket;
175 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
176 } executable_functions;
177
178 typedef struct jump_list {
179 struct sljit_jump *jump;
180 struct jump_list *next;
181 } jump_list;
182
183 enum stub_types { stack_alloc };
184
185 typedef struct stub_list {
186 enum stub_types type;
187 int data;
188 struct sljit_jump *start;
189 struct sljit_label *quit;
190 struct stub_list *next;
191 } stub_list;
192
193 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
194
195 /* The following structure is the key data type for the recursive
196 code generator. It is allocated by compile_matchingpath, and contains
197 the aguments for compile_backtrackingpath. Must be the first member
198 of its descendants. */
199 typedef struct backtrack_common {
200 /* Concatenation stack. */
201 struct backtrack_common *prev;
202 jump_list *nextbacktracks;
203 /* Internal stack (for component operators). */
204 struct backtrack_common *top;
205 jump_list *topbacktracks;
206 /* Opcode pointer. */
207 pcre_uchar *cc;
208 } backtrack_common;
209
210 typedef struct assert_backtrack {
211 backtrack_common common;
212 jump_list *condfailed;
213 /* Less than 0 (-1) if a frame is not needed. */
214 int framesize;
215 /* Points to our private memory word on the stack. */
216 int private_data_ptr;
217 /* For iterators. */
218 struct sljit_label *matchingpath;
219 } assert_backtrack;
220
221 typedef struct bracket_backtrack {
222 backtrack_common common;
223 /* Where to coninue if an alternative is successfully matched. */
224 struct sljit_label *alternative_matchingpath;
225 /* For rmin and rmax iterators. */
226 struct sljit_label *recursive_matchingpath;
227 /* For greedy ? operator. */
228 struct sljit_label *zero_matchingpath;
229 /* Contains the branches of a failed condition. */
230 union {
231 /* Both for OP_COND, OP_SCOND. */
232 jump_list *condfailed;
233 assert_backtrack *assert;
234 /* For OP_ONCE. -1 if not needed. */
235 int framesize;
236 } u;
237 /* Points to our private memory word on the stack. */
238 int private_data_ptr;
239 } bracket_backtrack;
240
241 typedef struct bracketpos_backtrack {
242 backtrack_common common;
243 /* Points to our private memory word on the stack. */
244 int private_data_ptr;
245 /* Reverting stack is needed. */
246 int framesize;
247 /* Allocated stack size. */
248 int stacksize;
249 } bracketpos_backtrack;
250
251 typedef struct braminzero_backtrack {
252 backtrack_common common;
253 struct sljit_label *matchingpath;
254 } braminzero_backtrack;
255
256 typedef struct iterator_backtrack {
257 backtrack_common common;
258 /* Next iteration. */
259 struct sljit_label *matchingpath;
260 } iterator_backtrack;
261
262 typedef struct recurse_entry {
263 struct recurse_entry *next;
264 /* Contains the function entry. */
265 struct sljit_label *entry;
266 /* Collects the calls until the function is not created. */
267 jump_list *calls;
268 /* Points to the starting opcode. */
269 int start;
270 } recurse_entry;
271
272 typedef struct recurse_backtrack {
273 backtrack_common common;
274 } recurse_backtrack;
275
276 #define MAX_RANGE_SIZE 6
277
278 typedef struct compiler_common {
279 struct sljit_compiler *compiler;
280 pcre_uchar *start;
281
282 /* Maps private data offset to each opcode. */
283 int *private_data_ptrs;
284 /* Tells whether the capturing bracket is optimized. */
285 pcre_uint8 *optimized_cbracket;
286 /* Starting offset of private data for capturing brackets. */
287 int cbraptr;
288 /* OVector starting point. Must be divisible by 2. */
289 int ovector_start;
290 /* Last known position of the requested byte. */
291 int req_char_ptr;
292 /* Head of the last recursion. */
293 int recursive_head_ptr;
294 /* First inspected character for partial matching. */
295 int start_used_ptr;
296 /* Starting pointer for partial soft matches. */
297 int hit_start;
298 /* End pointer of the first line. */
299 int first_line_end;
300 /* Points to the marked string. */
301 int mark_ptr;
302 /* Points to the last matched capture block index. */
303 int capture_last_ptr;
304
305 /* Flipped and lower case tables. */
306 const pcre_uint8 *fcc;
307 sljit_sw lcc;
308 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
309 int mode;
310 /* Newline control. */
311 int nltype;
312 int newline;
313 int bsr_nltype;
314 /* Dollar endonly. */
315 int endonly;
316 BOOL has_set_som;
317 /* Tables. */
318 sljit_sw ctypes;
319 int digits[2 + MAX_RANGE_SIZE];
320 /* Named capturing brackets. */
321 sljit_uw name_table;
322 sljit_sw name_count;
323 sljit_sw name_entry_size;
324
325 /* Labels and jump lists. */
326 struct sljit_label *partialmatchlabel;
327 struct sljit_label *quit_label;
328 struct sljit_label *forced_quit_label;
329 struct sljit_label *accept_label;
330 stub_list *stubs;
331 recurse_entry *entries;
332 recurse_entry *currententry;
333 jump_list *partialmatch;
334 jump_list *quit;
335 jump_list *forced_quit;
336 jump_list *accept;
337 jump_list *calllimit;
338 jump_list *stackalloc;
339 jump_list *revertframes;
340 jump_list *wordboundary;
341 jump_list *anynewline;
342 jump_list *hspace;
343 jump_list *vspace;
344 jump_list *casefulcmp;
345 jump_list *caselesscmp;
346 BOOL jscript_compat;
347 #ifdef SUPPORT_UTF
348 BOOL utf;
349 #ifdef SUPPORT_UCP
350 BOOL use_ucp;
351 #endif
352 #ifndef COMPILE_PCRE32
353 jump_list *utfreadchar;
354 #endif
355 #ifdef COMPILE_PCRE8
356 jump_list *utfreadtype8;
357 #endif
358 #endif /* SUPPORT_UTF */
359 #ifdef SUPPORT_UCP
360 jump_list *getucd;
361 #endif
362 } compiler_common;
363
364 /* For byte_sequence_compare. */
365
366 typedef struct compare_context {
367 int length;
368 int sourcereg;
369 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
370 int ucharptr;
371 union {
372 sljit_si asint;
373 sljit_uh asushort;
374 #if defined COMPILE_PCRE8
375 sljit_ub asbyte;
376 sljit_ub asuchars[4];
377 #elif defined COMPILE_PCRE16
378 sljit_uh asuchars[2];
379 #elif defined COMPILE_PCRE32
380 sljit_ui asuchars[1];
381 #endif
382 } c;
383 union {
384 sljit_si asint;
385 sljit_uh asushort;
386 #if defined COMPILE_PCRE8
387 sljit_ub asbyte;
388 sljit_ub asuchars[4];
389 #elif defined COMPILE_PCRE16
390 sljit_uh asuchars[2];
391 #elif defined COMPILE_PCRE32
392 sljit_ui asuchars[1];
393 #endif
394 } oc;
395 #endif
396 } compare_context;
397
398 /* Undefine sljit macros. */
399 #undef CMP
400
401 /* Used for accessing the elements of the stack. */
402 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
403
404 #define TMP1 SLJIT_SCRATCH_REG1
405 #define TMP2 SLJIT_SCRATCH_REG3
406 #define TMP3 SLJIT_TEMPORARY_EREG2
407 #define STR_PTR SLJIT_SAVED_REG1
408 #define STR_END SLJIT_SAVED_REG2
409 #define STACK_TOP SLJIT_SCRATCH_REG2
410 #define STACK_LIMIT SLJIT_SAVED_REG3
411 #define ARGUMENTS SLJIT_SAVED_EREG1
412 #define CALL_COUNT SLJIT_SAVED_EREG2
413 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
414
415 /* Local space layout. */
416 /* These two locals can be used by the current opcode. */
417 #define LOCALS0 (0 * sizeof(sljit_sw))
418 #define LOCALS1 (1 * sizeof(sljit_sw))
419 /* Two local variables for possessive quantifiers (char1 cannot use them). */
420 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
421 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
422 /* Max limit of recursions. */
423 #define CALL_LIMIT (4 * sizeof(sljit_sw))
424 /* The output vector is stored on the stack, and contains pointers
425 to characters. The vector data is divided into two groups: the first
426 group contains the start / end character pointers, and the second is
427 the start pointers when the end of the capturing group has not yet reached. */
428 #define OVECTOR_START (common->ovector_start)
429 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
430 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw))
431 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
432
433 #if defined COMPILE_PCRE8
434 #define MOV_UCHAR SLJIT_MOV_UB
435 #define MOVU_UCHAR SLJIT_MOVU_UB
436 #elif defined COMPILE_PCRE16
437 #define MOV_UCHAR SLJIT_MOV_UH
438 #define MOVU_UCHAR SLJIT_MOVU_UH
439 #elif defined COMPILE_PCRE32
440 #define MOV_UCHAR SLJIT_MOV_UI
441 #define MOVU_UCHAR SLJIT_MOVU_UI
442 #else
443 #error Unsupported compiling mode
444 #endif
445
446 /* Shortcuts. */
447 #define DEFINE_COMPILER \
448 struct sljit_compiler *compiler = common->compiler
449 #define OP1(op, dst, dstw, src, srcw) \
450 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
451 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
452 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
453 #define LABEL() \
454 sljit_emit_label(compiler)
455 #define JUMP(type) \
456 sljit_emit_jump(compiler, (type))
457 #define JUMPTO(type, label) \
458 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
459 #define JUMPHERE(jump) \
460 sljit_set_label((jump), sljit_emit_label(compiler))
461 #define SET_LABEL(jump, label) \
462 sljit_set_label((jump), (label))
463 #define CMP(type, src1, src1w, src2, src2w) \
464 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
465 #define CMPTO(type, src1, src1w, src2, src2w, label) \
466 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
467 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
468 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
469 #define GET_LOCAL_BASE(dst, dstw, offset) \
470 sljit_get_local_base(compiler, (dst), (dstw), (offset))
471
472 static pcre_uchar* bracketend(pcre_uchar* cc)
473 {
474 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
475 do cc += GET(cc, 1); while (*cc == OP_ALT);
476 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
477 cc += 1 + LINK_SIZE;
478 return cc;
479 }
480
481 /* Functions whose might need modification for all new supported opcodes:
482 next_opcode
483 get_private_data_length
484 set_private_data_ptrs
485 get_framesize
486 init_frame
487 get_private_data_length_for_copy
488 copy_private_data
489 compile_matchingpath
490 compile_backtrackingpath
491 */
492
493 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
494 {
495 SLJIT_UNUSED_ARG(common);
496 switch(*cc)
497 {
498 case OP_SOD:
499 case OP_SOM:
500 case OP_SET_SOM:
501 case OP_NOT_WORD_BOUNDARY:
502 case OP_WORD_BOUNDARY:
503 case OP_NOT_DIGIT:
504 case OP_DIGIT:
505 case OP_NOT_WHITESPACE:
506 case OP_WHITESPACE:
507 case OP_NOT_WORDCHAR:
508 case OP_WORDCHAR:
509 case OP_ANY:
510 case OP_ALLANY:
511 case OP_ANYNL:
512 case OP_NOT_HSPACE:
513 case OP_HSPACE:
514 case OP_NOT_VSPACE:
515 case OP_VSPACE:
516 case OP_EXTUNI:
517 case OP_EODN:
518 case OP_EOD:
519 case OP_CIRC:
520 case OP_CIRCM:
521 case OP_DOLL:
522 case OP_DOLLM:
523 case OP_TYPESTAR:
524 case OP_TYPEMINSTAR:
525 case OP_TYPEPLUS:
526 case OP_TYPEMINPLUS:
527 case OP_TYPEQUERY:
528 case OP_TYPEMINQUERY:
529 case OP_TYPEPOSSTAR:
530 case OP_TYPEPOSPLUS:
531 case OP_TYPEPOSQUERY:
532 case OP_CRSTAR:
533 case OP_CRMINSTAR:
534 case OP_CRPLUS:
535 case OP_CRMINPLUS:
536 case OP_CRQUERY:
537 case OP_CRMINQUERY:
538 case OP_DEF:
539 case OP_BRAZERO:
540 case OP_BRAMINZERO:
541 case OP_BRAPOSZERO:
542 case OP_COMMIT:
543 case OP_FAIL:
544 case OP_ACCEPT:
545 case OP_ASSERT_ACCEPT:
546 case OP_SKIPZERO:
547 return cc + 1;
548
549 case OP_ANYBYTE:
550 #ifdef SUPPORT_UTF
551 if (common->utf) return NULL;
552 #endif
553 return cc + 1;
554
555 case OP_CHAR:
556 case OP_CHARI:
557 case OP_NOT:
558 case OP_NOTI:
559 case OP_STAR:
560 case OP_MINSTAR:
561 case OP_PLUS:
562 case OP_MINPLUS:
563 case OP_QUERY:
564 case OP_MINQUERY:
565 case OP_POSSTAR:
566 case OP_POSPLUS:
567 case OP_POSQUERY:
568 case OP_STARI:
569 case OP_MINSTARI:
570 case OP_PLUSI:
571 case OP_MINPLUSI:
572 case OP_QUERYI:
573 case OP_MINQUERYI:
574 case OP_POSSTARI:
575 case OP_POSPLUSI:
576 case OP_POSQUERYI:
577 case OP_NOTSTAR:
578 case OP_NOTMINSTAR:
579 case OP_NOTPLUS:
580 case OP_NOTMINPLUS:
581 case OP_NOTQUERY:
582 case OP_NOTMINQUERY:
583 case OP_NOTPOSSTAR:
584 case OP_NOTPOSPLUS:
585 case OP_NOTPOSQUERY:
586 case OP_NOTSTARI:
587 case OP_NOTMINSTARI:
588 case OP_NOTPLUSI:
589 case OP_NOTMINPLUSI:
590 case OP_NOTQUERYI:
591 case OP_NOTMINQUERYI:
592 case OP_NOTPOSSTARI:
593 case OP_NOTPOSPLUSI:
594 case OP_NOTPOSQUERYI:
595 cc += 2;
596 #ifdef SUPPORT_UTF
597 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
598 #endif
599 return cc;
600
601 case OP_UPTO:
602 case OP_MINUPTO:
603 case OP_EXACT:
604 case OP_POSUPTO:
605 case OP_UPTOI:
606 case OP_MINUPTOI:
607 case OP_EXACTI:
608 case OP_POSUPTOI:
609 case OP_NOTUPTO:
610 case OP_NOTMINUPTO:
611 case OP_NOTEXACT:
612 case OP_NOTPOSUPTO:
613 case OP_NOTUPTOI:
614 case OP_NOTMINUPTOI:
615 case OP_NOTEXACTI:
616 case OP_NOTPOSUPTOI:
617 cc += 2 + IMM2_SIZE;
618 #ifdef SUPPORT_UTF
619 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
620 #endif
621 return cc;
622
623 case OP_NOTPROP:
624 case OP_PROP:
625 return cc + 1 + 2;
626
627 case OP_TYPEUPTO:
628 case OP_TYPEMINUPTO:
629 case OP_TYPEEXACT:
630 case OP_TYPEPOSUPTO:
631 case OP_REF:
632 case OP_REFI:
633 case OP_CREF:
634 case OP_NCREF:
635 case OP_RREF:
636 case OP_NRREF:
637 case OP_CLOSE:
638 cc += 1 + IMM2_SIZE;
639 return cc;
640
641 case OP_CRRANGE:
642 case OP_CRMINRANGE:
643 return cc + 1 + 2 * IMM2_SIZE;
644
645 case OP_CLASS:
646 case OP_NCLASS:
647 return cc + 1 + 32 / sizeof(pcre_uchar);
648
649 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
650 case OP_XCLASS:
651 return cc + GET(cc, 1);
652 #endif
653
654 case OP_RECURSE:
655 case OP_ASSERT:
656 case OP_ASSERT_NOT:
657 case OP_ASSERTBACK:
658 case OP_ASSERTBACK_NOT:
659 case OP_REVERSE:
660 case OP_ONCE:
661 case OP_ONCE_NC:
662 case OP_BRA:
663 case OP_BRAPOS:
664 case OP_COND:
665 case OP_SBRA:
666 case OP_SBRAPOS:
667 case OP_SCOND:
668 case OP_ALT:
669 case OP_KET:
670 case OP_KETRMAX:
671 case OP_KETRMIN:
672 case OP_KETRPOS:
673 return cc + 1 + LINK_SIZE;
674
675 case OP_CBRA:
676 case OP_CBRAPOS:
677 case OP_SCBRA:
678 case OP_SCBRAPOS:
679 return cc + 1 + LINK_SIZE + IMM2_SIZE;
680
681 case OP_MARK:
682 return cc + 1 + 2 + cc[1];
683
684 case OP_CALLOUT:
685 return cc + 2 + 2 * LINK_SIZE;
686
687 default:
688 return NULL;
689 }
690 }
691
692 #define CASE_ITERATOR_PRIVATE_DATA_1 \
693 case OP_MINSTAR: \
694 case OP_MINPLUS: \
695 case OP_QUERY: \
696 case OP_MINQUERY: \
697 case OP_MINSTARI: \
698 case OP_MINPLUSI: \
699 case OP_QUERYI: \
700 case OP_MINQUERYI: \
701 case OP_NOTMINSTAR: \
702 case OP_NOTMINPLUS: \
703 case OP_NOTQUERY: \
704 case OP_NOTMINQUERY: \
705 case OP_NOTMINSTARI: \
706 case OP_NOTMINPLUSI: \
707 case OP_NOTQUERYI: \
708 case OP_NOTMINQUERYI:
709
710 #define CASE_ITERATOR_PRIVATE_DATA_2A \
711 case OP_STAR: \
712 case OP_PLUS: \
713 case OP_STARI: \
714 case OP_PLUSI: \
715 case OP_NOTSTAR: \
716 case OP_NOTPLUS: \
717 case OP_NOTSTARI: \
718 case OP_NOTPLUSI:
719
720 #define CASE_ITERATOR_PRIVATE_DATA_2B \
721 case OP_UPTO: \
722 case OP_MINUPTO: \
723 case OP_UPTOI: \
724 case OP_MINUPTOI: \
725 case OP_NOTUPTO: \
726 case OP_NOTMINUPTO: \
727 case OP_NOTUPTOI: \
728 case OP_NOTMINUPTOI:
729
730 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
731 case OP_TYPEMINSTAR: \
732 case OP_TYPEMINPLUS: \
733 case OP_TYPEQUERY: \
734 case OP_TYPEMINQUERY:
735
736 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
737 case OP_TYPESTAR: \
738 case OP_TYPEPLUS:
739
740 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
741 case OP_TYPEUPTO: \
742 case OP_TYPEMINUPTO:
743
744 static int get_class_iterator_size(pcre_uchar *cc)
745 {
746 switch(*cc)
747 {
748 case OP_CRSTAR:
749 case OP_CRPLUS:
750 return 2;
751
752 case OP_CRMINSTAR:
753 case OP_CRMINPLUS:
754 case OP_CRQUERY:
755 case OP_CRMINQUERY:
756 return 1;
757
758 case OP_CRRANGE:
759 case OP_CRMINRANGE:
760 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
761 return 0;
762 return 2;
763
764 default:
765 return 0;
766 }
767 }
768
769 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
770 {
771 int private_data_length = 0;
772 pcre_uchar *alternative;
773 pcre_uchar *name;
774 pcre_uchar *end = NULL;
775 int space, size, i;
776 pcre_uint32 bracketlen;
777
778 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
779 while (cc < ccend)
780 {
781 space = 0;
782 size = 0;
783 bracketlen = 0;
784 switch(*cc)
785 {
786 case OP_SET_SOM:
787 common->has_set_som = TRUE;
788 cc += 1;
789 break;
790
791 case OP_REF:
792 case OP_REFI:
793 common->optimized_cbracket[GET2(cc, 1)] = 0;
794 cc += 1 + IMM2_SIZE;
795 break;
796
797 case OP_ASSERT:
798 case OP_ASSERT_NOT:
799 case OP_ASSERTBACK:
800 case OP_ASSERTBACK_NOT:
801 case OP_ONCE:
802 case OP_ONCE_NC:
803 case OP_BRAPOS:
804 case OP_SBRA:
805 case OP_SBRAPOS:
806 private_data_length += sizeof(sljit_sw);
807 bracketlen = 1 + LINK_SIZE;
808 break;
809
810 case OP_CBRAPOS:
811 case OP_SCBRAPOS:
812 private_data_length += sizeof(sljit_sw);
813 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
814 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
815 break;
816
817 case OP_COND:
818 case OP_SCOND:
819 /* Only AUTO_CALLOUT can insert this opcode. We do
820 not intend to support this case. */
821 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
822 return -1;
823
824 if (*cc == OP_COND)
825 {
826 /* Might be a hidden SCOND. */
827 alternative = cc + GET(cc, 1);
828 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
829 private_data_length += sizeof(sljit_sw);
830 }
831 else
832 private_data_length += sizeof(sljit_sw);
833 bracketlen = 1 + LINK_SIZE;
834 break;
835
836 case OP_CREF:
837 i = GET2(cc, 1);
838 common->optimized_cbracket[i] = 0;
839 cc += 1 + IMM2_SIZE;
840 break;
841
842 case OP_NCREF:
843 bracketlen = GET2(cc, 1);
844 name = (pcre_uchar *)common->name_table;
845 alternative = name;
846 for (i = 0; i < common->name_count; i++)
847 {
848 if (GET2(name, 0) == bracketlen) break;
849 name += common->name_entry_size;
850 }
851 SLJIT_ASSERT(i != common->name_count);
852
853 for (i = 0; i < common->name_count; i++)
854 {
855 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
856 common->optimized_cbracket[GET2(alternative, 0)] = 0;
857 alternative += common->name_entry_size;
858 }
859 bracketlen = 0;
860 cc += 1 + IMM2_SIZE;
861 break;
862
863 case OP_BRA:
864 bracketlen = 1 + LINK_SIZE;
865 break;
866
867 case OP_CBRA:
868 case OP_SCBRA:
869 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
870 break;
871
872 CASE_ITERATOR_PRIVATE_DATA_1
873 space = 1;
874 size = -2;
875 break;
876
877 CASE_ITERATOR_PRIVATE_DATA_2A
878 space = 2;
879 size = -2;
880 break;
881
882 CASE_ITERATOR_PRIVATE_DATA_2B
883 space = 2;
884 size = -(2 + IMM2_SIZE);
885 break;
886
887 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
888 space = 1;
889 size = 1;
890 break;
891
892 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
893 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
894 space = 2;
895 size = 1;
896 break;
897
898 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
899 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
900 space = 2;
901 size = 1 + IMM2_SIZE;
902 break;
903
904 case OP_CLASS:
905 case OP_NCLASS:
906 size += 1 + 32 / sizeof(pcre_uchar);
907 space = get_class_iterator_size(cc + size);
908 break;
909
910 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
911 case OP_XCLASS:
912 size = GET(cc, 1);
913 space = get_class_iterator_size(cc + size);
914 break;
915 #endif
916
917 case OP_RECURSE:
918 /* Set its value only once. */
919 if (common->recursive_head_ptr == 0)
920 {
921 common->recursive_head_ptr = common->ovector_start;
922 common->ovector_start += sizeof(sljit_sw);
923 }
924 cc += 1 + LINK_SIZE;
925 break;
926
927 case OP_CALLOUT:
928 if (common->capture_last_ptr == 0)
929 {
930 common->capture_last_ptr = common->ovector_start;
931 common->ovector_start += sizeof(sljit_sw);
932 }
933 cc += 2 + 2 * LINK_SIZE;
934 break;
935
936 case OP_MARK:
937 if (common->mark_ptr == 0)
938 {
939 common->mark_ptr = common->ovector_start;
940 common->ovector_start += sizeof(sljit_sw);
941 }
942 cc += 1 + 2 + cc[1];
943 break;
944
945 default:
946 cc = next_opcode(common, cc);
947 if (cc == NULL)
948 return -1;
949 break;
950 }
951
952 if (space > 0 && cc >= end)
953 private_data_length += sizeof(sljit_sw) * space;
954
955 if (size != 0)
956 {
957 if (size < 0)
958 {
959 cc += -size;
960 #ifdef SUPPORT_UTF
961 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
962 #endif
963 }
964 else
965 cc += size;
966 }
967
968 if (bracketlen != 0)
969 {
970 if (cc >= end)
971 {
972 end = bracketend(cc);
973 if (end[-1 - LINK_SIZE] == OP_KET)
974 end = NULL;
975 }
976 cc += bracketlen;
977 }
978 }
979 return private_data_length;
980 }
981
982 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
983 {
984 pcre_uchar *cc = common->start;
985 pcre_uchar *alternative;
986 pcre_uchar *end = NULL;
987 int space, size, bracketlen;
988
989 while (cc < ccend)
990 {
991 space = 0;
992 size = 0;
993 bracketlen = 0;
994 switch(*cc)
995 {
996 case OP_ASSERT:
997 case OP_ASSERT_NOT:
998 case OP_ASSERTBACK:
999 case OP_ASSERTBACK_NOT:
1000 case OP_ONCE:
1001 case OP_ONCE_NC:
1002 case OP_BRAPOS:
1003 case OP_SBRA:
1004 case OP_SBRAPOS:
1005 case OP_SCOND:
1006 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1007 private_data_ptr += sizeof(sljit_sw);
1008 bracketlen = 1 + LINK_SIZE;
1009 break;
1010
1011 case OP_CBRAPOS:
1012 case OP_SCBRAPOS:
1013 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1014 private_data_ptr += sizeof(sljit_sw);
1015 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1016 break;
1017
1018 case OP_COND:
1019 /* Might be a hidden SCOND. */
1020 alternative = cc + GET(cc, 1);
1021 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1022 {
1023 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1024 private_data_ptr += sizeof(sljit_sw);
1025 }
1026 bracketlen = 1 + LINK_SIZE;
1027 break;
1028
1029 case OP_BRA:
1030 bracketlen = 1 + LINK_SIZE;
1031 break;
1032
1033 case OP_CBRA:
1034 case OP_SCBRA:
1035 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1036 break;
1037
1038 CASE_ITERATOR_PRIVATE_DATA_1
1039 space = 1;
1040 size = -2;
1041 break;
1042
1043 CASE_ITERATOR_PRIVATE_DATA_2A
1044 space = 2;
1045 size = -2;
1046 break;
1047
1048 CASE_ITERATOR_PRIVATE_DATA_2B
1049 space = 2;
1050 size = -(2 + IMM2_SIZE);
1051 break;
1052
1053 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1054 space = 1;
1055 size = 1;
1056 break;
1057
1058 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1059 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1060 space = 2;
1061 size = 1;
1062 break;
1063
1064 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1065 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1066 space = 2;
1067 size = 1 + IMM2_SIZE;
1068 break;
1069
1070 case OP_CLASS:
1071 case OP_NCLASS:
1072 size += 1 + 32 / sizeof(pcre_uchar);
1073 space = get_class_iterator_size(cc + size);
1074 break;
1075
1076 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1077 case OP_XCLASS:
1078 size = GET(cc, 1);
1079 space = get_class_iterator_size(cc + size);
1080 break;
1081 #endif
1082
1083 default:
1084 cc = next_opcode(common, cc);
1085 SLJIT_ASSERT(cc != NULL);
1086 break;
1087 }
1088
1089 if (space > 0 && cc >= end)
1090 {
1091 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1092 private_data_ptr += sizeof(sljit_sw) * space;
1093 }
1094
1095 if (size != 0)
1096 {
1097 if (size < 0)
1098 {
1099 cc += -size;
1100 #ifdef SUPPORT_UTF
1101 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1102 #endif
1103 }
1104 else
1105 cc += size;
1106 }
1107
1108 if (bracketlen > 0)
1109 {
1110 if (cc >= end)
1111 {
1112 end = bracketend(cc);
1113 if (end[-1 - LINK_SIZE] == OP_KET)
1114 end = NULL;
1115 }
1116 cc += bracketlen;
1117 }
1118 }
1119 }
1120
1121 /* Returns with -1 if no need for frame. */
1122 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1123 {
1124 pcre_uchar *ccend = bracketend(cc);
1125 int length = 0;
1126 int possessive = 0;
1127 BOOL setsom_found = recursive;
1128 BOOL setmark_found = recursive;
1129 BOOL capture_last_found = recursive;
1130
1131 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1132 {
1133 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1134 /* This is correct regardless of common->capture_last_ptr. */
1135 capture_last_found = TRUE;
1136 }
1137
1138 cc = next_opcode(common, cc);
1139 SLJIT_ASSERT(cc != NULL);
1140 while (cc < ccend)
1141 switch(*cc)
1142 {
1143 case OP_SET_SOM:
1144 SLJIT_ASSERT(common->has_set_som);
1145 if (!setsom_found)
1146 {
1147 length += 2;
1148 setsom_found = TRUE;
1149 }
1150 cc += 1;
1151 break;
1152
1153 case OP_MARK:
1154 SLJIT_ASSERT(common->mark_ptr != 0);
1155 if (!setmark_found)
1156 {
1157 length += 2;
1158 setmark_found = TRUE;
1159 }
1160 cc += 1 + 2 + cc[1];
1161 break;
1162
1163 case OP_RECURSE:
1164 if (common->has_set_som && !setsom_found)
1165 {
1166 length += 2;
1167 setsom_found = TRUE;
1168 }
1169 if (common->mark_ptr != 0 && !setmark_found)
1170 {
1171 length += 2;
1172 setmark_found = TRUE;
1173 }
1174 if (common->capture_last_ptr != 0 && !capture_last_found)
1175 {
1176 length += 2;
1177 capture_last_found = TRUE;
1178 }
1179 cc += 1 + LINK_SIZE;
1180 break;
1181
1182 case OP_CBRA:
1183 case OP_CBRAPOS:
1184 case OP_SCBRA:
1185 case OP_SCBRAPOS:
1186 if (common->capture_last_ptr != 0 && !capture_last_found)
1187 {
1188 length += 2;
1189 capture_last_found = TRUE;
1190 }
1191 length += 3;
1192 cc += 1 + LINK_SIZE + IMM2_SIZE;
1193 break;
1194
1195 default:
1196 cc = next_opcode(common, cc);
1197 SLJIT_ASSERT(cc != NULL);
1198 break;
1199 }
1200
1201 /* Possessive quantifiers can use a special case. */
1202 if (SLJIT_UNLIKELY(possessive == length))
1203 return -1;
1204
1205 if (length > 0)
1206 return length + 1;
1207 return -1;
1208 }
1209
1210 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1211 {
1212 DEFINE_COMPILER;
1213 pcre_uchar *ccend = bracketend(cc);
1214 BOOL setsom_found = recursive;
1215 BOOL setmark_found = recursive;
1216 BOOL capture_last_found = recursive;
1217 int offset;
1218
1219 /* >= 1 + shortest item size (2) */
1220 SLJIT_UNUSED_ARG(stacktop);
1221 SLJIT_ASSERT(stackpos >= stacktop + 2);
1222
1223 stackpos = STACK(stackpos);
1224 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1225 cc = next_opcode(common, cc);
1226 SLJIT_ASSERT(cc != NULL);
1227 while (cc < ccend)
1228 switch(*cc)
1229 {
1230 case OP_SET_SOM:
1231 SLJIT_ASSERT(common->has_set_som);
1232 if (!setsom_found)
1233 {
1234 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1235 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1236 stackpos += (int)sizeof(sljit_sw);
1237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1238 stackpos += (int)sizeof(sljit_sw);
1239 setsom_found = TRUE;
1240 }
1241 cc += 1;
1242 break;
1243
1244 case OP_MARK:
1245 SLJIT_ASSERT(common->mark_ptr != 0);
1246 if (!setmark_found)
1247 {
1248 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1249 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1250 stackpos += (int)sizeof(sljit_sw);
1251 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1252 stackpos += (int)sizeof(sljit_sw);
1253 setmark_found = TRUE;
1254 }
1255 cc += 1 + 2 + cc[1];
1256 break;
1257
1258 case OP_RECURSE:
1259 if (common->has_set_som && !setsom_found)
1260 {
1261 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1262 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1263 stackpos += (int)sizeof(sljit_sw);
1264 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1265 stackpos += (int)sizeof(sljit_sw);
1266 setsom_found = TRUE;
1267 }
1268 if (common->mark_ptr != 0 && !setmark_found)
1269 {
1270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1272 stackpos += (int)sizeof(sljit_sw);
1273 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1274 stackpos += (int)sizeof(sljit_sw);
1275 setmark_found = TRUE;
1276 }
1277 if (common->capture_last_ptr != 0 && !capture_last_found)
1278 {
1279 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1280 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1281 stackpos += (int)sizeof(sljit_sw);
1282 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1283 stackpos += (int)sizeof(sljit_sw);
1284 capture_last_found = TRUE;
1285 }
1286 cc += 1 + LINK_SIZE;
1287 break;
1288
1289 case OP_CBRA:
1290 case OP_CBRAPOS:
1291 case OP_SCBRA:
1292 case OP_SCBRAPOS:
1293 if (common->capture_last_ptr != 0 && !capture_last_found)
1294 {
1295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1296 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1297 stackpos += (int)sizeof(sljit_sw);
1298 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1299 stackpos += (int)sizeof(sljit_sw);
1300 capture_last_found = TRUE;
1301 }
1302 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1303 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1304 stackpos += (int)sizeof(sljit_sw);
1305 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1306 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1307 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1308 stackpos += (int)sizeof(sljit_sw);
1309 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1310 stackpos += (int)sizeof(sljit_sw);
1311
1312 cc += 1 + LINK_SIZE + IMM2_SIZE;
1313 break;
1314
1315 default:
1316 cc = next_opcode(common, cc);
1317 SLJIT_ASSERT(cc != NULL);
1318 break;
1319 }
1320
1321 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1322 SLJIT_ASSERT(stackpos == STACK(stacktop));
1323 }
1324
1325 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1326 {
1327 int private_data_length = 2;
1328 int size;
1329 pcre_uchar *alternative;
1330 /* Calculate the sum of the private machine words. */
1331 while (cc < ccend)
1332 {
1333 size = 0;
1334 switch(*cc)
1335 {
1336 case OP_ASSERT:
1337 case OP_ASSERT_NOT:
1338 case OP_ASSERTBACK:
1339 case OP_ASSERTBACK_NOT:
1340 case OP_ONCE:
1341 case OP_ONCE_NC:
1342 case OP_BRAPOS:
1343 case OP_SBRA:
1344 case OP_SBRAPOS:
1345 case OP_SCOND:
1346 private_data_length++;
1347 cc += 1 + LINK_SIZE;
1348 break;
1349
1350 case OP_CBRA:
1351 case OP_SCBRA:
1352 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1353 private_data_length++;
1354 cc += 1 + LINK_SIZE + IMM2_SIZE;
1355 break;
1356
1357 case OP_CBRAPOS:
1358 case OP_SCBRAPOS:
1359 private_data_length += 2;
1360 cc += 1 + LINK_SIZE + IMM2_SIZE;
1361 break;
1362
1363 case OP_COND:
1364 /* Might be a hidden SCOND. */
1365 alternative = cc + GET(cc, 1);
1366 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1367 private_data_length++;
1368 cc += 1 + LINK_SIZE;
1369 break;
1370
1371 CASE_ITERATOR_PRIVATE_DATA_1
1372 if (PRIVATE_DATA(cc))
1373 private_data_length++;
1374 cc += 2;
1375 #ifdef SUPPORT_UTF
1376 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1377 #endif
1378 break;
1379
1380 CASE_ITERATOR_PRIVATE_DATA_2A
1381 if (PRIVATE_DATA(cc))
1382 private_data_length += 2;
1383 cc += 2;
1384 #ifdef SUPPORT_UTF
1385 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1386 #endif
1387 break;
1388
1389 CASE_ITERATOR_PRIVATE_DATA_2B
1390 if (PRIVATE_DATA(cc))
1391 private_data_length += 2;
1392 cc += 2 + IMM2_SIZE;
1393 #ifdef SUPPORT_UTF
1394 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1395 #endif
1396 break;
1397
1398 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1399 if (PRIVATE_DATA(cc))
1400 private_data_length++;
1401 cc += 1;
1402 break;
1403
1404 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1405 if (PRIVATE_DATA(cc))
1406 private_data_length += 2;
1407 cc += 1;
1408 break;
1409
1410 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1411 if (PRIVATE_DATA(cc))
1412 private_data_length += 2;
1413 cc += 1 + IMM2_SIZE;
1414 break;
1415
1416 case OP_CLASS:
1417 case OP_NCLASS:
1418 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1419 case OP_XCLASS:
1420 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1421 #else
1422 size = 1 + 32 / (int)sizeof(pcre_uchar);
1423 #endif
1424 if (PRIVATE_DATA(cc))
1425 private_data_length += get_class_iterator_size(cc + size);
1426 cc += size;
1427 break;
1428
1429 default:
1430 cc = next_opcode(common, cc);
1431 SLJIT_ASSERT(cc != NULL);
1432 break;
1433 }
1434 }
1435 SLJIT_ASSERT(cc == ccend);
1436 return private_data_length;
1437 }
1438
1439 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1440 BOOL save, int stackptr, int stacktop)
1441 {
1442 DEFINE_COMPILER;
1443 int srcw[2];
1444 int count, size;
1445 BOOL tmp1next = TRUE;
1446 BOOL tmp1empty = TRUE;
1447 BOOL tmp2empty = TRUE;
1448 pcre_uchar *alternative;
1449 enum {
1450 start,
1451 loop,
1452 end
1453 } status;
1454
1455 status = save ? start : loop;
1456 stackptr = STACK(stackptr - 2);
1457 stacktop = STACK(stacktop - 1);
1458
1459 if (!save)
1460 {
1461 stackptr += sizeof(sljit_sw);
1462 if (stackptr < stacktop)
1463 {
1464 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1465 stackptr += sizeof(sljit_sw);
1466 tmp1empty = FALSE;
1467 }
1468 if (stackptr < stacktop)
1469 {
1470 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1471 stackptr += sizeof(sljit_sw);
1472 tmp2empty = FALSE;
1473 }
1474 /* The tmp1next must be TRUE in either way. */
1475 }
1476
1477 while (status != end)
1478 {
1479 count = 0;
1480 switch(status)
1481 {
1482 case start:
1483 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1484 count = 1;
1485 srcw[0] = common->recursive_head_ptr;
1486 status = loop;
1487 break;
1488
1489 case loop:
1490 if (cc >= ccend)
1491 {
1492 status = end;
1493 break;
1494 }
1495
1496 switch(*cc)
1497 {
1498 case OP_ASSERT:
1499 case OP_ASSERT_NOT:
1500 case OP_ASSERTBACK:
1501 case OP_ASSERTBACK_NOT:
1502 case OP_ONCE:
1503 case OP_ONCE_NC:
1504 case OP_BRAPOS:
1505 case OP_SBRA:
1506 case OP_SBRAPOS:
1507 case OP_SCOND:
1508 count = 1;
1509 srcw[0] = PRIVATE_DATA(cc);
1510 SLJIT_ASSERT(srcw[0] != 0);
1511 cc += 1 + LINK_SIZE;
1512 break;
1513
1514 case OP_CBRA:
1515 case OP_SCBRA:
1516 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1517 {
1518 count = 1;
1519 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1520 }
1521 cc += 1 + LINK_SIZE + IMM2_SIZE;
1522 break;
1523
1524 case OP_CBRAPOS:
1525 case OP_SCBRAPOS:
1526 count = 2;
1527 srcw[0] = PRIVATE_DATA(cc);
1528 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1529 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1530 cc += 1 + LINK_SIZE + IMM2_SIZE;
1531 break;
1532
1533 case OP_COND:
1534 /* Might be a hidden SCOND. */
1535 alternative = cc + GET(cc, 1);
1536 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1537 {
1538 count = 1;
1539 srcw[0] = PRIVATE_DATA(cc);
1540 SLJIT_ASSERT(srcw[0] != 0);
1541 }
1542 cc += 1 + LINK_SIZE;
1543 break;
1544
1545 CASE_ITERATOR_PRIVATE_DATA_1
1546 if (PRIVATE_DATA(cc))
1547 {
1548 count = 1;
1549 srcw[0] = PRIVATE_DATA(cc);
1550 }
1551 cc += 2;
1552 #ifdef SUPPORT_UTF
1553 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1554 #endif
1555 break;
1556
1557 CASE_ITERATOR_PRIVATE_DATA_2A
1558 if (PRIVATE_DATA(cc))
1559 {
1560 count = 2;
1561 srcw[0] = PRIVATE_DATA(cc);
1562 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1563 }
1564 cc += 2;
1565 #ifdef SUPPORT_UTF
1566 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1567 #endif
1568 break;
1569
1570 CASE_ITERATOR_PRIVATE_DATA_2B
1571 if (PRIVATE_DATA(cc))
1572 {
1573 count = 2;
1574 srcw[0] = PRIVATE_DATA(cc);
1575 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1576 }
1577 cc += 2 + IMM2_SIZE;
1578 #ifdef SUPPORT_UTF
1579 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1580 #endif
1581 break;
1582
1583 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1584 if (PRIVATE_DATA(cc))
1585 {
1586 count = 1;
1587 srcw[0] = PRIVATE_DATA(cc);
1588 }
1589 cc += 1;
1590 break;
1591
1592 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1593 if (PRIVATE_DATA(cc))
1594 {
1595 count = 2;
1596 srcw[0] = PRIVATE_DATA(cc);
1597 srcw[1] = srcw[0] + sizeof(sljit_sw);
1598 }
1599 cc += 1;
1600 break;
1601
1602 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1603 if (PRIVATE_DATA(cc))
1604 {
1605 count = 2;
1606 srcw[0] = PRIVATE_DATA(cc);
1607 srcw[1] = srcw[0] + sizeof(sljit_sw);
1608 }
1609 cc += 1 + IMM2_SIZE;
1610 break;
1611
1612 case OP_CLASS:
1613 case OP_NCLASS:
1614 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1615 case OP_XCLASS:
1616 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1617 #else
1618 size = 1 + 32 / (int)sizeof(pcre_uchar);
1619 #endif
1620 if (PRIVATE_DATA(cc))
1621 switch(get_class_iterator_size(cc + size))
1622 {
1623 case 1:
1624 count = 1;
1625 srcw[0] = PRIVATE_DATA(cc);
1626 break;
1627
1628 case 2:
1629 count = 2;
1630 srcw[0] = PRIVATE_DATA(cc);
1631 srcw[1] = srcw[0] + sizeof(sljit_sw);
1632 break;
1633
1634 default:
1635 SLJIT_ASSERT_STOP();
1636 break;
1637 }
1638 cc += size;
1639 break;
1640
1641 default:
1642 cc = next_opcode(common, cc);
1643 SLJIT_ASSERT(cc != NULL);
1644 break;
1645 }
1646 break;
1647
1648 case end:
1649 SLJIT_ASSERT_STOP();
1650 break;
1651 }
1652
1653 while (count > 0)
1654 {
1655 count--;
1656 if (save)
1657 {
1658 if (tmp1next)
1659 {
1660 if (!tmp1empty)
1661 {
1662 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1663 stackptr += sizeof(sljit_sw);
1664 }
1665 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1666 tmp1empty = FALSE;
1667 tmp1next = FALSE;
1668 }
1669 else
1670 {
1671 if (!tmp2empty)
1672 {
1673 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1674 stackptr += sizeof(sljit_sw);
1675 }
1676 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1677 tmp2empty = FALSE;
1678 tmp1next = TRUE;
1679 }
1680 }
1681 else
1682 {
1683 if (tmp1next)
1684 {
1685 SLJIT_ASSERT(!tmp1empty);
1686 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1687 tmp1empty = stackptr >= stacktop;
1688 if (!tmp1empty)
1689 {
1690 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1691 stackptr += sizeof(sljit_sw);
1692 }
1693 tmp1next = FALSE;
1694 }
1695 else
1696 {
1697 SLJIT_ASSERT(!tmp2empty);
1698 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1699 tmp2empty = stackptr >= stacktop;
1700 if (!tmp2empty)
1701 {
1702 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1703 stackptr += sizeof(sljit_sw);
1704 }
1705 tmp1next = TRUE;
1706 }
1707 }
1708 }
1709 }
1710
1711 if (save)
1712 {
1713 if (tmp1next)
1714 {
1715 if (!tmp1empty)
1716 {
1717 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1718 stackptr += sizeof(sljit_sw);
1719 }
1720 if (!tmp2empty)
1721 {
1722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1723 stackptr += sizeof(sljit_sw);
1724 }
1725 }
1726 else
1727 {
1728 if (!tmp2empty)
1729 {
1730 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1731 stackptr += sizeof(sljit_sw);
1732 }
1733 if (!tmp1empty)
1734 {
1735 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1736 stackptr += sizeof(sljit_sw);
1737 }
1738 }
1739 }
1740 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1741 }
1742
1743 #undef CASE_ITERATOR_PRIVATE_DATA_1
1744 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1745 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1746 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1747 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1748 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1749
1750 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1751 {
1752 return (value & (value - 1)) == 0;
1753 }
1754
1755 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1756 {
1757 while (list)
1758 {
1759 /* sljit_set_label is clever enough to do nothing
1760 if either the jump or the label is NULL. */
1761 SET_LABEL(list->jump, label);
1762 list = list->next;
1763 }
1764 }
1765
1766 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1767 {
1768 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1769 if (list_item)
1770 {
1771 list_item->next = *list;
1772 list_item->jump = jump;
1773 *list = list_item;
1774 }
1775 }
1776
1777 static void add_stub(compiler_common *common, enum stub_types type, int data, struct sljit_jump *start)
1778 {
1779 DEFINE_COMPILER;
1780 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1781
1782 if (list_item)
1783 {
1784 list_item->type = type;
1785 list_item->data = data;
1786 list_item->start = start;
1787 list_item->quit = LABEL();
1788 list_item->next = common->stubs;
1789 common->stubs = list_item;
1790 }
1791 }
1792
1793 static void flush_stubs(compiler_common *common)
1794 {
1795 DEFINE_COMPILER;
1796 stub_list* list_item = common->stubs;
1797
1798 while (list_item)
1799 {
1800 JUMPHERE(list_item->start);
1801 switch(list_item->type)
1802 {
1803 case stack_alloc:
1804 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1805 break;
1806 }
1807 JUMPTO(SLJIT_JUMP, list_item->quit);
1808 list_item = list_item->next;
1809 }
1810 common->stubs = NULL;
1811 }
1812
1813 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1814 {
1815 DEFINE_COMPILER;
1816
1817 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1818 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1819 }
1820
1821 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1822 {
1823 /* May destroy all locals and registers except TMP2. */
1824 DEFINE_COMPILER;
1825
1826 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1827 #ifdef DESTROY_REGISTERS
1828 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1829 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1830 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1832 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1833 #endif
1834 add_stub(common, stack_alloc, 0, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1835 }
1836
1837 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1838 {
1839 DEFINE_COMPILER;
1840 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1841 }
1842
1843 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1844 {
1845 DEFINE_COMPILER;
1846 struct sljit_label *loop;
1847 int i;
1848 /* At this point we can freely use all temporary registers. */
1849 /* TMP1 returns with begin - 1. */
1850 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1851 if (length < 8)
1852 {
1853 for (i = 0; i < length; i++)
1854 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1855 }
1856 else
1857 {
1858 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));
1859 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);
1860 loop = LABEL();
1861 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1862 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1863 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1864 }
1865 }
1866
1867 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1868 {
1869 DEFINE_COMPILER;
1870 struct sljit_label *loop;
1871 struct sljit_jump *early_quit;
1872
1873 /* At this point we can freely use all registers. */
1874 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1875 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1876
1877 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1878 if (common->mark_ptr != 0)
1879 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1880 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
1881 if (common->mark_ptr != 0)
1882 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1883 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1884 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1885 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1886 /* Unlikely, but possible */
1887 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1888 loop = LABEL();
1889 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1890 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
1891 /* Copy the integer value to the output buffer */
1892 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1893 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1894 #endif
1895 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1896 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1897 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1898 JUMPHERE(early_quit);
1899
1900 /* Calculate the return value, which is the maximum ovector value. */
1901 if (topbracket > 1)
1902 {
1903 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
1904 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
1905
1906 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1907 loop = LABEL();
1908 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
1909 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1910 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1911 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
1912 }
1913 else
1914 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1915 }
1916
1917 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1918 {
1919 DEFINE_COMPILER;
1920
1921 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1922 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1923
1924 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
1925 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1926 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offset_count));
1927 CMPTO(SLJIT_C_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
1928
1929 /* Store match begin and end. */
1930 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1931 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1932 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1933 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1934 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1935 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1936 #endif
1937 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
1938
1939 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
1940 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1941 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1942 #endif
1943 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
1944
1945 JUMPTO(SLJIT_JUMP, quit);
1946 }
1947
1948 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
1949 {
1950 /* May destroy TMP1. */
1951 DEFINE_COMPILER;
1952 struct sljit_jump *jump;
1953
1954 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
1955 {
1956 /* The value of -1 must be kept for start_used_ptr! */
1957 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
1958 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
1959 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
1960 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
1961 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1962 JUMPHERE(jump);
1963 }
1964 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
1965 {
1966 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1967 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
1968 JUMPHERE(jump);
1969 }
1970 }
1971
1972 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
1973 {
1974 /* Detects if the character has an othercase. */
1975 unsigned int c;
1976
1977 #ifdef SUPPORT_UTF
1978 if (common->utf)
1979 {
1980 GETCHAR(c, cc);
1981 if (c > 127)
1982 {
1983 #ifdef SUPPORT_UCP
1984 return c != UCD_OTHERCASE(c);
1985 #else
1986 return FALSE;
1987 #endif
1988 }
1989 #ifndef COMPILE_PCRE8
1990 return common->fcc[c] != c;
1991 #endif
1992 }
1993 else
1994 #endif
1995 c = *cc;
1996 return MAX_255(c) ? common->fcc[c] != c : FALSE;
1997 }
1998
1999 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2000 {
2001 /* Returns with the othercase. */
2002 #ifdef SUPPORT_UTF
2003 if (common->utf && c > 127)
2004 {
2005 #ifdef SUPPORT_UCP
2006 return UCD_OTHERCASE(c);
2007 #else
2008 return c;
2009 #endif
2010 }
2011 #endif
2012 return TABLE_GET(c, common->fcc, c);
2013 }
2014
2015 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2016 {
2017 /* Detects if the character and its othercase has only 1 bit difference. */
2018 unsigned int c, oc, bit;
2019 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2020 int n;
2021 #endif
2022
2023 #ifdef SUPPORT_UTF
2024 if (common->utf)
2025 {
2026 GETCHAR(c, cc);
2027 if (c <= 127)
2028 oc = common->fcc[c];
2029 else
2030 {
2031 #ifdef SUPPORT_UCP
2032 oc = UCD_OTHERCASE(c);
2033 #else
2034 oc = c;
2035 #endif
2036 }
2037 }
2038 else
2039 {
2040 c = *cc;
2041 oc = TABLE_GET(c, common->fcc, c);
2042 }
2043 #else
2044 c = *cc;
2045 oc = TABLE_GET(c, common->fcc, c);
2046 #endif
2047
2048 SLJIT_ASSERT(c != oc);
2049
2050 bit = c ^ oc;
2051 /* Optimized for English alphabet. */
2052 if (c <= 127 && bit == 0x20)
2053 return (0 << 8) | 0x20;
2054
2055 /* Since c != oc, they must have at least 1 bit difference. */
2056 if (!is_powerof2(bit))
2057 return 0;
2058
2059 #if defined COMPILE_PCRE8
2060
2061 #ifdef SUPPORT_UTF
2062 if (common->utf && c > 127)
2063 {
2064 n = GET_EXTRALEN(*cc);
2065 while ((bit & 0x3f) == 0)
2066 {
2067 n--;
2068 bit >>= 6;
2069 }
2070 return (n << 8) | bit;
2071 }
2072 #endif /* SUPPORT_UTF */
2073 return (0 << 8) | bit;
2074
2075 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2076
2077 #ifdef SUPPORT_UTF
2078 if (common->utf && c > 65535)
2079 {
2080 if (bit >= (1 << 10))
2081 bit >>= 10;
2082 else
2083 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2084 }
2085 #endif /* SUPPORT_UTF */
2086 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2087
2088 #endif /* COMPILE_PCRE[8|16|32] */
2089 }
2090
2091 static void check_partial(compiler_common *common, BOOL force)
2092 {
2093 /* Checks whether a partial matching is occured. Does not modify registers. */
2094 DEFINE_COMPILER;
2095 struct sljit_jump *jump = NULL;
2096
2097 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2098
2099 if (common->mode == JIT_COMPILE)
2100 return;
2101
2102 if (!force)
2103 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2104 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2105 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2106
2107 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2108 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2109 else
2110 {
2111 if (common->partialmatchlabel != NULL)
2112 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2113 else
2114 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2115 }
2116
2117 if (jump != NULL)
2118 JUMPHERE(jump);
2119 }
2120
2121 static struct sljit_jump *check_str_end(compiler_common *common)
2122 {
2123 /* Does not affect registers. Usually used in a tight spot. */
2124 DEFINE_COMPILER;
2125 struct sljit_jump *jump;
2126 struct sljit_jump *nohit;
2127 struct sljit_jump *return_value;
2128
2129 if (common->mode == JIT_COMPILE)
2130 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2131
2132 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2133 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2134 {
2135 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2136 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2137 JUMPHERE(nohit);
2138 return_value = JUMP(SLJIT_JUMP);
2139 }
2140 else
2141 {
2142 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2143 if (common->partialmatchlabel != NULL)
2144 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2145 else
2146 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2147 }
2148 JUMPHERE(jump);
2149 return return_value;
2150 }
2151
2152 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2153 {
2154 DEFINE_COMPILER;
2155 struct sljit_jump *jump;
2156
2157 if (common->mode == JIT_COMPILE)
2158 {
2159 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2160 return;
2161 }
2162
2163 /* Partial matching mode. */
2164 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2165 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2166 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2167 {
2168 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2169 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2170 }
2171 else
2172 {
2173 if (common->partialmatchlabel != NULL)
2174 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2175 else
2176 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2177 }
2178 JUMPHERE(jump);
2179 }
2180
2181 static void read_char(compiler_common *common)
2182 {
2183 /* Reads the character into TMP1, updates STR_PTR.
2184 Does not check STR_END. TMP2 Destroyed. */
2185 DEFINE_COMPILER;
2186 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2187 struct sljit_jump *jump;
2188 #endif
2189
2190 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2191 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2192 if (common->utf)
2193 {
2194 #if defined COMPILE_PCRE8
2195 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2196 #elif defined COMPILE_PCRE16
2197 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2198 #endif /* COMPILE_PCRE[8|16] */
2199 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2200 JUMPHERE(jump);
2201 }
2202 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2203 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2204 }
2205
2206 static void peek_char(compiler_common *common)
2207 {
2208 /* Reads the character into TMP1, keeps STR_PTR.
2209 Does not check STR_END. TMP2 Destroyed. */
2210 DEFINE_COMPILER;
2211 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2212 struct sljit_jump *jump;
2213 #endif
2214
2215 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2216 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2217 if (common->utf)
2218 {
2219 #if defined COMPILE_PCRE8
2220 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2221 #elif defined COMPILE_PCRE16
2222 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2223 #endif /* COMPILE_PCRE[8|16] */
2224 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2225 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2226 JUMPHERE(jump);
2227 }
2228 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2229 }
2230
2231 static void read_char8_type(compiler_common *common)
2232 {
2233 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2234 DEFINE_COMPILER;
2235 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2236 struct sljit_jump *jump;
2237 #endif
2238
2239 #ifdef SUPPORT_UTF
2240 if (common->utf)
2241 {
2242 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2243 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2244 #if defined COMPILE_PCRE8
2245 /* This can be an extra read in some situations, but hopefully
2246 it is needed in most cases. */
2247 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2248 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2249 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2250 JUMPHERE(jump);
2251 #elif defined COMPILE_PCRE16
2252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2253 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2254 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2255 JUMPHERE(jump);
2256 /* Skip low surrogate if necessary. */
2257 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2258 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2259 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2260 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2261 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2262 #elif defined COMPILE_PCRE32
2263 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2264 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2265 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2266 JUMPHERE(jump);
2267 #endif /* COMPILE_PCRE[8|16|32] */
2268 return;
2269 }
2270 #endif /* SUPPORT_UTF */
2271 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2272 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2273 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2274 /* The ctypes array contains only 256 values. */
2275 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2276 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2277 #endif
2278 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2279 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2280 JUMPHERE(jump);
2281 #endif
2282 }
2283
2284 static void skip_char_back(compiler_common *common)
2285 {
2286 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2287 DEFINE_COMPILER;
2288 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2289 #if defined COMPILE_PCRE8
2290 struct sljit_label *label;
2291
2292 if (common->utf)
2293 {
2294 label = LABEL();
2295 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2296 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2297 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2298 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2299 return;
2300 }
2301 #elif defined COMPILE_PCRE16
2302 if (common->utf)
2303 {
2304 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2305 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2306 /* Skip low surrogate if necessary. */
2307 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2308 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2309 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2310 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2311 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2312 return;
2313 }
2314 #endif /* COMPILE_PCRE[8|16] */
2315 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2316 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2317 }
2318
2319 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2320 {
2321 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2322 DEFINE_COMPILER;
2323
2324 if (nltype == NLTYPE_ANY)
2325 {
2326 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2327 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2328 }
2329 else if (nltype == NLTYPE_ANYCRLF)
2330 {
2331 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2332 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2333 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2334 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2335 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2336 }
2337 else
2338 {
2339 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2340 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2341 }
2342 }
2343
2344 #ifdef SUPPORT_UTF
2345
2346 #if defined COMPILE_PCRE8
2347 static void do_utfreadchar(compiler_common *common)
2348 {
2349 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2350 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2351 DEFINE_COMPILER;
2352 struct sljit_jump *jump;
2353
2354 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2355 /* Searching for the first zero. */
2356 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2357 jump = JUMP(SLJIT_C_NOT_ZERO);
2358 /* Two byte sequence. */
2359 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2360 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2361 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2362 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2363 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2364 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2365 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2366 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2367 JUMPHERE(jump);
2368
2369 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2370 jump = JUMP(SLJIT_C_NOT_ZERO);
2371 /* Three byte sequence. */
2372 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2373 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2374 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2375 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2376 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2377 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2378 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2379 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2380 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2381 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2382 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2383 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2384 JUMPHERE(jump);
2385
2386 /* Four byte sequence. */
2387 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2388 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2389 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2390 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2391 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2392 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2393 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2394 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2395 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2396 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2397 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2398 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2399 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2400 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2401 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2402 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2403 }
2404
2405 static void do_utfreadtype8(compiler_common *common)
2406 {
2407 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2408 of the character (>= 0xc0). Return value in TMP1. */
2409 DEFINE_COMPILER;
2410 struct sljit_jump *jump;
2411 struct sljit_jump *compare;
2412
2413 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2414
2415 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2416 jump = JUMP(SLJIT_C_NOT_ZERO);
2417 /* Two byte sequence. */
2418 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2419 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2420 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2421 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2422 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2423 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2424 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2425 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2426 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2427
2428 JUMPHERE(compare);
2429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2430 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2431 JUMPHERE(jump);
2432
2433 /* We only have types for characters less than 256. */
2434 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2435 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2436 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2437 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2438 }
2439
2440 #elif defined COMPILE_PCRE16
2441
2442 static void do_utfreadchar(compiler_common *common)
2443 {
2444 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2445 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2446 DEFINE_COMPILER;
2447 struct sljit_jump *jump;
2448
2449 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2450 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2451 /* Do nothing, only return. */
2452 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2453
2454 JUMPHERE(jump);
2455 /* Combine two 16 bit characters. */
2456 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2457 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2458 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2459 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2460 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2461 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2462 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2463 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2464 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2465 }
2466
2467 #endif /* COMPILE_PCRE[8|16] */
2468
2469 #endif /* SUPPORT_UTF */
2470
2471 #ifdef SUPPORT_UCP
2472
2473 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2474 #define UCD_BLOCK_MASK 127
2475 #define UCD_BLOCK_SHIFT 7
2476
2477 static void do_getucd(compiler_common *common)
2478 {
2479 /* Search the UCD record for the character comes in TMP1.
2480 Returns chartype in TMP1 and UCD offset in TMP2. */
2481 DEFINE_COMPILER;
2482
2483 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2484
2485 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2486 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2487 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2488 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2489 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2490 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2491 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2492 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2493 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2494 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2495 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2496 }
2497 #endif
2498
2499 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2500 {
2501 DEFINE_COMPILER;
2502 struct sljit_label *mainloop;
2503 struct sljit_label *newlinelabel = NULL;
2504 struct sljit_jump *start;
2505 struct sljit_jump *end = NULL;
2506 struct sljit_jump *nl = NULL;
2507 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2508 struct sljit_jump *singlechar;
2509 #endif
2510 jump_list *newline = NULL;
2511 BOOL newlinecheck = FALSE;
2512 BOOL readuchar = FALSE;
2513
2514 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2515 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2516 newlinecheck = TRUE;
2517
2518 if (firstline)
2519 {
2520 /* Search for the end of the first line. */
2521 SLJIT_ASSERT(common->first_line_end != 0);
2522 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2523
2524 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2525 {
2526 mainloop = LABEL();
2527 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2528 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2529 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2530 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2531 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2532 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2533 JUMPHERE(end);
2534 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2535 }
2536 else
2537 {
2538 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2539 mainloop = LABEL();
2540 /* Continual stores does not cause data dependency. */
2541 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2542 read_char(common);
2543 check_newlinechar(common, common->nltype, &newline, TRUE);
2544 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2545 JUMPHERE(end);
2546 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2547 set_jumps(newline, LABEL());
2548 }
2549
2550 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2551 }
2552
2553 start = JUMP(SLJIT_JUMP);
2554
2555 if (newlinecheck)
2556 {
2557 newlinelabel = LABEL();
2558 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2559 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2560 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2561 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2562 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2563 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2564 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2565 #endif
2566 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2567 nl = JUMP(SLJIT_JUMP);
2568 }
2569
2570 mainloop = LABEL();
2571
2572 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2573 #ifdef SUPPORT_UTF
2574 if (common->utf) readuchar = TRUE;
2575 #endif
2576 if (newlinecheck) readuchar = TRUE;
2577
2578 if (readuchar)
2579 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2580
2581 if (newlinecheck)
2582 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2583
2584 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2585 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2586 #if defined COMPILE_PCRE8
2587 if (common->utf)
2588 {
2589 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2590 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2591 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2592 JUMPHERE(singlechar);
2593 }
2594 #elif defined COMPILE_PCRE16
2595 if (common->utf)
2596 {
2597 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2598 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2599 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2600 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2601 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2602 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2603 JUMPHERE(singlechar);
2604 }
2605 #endif /* COMPILE_PCRE[8|16] */
2606 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2607 JUMPHERE(start);
2608
2609 if (newlinecheck)
2610 {
2611 JUMPHERE(end);
2612 JUMPHERE(nl);
2613 }
2614
2615 return mainloop;
2616 }
2617
2618 #define MAX_N_CHARS 3
2619
2620 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2621 {
2622 DEFINE_COMPILER;
2623 struct sljit_label *start;
2624 struct sljit_jump *quit;
2625 pcre_uint32 chars[MAX_N_CHARS * 2];
2626 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2627 int location = 0;
2628 pcre_int32 len, c, bit, caseless;
2629 int must_stop;
2630
2631 /* We do not support alternatives now. */
2632 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2633 return FALSE;
2634
2635 while (TRUE)
2636 {
2637 caseless = 0;
2638 must_stop = 1;
2639 switch(*cc)
2640 {
2641 case OP_CHAR:
2642 must_stop = 0;
2643 cc++;
2644 break;
2645
2646 case OP_CHARI:
2647 caseless = 1;
2648 must_stop = 0;
2649 cc++;
2650 break;
2651
2652 case OP_SOD:
2653 case OP_SOM:
2654 case OP_SET_SOM:
2655 case OP_NOT_WORD_BOUNDARY:
2656 case OP_WORD_BOUNDARY:
2657 case OP_EODN:
2658 case OP_EOD:
2659 case OP_CIRC:
2660 case OP_CIRCM:
2661 case OP_DOLL:
2662 case OP_DOLLM:
2663 /* Zero width assertions. */
2664 cc++;
2665 continue;
2666
2667 case OP_PLUS:
2668 case OP_MINPLUS:
2669 case OP_POSPLUS:
2670 cc++;
2671 break;
2672
2673 case OP_EXACT:
2674 cc += 1 + IMM2_SIZE;
2675 break;
2676
2677 case OP_PLUSI:
2678 case OP_MINPLUSI:
2679 case OP_POSPLUSI:
2680 caseless = 1;
2681 cc++;
2682 break;
2683
2684 case OP_EXACTI:
2685 caseless = 1;
2686 cc += 1 + IMM2_SIZE;
2687 break;
2688
2689 default:
2690 must_stop = 2;
2691 break;
2692 }
2693
2694 if (must_stop == 2)
2695 break;
2696
2697 len = 1;
2698 #ifdef SUPPORT_UTF
2699 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2700 #endif
2701
2702 if (caseless && char_has_othercase(common, cc))
2703 {
2704 caseless = char_get_othercase_bit(common, cc);
2705 if (caseless == 0)
2706 return FALSE;
2707 #ifdef COMPILE_PCRE8
2708 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2709 #else
2710 if ((caseless & 0x100) != 0)
2711 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2712 else
2713 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2714 #endif
2715 }
2716 else
2717 caseless = 0;
2718
2719 while (len > 0 && location < MAX_N_CHARS * 2)
2720 {
2721 c = *cc;
2722 bit = 0;
2723 if (len == (caseless & 0xff))
2724 {
2725 bit = caseless >> 8;
2726 c |= bit;
2727 }
2728
2729 chars[location] = c;
2730 chars[location + 1] = bit;
2731
2732 len--;
2733 location += 2;
2734 cc++;
2735 }
2736
2737 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2738 break;
2739 }
2740
2741 /* At least two characters are required. */
2742 if (location < 2 * 2)
2743 return FALSE;
2744
2745 if (firstline)
2746 {
2747 SLJIT_ASSERT(common->first_line_end != 0);
2748 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2749 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2750 }
2751 else
2752 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2753
2754 start = LABEL();
2755 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2756
2757 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2758 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2759 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2760 if (chars[1] != 0)
2761 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2762 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2763 if (location > 2 * 2)
2764 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2765 if (chars[3] != 0)
2766 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2767 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2768 if (location > 2 * 2)
2769 {
2770 if (chars[5] != 0)
2771 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2772 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2773 }
2774 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2775
2776 JUMPHERE(quit);
2777
2778 if (firstline)
2779 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2780 else
2781 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2782 return TRUE;
2783 }
2784
2785 #undef MAX_N_CHARS
2786
2787 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2788 {
2789 DEFINE_COMPILER;
2790 struct sljit_label *start;
2791 struct sljit_jump *quit;
2792 struct sljit_jump *found;
2793 pcre_uchar oc, bit;
2794
2795 if (firstline)
2796 {
2797 SLJIT_ASSERT(common->first_line_end != 0);
2798 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2799 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2800 }
2801
2802 start = LABEL();
2803 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2804 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2805
2806 oc = first_char;
2807 if (caseless)
2808 {
2809 oc = TABLE_GET(first_char, common->fcc, first_char);
2810 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2811 if (first_char > 127 && common->utf)
2812 oc = UCD_OTHERCASE(first_char);
2813 #endif
2814 }
2815 if (first_char == oc)
2816 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2817 else
2818 {
2819 bit = first_char ^ oc;
2820 if (is_powerof2(bit))
2821 {
2822 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2823 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2824 }
2825 else
2826 {
2827 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2828 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2829 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2830 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2831 found = JUMP(SLJIT_C_NOT_ZERO);
2832 }
2833 }
2834
2835 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2836 JUMPTO(SLJIT_JUMP, start);
2837 JUMPHERE(found);
2838 JUMPHERE(quit);
2839
2840 if (firstline)
2841 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2842 }
2843
2844 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2845 {
2846 DEFINE_COMPILER;
2847 struct sljit_label *loop;
2848 struct sljit_jump *lastchar;
2849 struct sljit_jump *firstchar;
2850 struct sljit_jump *quit;
2851 struct sljit_jump *foundcr = NULL;
2852 struct sljit_jump *notfoundnl;
2853 jump_list *newline = NULL;
2854
2855 if (firstline)
2856 {
2857 SLJIT_ASSERT(common->first_line_end != 0);
2858 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2859 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2860 }
2861
2862 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2863 {
2864 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2865 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2866 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2867 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2868 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2869
2870 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2871 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2872 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2873 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2874 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2875 #endif
2876 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2877
2878 loop = LABEL();
2879 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2880 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2881 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2882 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2883 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2884 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2885
2886 JUMPHERE(quit);
2887 JUMPHERE(firstchar);
2888 JUMPHERE(lastchar);
2889
2890 if (firstline)
2891 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2892 return;
2893 }
2894
2895 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2896 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2897 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2898 skip_char_back(common);
2899
2900 loop = LABEL();
2901 read_char(common);
2902 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2903 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2904 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2905 check_newlinechar(common, common->nltype, &newline, FALSE);
2906 set_jumps(newline, loop);
2907
2908 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2909 {
2910 quit = JUMP(SLJIT_JUMP);
2911 JUMPHERE(foundcr);
2912 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2913 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2914 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2915 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2916 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2917 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2918 #endif
2919 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2920 JUMPHERE(notfoundnl);
2921 JUMPHERE(quit);
2922 }
2923 JUMPHERE(lastchar);
2924 JUMPHERE(firstchar);
2925
2926 if (firstline)
2927 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2928 }
2929
2930 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
2931
2932 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2933 {
2934 DEFINE_COMPILER;
2935 struct sljit_label *start;
2936 struct sljit_jump *quit;
2937 struct sljit_jump *found = NULL;
2938 jump_list *matches = NULL;
2939 pcre_uint8 inverted_start_bits[32];
2940 int i;
2941 #ifndef COMPILE_PCRE8
2942 struct sljit_jump *jump;
2943 #endif
2944
2945 for (i = 0; i < 32; ++i)
2946 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
2947
2948 if (firstline)
2949 {
2950 SLJIT_ASSERT(common->first_line_end != 0);
2951 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
2952 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2953 }
2954
2955 start = LABEL();
2956 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2957 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2958 #ifdef SUPPORT_UTF
2959 if (common->utf)
2960 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
2961 #endif
2962
2963 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
2964 {
2965 #ifndef COMPILE_PCRE8
2966 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
2967 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
2968 JUMPHERE(jump);
2969 #endif
2970 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
2971 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
2972 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
2973 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
2974 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
2975 found = JUMP(SLJIT_C_NOT_ZERO);
2976 }
2977
2978 #ifdef SUPPORT_UTF
2979 if (common->utf)
2980 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
2981 #endif
2982 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2983 #ifdef SUPPORT_UTF
2984 #if defined COMPILE_PCRE8
2985 if (common->utf)
2986 {
2987 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
2988 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2989 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2990 }
2991 #elif defined COMPILE_PCRE16
2992 if (common->utf)
2993 {
2994 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
2995 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2996 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2997 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2998 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2999 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3000 }
3001 #endif /* COMPILE_PCRE[8|16] */
3002 #endif /* SUPPORT_UTF */
3003 JUMPTO(SLJIT_JUMP, start);
3004 if (found != NULL)
3005 JUMPHERE(found);
3006 if (matches != NULL)
3007 set_jumps(matches, LABEL());
3008 JUMPHERE(quit);
3009
3010 if (firstline)
3011 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3012 }
3013
3014 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3015 {
3016 DEFINE_COMPILER;
3017 struct sljit_label *loop;
3018 struct sljit_jump *toolong;
3019 struct sljit_jump *alreadyfound;
3020 struct sljit_jump *found;
3021 struct sljit_jump *foundoc = NULL;
3022 struct sljit_jump *notfound;
3023 pcre_uint32 oc, bit;
3024
3025 SLJIT_ASSERT(common->req_char_ptr != 0);
3026 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3027 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3028 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3029 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3030
3031 if (has_firstchar)
3032 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3033 else
3034 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3035
3036 loop = LABEL();
3037 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3038
3039 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3040 oc = req_char;
3041 if (caseless)
3042 {
3043 oc = TABLE_GET(req_char, common->fcc, req_char);
3044 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3045 if (req_char > 127 && common->utf)
3046 oc = UCD_OTHERCASE(req_char);
3047 #endif
3048 }
3049 if (req_char == oc)
3050 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3051 else
3052 {
3053 bit = req_char ^ oc;
3054 if (is_powerof2(bit))
3055 {
3056 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3057 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3058 }
3059 else
3060 {
3061 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3062 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3063 }
3064 }
3065 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3066 JUMPTO(SLJIT_JUMP, loop);
3067
3068 JUMPHERE(found);
3069 if (foundoc)
3070 JUMPHERE(foundoc);
3071 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3072 JUMPHERE(alreadyfound);
3073 JUMPHERE(toolong);
3074 return notfound;
3075 }
3076
3077 static void do_revertframes(compiler_common *common)
3078 {
3079 DEFINE_COMPILER;
3080 struct sljit_jump *jump;
3081 struct sljit_label *mainloop;
3082
3083 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3084 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3085 GET_LOCAL_BASE(TMP3, 0, 0);
3086
3087 /* Drop frames until we reach STACK_TOP. */
3088 mainloop = LABEL();
3089 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3090 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3091 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3092
3093 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3094 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3095 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3096 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3097 JUMPTO(SLJIT_JUMP, mainloop);
3098
3099 JUMPHERE(jump);
3100 jump = JUMP(SLJIT_C_SIG_LESS);
3101 /* End of dropping frames. */
3102 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3103
3104 JUMPHERE(jump);
3105 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3106 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3107 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3108 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3109 JUMPTO(SLJIT_JUMP, mainloop);
3110 }
3111
3112 static void check_wordboundary(compiler_common *common)
3113 {
3114 DEFINE_COMPILER;
3115 struct sljit_jump *skipread;
3116 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3117 struct sljit_jump *jump;
3118 #endif
3119
3120 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3121
3122 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3123 /* Get type of the previous char, and put it to LOCALS1. */
3124 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3125 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3126 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3127 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3128 skip_char_back(common);
3129 check_start_used_ptr(common);
3130 read_char(common);
3131
3132 /* Testing char type. */
3133 #ifdef SUPPORT_UCP
3134 if (common->use_ucp)
3135 {
3136 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3137 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3138 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3139 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3140 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3141 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3142 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3143 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3144 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3145 JUMPHERE(jump);
3146 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3147 }
3148 else
3149 #endif
3150 {
3151 #ifndef COMPILE_PCRE8
3152 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3153 #elif defined SUPPORT_UTF
3154 /* Here LOCALS1 has already been zeroed. */
3155 jump = NULL;
3156 if (common->utf)
3157 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3158 #endif /* COMPILE_PCRE8 */
3159 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3160 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3161 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3162 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3163 #ifndef COMPILE_PCRE8
3164 JUMPHERE(jump);
3165 #elif defined SUPPORT_UTF
3166 if (jump != NULL)
3167 JUMPHERE(jump);
3168 #endif /* COMPILE_PCRE8 */
3169 }
3170 JUMPHERE(skipread);
3171
3172 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3173 skipread = check_str_end(common);
3174 peek_char(common);
3175
3176 /* Testing char type. This is a code duplication. */
3177 #ifdef SUPPORT_UCP
3178 if (common->use_ucp)
3179 {
3180 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3181 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3182 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3183 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3184 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3185 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3186 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3187 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3188 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3189 JUMPHERE(jump);
3190 }
3191 else
3192 #endif
3193 {
3194 #ifndef COMPILE_PCRE8
3195 /* TMP2 may be destroyed by peek_char. */
3196 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3197 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3198 #elif defined SUPPORT_UTF
3199 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3200 jump = NULL;
3201 if (common->utf)
3202 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3203 #endif
3204 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3205 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3206 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3207 #ifndef COMPILE_PCRE8
3208 JUMPHERE(jump);
3209 #elif defined SUPPORT_UTF
3210 if (jump != NULL)
3211 JUMPHERE(jump);
3212 #endif /* COMPILE_PCRE8 */
3213 }
3214 JUMPHERE(skipread);
3215
3216 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3217 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3218 }
3219
3220 /*
3221 range format:
3222
3223 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3224 ranges[1] = first bit (0 or 1)
3225 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3226 */
3227
3228 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3229 {
3230 DEFINE_COMPILER;
3231 struct sljit_jump *jump;
3232
3233 if (ranges[0] < 0)
3234 return FALSE;
3235
3236 switch(ranges[0])
3237 {
3238 case 1:
3239 if (readch)
3240 read_char(common);
3241 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3242 return TRUE;
3243
3244 case 2:
3245 if (readch)
3246 read_char(common);
3247 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3248 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3249 return TRUE;
3250
3251 case 4:
3252 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3253 {
3254 if (readch)
3255 read_char(common);
3256 if (ranges[1] != 0)
3257 {
3258 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3259 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3260 }
3261 else
3262 {
3263 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3264 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3265 JUMPHERE(jump);
3266 }
3267 return TRUE;
3268 }
3269 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3270 {
3271 if (readch)
3272 read_char(common);
3273 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3274 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3275 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3276 return TRUE;
3277 }
3278 return FALSE;
3279
3280 default:
3281 return FALSE;
3282 }
3283 }
3284
3285 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3286 {
3287 int i, bit, length;
3288 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3289
3290 bit = ctypes[0] & flag;
3291 ranges[0] = -1;
3292 ranges[1] = bit != 0 ? 1 : 0;
3293 length = 0;
3294
3295 for (i = 1; i < 256; i++)
3296 if ((ctypes[i] & flag) != bit)
3297 {
3298 if (length >= MAX_RANGE_SIZE)
3299 return;
3300 ranges[2 + length] = i;
3301 length++;
3302 bit ^= flag;
3303 }
3304
3305 if (bit != 0)
3306 {
3307 if (length >= MAX_RANGE_SIZE)
3308 return;
3309 ranges[2 + length] = 256;
3310 length++;
3311 }
3312 ranges[0] = length;
3313 }
3314
3315 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3316 {
3317 int ranges[2 + MAX_RANGE_SIZE];
3318 pcre_uint8 bit, cbit, all;
3319 int i, byte, length = 0;
3320
3321 bit = bits[0] & 0x1;
3322 ranges[1] = bit;
3323 /* Can be 0 or 255. */
3324 all = -bit;
3325
3326 for (i = 0; i < 256; )
3327 {
3328 byte = i >> 3;
3329 if ((i & 0x7) == 0 && bits[byte] == all)
3330 i += 8;
3331 else
3332 {
3333 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3334 if (cbit != bit)
3335 {
3336 if (length >= MAX_RANGE_SIZE)
3337 return FALSE;
3338 ranges[2 + length] = i;
3339 length++;
3340 bit = cbit;
3341 all = -cbit;
3342 }
3343 i++;
3344 }
3345 }
3346
3347 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3348 {
3349 if (length >= MAX_RANGE_SIZE)
3350 return FALSE;
3351 ranges[2 + length] = 256;
3352 length++;
3353 }
3354 ranges[0] = length;
3355
3356 return check_ranges(common, ranges, backtracks, FALSE);
3357 }
3358
3359 static void check_anynewline(compiler_common *common)
3360 {
3361 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3362 DEFINE_COMPILER;
3363
3364 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3365
3366 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3367 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3368 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3369 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3370 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3371 #ifdef COMPILE_PCRE8
3372 if (common->utf)
3373 {
3374 #endif
3375 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3376 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3377 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3378 #ifdef COMPILE_PCRE8
3379 }
3380 #endif
3381 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3382 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3383 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3384 }
3385
3386 static void check_hspace(compiler_common *common)
3387 {
3388 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3389 DEFINE_COMPILER;
3390
3391 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3392
3393 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3394 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3395 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3396 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3397 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3398 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3399 #ifdef COMPILE_PCRE8
3400 if (common->utf)
3401 {
3402 #endif
3403 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3404 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3405 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3406 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3407 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3408 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3409 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3410 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3411 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3412 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3413 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3414 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3415 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3416 #ifdef COMPILE_PCRE8
3417 }
3418 #endif
3419 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3420 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3421
3422 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3423 }
3424
3425 static void check_vspace(compiler_common *common)
3426 {
3427 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3428 DEFINE_COMPILER;
3429
3430 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3431
3432 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3433 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3434 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3435 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3436 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3437 #ifdef COMPILE_PCRE8
3438 if (common->utf)
3439 {
3440 #endif
3441 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3442 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3443 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3444 #ifdef COMPILE_PCRE8
3445 }
3446 #endif
3447 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3448 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3449
3450 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3451 }
3452
3453 #define CHAR1 STR_END
3454 #define CHAR2 STACK_TOP
3455
3456 static void do_casefulcmp(compiler_common *common)
3457 {
3458 DEFINE_COMPILER;
3459 struct sljit_jump *jump;
3460 struct sljit_label *label;
3461
3462 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3463 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3464 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3466 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3467 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3468
3469 label = LABEL();
3470 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3471 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3472 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3473 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3474 JUMPTO(SLJIT_C_NOT_ZERO, label);
3475
3476 JUMPHERE(jump);
3477 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3478 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3479 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3480 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3481 }
3482
3483 #define LCC_TABLE STACK_LIMIT
3484
3485 static void do_caselesscmp(compiler_common *common)
3486 {
3487 DEFINE_COMPILER;
3488 struct sljit_jump *jump;
3489 struct sljit_label *label;
3490
3491 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3492 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3493
3494 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3495 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3496 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3497 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3498 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3499 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3500
3501 label = LABEL();
3502 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3503 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3504 #ifndef COMPILE_PCRE8
3505 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3506 #endif
3507 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3508 #ifndef COMPILE_PCRE8
3509 JUMPHERE(jump);
3510 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3511 #endif
3512 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3513 #ifndef COMPILE_PCRE8
3514 JUMPHERE(jump);
3515 #endif
3516 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3517 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3518 JUMPTO(SLJIT_C_NOT_ZERO, label);
3519
3520 JUMPHERE(jump);
3521 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3522 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3523 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3524 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3525 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3526 }
3527
3528 #undef LCC_TABLE
3529 #undef CHAR1
3530 #undef CHAR2
3531
3532 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3533
3534 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3535 {
3536 /* This function would be ineffective to do in JIT level. */
3537 pcre_uint32 c1, c2;
3538 const pcre_uchar *src2 = args->uchar_ptr;
3539 const pcre_uchar *end2 = args->end;
3540 const ucd_record *ur;
3541 const pcre_uint32 *pp;
3542
3543 while (src1 < end1)
3544 {
3545 if (src2 >= end2)
3546 return (pcre_uchar*)1;
3547 GETCHARINC(c1, src1);
3548 GETCHARINC(c2, src2);
3549 ur = GET_UCD(c2);
3550 if (c1 != c2 && c1 != c2 + ur->other_case)
3551 {
3552 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3553 for (;;)
3554 {
3555 if (c1 < *pp) return NULL;
3556 if (c1 == *pp++) break;
3557 }
3558 }
3559 }
3560 return src2;
3561 }
3562
3563 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3564
3565 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3566 compare_context* context, jump_list **backtracks)
3567 {
3568 DEFINE_COMPILER;
3569 unsigned int othercasebit = 0;
3570 pcre_uchar *othercasechar = NULL;
3571 #ifdef SUPPORT_UTF
3572 int utflength;
3573 #endif
3574
3575 if (caseless && char_has_othercase(common, cc))
3576 {
3577 othercasebit = char_get_othercase_bit(common, cc);
3578 SLJIT_ASSERT(othercasebit);
3579 /* Extracting bit difference info. */
3580 #if defined COMPILE_PCRE8
3581 othercasechar = cc + (othercasebit >> 8);
3582 othercasebit &= 0xff;
3583 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3584 /* Note that this code only handles characters in the BMP. If there
3585 ever are characters outside the BMP whose othercase differs in only one
3586 bit from itself (there currently are none), this code will need to be
3587 revised for COMPILE_PCRE32. */
3588 othercasechar = cc + (othercasebit >> 9);
3589 if ((othercasebit & 0x100) != 0)
3590 othercasebit = (othercasebit & 0xff) << 8;
3591 else
3592 othercasebit &= 0xff;
3593 #endif /* COMPILE_PCRE[8|16|32] */
3594 }
3595
3596 if (context->sourcereg == -1)
3597 {
3598 #if defined COMPILE_PCRE8
3599 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3600 if (context->length >= 4)
3601 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3602 else if (context->length >= 2)
3603 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3604 else
3605 #endif
3606 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3607 #elif defined COMPILE_PCRE16
3608 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3609 if (context->length >= 4)
3610 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3611 else
3612 #endif
3613 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3614 #elif defined COMPILE_PCRE32
3615 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3616 #endif /* COMPILE_PCRE[8|16|32] */
3617 context->sourcereg = TMP2;
3618 }
3619
3620 #ifdef SUPPORT_UTF
3621 utflength = 1;
3622 if (common->utf && HAS_EXTRALEN(*cc))
3623 utflength += GET_EXTRALEN(*cc);
3624
3625 do
3626 {
3627 #endif
3628
3629 context->length -= IN_UCHARS(1);
3630 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3631
3632 /* Unaligned read is supported. */
3633 if (othercasebit != 0 && othercasechar == cc)
3634 {
3635 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3636 context->oc.asuchars[context->ucharptr] = othercasebit;
3637 }
3638 else
3639 {
3640 context->c.asuchars[context->ucharptr] = *cc;
3641 context->oc.asuchars[context->ucharptr] = 0;
3642 }
3643 context->ucharptr++;
3644
3645 #if defined COMPILE_PCRE8
3646 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3647 #else
3648 if (context->ucharptr >= 2 || context->length == 0)
3649 #endif
3650 {
3651 if (context->length >= 4)
3652 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3653 else if (context->length >= 2)
3654 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3655 #if defined COMPILE_PCRE8
3656 else if (context->length >= 1)
3657 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3658 #endif /* COMPILE_PCRE8 */
3659 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3660
3661 switch(context->ucharptr)
3662 {
3663 case 4 / sizeof(pcre_uchar):
3664 if (context->oc.asint != 0)
3665 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3666 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3667 break;
3668
3669 case 2 / sizeof(pcre_uchar):
3670 if (context->oc.asushort != 0)
3671 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3672 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3673 break;
3674
3675 #ifdef COMPILE_PCRE8
3676 case 1:
3677 if (context->oc.asbyte != 0)
3678 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3679 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3680 break;
3681 #endif
3682
3683 default:
3684 SLJIT_ASSERT_STOP();
3685 break;
3686 }
3687 context->ucharptr = 0;
3688 }
3689
3690 #else
3691
3692 /* Unaligned read is unsupported or in 32 bit mode. */
3693 if (context->length >= 1)
3694 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3695
3696 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3697
3698 if (othercasebit != 0 && othercasechar == cc)
3699 {
3700 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3701 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3702 }
3703 else
3704 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3705
3706 #endif
3707
3708 cc++;
3709 #ifdef SUPPORT_UTF
3710 utflength--;
3711 }
3712 while (utflength > 0);
3713 #endif
3714
3715 return cc;
3716 }
3717
3718 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3719
3720 #define SET_TYPE_OFFSET(value) \
3721 if ((value) != typeoffset) \
3722 { \
3723 if ((value) > typeoffset) \
3724 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3725 else \
3726 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3727 } \
3728 typeoffset = (value);
3729
3730 #define SET_CHAR_OFFSET(value) \
3731 if ((value) != charoffset) \
3732 { \
3733 if ((value) > charoffset) \
3734 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3735 else \
3736 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3737 } \
3738 charoffset = (value);
3739
3740 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3741 {
3742 DEFINE_COMPILER;
3743 jump_list *found = NULL;
3744 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3745 pcre_int32 c, charoffset;
3746 const pcre_uint32 *other_cases;
3747 struct sljit_jump *jump = NULL;
3748 pcre_uchar *ccbegin;
3749 int compares, invertcmp, numberofcmps;
3750 #ifdef SUPPORT_UCP
3751 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3752 BOOL charsaved = FALSE;
3753 int typereg = TMP1, scriptreg = TMP1;
3754 pcre_int32 typeoffset;
3755 #endif
3756
3757 /* Although SUPPORT_UTF must be defined, we are
3758 not necessary in utf mode even in 8 bit mode. */
3759 detect_partial_match(common, backtracks);
3760 read_char(common);
3761
3762 if ((*cc++ & XCL_MAP) != 0)
3763 {
3764 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3765 #ifndef COMPILE_PCRE8
3766 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3767 #elif defined SUPPORT_UTF
3768 if (common->utf)
3769 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3770 #endif
3771
3772 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3773 {
3774 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3775 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3776 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3777 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3778 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3779 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3780 }
3781
3782 #ifndef COMPILE_PCRE8
3783 JUMPHERE(jump);
3784 #elif defined SUPPORT_UTF
3785 if (common->utf)
3786 JUMPHERE(jump);
3787 #endif
3788 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3789 #ifdef SUPPORT_UCP
3790 charsaved = TRUE;
3791 #endif
3792 cc += 32 / sizeof(pcre_uchar);
3793 }
3794
3795 /* Scanning the necessary info. */
3796 ccbegin = cc;
3797 compares = 0;
3798 while (*cc != XCL_END)
3799 {
3800 compares++;
3801 if (*cc == XCL_SINGLE)
3802 {
3803 cc += 2;
3804 #ifdef SUPPORT_UTF
3805 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3806 #endif
3807 #ifdef SUPPORT_UCP
3808 needschar = TRUE;
3809 #endif
3810 }
3811 else if (*cc == XCL_RANGE)
3812 {
3813 cc += 2;
3814 #ifdef SUPPORT_UTF
3815 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3816 #endif
3817 cc++;
3818 #ifdef SUPPORT_UTF
3819 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3820 #endif
3821 #ifdef SUPPORT_UCP
3822 needschar = TRUE;
3823 #endif
3824 }
3825 #ifdef SUPPORT_UCP
3826 else
3827 {
3828 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3829 cc++;
3830 switch(*cc)
3831 {
3832 case PT_ANY:
3833 break;
3834
3835 case PT_LAMP:
3836 case PT_GC:
3837 case PT_PC:
3838 case PT_ALNUM:
3839 needstype = TRUE;
3840 break;
3841
3842 case PT_SC:
3843 needsscript = TRUE;
3844 break;
3845
3846 case PT_SPACE:
3847 case PT_PXSPACE:
3848 case PT_WORD:
3849 needstype = TRUE;
3850 needschar = TRUE;
3851 break;
3852
3853 case PT_CLIST:
3854 needschar = TRUE;
3855 break;
3856
3857 default:
3858 SLJIT_ASSERT_STOP();
3859 break;
3860 }
3861 cc += 2;
3862 }
3863 #endif
3864 }
3865
3866 #ifdef SUPPORT_UCP
3867 /* Simple register allocation. TMP1 is preferred if possible. */
3868 if (needstype || needsscript)
3869 {
3870 if (needschar && !charsaved)
3871 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3872 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3873 if (needschar)
3874 {
3875 if (needstype)
3876 {
3877 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3878 typereg = RETURN_ADDR;
3879 }
3880
3881 if (needsscript)
3882 scriptreg = TMP3;
3883 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3884 }
3885 else if (needstype && needsscript)
3886 scriptreg = TMP3;
3887 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3888
3889 if (needsscript)
3890 {
3891 if (scriptreg == TMP1)
3892 {
3893 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3894 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3895 }
3896 else
3897 {
3898 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3899 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3900 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3901 }
3902 }
3903 }
3904 #endif
3905
3906 /* Generating code. */
3907 cc = ccbegin;
3908 charoffset = 0;
3909 numberofcmps = 0;
3910 #ifdef SUPPORT_UCP
3911 typeoffset = 0;
3912 #endif
3913
3914 while (*cc != XCL_END)
3915 {
3916 compares--;
3917 invertcmp = (compares == 0 && list != backtracks);
3918 jump = NULL;
3919
3920 if (*cc == XCL_SINGLE)
3921 {
3922 cc ++;
3923 #ifdef SUPPORT_UTF
3924 if (common->utf)
3925 {
3926 GETCHARINC(c, cc);
3927 }
3928 else
3929 #endif
3930 c = *cc++;
3931
3932 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3933 {
3934 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3935 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
3936 numberofcmps++;
3937 }
3938 else if (numberofcmps > 0)
3939 {
3940 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3941 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3942 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3943 numberofcmps = 0;
3944 }
3945 else
3946 {
3947 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3948 numberofcmps = 0;
3949 }
3950 }
3951 else if (*cc == XCL_RANGE)
3952 {
3953 cc ++;
3954 #ifdef SUPPORT_UTF
3955 if (common->utf)
3956 {
3957 GETCHARINC(c, cc);
3958 }
3959 else
3960 #endif
3961 c = *cc++;
3962 SET_CHAR_OFFSET(c);
3963 #ifdef SUPPORT_UTF
3964 if (common->utf)
3965 {
3966 GETCHARINC(c, cc);
3967 }
3968 else
3969 #endif
3970 c = *cc++;
3971 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3972 {
3973 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3974 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
3975 numberofcmps++;
3976 }
3977 else if (numberofcmps > 0)
3978 {
3979 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
3980 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3981 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
3982 numberofcmps = 0;
3983 }
3984 else
3985 {
3986 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
3987 numberofcmps = 0;
3988 }
3989 }
3990 #ifdef SUPPORT_UCP
3991 else
3992 {
3993 if (*cc == XCL_NOTPROP)
3994 invertcmp ^= 0x1;
3995 cc++;
3996 switch(*cc)
3997 {
3998 case PT_ANY:
3999 if (list != backtracks)
4000 {
4001 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4002 continue;
4003 }
4004 else if (cc[-1] == XCL_NOTPROP)
4005 continue;
4006 jump = JUMP(SLJIT_JUMP);
4007 break;
4008
4009 case PT_LAMP:
4010 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4011 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4012 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4013 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4014 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4015 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4016 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4017 break;
4018
4019 case PT_GC:
4020 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4021 SET_TYPE_OFFSET(c);
4022 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4023 break;
4024
4025 case PT_PC:
4026 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4027 break;
4028
4029 case PT_SC:
4030 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4031 break;
4032
4033 case PT_SPACE:
4034 case PT_PXSPACE:
4035 if (*cc == PT_SPACE)
4036 {
4037 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4038 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4039 }
4040 SET_CHAR_OFFSET(9);
4041 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4042 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4043 if (*cc == PT_SPACE)
4044 JUMPHERE(jump);
4045
4046 SET_TYPE_OFFSET(ucp_Zl);
4047 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4048 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4049 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4050 break;
4051
4052 case PT_WORD:
4053 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4054 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4055 /* ... fall through */
4056
4057 case PT_ALNUM:
4058 SET_TYPE_OFFSET(ucp_Ll);
4059 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4060 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4061 SET_TYPE_OFFSET(ucp_Nd);
4062 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4063 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4064 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4065 break;
4066
4067 case PT_CLIST:
4068 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4069
4070 /* At least three characters are required.
4071 Otherwise this case would be handled by the normal code path. */
4072 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4073 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4074
4075 /* Optimizing character pairs, if their difference is power of 2. */
4076 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4077 {
4078 if (charoffset == 0)
4079 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4080 else
4081 {
4082 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4083 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4084 }
4085 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4086 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4087 other_cases += 2;
4088 }
4089 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4090 {
4091 if (charoffset == 0)
4092 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4093 else
4094 {
4095 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4096 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4097 }
4098 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4099 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4100
4101 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4102 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4103
4104 other_cases += 3;
4105 }
4106 else
4107 {
4108 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4109 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4110 }
4111
4112 while (*other_cases != NOTACHAR)
4113 {
4114 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4115 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4116 }
4117 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4118 break;
4119 }
4120 cc += 2;
4121 }
4122 #endif
4123
4124 if (jump != NULL)
4125 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4126 }
4127
4128 if (found != NULL)
4129 set_jumps(found, LABEL());
4130 }
4131
4132 #undef SET_TYPE_OFFSET
4133 #undef SET_CHAR_OFFSET
4134
4135 #endif
4136
4137 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4138 {
4139 DEFINE_COMPILER;
4140 int length;
4141 unsigned int c, oc, bit;
4142 compare_context context;
4143 struct sljit_jump *jump[4];
4144 #ifdef SUPPORT_UTF
4145 struct sljit_label *label;
4146 #ifdef SUPPORT_UCP
4147 pcre_uchar propdata[5];
4148 #endif
4149 #endif
4150
4151 switch(type)
4152 {
4153 case OP_SOD:
4154 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4155 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4156 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4157 return cc;
4158
4159 case OP_SOM:
4160 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4161 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4162 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4163 return cc;
4164
4165 case OP_NOT_WORD_BOUNDARY:
4166 case OP_WORD_BOUNDARY:
4167 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4168 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4169 return cc;
4170
4171 case OP_NOT_DIGIT:
4172 case OP_DIGIT:
4173 /* Digits are usually 0-9, so it is worth to optimize them. */
4174 if (common->digits[0] == -2)
4175 get_ctype_ranges(common, ctype_digit, common->digits);
4176 detect_partial_match(common, backtracks);
4177 /* Flip the starting bit in the negative case. */
4178 if (type == OP_NOT_DIGIT)
4179 common->digits[1] ^= 1;
4180 if (!check_ranges(common, common->digits, backtracks, TRUE))
4181 {
4182 read_char8_type(common);
4183 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4184 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4185 }
4186 if (type == OP_NOT_DIGIT)
4187 common->digits[1] ^= 1;
4188 return cc;
4189
4190 case OP_NOT_WHITESPACE:
4191 case OP_WHITESPACE:
4192 detect_partial_match(common, backtracks);
4193 read_char8_type(common);
4194 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4195 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4196 return cc;
4197
4198 case OP_NOT_WORDCHAR:
4199 case OP_WORDCHAR:
4200 detect_partial_match(common, backtracks);
4201 read_char8_type(common);
4202 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4203 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4204 return cc;
4205
4206 case OP_ANY:
4207 detect_partial_match(common, backtracks);
4208 read_char(common);
4209 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4210 {
4211 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4212 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4213 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4214 else
4215 jump[1] = check_str_end(common);
4216
4217 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4218 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4219 if (jump[1] != NULL)
4220 JUMPHERE(jump[1]);
4221 JUMPHERE(jump[0]);
4222 }
4223 else
4224 check_newlinechar(common, common->nltype, backtracks, TRUE);
4225 return cc;
4226
4227 case OP_ALLANY:
4228 detect_partial_match(common, backtracks);
4229 #ifdef SUPPORT_UTF
4230 if (common->utf)
4231 {
4232 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4233 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4234 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4235 #if defined COMPILE_PCRE8
4236 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4237 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4238 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4239 #elif defined COMPILE_PCRE16
4240 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4241 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4242 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4243 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4244 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4245 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4246 #endif
4247 JUMPHERE(jump[0]);
4248 #endif /* COMPILE_PCRE[8|16] */
4249 return cc;
4250 }
4251 #endif
4252 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4253 return cc;
4254
4255 case OP_ANYBYTE:
4256 detect_partial_match(common, backtracks);
4257 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4258 return cc;
4259
4260 #ifdef SUPPORT_UTF
4261 #ifdef SUPPORT_UCP
4262 case OP_NOTPROP:
4263 case OP_PROP:
4264 propdata[0] = 0;
4265 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4266 propdata[2] = cc[0];
4267 propdata[3] = cc[1];
4268 propdata[4] = XCL_END;
4269 compile_xclass_matchingpath(common, propdata, backtracks);
4270 return cc + 2;
4271 #endif
4272 #endif
4273
4274 case OP_ANYNL:
4275 detect_partial_match(common, backtracks);
4276 read_char(common);
4277 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4278 /* We don't need to handle soft partial matching case. */
4279 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4280 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4281 else
4282 jump[1] = check_str_end(common);
4283 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4284 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4285 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4286 jump[3] = JUMP(SLJIT_JUMP);
4287 JUMPHERE(jump[0]);
4288 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4289 JUMPHERE(jump[1]);
4290 JUMPHERE(jump[2]);
4291 JUMPHERE(jump[3]);
4292 return cc;
4293
4294 case OP_NOT_HSPACE:
4295 case OP_HSPACE:
4296 detect_partial_match(common, backtracks);
4297 read_char(common);
4298 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4299 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4300 return cc;
4301
4302 case OP_NOT_VSPACE:
4303 case OP_VSPACE:
4304 detect_partial_match(common, backtracks);
4305 read_char(common);
4306 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4307 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4308 return cc;
4309
4310 #ifdef SUPPORT_UCP
4311 case OP_EXTUNI:
4312 detect_partial_match(common, backtracks);
4313 read_char(common);
4314 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4315 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4316 /* Optimize register allocation: use a real register. */
4317 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4318 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4319
4320 label = LABEL();
4321 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4322 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4323 read_char(common);
4324 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4325 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4326 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4327
4328 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4329 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4330 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4331 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4332 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4333 JUMPTO(SLJIT_C_NOT_ZERO, label);
4334
4335 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4336 JUMPHERE(jump[0]);
4337 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4338
4339 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4340 {
4341 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4342 /* Since we successfully read a char above, partial matching must occure. */
4343 check_partial(common, TRUE);
4344 JUMPHERE(jump[0]);
4345 }
4346 return cc;
4347 #endif
4348
4349 case OP_EODN:
4350 /* Requires rather complex checks. */
4351 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4352 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4353 {
4354 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4355 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4356 if (common->mode == JIT_COMPILE)
4357 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4358 else
4359 {
4360 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4361 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4362 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4363 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4364 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4365 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4366 check_partial(common, TRUE);
4367 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4368 JUMPHERE(jump[1]);
4369 }
4370 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4371 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4372 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4373 }
4374 else if (common->nltype == NLTYPE_FIXED)
4375 {
4376 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4377 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4378 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4379 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4380 }
4381 else
4382 {
4383 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4384 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4385 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4386 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4387 jump[2] = JUMP(SLJIT_C_GREATER);
4388 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4389 /* Equal. */
4390 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4391 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4392 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4393
4394 JUMPHERE(jump[1]);
4395 if (common->nltype == NLTYPE_ANYCRLF)
4396 {
4397 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4398 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4399 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4400 }
4401 else
4402 {
4403 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4404 read_char(common);
4405 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4406 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4407 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4408 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4409 }
4410 JUMPHERE(jump[2]);
4411 JUMPHERE(jump[3]);
4412 }
4413 JUMPHERE(jump[0]);
4414 check_partial(common, FALSE);
4415 return cc;
4416
4417 case OP_EOD:
4418 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4419 check_partial(common, FALSE);
4420 return cc;
4421
4422 case OP_CIRC:
4423 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4424 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4425 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4426 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4427 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4428 return cc;
4429
4430 case OP_CIRCM:
4431 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4432 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4433 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4434 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4435 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4436 jump[0] = JUMP(SLJIT_JUMP);
4437 JUMPHERE(jump[1]);
4438
4439 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4440 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4441 {
4442 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4443 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4444 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4445 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4446 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4447 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4448 }
4449 else
4450 {
4451 skip_char_back(common);
4452 read_char(common);
4453 check_newlinechar(common, common->nltype, backtracks, FALSE);
4454 }
4455 JUMPHERE(jump[0]);
4456 return cc;
4457
4458 case OP_DOLL:
4459 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4460 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4461 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4462
4463 if (!common->endonly)
4464 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4465 else
4466 {
4467 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4468 check_partial(common, FALSE);
4469 }
4470 return cc;
4471
4472 case OP_DOLLM:
4473 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4474 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4475 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4476 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4477 check_partial(common, FALSE);
4478 jump[0] = JUMP(SLJIT_JUMP);
4479 JUMPHERE(jump[1]);
4480
4481 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4482 {
4483 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4484 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4485 if (common->mode == JIT_COMPILE)
4486 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4487 else
4488 {
4489 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4490 /* STR_PTR = STR_END - IN_UCHARS(1) */
4491 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4492 check_partial(common, TRUE);
4493 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4494 JUMPHERE(jump[1]);
4495 }
4496
4497 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4498 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4499 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4500 }
4501 else
4502 {
4503 peek_char(common);
4504 check_newlinechar(common, common->nltype, backtracks, FALSE);
4505 }
4506 JUMPHERE(jump[0]);
4507 return cc;
4508
4509 case OP_CHAR:
4510 case OP_CHARI:
4511 length = 1;
4512 #ifdef SUPPORT_UTF
4513 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4514 #endif
4515 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4516 {
4517 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4518 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4519
4520 context.length = IN_UCHARS(length);
4521 context.sourcereg = -1;
4522 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4523 context.ucharptr = 0;
4524 #endif
4525 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4526 }
4527 detect_partial_match(common, backtracks);
4528 read_char(common);
4529 #ifdef SUPPORT_UTF
4530 if (common->utf)
4531 {
4532 GETCHAR(c, cc);
4533 }
4534 else
4535 #endif
4536 c = *cc;
4537 if (type == OP_CHAR || !char_has_othercase(common, cc))
4538 {
4539 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4540 return cc + length;
4541 }
4542 oc = char_othercase(common, c);
4543 bit = c ^ oc;
4544 if (is_powerof2(bit))
4545 {
4546 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4547 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4548 return cc + length;
4549 }
4550 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4551 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4552 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4553 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4554 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4555 return cc + length;
4556
4557 case OP_NOT:
4558 case OP_NOTI:
4559 detect_partial_match(common, backtracks);
4560 length = 1;
4561 #ifdef SUPPORT_UTF
4562 if (common->utf)
4563 {
4564 #ifdef COMPILE_PCRE8
4565 c = *cc;
4566 if (c < 128)
4567 {
4568 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4569 if (type == OP_NOT || !char_has_othercase(common, cc))
4570 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4571 else
4572 {
4573 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4574 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4575 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4576 }
4577 /* Skip the variable-length character. */
4578 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4579 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4580 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4581 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4582 JUMPHERE(jump[0]);
4583 return cc + 1;
4584 }
4585 else
4586 #endif /* COMPILE_PCRE8 */
4587 {
4588 GETCHARLEN(c, cc, length);
4589 read_char(common);
4590 }
4591 }
4592 else
4593 #endif /* SUPPORT_UTF */
4594 {
4595 read_char(common);
4596 c = *cc;
4597 }
4598
4599 if (type == OP_NOT || !char_has_othercase(common, cc))
4600 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4601 else
4602 {
4603 oc = char_othercase(common, c);
4604 bit = c ^ oc;
4605 if (is_powerof2(bit))
4606 {
4607 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4608 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4609 }
4610 else
4611 {
4612 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4613 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4614 }
4615 }
4616 return cc + length;
4617
4618 case OP_CLASS:
4619 case OP_NCLASS:
4620 detect_partial_match(common, backtracks);
4621 read_char(common);
4622 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4623 return cc + 32 / sizeof(pcre_uchar);
4624
4625 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4626 jump[0] = NULL;
4627 #ifdef COMPILE_PCRE8
4628 /* This check only affects 8 bit mode. In other modes, we
4629 always need to compare the value with 255. */
4630 if (common->utf)
4631 #endif /* COMPILE_PCRE8 */
4632 {
4633 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4634 if (type == OP_CLASS)
4635 {
4636 add_jump(compiler, backtracks, jump[0]);
4637 jump[0] = NULL;
4638 }
4639 }
4640 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4641 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4642 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4643 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4644 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4645 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4646 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4647 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4648 if (jump[0] != NULL)
4649 JUMPHERE(jump[0]);
4650 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4651 return cc + 32 / sizeof(pcre_uchar);
4652
4653 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4654 case OP_XCLASS:
4655 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4656 return cc + GET(cc, 0) - 1;
4657 #endif
4658
4659 case OP_REVERSE:
4660 length = GET(cc, 0);
4661 if (length == 0)
4662 return cc + LINK_SIZE;
4663 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4664 #ifdef SUPPORT_UTF
4665 if (common->utf)
4666 {
4667 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4668 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4669 label = LABEL();
4670 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4671 skip_char_back(common);
4672 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4673 JUMPTO(SLJIT_C_NOT_ZERO, label);
4674 }
4675 else
4676 #endif
4677 {
4678 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4679 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4680 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4681 }
4682 check_start_used_ptr(common);
4683 return cc + LINK_SIZE;
4684 }
4685 SLJIT_ASSERT_STOP();
4686 return cc;
4687 }
4688
4689 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4690 {
4691 /* This function consumes at least one input character. */
4692 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4693 DEFINE_COMPILER;
4694 pcre_uchar *ccbegin = cc;
4695 compare_context context;
4696 int size;
4697
4698 context.length = 0;
4699 do
4700 {
4701 if (cc >= ccend)
4702 break;
4703
4704 if (*cc == OP_CHAR)
4705 {
4706 size = 1;
4707 #ifdef SUPPORT_UTF
4708 if (common->utf && HAS_EXTRALEN(cc[1]))
4709 size += GET_EXTRALEN(cc[1]);
4710 #endif
4711 }
4712 else if (*cc == OP_CHARI)
4713 {
4714 size = 1;
4715 #ifdef SUPPORT_UTF
4716 if (common->utf)
4717 {
4718 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4719 size = 0;
4720 else if (HAS_EXTRALEN(cc[1]))
4721 size += GET_EXTRALEN(cc[1]);
4722 }
4723 else
4724 #endif
4725 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4726 size = 0;
4727 }
4728 else
4729 size = 0;
4730
4731 cc += 1 + size;
4732 context.length += IN_UCHARS(size);
4733 }
4734 while (size > 0 && context.length <= 128);
4735
4736 cc = ccbegin;
4737 if (context.length > 0)
4738 {
4739 /* We have a fixed-length byte sequence. */
4740 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4741 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4742
4743 context.sourcereg = -1;
4744 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4745 context.ucharptr = 0;
4746 #endif
4747 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4748 return cc;
4749 }
4750
4751 /* A non-fixed length character will be checked if length == 0. */
4752 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4753 }
4754
4755 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4756 {
4757 DEFINE_COMPILER;
4758 int offset = GET2(cc, 1) << 1;
4759
4760 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4761 if (!common->jscript_compat)
4762 {
4763 if (backtracks == NULL)
4764 {
4765 /* OVECTOR(1) contains the "string begin - 1" constant. */
4766 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4767 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4768 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4769 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4770 return JUMP(SLJIT_C_NOT_ZERO);
4771 }
4772 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4773 }
4774 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4775 }
4776
4777 /* Forward definitions. */
4778 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4779 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4780
4781 #define PUSH_BACKTRACK(size, ccstart, error) \
4782 do \
4783 { \
4784 backtrack = sljit_alloc_memory(compiler, (size)); \
4785 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4786 return error; \
4787 memset(backtrack, 0, size); \
4788 backtrack->prev = parent->top; \
4789 backtrack->cc = (ccstart); \
4790 parent->top = backtrack; \
4791 } \
4792 while (0)
4793
4794 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4795 do \
4796 { \
4797 backtrack = sljit_alloc_memory(compiler, (size)); \
4798 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4799 return; \
4800 memset(backtrack, 0, size); \
4801 backtrack->prev = parent->top; \
4802 backtrack->cc = (ccstart); \
4803 parent->top = backtrack; \
4804 } \
4805 while (0)
4806
4807 #define BACKTRACK_AS(type) ((type *)backtrack)
4808
4809 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4810 {
4811 DEFINE_COMPILER;
4812 int offset = GET2(cc, 1) << 1;
4813 struct sljit_jump *jump = NULL;
4814 struct sljit_jump *partial;
4815 struct sljit_jump *nopartial;
4816
4817 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4818 /* OVECTOR(1) contains the "string begin - 1" constant. */
4819 if (withchecks && !common->jscript_compat)
4820 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4821
4822 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4823 if (common->utf && *cc == OP_REFI)
4824 {
4825 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
4826 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4827 if (withchecks)
4828 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4829
4830 /* Needed to save important temporary registers. */
4831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4832 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
4833 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4834 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4835 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4836 if (common->mode == JIT_COMPILE)
4837 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4838 else
4839 {
4840 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4841 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4842 check_partial(common, FALSE);
4843 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4844 JUMPHERE(nopartial);
4845 }
4846 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4847 }
4848 else
4849 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4850 {
4851 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4852 if (withchecks)
4853 jump = JUMP(SLJIT_C_ZERO);
4854
4855 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4856 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4857 if (common->mode == JIT_COMPILE)
4858 add_jump(compiler, backtracks, partial);
4859
4860 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4861 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4862
4863 if (common->mode != JIT_COMPILE)
4864 {
4865 nopartial = JUMP(SLJIT_JUMP);
4866 JUMPHERE(partial);
4867 /* TMP2 -= STR_END - STR_PTR */
4868 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4869 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4870 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4871 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4872 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4873 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4874 JUMPHERE(partial);
4875 check_partial(common, FALSE);
4876 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4877 JUMPHERE(nopartial);
4878 }
4879 }
4880
4881 if (jump != NULL)
4882 {
4883 if (emptyfail)
4884 add_jump(compiler, backtracks, jump);
4885 else
4886 JUMPHERE(jump);
4887 }
4888 return cc + 1 + IMM2_SIZE;
4889 }
4890
4891 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4892 {
4893 DEFINE_COMPILER;
4894 backtrack_common *backtrack;
4895 pcre_uchar type;
4896 struct sljit_label *label;
4897 struct sljit_jump *zerolength;
4898 struct sljit_jump *jump = NULL;
4899 pcre_uchar *ccbegin = cc;
4900 int min = 0, max = 0;
4901 BOOL minimize;
4902
4903 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4904
4905 type = cc[1 + IMM2_SIZE];
4906 minimize = (type & 0x1) != 0;
4907 switch(type)
4908 {
4909 case OP_CRSTAR:
4910 case OP_CRMINSTAR:
4911 min = 0;
4912 max = 0;
4913 cc += 1 + IMM2_SIZE + 1;
4914 break;
4915 case OP_CRPLUS:
4916 case OP_CRMINPLUS:
4917 min = 1;
4918 max = 0;
4919 cc += 1 + IMM2_SIZE + 1;
4920 break;
4921 case OP_CRQUERY:
4922 case OP_CRMINQUERY:
4923 min = 0;
4924 max = 1;
4925 cc += 1 + IMM2_SIZE + 1;
4926 break;
4927 case OP_CRRANGE:
4928 case OP_CRMINRANGE:
4929 min = GET2(cc, 1 + IMM2_SIZE + 1);
4930 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4931 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4932 break;
4933 default:
4934 SLJIT_ASSERT_STOP();
4935 break;
4936 }
4937
4938 if (!minimize)
4939 {
4940 if (min == 0)
4941 {
4942 allocate_stack(common, 2);
4943 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4944 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
4945 /* Temporary release of STR_PTR. */
4946 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
4947 zerolength = compile_ref_checks(common, ccbegin, NULL);
4948 /* Restore if not zero length. */
4949 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
4950 }
4951 else
4952 {
4953 allocate_stack(common, 1);
4954 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4955 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
4956 }
4957
4958 if (min > 1 || max > 1)
4959 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
4960
4961 label = LABEL();
4962 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
4963
4964 if (min > 1 || max > 1)
4965 {
4966 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
4967 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4968 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
4969 if (min > 1)
4970 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
4971 if (max > 1)
4972 {
4973 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
4974 allocate_stack(common, 1);
4975 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4976 JUMPTO(SLJIT_JUMP, label);
4977 JUMPHERE(jump);
4978 }
4979 }
4980
4981 if (max == 0)
4982 {
4983 /* Includes min > 1 case as well. */
4984 allocate_stack(common, 1);
4985 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
4986 JUMPTO(SLJIT_JUMP, label);
4987 }
4988
4989 JUMPHERE(zerolength);
4990 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
4991
4992 decrease_call_count(common);
4993 return cc;
4994 }
4995
4996 allocate_stack(common, 2);
4997 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
4998 if (type != OP_CRMINSTAR)
4999 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5000
5001 if (min == 0)
5002 {
5003 zerolength = compile_ref_checks(common, ccbegin, NULL);
5004 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5005 jump = JUMP(SLJIT_JUMP);
5006 }
5007 else
5008 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5009
5010 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5011 if (max > 0)
5012 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5013
5014 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5015 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5016
5017 if (min > 1)
5018 {
5019 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5020 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5021 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5022 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5023 }
5024 else if (max > 0)
5025 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5026
5027 if (jump != NULL)
5028 JUMPHERE(jump);
5029 JUMPHERE(zerolength);
5030
5031 decrease_call_count(common);
5032 return cc;
5033 }
5034
5035 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5036 {
5037 DEFINE_COMPILER;
5038 backtrack_common *backtrack;
5039 recurse_entry *entry = common->entries;
5040 recurse_entry *prev = NULL;
5041 int start = GET(cc, 1);
5042
5043 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5044 while (entry != NULL)
5045 {
5046 if (entry->start == start)
5047 break;
5048 prev = entry;
5049 entry = entry->next;
5050 }
5051
5052 if (entry == NULL)
5053 {
5054 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5055 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5056 return NULL;
5057 entry->next = NULL;
5058 entry->entry = NULL;
5059 entry->calls = NULL;
5060 entry->start = start;
5061
5062 if (prev != NULL)
5063 prev->next = entry;
5064 else
5065 common->entries = entry;
5066 }
5067
5068 if (common->has_set_som && common->mark_ptr != 0)
5069 {
5070 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5071 allocate_stack(common, 2);
5072 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5073 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5074 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5075 }
5076 else if (common->has_set_som || common->mark_ptr != 0)
5077 {
5078 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5079 allocate_stack(common, 1);
5080 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5081 }
5082
5083 if (entry->entry == NULL)
5084 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5085 else
5086 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5087 /* Leave if the match is failed. */
5088 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5089 return cc + 1 + LINK_SIZE;
5090 }
5091
5092 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5093 {
5094 const pcre_uchar *begin = arguments->begin;
5095 int *offset_vector = arguments->offsets;
5096 int offset_count = arguments->offset_count;
5097 int i;
5098
5099 if (PUBL(callout) == NULL)
5100 return 0;
5101
5102 callout_block->version = 2;
5103 callout_block->callout_data = arguments->callout_data;
5104
5105 /* Offsets in subject. */
5106 callout_block->subject_length = arguments->end - arguments->begin;
5107 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5108 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5109 #if defined COMPILE_PCRE8
5110 callout_block->subject = (PCRE_SPTR)begin;
5111 #elif defined COMPILE_PCRE16
5112 callout_block->subject = (PCRE_SPTR16)begin;
5113 #elif defined COMPILE_PCRE32
5114 callout_block->subject = (PCRE_SPTR32)begin;
5115 #endif
5116
5117 /* Convert and copy the JIT offset vector to the offset_vector array. */
5118 callout_block->capture_top = 0;
5119 callout_block->offset_vector = offset_vector;
5120 for (i = 2; i < offset_count; i += 2)
5121 {
5122 offset_vector[i] = jit_ovector[i] - begin;
5123 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5124 if (jit_ovector[i] >= begin)
5125 callout_block->capture_top = i;
5126 }
5127
5128 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5129 if (offset_count > 0)
5130 offset_vector[0] = -1;
5131 if (offset_count > 1)
5132 offset_vector[1] = -1;
5133 return (*PUBL(callout))(callout_block);
5134 }
5135
5136 /* Aligning to 8 byte. */
5137 #define CALLOUT_ARG_SIZE \
5138 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5139
5140 #define CALLOUT_ARG_OFFSET(arg) \
5141 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5142
5143 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5144 {
5145 DEFINE_COMPILER;
5146 backtrack_common *backtrack;
5147
5148 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5149
5150 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5151
5152 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5153 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5154 SLJIT_ASSERT(common->capture_last_ptr != 0);
5155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5156 OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5157
5158 /* These pointer sized fields temporarly stores internal variables. */
5159 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5160 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5161 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5162
5163 if (common->mark_ptr != 0)
5164 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5165 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5166 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5167 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5168
5169 /* Needed to save important temporary registers. */
5170 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5171 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5172 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5173 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5174 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5175 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5176 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5177
5178 /* Check return value. */
5179 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5180 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5181 if (common->forced_quit_label == NULL)
5182 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5183 else
5184 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5185 return cc + 2 + 2 * LINK_SIZE;
5186 }
5187
5188 #undef CALLOUT_ARG_SIZE
5189 #undef CALLOUT_ARG_OFFSET
5190
5191 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5192 {
5193 DEFINE_COMPILER;
5194 int framesize;
5195 int private_data_ptr;
5196 backtrack_common altbacktrack;
5197 pcre_uchar *ccbegin;
5198 pcre_uchar opcode;
5199 pcre_uchar bra = OP_BRA;
5200 jump_list *tmp = NULL;
5201 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5202 jump_list **found;
5203 /* Saving previous accept variables. */
5204 struct sljit_label *save_quit_label = common->quit_label;
5205 struct sljit_label *save_accept_label = common->accept_label;
5206 jump_list *save_quit = common->quit;
5207 jump_list *save_accept = common->accept;
5208 struct sljit_jump *jump;
5209 struct sljit_jump *brajump = NULL;
5210
5211 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5212 {
5213 SLJIT_ASSERT(!conditional);
5214 bra = *cc;
5215 cc++;
5216 }
5217 private_data_ptr = PRIVATE_DATA(cc);
5218 SLJIT_ASSERT(private_data_ptr != 0);
5219 framesize = get_framesize(common, cc, FALSE);
5220 backtrack->framesize = framesize;
5221 backtrack->private_data_ptr = private_data_ptr;
5222 opcode = *cc;
5223 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5224 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5225 ccbegin = cc;
5226 cc += GET(cc, 1);
5227
5228 if (bra == OP_BRAMINZERO)
5229 {
5230 /* This is a braminzero backtrack path. */
5231 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5232 free_stack(common, 1);
5233 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5234 }
5235
5236 if (framesize < 0)
5237 {
5238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5239 allocate_stack(common, 1);
5240 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5241 }
5242 else
5243 {
5244 allocate_stack(common, framesize + 2);
5245 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5246 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5247 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5248 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5249 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5250 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5251 }
5252
5253 memset(&altbacktrack, 0, sizeof(backtrack_common));
5254 common->quit_label = NULL;
5255 common->quit = NULL;
5256 while (1)
5257 {
5258 common->accept_label = NULL;
5259 common->accept = NULL;
5260 altbacktrack.top = NULL;
5261 altbacktrack.topbacktracks = NULL;
5262
5263 if (*ccbegin == OP_ALT)
5264 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5265
5266 altbacktrack.cc = ccbegin;
5267 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5268 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5269 {
5270 common->quit_label = save_quit_label;
5271 common->accept_label = save_accept_label;
5272 common->quit = save_quit;
5273 common->accept = save_accept;
5274 return NULL;
5275 }
5276 common->accept_label = LABEL();
5277 if (common->accept != NULL)
5278 set_jumps(common->accept, common->accept_label);
5279
5280 /* Reset stack. */
5281 if (framesize < 0)
5282 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5283 else {
5284 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5285 {
5286 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5287 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5288 }
5289 else
5290 {
5291 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5292 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5293 }
5294 }
5295
5296 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5297 {
5298 /* We know that STR_PTR was stored on the top of the stack. */
5299 if (conditional)
5300 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5301 else if (bra == OP_BRAZERO)
5302 {
5303 if (framesize < 0)
5304 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5305 else
5306 {
5307 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5308 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5309 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5310 }
5311 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5312 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5313 }
5314 else if (framesize >= 0)
5315 {
5316 /* For OP_BRA and OP_BRAMINZERO. */
5317 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5318 }
5319 }
5320 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5321
5322 compile_backtrackingpath(common, altbacktrack.top);
5323 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5324 {
5325 common->quit_label = save_quit_label;
5326 common->accept_label = save_accept_label;
5327 common->quit = save_quit;
5328 common->accept = save_accept;
5329 return NULL;
5330 }
5331 set_jumps(altbacktrack.topbacktracks, LABEL());
5332
5333 if (*cc != OP_ALT)
5334 break;
5335
5336 ccbegin = cc;
5337 cc += GET(cc, 1);
5338 }
5339 /* None of them matched. */
5340 if (common->quit != NULL)
5341 set_jumps(common->quit, LABEL());
5342
5343 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5344 {
5345 /* Assert is failed. */
5346 if (conditional || bra == OP_BRAZERO)
5347 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5348
5349 if (framesize < 0)
5350 {
5351 /* The topmost item should be 0. */
5352 if (bra == OP_BRAZERO)
5353 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5354 else
5355 free_stack(common, 1);
5356 }
5357 else
5358 {
5359 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5360 /* The topmost item should be 0. */
5361 if (bra == OP_BRAZERO)
5362 {
5363 free_stack(common, framesize + 1);
5364 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5365 }
5366 else
5367 free_stack(common, framesize + 2);
5368 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5369 }
5370 jump = JUMP(SLJIT_JUMP);
5371 if (bra != OP_BRAZERO)
5372 add_jump(compiler, target, jump);
5373
5374 /* Assert is successful. */
5375 set_jumps(tmp, LABEL());
5376 if (framesize < 0)
5377 {
5378 /* We know that STR_PTR was stored on the top of the stack. */
5379 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5380 /* Keep the STR_PTR on the top of the stack. */
5381 if (bra == OP_BRAZERO)
5382 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5383 else if (bra == OP_BRAMINZERO)
5384 {
5385 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5386 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5387 }
5388 }
5389 else
5390 {
5391 if (bra == OP_BRA)
5392 {
5393 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5394 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5395 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5396 }
5397 else
5398 {
5399 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5400 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5401 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5402 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5403 }
5404 }
5405
5406 if (bra == OP_BRAZERO)
5407 {
5408 backtrack->matchingpath = LABEL();
5409 SET_LABEL(jump, backtrack->matchingpath);
5410 }
5411 else if (bra == OP_BRAMINZERO)
5412 {
5413 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5414 JUMPHERE(brajump);
5415 if (framesize >= 0)
5416 {
5417 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5418 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5419 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5420 }
5421 set_jumps(backtrack->common.topbacktracks, LABEL());
5422 }
5423 }
5424 else
5425 {
5426 /* AssertNot is successful. */
5427 if (framesize < 0)
5428 {
5429 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5430 if (bra != OP_BRA)
5431 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5432 else
5433 free_stack(common, 1);
5434 }
5435 else
5436 {
5437 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5438 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5439 /* The topmost item should be 0. */
5440 if (bra != OP_BRA)
5441 {
5442 free_stack(common, framesize + 1);
5443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5444 }
5445 else
5446 free_stack(common, framesize + 2);
5447 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5448 }
5449
5450 if (bra == OP_BRAZERO)
5451 backtrack->matchingpath = LABEL();
5452 else if (bra == OP_BRAMINZERO)
5453 {
5454 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5455 JUMPHERE(brajump);
5456 }
5457
5458 if (bra != OP_BRA)
5459 {
5460 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5461 set_jumps(backtrack->common.topbacktracks, LABEL());
5462 backtrack->common.topbacktracks = NULL;
5463 }
5464 }
5465
5466 common->quit_label = save_quit_label;
5467 common->accept_label = save_accept_label;
5468 common->quit = save_quit;
5469 common->accept = save_accept;
5470 return cc + 1 + LINK_SIZE;
5471 }
5472
5473 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5474 {
5475 int condition = FALSE;
5476 pcre_uchar *slotA = name_table;
5477 pcre_uchar *slotB;
5478 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5479 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5480 sljit_sw no_capture;
5481 int i;
5482
5483 locals += refno & 0xff;
5484 refno >>= 8;
5485 no_capture = locals[1];
5486
5487 for (i = 0; i < name_count; i++)
5488 {
5489 if (GET2(slotA, 0) == refno) break;
5490 slotA += name_entry_size;
5491 }
5492
5493 if (i < name_count)
5494 {
5495 /* Found a name for the number - there can be only one; duplicate names
5496 for different numbers are allowed, but not vice versa. First scan down
5497 for duplicates. */
5498
5499 slotB = slotA;
5500 while (slotB > name_table)
5501 {
5502 slotB -= name_entry_size;
5503 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5504 {
5505 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5506 if (condition) break;
5507 }
5508 else break;
5509 }
5510
5511 /* Scan up for duplicates */
5512 if (!condition)
5513 {
5514 slotB = slotA;
5515 for (i++; i < name_count; i++)
5516 {
5517 slotB += name_entry_size;
5518 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5519 {
5520 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5521 if (condition) break;
5522 }
5523 else break;
5524 }
5525 }
5526 }
5527 return condition;
5528 }
5529
5530 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5531 {
5532 int condition = FALSE;
5533 pcre_uchar *slotA = name_table;
5534 pcre_uchar *slotB;
5535 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5536 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5537 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5538 sljit_uw i;
5539
5540 for (i = 0; i < name_count; i++)
5541 {
5542 if (GET2(slotA, 0) == recno) break;
5543 slotA += name_entry_size;
5544 }
5545
5546 if (i < name_count)
5547 {
5548 /* Found a name for the number - there can be only one; duplicate
5549 names for different numbers are allowed, but not vice versa. First
5550 scan down for duplicates. */
5551
5552 slotB = slotA;
5553 while (slotB > name_table)
5554 {
5555 slotB -= name_entry_size;
5556 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5557 {
5558 condition = GET2(slotB, 0) == group_num;
5559 if (condition) break;
5560 }
5561 else break;
5562 }
5563
5564 /* Scan up for duplicates */
5565 if (!condition)
5566 {
5567 slotB = slotA;
5568 for (i++; i < name_count; i++)
5569 {
5570 slotB += name_entry_size;
5571 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5572 {
5573 condition = GET2(slotB, 0) == group_num;
5574 if (condition) break;
5575 }
5576 else break;
5577 }
5578 }
5579 }
5580 return condition;
5581 }
5582
5583 /*
5584 Handling bracketed expressions is probably the most complex part.
5585
5586 Stack layout naming characters:
5587 S - Push the current STR_PTR
5588 0 - Push a 0 (NULL)
5589 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5590 before the next alternative. Not pushed if there are no alternatives.
5591 M - Any values pushed by the current alternative. Can be empty, or anything.
5592 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5593 L - Push the previous local (pointed by localptr) to the stack
5594 () - opional values stored on the stack
5595 ()* - optonal, can be stored multiple times
5596
5597 The following list shows the regular expression templates, their PCRE byte codes
5598 and stack layout supported by pcre-sljit.
5599
5600 (?:) OP_BRA | OP_KET A M
5601 () OP_CBRA | OP_KET C M
5602 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5603 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5604 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5605 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5606 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5607 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5608 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5609 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5610 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5611 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5612 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5613 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5614 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5615 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5616 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5617 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5618 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5619 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5620 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5621 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5622
5623
5624 Stack layout naming characters:
5625 A - Push the alternative index (starting from 0) on the stack.
5626 Not pushed if there is no alternatives.
5627 M - Any values pushed by the current alternative. Can be empty, or anything.
5628
5629 The next list shows the possible content of a bracket:
5630 (|) OP_*BRA | OP_ALT ... M A
5631 (?()|) OP_*COND | OP_ALT M A
5632 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5633 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5634 Or nothing, if trace is unnecessary
5635 */
5636
5637 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5638 {
5639 DEFINE_COMPILER;
5640 backtrack_common *backtrack;
5641 pcre_uchar opcode;
5642 int private_data_ptr = 0;
5643 int offset = 0;
5644 int stacksize;
5645 pcre_uchar *ccbegin;
5646 pcre_uchar *matchingpath;
5647 pcre_uchar bra = OP_BRA;
5648 pcre_uchar ket;
5649 assert_backtrack *assert;
5650 BOOL has_alternatives;
5651 struct sljit_jump *jump;
5652 struct sljit_jump *skip;
5653 struct sljit_label *rmaxlabel = NULL;
5654 struct sljit_jump *braminzerojump = NULL;
5655
5656 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5657
5658 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5659 {
5660 bra = *cc;
5661 cc++;
5662 opcode = *cc;
5663 }
5664
5665 opcode = *cc;
5666 ccbegin = cc;
5667 matchingpath = ccbegin + 1 + LINK_SIZE;
5668
5669 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5670 {
5671 /* Drop this bracket_backtrack. */
5672 parent->top = backtrack->prev;
5673 return bracketend(cc);
5674 }
5675
5676 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5677 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5678 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5679 cc += GET(cc, 1);
5680
5681 has_alternatives = *cc == OP_ALT;
5682 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5683 {
5684 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5685 if (*matchingpath == OP_NRREF)
5686 {
5687 stacksize = GET2(matchingpath, 1);
5688 if (common->currententry == NULL || stacksize == RREF_ANY)
5689 has_alternatives = FALSE;
5690 else if (common->currententry->start == 0)
5691 has_alternatives = stacksize != 0;
5692 else
5693 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5694 }
5695 }
5696
5697 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5698 opcode = OP_SCOND;
5699 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5700 opcode = OP_ONCE;
5701
5702 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5703 {
5704 /* Capturing brackets has a pre-allocated space. */
5705 offset = GET2(ccbegin, 1 + LINK_SIZE);
5706 if (common->optimized_cbracket[offset] == 0)
5707 {
5708 private_data_ptr = OVECTOR_PRIV(offset);
5709 offset <<= 1;
5710 }
5711 else
5712 {
5713 offset <<= 1;
5714 private_data_ptr = OVECTOR(offset);
5715 }
5716 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5717 matchingpath += IMM2_SIZE;
5718 }
5719 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5720 {
5721 /* Other brackets simply allocate the next entry. */
5722 private_data_ptr = PRIVATE_DATA(ccbegin);
5723 SLJIT_ASSERT(private_data_ptr != 0);
5724 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5725 if (opcode == OP_ONCE)
5726 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5727 }
5728
5729 /* Instructions before the first alternative. */
5730 stacksize = 0;
5731 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5732 stacksize++;
5733 if (bra == OP_BRAZERO)
5734 stacksize++;
5735
5736 if (stacksize > 0)
5737 allocate_stack(common, stacksize);
5738
5739 stacksize = 0;
5740 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5741 {
5742 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5743 stacksize++;
5744 }
5745
5746 if (bra == OP_BRAZERO)
5747 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5748
5749 if (bra == OP_BRAMINZERO)
5750 {
5751 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5752 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5753 if (ket != OP_KETRMIN)
5754 {
5755 free_stack(common, 1);
5756 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5757 }
5758 else
5759 {
5760 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5761 {
5762 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5763 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5764 /* Nothing stored during the first run. */
5765 skip = JUMP(SLJIT_JUMP);
5766 JUMPHERE(jump);
5767 /* Checking zero-length iteration. */
5768 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5769 {
5770 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5771 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5772 }
5773 else
5774 {
5775 /* Except when the whole stack frame must be saved. */
5776 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5777 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
5778 }
5779 JUMPHERE(skip);
5780 }
5781 else
5782 {
5783 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5784 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5785 JUMPHERE(jump);
5786 }
5787 }
5788 }
5789
5790 if (ket == OP_KETRMIN)
5791 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5792
5793 if (ket == OP_KETRMAX)
5794 {
5795 rmaxlabel = LABEL();
5796 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5797 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5798 }
5799
5800 /* Handling capturing brackets and alternatives. */
5801 if (opcode == OP_ONCE)
5802 {
5803 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5804 {
5805 /* Neither capturing brackets nor recursions are not found in the block. */
5806 if (ket == OP_KETRMIN)
5807 {
5808 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5809 allocate_stack(common, 2);
5810 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5811 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5812 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5813 }
5814 else if (ket == OP_KETRMAX || has_alternatives)
5815 {
5816 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5817 allocate_stack(common, 1);
5818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5819 }
5820 else
5821 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5822 }
5823 else
5824 {
5825 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5826 {
5827 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5828 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5829 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5830 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5831 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5832 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5833 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5834 }
5835 else
5836 {
5837 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5838 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5839 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5840 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5841 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5842 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5843 }
5844 }
5845 }
5846 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5847 {
5848 /* Saving the previous values. */
5849 if (common->optimized_cbracket[offset >> 1] != 0)
5850 {
5851 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5852 allocate_stack(common, 2);
5853 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5854 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
5855 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5856 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5857 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5858 }
5859 else
5860 {
5861 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5862 allocate_stack(common, 1);
5863 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5864 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5865 }
5866 }
5867 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5868 {
5869 /* Saving the previous value. */
5870 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5871 allocate_stack(common, 1);
5872 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5873 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5874 }
5875 else if (has_alternatives)
5876 {
5877 /* Pushing the starting string pointer. */
5878 allocate_stack(common, 1);
5879 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5880 }
5881
5882 /* Generating code for the first alternative. */
5883 if (opcode == OP_COND || opcode == OP_SCOND)
5884 {
5885 if (*matchingpath == OP_CREF)
5886 {
5887 SLJIT_ASSERT(has_alternatives);
5888 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5889 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5890 matchingpath += 1 + IMM2_SIZE;
5891 }
5892 else if (*matchingpath == OP_NCREF)
5893 {
5894 SLJIT_ASSERT(has_alternatives);
5895 stacksize = GET2(matchingpath, 1);
5896 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5897
5898 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5899 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5900 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5901 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
5902 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5903 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5904 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5905 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5906 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5907
5908 JUMPHERE(jump);
5909 matchingpath += 1 + IMM2_SIZE;
5910 }
5911 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5912 {
5913 /* Never has other case. */
5914 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5915
5916 stacksize = GET2(matchingpath, 1);
5917 if (common->currententry == NULL)
5918 stacksize = 0;
5919 else if (stacksize == RREF_ANY)
5920 stacksize = 1;
5921 else if (common->currententry->start == 0)
5922 stacksize = stacksize == 0;
5923 else
5924 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5925
5926 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
5927 {
5928 SLJIT_ASSERT(!has_alternatives);
5929 if (stacksize != 0)
5930 matchingpath += 1 + IMM2_SIZE;
5931 else
5932 {
5933 if (*cc == OP_ALT)
5934 {
5935 matchingpath = cc + 1 + LINK_SIZE;
5936 cc += GET(cc, 1);
5937 }
5938 else
5939 matchingpath = cc;
5940 }
5941 }
5942 else
5943 {
5944 SLJIT_ASSERT(has_alternatives);
5945
5946 stacksize = GET2(matchingpath, 1);
5947 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5949 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5950 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
5951 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
5952 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5953 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5954 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
5955 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5956 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5957 matchingpath += 1 + IMM2_SIZE;
5958 }
5959 }
5960 else
5961 {
5962 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
5963 /* Similar code as PUSH_BACKTRACK macro. */
5964 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
5965 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5966 return NULL;
5967 memset(assert, 0, sizeof(assert_backtrack));
5968 assert->common.cc = matchingpath;
5969 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
5970 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
5971 }
5972 }
5973
5974 compile_matchingpath(common, matchingpath, cc, backtrack);
5975 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5976 return NULL;
5977
5978 if (opcode == OP_ONCE)
5979 {
5980 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5981 {
5982 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5983 /* TMP2 which is set here used by OP_KETRMAX below. */
5984 if (ket == OP_KETRMAX)
5985 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
5986 else if (ket == OP_KETRMIN)
5987 {
5988 /* Move the STR_PTR to the private_data_ptr. */
5989 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
5990 }
5991 }
5992 else
5993 {
5994 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
5995 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
5996 if (ket == OP_KETRMAX)
5997 {
5998 /* TMP2 which is set here used by OP_KETRMAX below. */
5999 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6000 }
6001 }
6002 }
6003
6004 stacksize = 0;
6005 if (ket != OP_KET || bra != OP_BRA)
6006 stacksize++;
6007 if (offset != 0)
6008 {
6009 if (common->capture_last_ptr != 0)
6010 stacksize++;
6011 if (common->optimized_cbracket[offset >> 1] == 0)
6012 stacksize += 2;
6013 }
6014 if (has_alternatives && opcode != OP_ONCE)
6015 stacksize++;
6016
6017 if (stacksize > 0)
6018 allocate_stack(common, stacksize);
6019
6020 stacksize = 0;
6021 if (ket != OP_KET || bra != OP_BRA)
6022 {
6023 if (ket != OP_KET)
6024 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6025 else
6026 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6027 stacksize++;
6028 }
6029
6030 if (offset != 0)
6031 {
6032 if (common->capture_last_ptr != 0)
6033 {
6034 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6035 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6036 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0);
6037 stacksize++;
6038 }
6039 if (common->optimized_cbracket[offset >> 1] == 0)
6040 {
6041 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6042 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6043 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6044 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6045 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6048 stacksize += 2;
6049 }
6050 }
6051
6052 if (has_alternatives)
6053 {
6054 if (opcode != OP_ONCE)
6055 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6056 if (ket != OP_KETRMAX)
6057 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6058 }
6059
6060 /* Must be after the matchingpath label. */
6061 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6062 {
6063 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6064 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6065 }
6066
6067 if (ket == OP_KETRMAX)
6068 {
6069 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6070 {
6071 if (has_alternatives)
6072 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6073 /* Checking zero-length iteration. */
6074 if (opcode != OP_ONCE)
6075 {
6076 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6077 /* Drop STR_PTR for greedy plus quantifier. */
6078 if (bra != OP_BRAZERO)
6079 free_stack(common, 1);
6080 }
6081 else
6082 /* TMP2 must contain the starting STR_PTR. */
6083 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6084 }
6085 else
6086 JUMPTO(SLJIT_JUMP, rmaxlabel);
6087 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6088 }
6089
6090 if (bra == OP_BRAZERO)
6091 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6092
6093 if (bra == OP_BRAMINZERO)
6094 {
6095 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6096 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6097 if (braminzerojump != NULL)
6098 {
6099 JUMPHERE(braminzerojump);
6100 /* We need to release the end pointer to perform the
6101 backtrack for the zero-length iteration. When
6102 framesize is < 0, OP_ONCE will do the release itself. */
6103 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6104 {
6105 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6106 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6107 }
6108 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6109 free_stack(common, 1);
6110 }
6111 /* Continue to the normal backtrack. */
6112 }
6113
6114 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6115 decrease_call_count(common);
6116
6117 /* Skip the other alternatives. */
6118 while (*cc == OP_ALT)
6119 cc += GET(cc, 1);
6120 cc += 1 + LINK_SIZE;
6121 return cc;
6122 }
6123
6124 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6125 {
6126 DEFINE_COMPILER;
6127 backtrack_common *backtrack;
6128 pcre_uchar opcode;
6129 int private_data_ptr;
6130 int cbraprivptr = 0;
6131 int framesize;
6132 int stacksize;
6133 int offset = 0;
6134 BOOL zero = FALSE;
6135 pcre_uchar *ccbegin = NULL;
6136 int stack;
6137 struct sljit_label *loop = NULL;
6138 struct jump_list *emptymatch = NULL;
6139
6140 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6141 if (*cc == OP_BRAPOSZERO)
6142 {
6143 zero = TRUE;
6144 cc++;
6145 }
6146
6147 opcode = *cc;
6148 private_data_ptr = PRIVATE_DATA(cc);
6149 SLJIT_ASSERT(private_data_ptr != 0);
6150 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6151 switch(opcode)
6152 {
6153 case OP_BRAPOS:
6154 case OP_SBRAPOS:
6155 ccbegin = cc + 1 + LINK_SIZE;
6156 break;
6157
6158 case OP_CBRAPOS:
6159 case OP_SCBRAPOS:
6160 offset = GET2(cc, 1 + LINK_SIZE);
6161 /* This case cannot be optimized in the same was as
6162 normal capturing brackets. */
6163 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6164 cbraprivptr = OVECTOR_PRIV(offset);
6165 offset <<= 1;
6166 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6167 break;
6168
6169 default:
6170 SLJIT_ASSERT_STOP();
6171 break;
6172 }
6173
6174 framesize = get_framesize(common, cc, FALSE);
6175 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6176 if (framesize < 0)
6177 {
6178 if (offset != 0)
6179 {
6180 stacksize = 2;
6181 if (common->capture_last_ptr != 0)
6182 stacksize++;
6183 }
6184 else
6185 stacksize = 1;
6186
6187 if (!zero)
6188 stacksize++;
6189
6190 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6191 allocate_stack(common, stacksize);
6192 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6193
6194 if (offset != 0)
6195 {
6196 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6197 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6198 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6199 if (common->capture_last_ptr != 0)
6200 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6202 if (common->capture_last_ptr != 0)
6203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6204 }
6205 else
6206 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6207
6208 if (!zero)
6209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6210 }
6211 else
6212 {
6213 stacksize = framesize + 1;
6214 if (!zero)
6215 stacksize++;
6216 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6217 stacksize++;
6218 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6219
6220 allocate_stack(common, stacksize);
6221 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6222 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6223 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6224
6225 stack = 0;
6226 if (!zero)
6227 {
6228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6229 stack++;
6230 }
6231 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6232 {
6233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6234 stack++;
6235 }
6236 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6237 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6238 }
6239
6240 if (offset != 0)
6241 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6242
6243 loop = LABEL();
6244 while (*cc != OP_KETRPOS)
6245 {
6246 backtrack->top = NULL;
6247 backtrack->topbacktracks = NULL;
6248 cc += GET(cc, 1);
6249
6250 compile_matchingpath(common, ccbegin, cc, backtrack);
6251 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6252 return NULL;
6253
6254 if (framesize < 0)
6255 {
6256 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6257
6258 if (offset != 0)
6259 {
6260 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6262 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6263 if (common->capture_last_ptr != 0)
6264 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6266 }
6267 else
6268 {
6269 if (opcode == OP_SBRAPOS)
6270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6271 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6272 }
6273
6274 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6275 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6276
6277 if (!zero)
6278 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6279 }
6280 else
6281 {
6282 if (offset != 0)
6283 {
6284 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6285 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6286 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6287 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6288 if (common->capture_last_ptr != 0)
6289 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6290 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6291 }
6292 else
6293 {
6294 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6295 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6296 if (opcode == OP_SBRAPOS)
6297 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6298 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6299 }
6300
6301 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6302 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6303
6304 if (!zero)
6305 {
6306 if (framesize < 0)
6307 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6308 else
6309 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6310 }
6311 }
6312 JUMPTO(SLJIT_JUMP, loop);
6313 flush_stubs(common);
6314
6315 compile_backtrackingpath(common, backtrack->top);
6316 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6317 return NULL;
6318 set_jumps(backtrack->topbacktracks, LABEL());
6319
6320 if (framesize < 0)
6321 {
6322 if (offset != 0)
6323 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6324 else
6325 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6326 }
6327 else
6328 {
6329 if (offset != 0)
6330 {
6331 /* Last alternative. */
6332 if (*cc == OP_KETRPOS)
6333 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6334 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6335 }
6336 else
6337 {
6338 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6339 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6340 }
6341 }
6342
6343 if (*cc == OP_KETRPOS)
6344 break;
6345 ccbegin = cc + 1 + LINK_SIZE;
6346 }
6347
6348 backtrack->topbacktracks = NULL;
6349 if (!zero)
6350 {
6351 if (framesize < 0)
6352 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6353 else /* TMP2 is set to [private_data_ptr] above. */
6354 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6355 }
6356
6357 /* None of them matched. */
6358 set_jumps(emptymatch, LABEL());
6359 decrease_call_count(common);
6360 return cc + 1 + LINK_SIZE;
6361 }
6362
6363 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6364 {
6365 int class_len;
6366
6367 *opcode = *cc;
6368 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6369 {
6370 cc++;
6371 *type = OP_CHAR;
6372 }
6373 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6374 {
6375 cc++;
6376 *type = OP_CHARI;
6377 *opcode -= OP_STARI - OP_STAR;
6378 }
6379 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6380 {
6381 cc++;
6382 *type = OP_NOT;
6383 *opcode -= OP_NOTSTAR - OP_STAR;
6384 }
6385 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6386 {
6387 cc++;
6388 *type = OP_NOTI;
6389 *opcode -= OP_NOTSTARI - OP_STAR;
6390 }
6391 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6392 {
6393 cc++;
6394 *opcode -= OP_TYPESTAR - OP_STAR;
6395 *type = 0;
6396 }
6397 else
6398 {
6399 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6400 *type = *opcode;
6401 cc++;
6402 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6403 *opcode = cc[class_len - 1];
6404 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6405 {
6406 *opcode -= OP_CRSTAR - OP_STAR;
6407 if (end != NULL)
6408 *end = cc + class_len;
6409 }
6410 else
6411 {
6412 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6413 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6414 *arg2 = GET2(cc, class_len);
6415
6416 if (*arg2 == 0)
6417 {
6418 SLJIT_ASSERT(*arg1 != 0);
6419 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6420 }
6421 if (*arg1 == *arg2)
6422 *opcode = OP_EXACT;
6423
6424 if (end != NULL)
6425 *end = cc + class_len + 2 * IMM2_SIZE;
6426 }
6427 return cc;
6428 }
6429
6430 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6431 {
6432 *arg1 = GET2(cc, 0);
6433 cc += IMM2_SIZE;
6434 }
6435
6436 if (*type == 0)
6437 {
6438 *type = *cc;
6439 if (end != NULL)
6440 *end = next_opcode(common, cc);
6441 cc++;
6442 return cc;
6443 }
6444
6445 if (end != NULL)
6446 {
6447 *end = cc + 1;
6448 #ifdef SUPPORT_UTF
6449 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6450 #endif
6451 }
6452 return cc;
6453 }
6454
6455 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6456 {
6457 DEFINE_COMPILER;
6458 backtrack_common *backtrack;
6459 pcre_uchar opcode;
6460 pcre_uchar type;
6461 int arg1 = -1, arg2 = -1;
6462 pcre_uchar* end;
6463 jump_list *nomatch = NULL;
6464 struct sljit_jump *jump = NULL;
6465 struct sljit_label *label;
6466 int private_data_ptr = PRIVATE_DATA(cc);
6467 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6468 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6469 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
6470 int tmp_base, tmp_offset;
6471
6472 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6473
6474 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6475
6476 switch(type)
6477 {
6478 case OP_NOT_DIGIT:
6479 case OP_DIGIT:
6480 case OP_NOT_WHITESPACE:
6481 case OP_WHITESPACE:
6482 case OP_NOT_WORDCHAR:
6483 case OP_WORDCHAR:
6484 case OP_ANY:
6485 case OP_ALLANY:
6486 case OP_ANYBYTE:
6487 case OP_ANYNL:
6488 case OP_NOT_HSPACE:
6489 case OP_HSPACE:
6490 case OP_NOT_VSPACE:
6491 case OP_VSPACE:
6492 case OP_CHAR:
6493 case OP_CHARI:
6494 case OP_NOT:
6495 case OP_NOTI:
6496 case OP_CLASS:
6497 case OP_NCLASS:
6498 tmp_base = TMP3;
6499 tmp_offset = 0;
6500 break;
6501
6502 default:
6503 SLJIT_ASSERT_STOP();
6504 /* Fall through. */
6505
6506 case OP_EXTUNI:
6507 case OP_XCLASS:
6508 case OP_NOTPROP:
6509 case OP_PROP:
6510 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6511 tmp_offset = POSSESSIVE0;
6512 break;
6513 }
6514
6515 switch(opcode)
6516 {
6517 case OP_STAR:
6518 case OP_PLUS:
6519 case OP_UPTO:
6520 case OP_CRRANGE:
6521 if (type == OP_ANYNL || type == OP_EXTUNI)
6522 {
6523 SLJIT_ASSERT(private_data_ptr == 0);
6524 if (opcode == OP_STAR || opcode == OP_UPTO)
6525 {
6526 allocate_stack(common, 2);
6527 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6528 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6529 }
6530 else
6531 {
6532 allocate_stack(common, 1);
6533 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6534 }
6535
6536 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6537 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6538
6539 label = LABEL();
6540 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6541 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6542 {
6543 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6544 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6545 if (opcode == OP_CRRANGE && arg2 > 0)
6546 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6547 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6548 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6549 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6550 }
6551
6552 /* We cannot use TMP3 because of this allocate_stack. */
6553 allocate_stack(common, 1);
6554 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6555 JUMPTO(SLJIT_JUMP, label);
6556 if (jump != NULL)
6557 JUMPHERE(jump);
6558 }
6559 else
6560 {
6561 if (opcode == OP_PLUS)
6562 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6563 if (private_data_ptr == 0)
6564 allocate_stack(common, 2);
6565 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6566 if (opcode <= OP_PLUS)
6567 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6568 else
6569 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6570 label = LABEL();
6571 compile_char1_matchingpath(common, type, cc, &nomatch);
6572 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6573 if (opcode <= OP_PLUS)
6574 JUMPTO(SLJIT_JUMP, label);
6575 else if (opcode == OP_CRRANGE && arg1 == 0)
6576 {
6577 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6578 JUMPTO(SLJIT_JUMP, label);
6579 }
6580 else
6581 {
6582 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6583 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6584 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6585 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6586 }
6587 set_jumps(nomatch, LABEL());
6588 if (opcode == OP_CRRANGE)
6589 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6590 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6591 }
6592 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6593 break;
6594
6595 case OP_MINSTAR:
6596 case OP_MINPLUS:
6597 if (opcode == OP_MINPLUS)
6598 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6599 if (private_data_ptr == 0)
6600 allocate_stack(common, 1);
6601 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6602 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6603 break;
6604
6605 case OP_MINUPTO:
6606 case OP_CRMINRANGE:
6607 if (private_data_ptr == 0)
6608 allocate_stack(common, 2);
6609 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6610 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6611 if (opcode == OP_CRMINRANGE)
6612 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6613 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6614 break;
6615
6616 case OP_QUERY:
6617 case OP_MINQUERY:
6618 if (private_data_ptr == 0)
6619 allocate_stack(common, 1);
6620 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6621 if (opcode == OP_QUERY)
6622 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6623 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6624 break;
6625
6626 case OP_EXACT:
6627 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6628 label = LABEL();
6629 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6630 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6631 JUMPTO(SLJIT_C_NOT_ZERO, label);
6632 break;
6633
6634 case OP_POSSTAR:
6635 case OP_POSPLUS:
6636 case OP_POSUPTO:
6637 if (opcode == OP_POSPLUS)
6638 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6639 if (opcode == OP_POSUPTO)
6640 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6641 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6642 label = LABEL();
6643 compile_char1_matchingpath(common, type, cc, &nomatch);
6644 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6645 if (opcode != OP_POSUPTO)
6646 JUMPTO(SLJIT_JUMP, label);
6647 else
6648 {
6649 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6650 JUMPTO(SLJIT_C_NOT_ZERO, label);
6651 }
6652 set_jumps(nomatch, LABEL());
6653 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6654 break;
6655
6656 case OP_POSQUERY:
6657 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6658 compile_char1_matchingpath(common, type, cc, &nomatch);
6659 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6660 set_jumps(nomatch, LABEL());
6661 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6662 break;
6663
6664 default:
6665 SLJIT_ASSERT_STOP();
6666 break;
6667 }
6668
6669 decrease_call_count(common);
6670 return end;
6671 }
6672
6673 static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6674 {
6675 DEFINE_COMPILER;
6676 backtrack_common *backtrack;
6677
6678 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
6679
6680 if (*cc == OP_FAIL)
6681 {
6682 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6683 return cc + 1;
6684 }
6685
6686 if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL)
6687 {
6688 /* No need to check notempty conditions. */
6689 if (common->accept_label == NULL)
6690 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
6691 else
6692 JUMPTO(SLJIT_JUMP, common->accept_label);
6693 return cc + 1;
6694 }
6695
6696 if (common->accept_label == NULL)
6697 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0)));
6698 else
6699 CMPTO(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0), common->accept_label);
6700 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6701 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty));
6702 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6703 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart));
6704 if (common->accept_label == NULL)
6705 add_jump(compiler, &common->accept, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0));
6706 else
6707 CMPTO(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label);
6708 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6709 if (common->accept_label == NULL)
6710 add_jump(compiler, &common->accept, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
6711 else
6712 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
6713 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6714 return cc + 1;
6715 }
6716
6717 static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc)
6718 {
6719 DEFINE_COMPILER;
6720 int offset = GET2(cc, 1);
6721 BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
6722
6723 /* Data will be discarded anyway... */
6724 if (common->currententry != NULL)
6725 return cc + 1 + IMM2_SIZE;
6726
6727 if (!optimized_cbracket)
6728 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR_PRIV(offset));
6729 offset <<= 1;
6730 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6731 if (!optimized_cbracket)
6732 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6733 return cc + 1 + IMM2_SIZE;
6734 }
6735
6736 static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent)
6737 {
6738 DEFINE_COMPILER;
6739 backtrack_common *backtrack;
6740
6741 while (cc < ccend)
6742 {
6743 switch(*cc)
6744 {
6745 case OP_SOD:
6746 case OP_SOM:
6747 case OP_NOT_WORD_BOUNDARY: