/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1259 - (show annotations)
Mon Feb 25 08:25:19 2013 UTC (6 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 277772 byte(s)
Minor optimization of soft-partial matching in JIT.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 The machine code generator part (this module) was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include "pcre_internal.h"
48
49 #if defined SUPPORT_JIT
50
51 /* All-in-one: Since we use the JIT compiler only from here,
52 we just include it. This way we don't need to touch the build
53 system files. */
54
55 #define SLJIT_MALLOC(size) (PUBL(malloc))(size)
56 #define SLJIT_FREE(ptr) (PUBL(free))(ptr)
57 #define SLJIT_CONFIG_AUTO 1
58 #define SLJIT_CONFIG_STATIC 1
59 #define SLJIT_VERBOSE 0
60 #define SLJIT_DEBUG 0
61
62 #include "sljit/sljitLir.c"
63
64 #if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
65 #error Unsupported architecture
66 #endif
67
68 /* Defines for debugging purposes. */
69
70 /* 1 - Use unoptimized capturing brackets.
71 2 - Enable capture_last_ptr (includes option 1). */
72 /* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
73
74 /* Allocate memory for the regex stack on the real machine stack.
75 Fast, but limited size. */
76 #define MACHINE_STACK_SIZE 32768
77
78 /* Growth rate for stack allocated by the OS. Should be the multiply
79 of page size. */
80 #define STACK_GROWTH_RATE 8192
81
82 /* Enable to check that the allocation could destroy temporaries. */
83 #if defined SLJIT_DEBUG && SLJIT_DEBUG
84 #define DESTROY_REGISTERS 1
85 #endif
86
87 /*
88 Short summary about the backtracking mechanism empolyed by the jit code generator:
89
90 The code generator follows the recursive nature of the PERL compatible regular
91 expressions. The basic blocks of regular expressions are condition checkers
92 whose execute different commands depending on the result of the condition check.
93 The relationship between the operators can be horizontal (concatenation) and
94 vertical (sub-expression) (See struct backtrack_common for more details).
95
96 'ab' - 'a' and 'b' regexps are concatenated
97 'a+' - 'a' is the sub-expression of the '+' operator
98
99 The condition checkers are boolean (true/false) checkers. Machine code is generated
100 for the checker itself and for the actions depending on the result of the checker.
101 The 'true' case is called as the matching path (expected path), and the other is called as
102 the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
103 branches on the matching path.
104
105 Greedy star operator (*) :
106 Matching path: match happens.
107 Backtrack path: match failed.
108 Non-greedy star operator (*?) :
109 Matching path: no need to perform a match.
110 Backtrack path: match is required.
111
112 The following example shows how the code generated for a capturing bracket
113 with two alternatives. Let A, B, C, D are arbirary regular expressions, and
114 we have the following regular expression:
115
116 A(B|C)D
117
118 The generated code will be the following:
119
120 A matching path
121 '(' matching path (pushing arguments to the stack)
122 B matching path
123 ')' matching path (pushing arguments to the stack)
124 D matching path
125 return with successful match
126
127 D backtrack path
128 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
129 B backtrack path
130 C expected path
131 jump to D matching path
132 C backtrack path
133 A backtrack path
134
135 Notice, that the order of backtrack code paths are the opposite of the fast
136 code paths. In this way the topmost value on the stack is always belong
137 to the current backtrack code path. The backtrack path must check
138 whether there is a next alternative. If so, it needs to jump back to
139 the matching path eventually. Otherwise it needs to clear out its own stack
140 frame and continue the execution on the backtrack code paths.
141 */
142
143 /*
144 Saved stack frames:
145
146 Atomic blocks and asserts require reloading the values of private data
147 when the backtrack mechanism performed. Because of OP_RECURSE, the data
148 are not necessarly known in compile time, thus we need a dynamic restore
149 mechanism.
150
151 The stack frames are stored in a chain list, and have the following format:
152 ([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
153
154 Thus we can restore the private data to a particular point in the stack.
155 */
156
157 typedef struct jit_arguments {
158 /* Pointers first. */
159 struct sljit_stack *stack;
160 const pcre_uchar *str;
161 const pcre_uchar *begin;
162 const pcre_uchar *end;
163 int *offsets;
164 pcre_uchar *uchar_ptr;
165 pcre_uchar *mark_ptr;
166 void *callout_data;
167 /* Everything else after. */
168 int real_offset_count;
169 int offset_count;
170 int call_limit;
171 pcre_uint8 notbol;
172 pcre_uint8 noteol;
173 pcre_uint8 notempty;
174 pcre_uint8 notempty_atstart;
175 } jit_arguments;
176
177 typedef struct executable_functions {
178 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
179 PUBL(jit_callback) callback;
180 void *userdata;
181 pcre_uint32 top_bracket;
182 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
183 } executable_functions;
184
185 typedef struct jump_list {
186 struct sljit_jump *jump;
187 struct jump_list *next;
188 } jump_list;
189
190 typedef struct stub_list {
191 struct sljit_jump *start;
192 struct sljit_label *quit;
193 struct stub_list *next;
194 } stub_list;
195
196 enum frame_types { no_frame = -1, no_stack = -2 };
197
198 typedef int (SLJIT_CALL *jit_function)(jit_arguments *args);
199
200 /* The following structure is the key data type for the recursive
201 code generator. It is allocated by compile_matchingpath, and contains
202 the aguments for compile_backtrackingpath. Must be the first member
203 of its descendants. */
204 typedef struct backtrack_common {
205 /* Concatenation stack. */
206 struct backtrack_common *prev;
207 jump_list *nextbacktracks;
208 /* Internal stack (for component operators). */
209 struct backtrack_common *top;
210 jump_list *topbacktracks;
211 /* Opcode pointer. */
212 pcre_uchar *cc;
213 } backtrack_common;
214
215 typedef struct assert_backtrack {
216 backtrack_common common;
217 jump_list *condfailed;
218 /* Less than 0 (-1) if a frame is not needed. */
219 int framesize;
220 /* Points to our private memory word on the stack. */
221 int private_data_ptr;
222 /* For iterators. */
223 struct sljit_label *matchingpath;
224 } assert_backtrack;
225
226 typedef struct bracket_backtrack {
227 backtrack_common common;
228 /* Where to coninue if an alternative is successfully matched. */
229 struct sljit_label *alternative_matchingpath;
230 /* For rmin and rmax iterators. */
231 struct sljit_label *recursive_matchingpath;
232 /* For greedy ? operator. */
233 struct sljit_label *zero_matchingpath;
234 /* Contains the branches of a failed condition. */
235 union {
236 /* Both for OP_COND, OP_SCOND. */
237 jump_list *condfailed;
238 assert_backtrack *assert;
239 /* For OP_ONCE. -1 if not needed. */
240 int framesize;
241 } u;
242 /* Points to our private memory word on the stack. */
243 int private_data_ptr;
244 } bracket_backtrack;
245
246 typedef struct bracketpos_backtrack {
247 backtrack_common common;
248 /* Points to our private memory word on the stack. */
249 int private_data_ptr;
250 /* Reverting stack is needed. */
251 int framesize;
252 /* Allocated stack size. */
253 int stacksize;
254 } bracketpos_backtrack;
255
256 typedef struct braminzero_backtrack {
257 backtrack_common common;
258 struct sljit_label *matchingpath;
259 } braminzero_backtrack;
260
261 typedef struct iterator_backtrack {
262 backtrack_common common;
263 /* Next iteration. */
264 struct sljit_label *matchingpath;
265 } iterator_backtrack;
266
267 typedef struct recurse_entry {
268 struct recurse_entry *next;
269 /* Contains the function entry. */
270 struct sljit_label *entry;
271 /* Collects the calls until the function is not created. */
272 jump_list *calls;
273 /* Points to the starting opcode. */
274 int start;
275 } recurse_entry;
276
277 typedef struct recurse_backtrack {
278 backtrack_common common;
279 BOOL inlined_pattern;
280 } recurse_backtrack;
281
282 #define MAX_RANGE_SIZE 6
283
284 typedef struct compiler_common {
285 struct sljit_compiler *compiler;
286 pcre_uchar *start;
287
288 /* Maps private data offset to each opcode. */
289 int *private_data_ptrs;
290 /* Tells whether the capturing bracket is optimized. */
291 pcre_uint8 *optimized_cbracket;
292 /* Starting offset of private data for capturing brackets. */
293 int cbraptr;
294 /* OVector starting point. Must be divisible by 2. */
295 int ovector_start;
296 /* Last known position of the requested byte. */
297 int req_char_ptr;
298 /* Head of the last recursion. */
299 int recursive_head_ptr;
300 /* First inspected character for partial matching. */
301 int start_used_ptr;
302 /* Starting pointer for partial soft matches. */
303 int hit_start;
304 /* End pointer of the first line. */
305 int first_line_end;
306 /* Points to the marked string. */
307 int mark_ptr;
308 /* Points to the last matched capture block index. */
309 int capture_last_ptr;
310
311 /* Flipped and lower case tables. */
312 const pcre_uint8 *fcc;
313 sljit_sw lcc;
314 /* Mode can be PCRE_STUDY_JIT_COMPILE and others. */
315 int mode;
316 /* Newline control. */
317 int nltype;
318 int newline;
319 int bsr_nltype;
320 /* Dollar endonly. */
321 int endonly;
322 BOOL has_set_som;
323 /* Tables. */
324 sljit_sw ctypes;
325 int digits[2 + MAX_RANGE_SIZE];
326 /* Named capturing brackets. */
327 sljit_uw name_table;
328 sljit_sw name_count;
329 sljit_sw name_entry_size;
330
331 /* Labels and jump lists. */
332 struct sljit_label *partialmatchlabel;
333 struct sljit_label *quit_label;
334 struct sljit_label *forced_quit_label;
335 struct sljit_label *accept_label;
336 stub_list *stubs;
337 recurse_entry *entries;
338 recurse_entry *currententry;
339 jump_list *partialmatch;
340 jump_list *quit;
341 jump_list *forced_quit;
342 jump_list *accept;
343 jump_list *calllimit;
344 jump_list *stackalloc;
345 jump_list *revertframes;
346 jump_list *wordboundary;
347 jump_list *anynewline;
348 jump_list *hspace;
349 jump_list *vspace;
350 jump_list *casefulcmp;
351 jump_list *caselesscmp;
352 BOOL jscript_compat;
353 #ifdef SUPPORT_UTF
354 BOOL utf;
355 #ifdef SUPPORT_UCP
356 BOOL use_ucp;
357 #endif
358 #ifndef COMPILE_PCRE32
359 jump_list *utfreadchar;
360 #endif
361 #ifdef COMPILE_PCRE8
362 jump_list *utfreadtype8;
363 #endif
364 #endif /* SUPPORT_UTF */
365 #ifdef SUPPORT_UCP
366 jump_list *getucd;
367 #endif
368 } compiler_common;
369
370 /* For byte_sequence_compare. */
371
372 typedef struct compare_context {
373 int length;
374 int sourcereg;
375 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
376 int ucharptr;
377 union {
378 sljit_si asint;
379 sljit_uh asushort;
380 #if defined COMPILE_PCRE8
381 sljit_ub asbyte;
382 sljit_ub asuchars[4];
383 #elif defined COMPILE_PCRE16
384 sljit_uh asuchars[2];
385 #elif defined COMPILE_PCRE32
386 sljit_ui asuchars[1];
387 #endif
388 } c;
389 union {
390 sljit_si asint;
391 sljit_uh asushort;
392 #if defined COMPILE_PCRE8
393 sljit_ub asbyte;
394 sljit_ub asuchars[4];
395 #elif defined COMPILE_PCRE16
396 sljit_uh asuchars[2];
397 #elif defined COMPILE_PCRE32
398 sljit_ui asuchars[1];
399 #endif
400 } oc;
401 #endif
402 } compare_context;
403
404 /* Undefine sljit macros. */
405 #undef CMP
406
407 /* Used for accessing the elements of the stack. */
408 #define STACK(i) ((-(i) - 1) * (int)sizeof(sljit_sw))
409
410 #define TMP1 SLJIT_SCRATCH_REG1
411 #define TMP2 SLJIT_SCRATCH_REG3
412 #define TMP3 SLJIT_TEMPORARY_EREG2
413 #define STR_PTR SLJIT_SAVED_REG1
414 #define STR_END SLJIT_SAVED_REG2
415 #define STACK_TOP SLJIT_SCRATCH_REG2
416 #define STACK_LIMIT SLJIT_SAVED_REG3
417 #define ARGUMENTS SLJIT_SAVED_EREG1
418 #define CALL_COUNT SLJIT_SAVED_EREG2
419 #define RETURN_ADDR SLJIT_TEMPORARY_EREG1
420
421 /* Local space layout. */
422 /* These two locals can be used by the current opcode. */
423 #define LOCALS0 (0 * sizeof(sljit_sw))
424 #define LOCALS1 (1 * sizeof(sljit_sw))
425 /* Two local variables for possessive quantifiers (char1 cannot use them). */
426 #define POSSESSIVE0 (2 * sizeof(sljit_sw))
427 #define POSSESSIVE1 (3 * sizeof(sljit_sw))
428 /* Max limit of recursions. */
429 #define CALL_LIMIT (4 * sizeof(sljit_sw))
430 /* The output vector is stored on the stack, and contains pointers
431 to characters. The vector data is divided into two groups: the first
432 group contains the start / end character pointers, and the second is
433 the start pointers when the end of the capturing group has not yet reached. */
434 #define OVECTOR_START (common->ovector_start)
435 #define OVECTOR(i) (OVECTOR_START + (i) * sizeof(sljit_sw))
436 #define OVECTOR_PRIV(i) (common->cbraptr + (i) * sizeof(sljit_sw))
437 #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
438
439 #if defined COMPILE_PCRE8
440 #define MOV_UCHAR SLJIT_MOV_UB
441 #define MOVU_UCHAR SLJIT_MOVU_UB
442 #elif defined COMPILE_PCRE16
443 #define MOV_UCHAR SLJIT_MOV_UH
444 #define MOVU_UCHAR SLJIT_MOVU_UH
445 #elif defined COMPILE_PCRE32
446 #define MOV_UCHAR SLJIT_MOV_UI
447 #define MOVU_UCHAR SLJIT_MOVU_UI
448 #else
449 #error Unsupported compiling mode
450 #endif
451
452 /* Shortcuts. */
453 #define DEFINE_COMPILER \
454 struct sljit_compiler *compiler = common->compiler
455 #define OP1(op, dst, dstw, src, srcw) \
456 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
457 #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
458 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
459 #define LABEL() \
460 sljit_emit_label(compiler)
461 #define JUMP(type) \
462 sljit_emit_jump(compiler, (type))
463 #define JUMPTO(type, label) \
464 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
465 #define JUMPHERE(jump) \
466 sljit_set_label((jump), sljit_emit_label(compiler))
467 #define SET_LABEL(jump, label) \
468 sljit_set_label((jump), (label))
469 #define CMP(type, src1, src1w, src2, src2w) \
470 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
471 #define CMPTO(type, src1, src1w, src2, src2w, label) \
472 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
473 #define OP_FLAGS(op, dst, dstw, src, srcw, type) \
474 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (src), (srcw), (type))
475 #define GET_LOCAL_BASE(dst, dstw, offset) \
476 sljit_get_local_base(compiler, (dst), (dstw), (offset))
477
478 static pcre_uchar* bracketend(pcre_uchar* cc)
479 {
480 SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
481 do cc += GET(cc, 1); while (*cc == OP_ALT);
482 SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
483 cc += 1 + LINK_SIZE;
484 return cc;
485 }
486
487 /* Functions whose might need modification for all new supported opcodes:
488 next_opcode
489 get_private_data_length
490 set_private_data_ptrs
491 get_framesize
492 init_frame
493 get_private_data_length_for_copy
494 copy_private_data
495 compile_matchingpath
496 compile_backtrackingpath
497 */
498
499 static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc)
500 {
501 SLJIT_UNUSED_ARG(common);
502 switch(*cc)
503 {
504 case OP_SOD:
505 case OP_SOM:
506 case OP_SET_SOM:
507 case OP_NOT_WORD_BOUNDARY:
508 case OP_WORD_BOUNDARY:
509 case OP_NOT_DIGIT:
510 case OP_DIGIT:
511 case OP_NOT_WHITESPACE:
512 case OP_WHITESPACE:
513 case OP_NOT_WORDCHAR:
514 case OP_WORDCHAR:
515 case OP_ANY:
516 case OP_ALLANY:
517 case OP_NOTPROP:
518 case OP_PROP:
519 case OP_ANYNL:
520 case OP_NOT_HSPACE:
521 case OP_HSPACE:
522 case OP_NOT_VSPACE:
523 case OP_VSPACE:
524 case OP_EXTUNI:
525 case OP_EODN:
526 case OP_EOD:
527 case OP_CIRC:
528 case OP_CIRCM:
529 case OP_DOLL:
530 case OP_DOLLM:
531 case OP_CRSTAR:
532 case OP_CRMINSTAR:
533 case OP_CRPLUS:
534 case OP_CRMINPLUS:
535 case OP_CRQUERY:
536 case OP_CRMINQUERY:
537 case OP_CRRANGE:
538 case OP_CRMINRANGE:
539 case OP_CLASS:
540 case OP_NCLASS:
541 case OP_REF:
542 case OP_REFI:
543 case OP_RECURSE:
544 case OP_CALLOUT:
545 case OP_ALT:
546 case OP_KET:
547 case OP_KETRMAX:
548 case OP_KETRMIN:
549 case OP_KETRPOS:
550 case OP_REVERSE:
551 case OP_ASSERT:
552 case OP_ASSERT_NOT:
553 case OP_ASSERTBACK:
554 case OP_ASSERTBACK_NOT:
555 case OP_ONCE:
556 case OP_ONCE_NC:
557 case OP_BRA:
558 case OP_BRAPOS:
559 case OP_CBRA:
560 case OP_CBRAPOS:
561 case OP_COND:
562 case OP_SBRA:
563 case OP_SBRAPOS:
564 case OP_SCBRA:
565 case OP_SCBRAPOS:
566 case OP_SCOND:
567 case OP_CREF:
568 case OP_NCREF:
569 case OP_RREF:
570 case OP_NRREF:
571 case OP_DEF:
572 case OP_BRAZERO:
573 case OP_BRAMINZERO:
574 case OP_BRAPOSZERO:
575 case OP_COMMIT:
576 case OP_FAIL:
577 case OP_ACCEPT:
578 case OP_ASSERT_ACCEPT:
579 case OP_CLOSE:
580 case OP_SKIPZERO:
581 return cc + PRIV(OP_lengths)[*cc];
582
583 case OP_CHAR:
584 case OP_CHARI:
585 case OP_NOT:
586 case OP_NOTI:
587 case OP_STAR:
588 case OP_MINSTAR:
589 case OP_PLUS:
590 case OP_MINPLUS:
591 case OP_QUERY:
592 case OP_MINQUERY:
593 case OP_UPTO:
594 case OP_MINUPTO:
595 case OP_EXACT:
596 case OP_POSSTAR:
597 case OP_POSPLUS:
598 case OP_POSQUERY:
599 case OP_POSUPTO:
600 case OP_STARI:
601 case OP_MINSTARI:
602 case OP_PLUSI:
603 case OP_MINPLUSI:
604 case OP_QUERYI:
605 case OP_MINQUERYI:
606 case OP_UPTOI:
607 case OP_MINUPTOI:
608 case OP_EXACTI:
609 case OP_POSSTARI:
610 case OP_POSPLUSI:
611 case OP_POSQUERYI:
612 case OP_POSUPTOI:
613 case OP_NOTSTAR:
614 case OP_NOTMINSTAR:
615 case OP_NOTPLUS:
616 case OP_NOTMINPLUS:
617 case OP_NOTQUERY:
618 case OP_NOTMINQUERY:
619 case OP_NOTUPTO:
620 case OP_NOTMINUPTO:
621 case OP_NOTEXACT:
622 case OP_NOTPOSSTAR:
623 case OP_NOTPOSPLUS:
624 case OP_NOTPOSQUERY:
625 case OP_NOTPOSUPTO:
626 case OP_NOTSTARI:
627 case OP_NOTMINSTARI:
628 case OP_NOTPLUSI:
629 case OP_NOTMINPLUSI:
630 case OP_NOTQUERYI:
631 case OP_NOTMINQUERYI:
632 case OP_NOTUPTOI:
633 case OP_NOTMINUPTOI:
634 case OP_NOTEXACTI:
635 case OP_NOTPOSSTARI:
636 case OP_NOTPOSPLUSI:
637 case OP_NOTPOSQUERYI:
638 case OP_NOTPOSUPTOI:
639 cc += PRIV(OP_lengths)[*cc];
640 #ifdef SUPPORT_UTF
641 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
642 #endif
643 return cc;
644
645 /* Special cases. */
646 case OP_TYPESTAR:
647 case OP_TYPEMINSTAR:
648 case OP_TYPEPLUS:
649 case OP_TYPEMINPLUS:
650 case OP_TYPEQUERY:
651 case OP_TYPEMINQUERY:
652 case OP_TYPEUPTO:
653 case OP_TYPEMINUPTO:
654 case OP_TYPEEXACT:
655 case OP_TYPEPOSSTAR:
656 case OP_TYPEPOSPLUS:
657 case OP_TYPEPOSQUERY:
658 case OP_TYPEPOSUPTO:
659 return cc + PRIV(OP_lengths)[*cc] - 1;
660
661 case OP_ANYBYTE:
662 #ifdef SUPPORT_UTF
663 if (common->utf) return NULL;
664 #endif
665 return cc + 1;
666
667 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
668 case OP_XCLASS:
669 return cc + GET(cc, 1);
670 #endif
671
672 case OP_MARK:
673 return cc + 1 + 2 + cc[1];
674
675 default:
676 return NULL;
677 }
678 }
679
680 #define CASE_ITERATOR_PRIVATE_DATA_1 \
681 case OP_MINSTAR: \
682 case OP_MINPLUS: \
683 case OP_QUERY: \
684 case OP_MINQUERY: \
685 case OP_MINSTARI: \
686 case OP_MINPLUSI: \
687 case OP_QUERYI: \
688 case OP_MINQUERYI: \
689 case OP_NOTMINSTAR: \
690 case OP_NOTMINPLUS: \
691 case OP_NOTQUERY: \
692 case OP_NOTMINQUERY: \
693 case OP_NOTMINSTARI: \
694 case OP_NOTMINPLUSI: \
695 case OP_NOTQUERYI: \
696 case OP_NOTMINQUERYI:
697
698 #define CASE_ITERATOR_PRIVATE_DATA_2A \
699 case OP_STAR: \
700 case OP_PLUS: \
701 case OP_STARI: \
702 case OP_PLUSI: \
703 case OP_NOTSTAR: \
704 case OP_NOTPLUS: \
705 case OP_NOTSTARI: \
706 case OP_NOTPLUSI:
707
708 #define CASE_ITERATOR_PRIVATE_DATA_2B \
709 case OP_UPTO: \
710 case OP_MINUPTO: \
711 case OP_UPTOI: \
712 case OP_MINUPTOI: \
713 case OP_NOTUPTO: \
714 case OP_NOTMINUPTO: \
715 case OP_NOTUPTOI: \
716 case OP_NOTMINUPTOI:
717
718 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
719 case OP_TYPEMINSTAR: \
720 case OP_TYPEMINPLUS: \
721 case OP_TYPEQUERY: \
722 case OP_TYPEMINQUERY:
723
724 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
725 case OP_TYPESTAR: \
726 case OP_TYPEPLUS:
727
728 #define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
729 case OP_TYPEUPTO: \
730 case OP_TYPEMINUPTO:
731
732 static int get_class_iterator_size(pcre_uchar *cc)
733 {
734 switch(*cc)
735 {
736 case OP_CRSTAR:
737 case OP_CRPLUS:
738 return 2;
739
740 case OP_CRMINSTAR:
741 case OP_CRMINPLUS:
742 case OP_CRQUERY:
743 case OP_CRMINQUERY:
744 return 1;
745
746 case OP_CRRANGE:
747 case OP_CRMINRANGE:
748 if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
749 return 0;
750 return 2;
751
752 default:
753 return 0;
754 }
755 }
756
757 static int get_private_data_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
758 {
759 int private_data_length = 0;
760 pcre_uchar *alternative;
761 pcre_uchar *name;
762 pcre_uchar *end = NULL;
763 int space, size, i;
764 pcre_uint32 bracketlen;
765
766 /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
767 while (cc < ccend)
768 {
769 space = 0;
770 size = 0;
771 bracketlen = 0;
772 switch(*cc)
773 {
774 case OP_SET_SOM:
775 common->has_set_som = TRUE;
776 cc += 1;
777 break;
778
779 case OP_REF:
780 case OP_REFI:
781 common->optimized_cbracket[GET2(cc, 1)] = 0;
782 cc += 1 + IMM2_SIZE;
783 break;
784
785 case OP_ASSERT:
786 case OP_ASSERT_NOT:
787 case OP_ASSERTBACK:
788 case OP_ASSERTBACK_NOT:
789 case OP_ONCE:
790 case OP_ONCE_NC:
791 case OP_BRAPOS:
792 case OP_SBRA:
793 case OP_SBRAPOS:
794 private_data_length += sizeof(sljit_sw);
795 bracketlen = 1 + LINK_SIZE;
796 break;
797
798 case OP_CBRAPOS:
799 case OP_SCBRAPOS:
800 private_data_length += sizeof(sljit_sw);
801 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
802 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
803 break;
804
805 case OP_COND:
806 case OP_SCOND:
807 /* Only AUTO_CALLOUT can insert this opcode. We do
808 not intend to support this case. */
809 if (cc[1 + LINK_SIZE] == OP_CALLOUT)
810 return -1;
811
812 if (*cc == OP_COND)
813 {
814 /* Might be a hidden SCOND. */
815 alternative = cc + GET(cc, 1);
816 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
817 private_data_length += sizeof(sljit_sw);
818 }
819 else
820 private_data_length += sizeof(sljit_sw);
821 bracketlen = 1 + LINK_SIZE;
822 break;
823
824 case OP_CREF:
825 i = GET2(cc, 1);
826 common->optimized_cbracket[i] = 0;
827 cc += 1 + IMM2_SIZE;
828 break;
829
830 case OP_NCREF:
831 bracketlen = GET2(cc, 1);
832 name = (pcre_uchar *)common->name_table;
833 alternative = name;
834 for (i = 0; i < common->name_count; i++)
835 {
836 if (GET2(name, 0) == bracketlen) break;
837 name += common->name_entry_size;
838 }
839 SLJIT_ASSERT(i != common->name_count);
840
841 for (i = 0; i < common->name_count; i++)
842 {
843 if (STRCMP_UC_UC(alternative + IMM2_SIZE, name + IMM2_SIZE) == 0)
844 common->optimized_cbracket[GET2(alternative, 0)] = 0;
845 alternative += common->name_entry_size;
846 }
847 bracketlen = 0;
848 cc += 1 + IMM2_SIZE;
849 break;
850
851 case OP_BRA:
852 bracketlen = 1 + LINK_SIZE;
853 break;
854
855 case OP_CBRA:
856 case OP_SCBRA:
857 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
858 break;
859
860 CASE_ITERATOR_PRIVATE_DATA_1
861 space = 1;
862 size = -2;
863 break;
864
865 CASE_ITERATOR_PRIVATE_DATA_2A
866 space = 2;
867 size = -2;
868 break;
869
870 CASE_ITERATOR_PRIVATE_DATA_2B
871 space = 2;
872 size = -(2 + IMM2_SIZE);
873 break;
874
875 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
876 space = 1;
877 size = 1;
878 break;
879
880 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
881 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
882 space = 2;
883 size = 1;
884 break;
885
886 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
887 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
888 space = 2;
889 size = 1 + IMM2_SIZE;
890 break;
891
892 case OP_CLASS:
893 case OP_NCLASS:
894 size += 1 + 32 / sizeof(pcre_uchar);
895 space = get_class_iterator_size(cc + size);
896 break;
897
898 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
899 case OP_XCLASS:
900 size = GET(cc, 1);
901 space = get_class_iterator_size(cc + size);
902 break;
903 #endif
904
905 case OP_RECURSE:
906 /* Set its value only once. */
907 if (common->recursive_head_ptr == 0)
908 {
909 common->recursive_head_ptr = common->ovector_start;
910 common->ovector_start += sizeof(sljit_sw);
911 }
912 cc += 1 + LINK_SIZE;
913 break;
914
915 case OP_CALLOUT:
916 if (common->capture_last_ptr == 0)
917 {
918 common->capture_last_ptr = common->ovector_start;
919 common->ovector_start += sizeof(sljit_sw);
920 }
921 cc += 2 + 2 * LINK_SIZE;
922 break;
923
924 case OP_MARK:
925 if (common->mark_ptr == 0)
926 {
927 common->mark_ptr = common->ovector_start;
928 common->ovector_start += sizeof(sljit_sw);
929 }
930 cc += 1 + 2 + cc[1];
931 break;
932
933 default:
934 cc = next_opcode(common, cc);
935 if (cc == NULL)
936 return -1;
937 break;
938 }
939
940 if (space > 0 && cc >= end)
941 private_data_length += sizeof(sljit_sw) * space;
942
943 if (size != 0)
944 {
945 if (size < 0)
946 {
947 cc += -size;
948 #ifdef SUPPORT_UTF
949 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
950 #endif
951 }
952 else
953 cc += size;
954 }
955
956 if (bracketlen != 0)
957 {
958 if (cc >= end)
959 {
960 end = bracketend(cc);
961 if (end[-1 - LINK_SIZE] == OP_KET)
962 end = NULL;
963 }
964 cc += bracketlen;
965 }
966 }
967 return private_data_length;
968 }
969
970 static void set_private_data_ptrs(compiler_common *common, int private_data_ptr, pcre_uchar *ccend)
971 {
972 pcre_uchar *cc = common->start;
973 pcre_uchar *alternative;
974 pcre_uchar *end = NULL;
975 int space, size, bracketlen;
976
977 while (cc < ccend)
978 {
979 space = 0;
980 size = 0;
981 bracketlen = 0;
982 switch(*cc)
983 {
984 case OP_ASSERT:
985 case OP_ASSERT_NOT:
986 case OP_ASSERTBACK:
987 case OP_ASSERTBACK_NOT:
988 case OP_ONCE:
989 case OP_ONCE_NC:
990 case OP_BRAPOS:
991 case OP_SBRA:
992 case OP_SBRAPOS:
993 case OP_SCOND:
994 common->private_data_ptrs[cc - common->start] = private_data_ptr;
995 private_data_ptr += sizeof(sljit_sw);
996 bracketlen = 1 + LINK_SIZE;
997 break;
998
999 case OP_CBRAPOS:
1000 case OP_SCBRAPOS:
1001 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1002 private_data_ptr += sizeof(sljit_sw);
1003 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1004 break;
1005
1006 case OP_COND:
1007 /* Might be a hidden SCOND. */
1008 alternative = cc + GET(cc, 1);
1009 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1010 {
1011 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1012 private_data_ptr += sizeof(sljit_sw);
1013 }
1014 bracketlen = 1 + LINK_SIZE;
1015 break;
1016
1017 case OP_BRA:
1018 bracketlen = 1 + LINK_SIZE;
1019 break;
1020
1021 case OP_CBRA:
1022 case OP_SCBRA:
1023 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1024 break;
1025
1026 CASE_ITERATOR_PRIVATE_DATA_1
1027 space = 1;
1028 size = -2;
1029 break;
1030
1031 CASE_ITERATOR_PRIVATE_DATA_2A
1032 space = 2;
1033 size = -2;
1034 break;
1035
1036 CASE_ITERATOR_PRIVATE_DATA_2B
1037 space = 2;
1038 size = -(2 + IMM2_SIZE);
1039 break;
1040
1041 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1042 space = 1;
1043 size = 1;
1044 break;
1045
1046 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1047 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1048 space = 2;
1049 size = 1;
1050 break;
1051
1052 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1053 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1054 space = 2;
1055 size = 1 + IMM2_SIZE;
1056 break;
1057
1058 case OP_CLASS:
1059 case OP_NCLASS:
1060 size += 1 + 32 / sizeof(pcre_uchar);
1061 space = get_class_iterator_size(cc + size);
1062 break;
1063
1064 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1065 case OP_XCLASS:
1066 size = GET(cc, 1);
1067 space = get_class_iterator_size(cc + size);
1068 break;
1069 #endif
1070
1071 default:
1072 cc = next_opcode(common, cc);
1073 SLJIT_ASSERT(cc != NULL);
1074 break;
1075 }
1076
1077 if (space > 0 && cc >= end)
1078 {
1079 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1080 private_data_ptr += sizeof(sljit_sw) * space;
1081 }
1082
1083 if (size != 0)
1084 {
1085 if (size < 0)
1086 {
1087 cc += -size;
1088 #ifdef SUPPORT_UTF
1089 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1090 #endif
1091 }
1092 else
1093 cc += size;
1094 }
1095
1096 if (bracketlen > 0)
1097 {
1098 if (cc >= end)
1099 {
1100 end = bracketend(cc);
1101 if (end[-1 - LINK_SIZE] == OP_KET)
1102 end = NULL;
1103 }
1104 cc += bracketlen;
1105 }
1106 }
1107 }
1108
1109 /* Returns with a frame_types (always < 0) if no need for frame. */
1110 static int get_framesize(compiler_common *common, pcre_uchar *cc, BOOL recursive)
1111 {
1112 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1113 int length = 0;
1114 int possessive = 0;
1115 BOOL stack_restore = FALSE;
1116 BOOL setsom_found = recursive;
1117 BOOL setmark_found = recursive;
1118 /* The last capture is a local variable even for recursions. */
1119 BOOL capture_last_found = FALSE;
1120
1121 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1122 {
1123 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1124 /* This is correct regardless of common->capture_last_ptr. */
1125 capture_last_found = TRUE;
1126 }
1127
1128 cc = next_opcode(common, cc);
1129 SLJIT_ASSERT(cc != NULL);
1130 while (cc < ccend)
1131 switch(*cc)
1132 {
1133 case OP_SET_SOM:
1134 SLJIT_ASSERT(common->has_set_som);
1135 stack_restore = TRUE;
1136 if (!setsom_found)
1137 {
1138 length += 2;
1139 setsom_found = TRUE;
1140 }
1141 cc += 1;
1142 break;
1143
1144 case OP_MARK:
1145 SLJIT_ASSERT(common->mark_ptr != 0);
1146 stack_restore = TRUE;
1147 if (!setmark_found)
1148 {
1149 length += 2;
1150 setmark_found = TRUE;
1151 }
1152 cc += 1 + 2 + cc[1];
1153 break;
1154
1155 case OP_RECURSE:
1156 stack_restore = TRUE;
1157 if (common->has_set_som && !setsom_found)
1158 {
1159 length += 2;
1160 setsom_found = TRUE;
1161 }
1162 if (common->mark_ptr != 0 && !setmark_found)
1163 {
1164 length += 2;
1165 setmark_found = TRUE;
1166 }
1167 if (common->capture_last_ptr != 0 && !capture_last_found)
1168 {
1169 length += 2;
1170 capture_last_found = TRUE;
1171 }
1172 cc += 1 + LINK_SIZE;
1173 break;
1174
1175 case OP_CBRA:
1176 case OP_CBRAPOS:
1177 case OP_SCBRA:
1178 case OP_SCBRAPOS:
1179 stack_restore = TRUE;
1180 if (common->capture_last_ptr != 0 && !capture_last_found)
1181 {
1182 length += 2;
1183 capture_last_found = TRUE;
1184 }
1185 length += 3;
1186 cc += 1 + LINK_SIZE + IMM2_SIZE;
1187 break;
1188
1189 default:
1190 stack_restore = TRUE;
1191 /* Fall through. */
1192
1193 case OP_NOT_WORD_BOUNDARY:
1194 case OP_WORD_BOUNDARY:
1195 case OP_NOT_DIGIT:
1196 case OP_DIGIT:
1197 case OP_NOT_WHITESPACE:
1198 case OP_WHITESPACE:
1199 case OP_NOT_WORDCHAR:
1200 case OP_WORDCHAR:
1201 case OP_ANY:
1202 case OP_ALLANY:
1203 case OP_ANYBYTE:
1204 case OP_NOTPROP:
1205 case OP_PROP:
1206 case OP_ANYNL:
1207 case OP_NOT_HSPACE:
1208 case OP_HSPACE:
1209 case OP_NOT_VSPACE:
1210 case OP_VSPACE:
1211 case OP_EXTUNI:
1212 case OP_EODN:
1213 case OP_EOD:
1214 case OP_CIRC:
1215 case OP_CIRCM:
1216 case OP_DOLL:
1217 case OP_DOLLM:
1218 case OP_CHAR:
1219 case OP_CHARI:
1220 case OP_NOT:
1221 case OP_NOTI:
1222
1223 case OP_EXACT:
1224 case OP_POSSTAR:
1225 case OP_POSPLUS:
1226 case OP_POSQUERY:
1227 case OP_POSUPTO:
1228
1229 case OP_EXACTI:
1230 case OP_POSSTARI:
1231 case OP_POSPLUSI:
1232 case OP_POSQUERYI:
1233 case OP_POSUPTOI:
1234
1235 case OP_NOTEXACT:
1236 case OP_NOTPOSSTAR:
1237 case OP_NOTPOSPLUS:
1238 case OP_NOTPOSQUERY:
1239 case OP_NOTPOSUPTO:
1240
1241 case OP_NOTEXACTI:
1242 case OP_NOTPOSSTARI:
1243 case OP_NOTPOSPLUSI:
1244 case OP_NOTPOSQUERYI:
1245 case OP_NOTPOSUPTOI:
1246
1247 case OP_TYPEEXACT:
1248 case OP_TYPEPOSSTAR:
1249 case OP_TYPEPOSPLUS:
1250 case OP_TYPEPOSQUERY:
1251 case OP_TYPEPOSUPTO:
1252
1253 case OP_CLASS:
1254 case OP_NCLASS:
1255 case OP_XCLASS:
1256
1257 cc = next_opcode(common, cc);
1258 SLJIT_ASSERT(cc != NULL);
1259 break;
1260 }
1261
1262 /* Possessive quantifiers can use a special case. */
1263 if (SLJIT_UNLIKELY(possessive == length))
1264 return stack_restore ? no_frame : no_stack;
1265
1266 if (length > 0)
1267 return length + 1;
1268 return stack_restore ? no_frame : no_stack;
1269 }
1270
1271 static void init_frame(compiler_common *common, pcre_uchar *cc, int stackpos, int stacktop, BOOL recursive)
1272 {
1273 DEFINE_COMPILER;
1274 pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE);
1275 BOOL setsom_found = recursive;
1276 BOOL setmark_found = recursive;
1277 /* The last capture is a local variable even for recursions. */
1278 BOOL capture_last_found = FALSE;
1279 int offset;
1280
1281 /* >= 1 + shortest item size (2) */
1282 SLJIT_UNUSED_ARG(stacktop);
1283 SLJIT_ASSERT(stackpos >= stacktop + 2);
1284
1285 stackpos = STACK(stackpos);
1286 if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS))
1287 cc = next_opcode(common, cc);
1288 SLJIT_ASSERT(cc != NULL);
1289 while (cc < ccend)
1290 switch(*cc)
1291 {
1292 case OP_SET_SOM:
1293 SLJIT_ASSERT(common->has_set_som);
1294 if (!setsom_found)
1295 {
1296 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1298 stackpos += (int)sizeof(sljit_sw);
1299 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1300 stackpos += (int)sizeof(sljit_sw);
1301 setsom_found = TRUE;
1302 }
1303 cc += 1;
1304 break;
1305
1306 case OP_MARK:
1307 SLJIT_ASSERT(common->mark_ptr != 0);
1308 if (!setmark_found)
1309 {
1310 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1311 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1312 stackpos += (int)sizeof(sljit_sw);
1313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1314 stackpos += (int)sizeof(sljit_sw);
1315 setmark_found = TRUE;
1316 }
1317 cc += 1 + 2 + cc[1];
1318 break;
1319
1320 case OP_RECURSE:
1321 if (common->has_set_som && !setsom_found)
1322 {
1323 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
1324 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
1325 stackpos += (int)sizeof(sljit_sw);
1326 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1327 stackpos += (int)sizeof(sljit_sw);
1328 setsom_found = TRUE;
1329 }
1330 if (common->mark_ptr != 0 && !setmark_found)
1331 {
1332 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1333 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
1334 stackpos += (int)sizeof(sljit_sw);
1335 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1336 stackpos += (int)sizeof(sljit_sw);
1337 setmark_found = TRUE;
1338 }
1339 if (common->capture_last_ptr != 0 && !capture_last_found)
1340 {
1341 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1343 stackpos += (int)sizeof(sljit_sw);
1344 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1345 stackpos += (int)sizeof(sljit_sw);
1346 capture_last_found = TRUE;
1347 }
1348 cc += 1 + LINK_SIZE;
1349 break;
1350
1351 case OP_CBRA:
1352 case OP_CBRAPOS:
1353 case OP_SCBRA:
1354 case OP_SCBRAPOS:
1355 if (common->capture_last_ptr != 0 && !capture_last_found)
1356 {
1357 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
1358 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
1359 stackpos += (int)sizeof(sljit_sw);
1360 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1361 stackpos += (int)sizeof(sljit_sw);
1362 capture_last_found = TRUE;
1363 }
1364 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
1365 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
1366 stackpos += (int)sizeof(sljit_sw);
1367 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
1368 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
1369 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
1370 stackpos += (int)sizeof(sljit_sw);
1371 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
1372 stackpos += (int)sizeof(sljit_sw);
1373
1374 cc += 1 + LINK_SIZE + IMM2_SIZE;
1375 break;
1376
1377 default:
1378 cc = next_opcode(common, cc);
1379 SLJIT_ASSERT(cc != NULL);
1380 break;
1381 }
1382
1383 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
1384 SLJIT_ASSERT(stackpos == STACK(stacktop));
1385 }
1386
1387 static SLJIT_INLINE int get_private_data_length_for_copy(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
1388 {
1389 int private_data_length = 2;
1390 int size;
1391 pcre_uchar *alternative;
1392 /* Calculate the sum of the private machine words. */
1393 while (cc < ccend)
1394 {
1395 size = 0;
1396 switch(*cc)
1397 {
1398 case OP_ASSERT:
1399 case OP_ASSERT_NOT:
1400 case OP_ASSERTBACK:
1401 case OP_ASSERTBACK_NOT:
1402 case OP_ONCE:
1403 case OP_ONCE_NC:
1404 case OP_BRAPOS:
1405 case OP_SBRA:
1406 case OP_SBRAPOS:
1407 case OP_SCOND:
1408 private_data_length++;
1409 cc += 1 + LINK_SIZE;
1410 break;
1411
1412 case OP_CBRA:
1413 case OP_SCBRA:
1414 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1415 private_data_length++;
1416 cc += 1 + LINK_SIZE + IMM2_SIZE;
1417 break;
1418
1419 case OP_CBRAPOS:
1420 case OP_SCBRAPOS:
1421 private_data_length += 2;
1422 cc += 1 + LINK_SIZE + IMM2_SIZE;
1423 break;
1424
1425 case OP_COND:
1426 /* Might be a hidden SCOND. */
1427 alternative = cc + GET(cc, 1);
1428 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1429 private_data_length++;
1430 cc += 1 + LINK_SIZE;
1431 break;
1432
1433 CASE_ITERATOR_PRIVATE_DATA_1
1434 if (PRIVATE_DATA(cc))
1435 private_data_length++;
1436 cc += 2;
1437 #ifdef SUPPORT_UTF
1438 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1439 #endif
1440 break;
1441
1442 CASE_ITERATOR_PRIVATE_DATA_2A
1443 if (PRIVATE_DATA(cc))
1444 private_data_length += 2;
1445 cc += 2;
1446 #ifdef SUPPORT_UTF
1447 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1448 #endif
1449 break;
1450
1451 CASE_ITERATOR_PRIVATE_DATA_2B
1452 if (PRIVATE_DATA(cc))
1453 private_data_length += 2;
1454 cc += 2 + IMM2_SIZE;
1455 #ifdef SUPPORT_UTF
1456 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1457 #endif
1458 break;
1459
1460 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1461 if (PRIVATE_DATA(cc))
1462 private_data_length++;
1463 cc += 1;
1464 break;
1465
1466 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1467 if (PRIVATE_DATA(cc))
1468 private_data_length += 2;
1469 cc += 1;
1470 break;
1471
1472 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1473 if (PRIVATE_DATA(cc))
1474 private_data_length += 2;
1475 cc += 1 + IMM2_SIZE;
1476 break;
1477
1478 case OP_CLASS:
1479 case OP_NCLASS:
1480 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1481 case OP_XCLASS:
1482 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1483 #else
1484 size = 1 + 32 / (int)sizeof(pcre_uchar);
1485 #endif
1486 if (PRIVATE_DATA(cc))
1487 private_data_length += get_class_iterator_size(cc + size);
1488 cc += size;
1489 break;
1490
1491 default:
1492 cc = next_opcode(common, cc);
1493 SLJIT_ASSERT(cc != NULL);
1494 break;
1495 }
1496 }
1497 SLJIT_ASSERT(cc == ccend);
1498 return private_data_length;
1499 }
1500
1501 static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend,
1502 BOOL save, int stackptr, int stacktop)
1503 {
1504 DEFINE_COMPILER;
1505 int srcw[2];
1506 int count, size;
1507 BOOL tmp1next = TRUE;
1508 BOOL tmp1empty = TRUE;
1509 BOOL tmp2empty = TRUE;
1510 pcre_uchar *alternative;
1511 enum {
1512 start,
1513 loop,
1514 end
1515 } status;
1516
1517 status = save ? start : loop;
1518 stackptr = STACK(stackptr - 2);
1519 stacktop = STACK(stacktop - 1);
1520
1521 if (!save)
1522 {
1523 stackptr += sizeof(sljit_sw);
1524 if (stackptr < stacktop)
1525 {
1526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1527 stackptr += sizeof(sljit_sw);
1528 tmp1empty = FALSE;
1529 }
1530 if (stackptr < stacktop)
1531 {
1532 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1533 stackptr += sizeof(sljit_sw);
1534 tmp2empty = FALSE;
1535 }
1536 /* The tmp1next must be TRUE in either way. */
1537 }
1538
1539 while (status != end)
1540 {
1541 count = 0;
1542 switch(status)
1543 {
1544 case start:
1545 SLJIT_ASSERT(save && common->recursive_head_ptr != 0);
1546 count = 1;
1547 srcw[0] = common->recursive_head_ptr;
1548 status = loop;
1549 break;
1550
1551 case loop:
1552 if (cc >= ccend)
1553 {
1554 status = end;
1555 break;
1556 }
1557
1558 switch(*cc)
1559 {
1560 case OP_ASSERT:
1561 case OP_ASSERT_NOT:
1562 case OP_ASSERTBACK:
1563 case OP_ASSERTBACK_NOT:
1564 case OP_ONCE:
1565 case OP_ONCE_NC:
1566 case OP_BRAPOS:
1567 case OP_SBRA:
1568 case OP_SBRAPOS:
1569 case OP_SCOND:
1570 count = 1;
1571 srcw[0] = PRIVATE_DATA(cc);
1572 SLJIT_ASSERT(srcw[0] != 0);
1573 cc += 1 + LINK_SIZE;
1574 break;
1575
1576 case OP_CBRA:
1577 case OP_SCBRA:
1578 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
1579 {
1580 count = 1;
1581 srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1582 }
1583 cc += 1 + LINK_SIZE + IMM2_SIZE;
1584 break;
1585
1586 case OP_CBRAPOS:
1587 case OP_SCBRAPOS:
1588 count = 2;
1589 srcw[0] = PRIVATE_DATA(cc);
1590 srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
1591 SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0);
1592 cc += 1 + LINK_SIZE + IMM2_SIZE;
1593 break;
1594
1595 case OP_COND:
1596 /* Might be a hidden SCOND. */
1597 alternative = cc + GET(cc, 1);
1598 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1599 {
1600 count = 1;
1601 srcw[0] = PRIVATE_DATA(cc);
1602 SLJIT_ASSERT(srcw[0] != 0);
1603 }
1604 cc += 1 + LINK_SIZE;
1605 break;
1606
1607 CASE_ITERATOR_PRIVATE_DATA_1
1608 if (PRIVATE_DATA(cc))
1609 {
1610 count = 1;
1611 srcw[0] = PRIVATE_DATA(cc);
1612 }
1613 cc += 2;
1614 #ifdef SUPPORT_UTF
1615 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1616 #endif
1617 break;
1618
1619 CASE_ITERATOR_PRIVATE_DATA_2A
1620 if (PRIVATE_DATA(cc))
1621 {
1622 count = 2;
1623 srcw[0] = PRIVATE_DATA(cc);
1624 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1625 }
1626 cc += 2;
1627 #ifdef SUPPORT_UTF
1628 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1629 #endif
1630 break;
1631
1632 CASE_ITERATOR_PRIVATE_DATA_2B
1633 if (PRIVATE_DATA(cc))
1634 {
1635 count = 2;
1636 srcw[0] = PRIVATE_DATA(cc);
1637 srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
1638 }
1639 cc += 2 + IMM2_SIZE;
1640 #ifdef SUPPORT_UTF
1641 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1642 #endif
1643 break;
1644
1645 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1646 if (PRIVATE_DATA(cc))
1647 {
1648 count = 1;
1649 srcw[0] = PRIVATE_DATA(cc);
1650 }
1651 cc += 1;
1652 break;
1653
1654 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1655 if (PRIVATE_DATA(cc))
1656 {
1657 count = 2;
1658 srcw[0] = PRIVATE_DATA(cc);
1659 srcw[1] = srcw[0] + sizeof(sljit_sw);
1660 }
1661 cc += 1;
1662 break;
1663
1664 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1665 if (PRIVATE_DATA(cc))
1666 {
1667 count = 2;
1668 srcw[0] = PRIVATE_DATA(cc);
1669 srcw[1] = srcw[0] + sizeof(sljit_sw);
1670 }
1671 cc += 1 + IMM2_SIZE;
1672 break;
1673
1674 case OP_CLASS:
1675 case OP_NCLASS:
1676 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1677 case OP_XCLASS:
1678 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar);
1679 #else
1680 size = 1 + 32 / (int)sizeof(pcre_uchar);
1681 #endif
1682 if (PRIVATE_DATA(cc))
1683 switch(get_class_iterator_size(cc + size))
1684 {
1685 case 1:
1686 count = 1;
1687 srcw[0] = PRIVATE_DATA(cc);
1688 break;
1689
1690 case 2:
1691 count = 2;
1692 srcw[0] = PRIVATE_DATA(cc);
1693 srcw[1] = srcw[0] + sizeof(sljit_sw);
1694 break;
1695
1696 default:
1697 SLJIT_ASSERT_STOP();
1698 break;
1699 }
1700 cc += size;
1701 break;
1702
1703 default:
1704 cc = next_opcode(common, cc);
1705 SLJIT_ASSERT(cc != NULL);
1706 break;
1707 }
1708 break;
1709
1710 case end:
1711 SLJIT_ASSERT_STOP();
1712 break;
1713 }
1714
1715 while (count > 0)
1716 {
1717 count--;
1718 if (save)
1719 {
1720 if (tmp1next)
1721 {
1722 if (!tmp1empty)
1723 {
1724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1725 stackptr += sizeof(sljit_sw);
1726 }
1727 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1728 tmp1empty = FALSE;
1729 tmp1next = FALSE;
1730 }
1731 else
1732 {
1733 if (!tmp2empty)
1734 {
1735 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1736 stackptr += sizeof(sljit_sw);
1737 }
1738 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count]);
1739 tmp2empty = FALSE;
1740 tmp1next = TRUE;
1741 }
1742 }
1743 else
1744 {
1745 if (tmp1next)
1746 {
1747 SLJIT_ASSERT(!tmp1empty);
1748 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP1, 0);
1749 tmp1empty = stackptr >= stacktop;
1750 if (!tmp1empty)
1751 {
1752 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1753 stackptr += sizeof(sljit_sw);
1754 }
1755 tmp1next = FALSE;
1756 }
1757 else
1758 {
1759 SLJIT_ASSERT(!tmp2empty);
1760 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), srcw[count], TMP2, 0);
1761 tmp2empty = stackptr >= stacktop;
1762 if (!tmp2empty)
1763 {
1764 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr);
1765 stackptr += sizeof(sljit_sw);
1766 }
1767 tmp1next = TRUE;
1768 }
1769 }
1770 }
1771 }
1772
1773 if (save)
1774 {
1775 if (tmp1next)
1776 {
1777 if (!tmp1empty)
1778 {
1779 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1780 stackptr += sizeof(sljit_sw);
1781 }
1782 if (!tmp2empty)
1783 {
1784 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1785 stackptr += sizeof(sljit_sw);
1786 }
1787 }
1788 else
1789 {
1790 if (!tmp2empty)
1791 {
1792 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0);
1793 stackptr += sizeof(sljit_sw);
1794 }
1795 if (!tmp1empty)
1796 {
1797 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0);
1798 stackptr += sizeof(sljit_sw);
1799 }
1800 }
1801 }
1802 SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty)));
1803 }
1804
1805 #undef CASE_ITERATOR_PRIVATE_DATA_1
1806 #undef CASE_ITERATOR_PRIVATE_DATA_2A
1807 #undef CASE_ITERATOR_PRIVATE_DATA_2B
1808 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1809 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1810 #undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
1811
1812 static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
1813 {
1814 return (value & (value - 1)) == 0;
1815 }
1816
1817 static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
1818 {
1819 while (list)
1820 {
1821 /* sljit_set_label is clever enough to do nothing
1822 if either the jump or the label is NULL. */
1823 SET_LABEL(list->jump, label);
1824 list = list->next;
1825 }
1826 }
1827
1828 static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump* jump)
1829 {
1830 jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
1831 if (list_item)
1832 {
1833 list_item->next = *list;
1834 list_item->jump = jump;
1835 *list = list_item;
1836 }
1837 }
1838
1839 static void add_stub(compiler_common *common, struct sljit_jump *start)
1840 {
1841 DEFINE_COMPILER;
1842 stub_list* list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
1843
1844 if (list_item)
1845 {
1846 list_item->start = start;
1847 list_item->quit = LABEL();
1848 list_item->next = common->stubs;
1849 common->stubs = list_item;
1850 }
1851 }
1852
1853 static void flush_stubs(compiler_common *common)
1854 {
1855 DEFINE_COMPILER;
1856 stub_list* list_item = common->stubs;
1857
1858 while (list_item)
1859 {
1860 JUMPHERE(list_item->start);
1861 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
1862 JUMPTO(SLJIT_JUMP, list_item->quit);
1863 list_item = list_item->next;
1864 }
1865 common->stubs = NULL;
1866 }
1867
1868 static SLJIT_INLINE void decrease_call_count(compiler_common *common)
1869 {
1870 DEFINE_COMPILER;
1871
1872 OP2(SLJIT_SUB | SLJIT_SET_E, CALL_COUNT, 0, CALL_COUNT, 0, SLJIT_IMM, 1);
1873 add_jump(compiler, &common->calllimit, JUMP(SLJIT_C_ZERO));
1874 }
1875
1876 static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
1877 {
1878 /* May destroy all locals and registers except TMP2. */
1879 DEFINE_COMPILER;
1880
1881 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1882 #ifdef DESTROY_REGISTERS
1883 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
1884 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
1885 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
1886 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, TMP1, 0);
1887 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
1888 #endif
1889 add_stub(common, CMP(SLJIT_C_GREATER, STACK_TOP, 0, STACK_LIMIT, 0));
1890 }
1891
1892 static SLJIT_INLINE void free_stack(compiler_common *common, int size)
1893 {
1894 DEFINE_COMPILER;
1895 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
1896 }
1897
1898 static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
1899 {
1900 DEFINE_COMPILER;
1901 struct sljit_label *loop;
1902 int i;
1903 /* At this point we can freely use all temporary registers. */
1904 /* TMP1 returns with begin - 1. */
1905 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
1906 if (length < 8)
1907 {
1908 for (i = 0; i < length; i++)
1909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(i), SLJIT_SCRATCH_REG1, 0);
1910 }
1911 else
1912 {
1913 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, OVECTOR_START - sizeof(sljit_sw));
1914 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, length);
1915 loop = LABEL();
1916 OP1(SLJIT_MOVU, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(sljit_sw), SLJIT_SCRATCH_REG1, 0);
1917 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 1);
1918 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1919 }
1920 }
1921
1922 static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
1923 {
1924 DEFINE_COMPILER;
1925 struct sljit_label *loop;
1926 struct sljit_jump *early_quit;
1927
1928 /* At this point we can freely use all registers. */
1929 OP1(SLJIT_MOV, SLJIT_SAVED_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
1930 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1), STR_PTR, 0);
1931
1932 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, ARGUMENTS, 0);
1933 if (common->mark_ptr != 0)
1934 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
1935 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offset_count));
1936 if (common->mark_ptr != 0)
1937 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_SCRATCH_REG3, 0);
1938 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int));
1939 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), SLJIT_OFFSETOF(jit_arguments, begin));
1940 GET_LOCAL_BASE(SLJIT_SAVED_REG1, 0, OVECTOR_START);
1941 /* Unlikely, but possible */
1942 early_quit = CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 0);
1943 loop = LABEL();
1944 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, SLJIT_MEM1(SLJIT_SAVED_REG1), 0, SLJIT_SCRATCH_REG1, 0);
1945 OP2(SLJIT_ADD, SLJIT_SAVED_REG1, 0, SLJIT_SAVED_REG1, 0, SLJIT_IMM, sizeof(sljit_sw));
1946 /* Copy the integer value to the output buffer */
1947 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1948 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
1949 #endif
1950 OP1(SLJIT_MOVU_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG3), sizeof(int), SLJIT_SAVED_REG2, 0);
1951 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1952 JUMPTO(SLJIT_C_NOT_ZERO, loop);
1953 JUMPHERE(early_quit);
1954
1955 /* Calculate the return value, which is the maximum ovector value. */
1956 if (topbracket > 1)
1957 {
1958 GET_LOCAL_BASE(SLJIT_SCRATCH_REG1, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
1959 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, topbracket + 1);
1960
1961 /* OVECTOR(0) is never equal to SLJIT_SAVED_REG3. */
1962 loop = LABEL();
1963 OP1(SLJIT_MOVU, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG1), -(2 * (sljit_sw)sizeof(sljit_sw)));
1964 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, SLJIT_SCRATCH_REG2, 0, SLJIT_IMM, 1);
1965 CMPTO(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG3, 0, loop);
1966 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_SCRATCH_REG2, 0);
1967 }
1968 else
1969 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
1970 }
1971
1972 static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
1973 {
1974 DEFINE_COMPILER;
1975 struct sljit_jump *jump;
1976
1977 SLJIT_COMPILE_ASSERT(STR_END == SLJIT_SAVED_REG2, str_end_must_be_saved_reg2);
1978 SLJIT_ASSERT(common->start_used_ptr != 0 && (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0));
1979
1980 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
1981 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL);
1982 OP1(SLJIT_MOV_SI, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, real_offset_count));
1983 CMPTO(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 2, quit);
1984
1985 /* Store match begin and end. */
1986 OP1(SLJIT_MOV, SLJIT_SAVED_REG1, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, begin));
1987 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, offsets));
1988
1989 jump = CMP(SLJIT_C_SIG_LESS, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, 3);
1990 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr + sizeof(sljit_sw), SLJIT_SAVED_REG1, 0);
1991 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1992 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
1993 #endif
1994 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 2 * sizeof(int), SLJIT_SCRATCH_REG3, 0);
1995 JUMPHERE(jump);
1996
1997 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start);
1998 OP2(SLJIT_SUB, SLJIT_SAVED_REG2, 0, STR_END, 0, SLJIT_SAVED_REG1, 0);
1999 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2000 OP2(SLJIT_ASHR, SLJIT_SAVED_REG2, 0, SLJIT_SAVED_REG2, 0, SLJIT_IMM, UCHAR_SHIFT);
2001 #endif
2002 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), sizeof(int), SLJIT_SAVED_REG2, 0);
2003
2004 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_SAVED_REG1, 0);
2005 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2006 OP2(SLJIT_ASHR, SLJIT_SCRATCH_REG3, 0, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, UCHAR_SHIFT);
2007 #endif
2008 OP1(SLJIT_MOV_SI, SLJIT_MEM1(SLJIT_SCRATCH_REG2), 0, SLJIT_SCRATCH_REG3, 0);
2009
2010 JUMPTO(SLJIT_JUMP, quit);
2011 }
2012
2013 static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
2014 {
2015 /* May destroy TMP1. */
2016 DEFINE_COMPILER;
2017 struct sljit_jump *jump;
2018
2019 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2020 {
2021 /* The value of -1 must be kept for start_used_ptr! */
2022 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, 1);
2023 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
2024 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
2025 jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
2026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2027 JUMPHERE(jump);
2028 }
2029 else if (common->mode == JIT_PARTIAL_HARD_COMPILE)
2030 {
2031 jump = CMP(SLJIT_C_LESS_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2032 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2033 JUMPHERE(jump);
2034 }
2035 }
2036
2037 static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar* cc)
2038 {
2039 /* Detects if the character has an othercase. */
2040 unsigned int c;
2041
2042 #ifdef SUPPORT_UTF
2043 if (common->utf)
2044 {
2045 GETCHAR(c, cc);
2046 if (c > 127)
2047 {
2048 #ifdef SUPPORT_UCP
2049 return c != UCD_OTHERCASE(c);
2050 #else
2051 return FALSE;
2052 #endif
2053 }
2054 #ifndef COMPILE_PCRE8
2055 return common->fcc[c] != c;
2056 #endif
2057 }
2058 else
2059 #endif
2060 c = *cc;
2061 return MAX_255(c) ? common->fcc[c] != c : FALSE;
2062 }
2063
2064 static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
2065 {
2066 /* Returns with the othercase. */
2067 #ifdef SUPPORT_UTF
2068 if (common->utf && c > 127)
2069 {
2070 #ifdef SUPPORT_UCP
2071 return UCD_OTHERCASE(c);
2072 #else
2073 return c;
2074 #endif
2075 }
2076 #endif
2077 return TABLE_GET(c, common->fcc, c);
2078 }
2079
2080 static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar* cc)
2081 {
2082 /* Detects if the character and its othercase has only 1 bit difference. */
2083 unsigned int c, oc, bit;
2084 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
2085 int n;
2086 #endif
2087
2088 #ifdef SUPPORT_UTF
2089 if (common->utf)
2090 {
2091 GETCHAR(c, cc);
2092 if (c <= 127)
2093 oc = common->fcc[c];
2094 else
2095 {
2096 #ifdef SUPPORT_UCP
2097 oc = UCD_OTHERCASE(c);
2098 #else
2099 oc = c;
2100 #endif
2101 }
2102 }
2103 else
2104 {
2105 c = *cc;
2106 oc = TABLE_GET(c, common->fcc, c);
2107 }
2108 #else
2109 c = *cc;
2110 oc = TABLE_GET(c, common->fcc, c);
2111 #endif
2112
2113 SLJIT_ASSERT(c != oc);
2114
2115 bit = c ^ oc;
2116 /* Optimized for English alphabet. */
2117 if (c <= 127 && bit == 0x20)
2118 return (0 << 8) | 0x20;
2119
2120 /* Since c != oc, they must have at least 1 bit difference. */
2121 if (!is_powerof2(bit))
2122 return 0;
2123
2124 #if defined COMPILE_PCRE8
2125
2126 #ifdef SUPPORT_UTF
2127 if (common->utf && c > 127)
2128 {
2129 n = GET_EXTRALEN(*cc);
2130 while ((bit & 0x3f) == 0)
2131 {
2132 n--;
2133 bit >>= 6;
2134 }
2135 return (n << 8) | bit;
2136 }
2137 #endif /* SUPPORT_UTF */
2138 return (0 << 8) | bit;
2139
2140 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2141
2142 #ifdef SUPPORT_UTF
2143 if (common->utf && c > 65535)
2144 {
2145 if (bit >= (1 << 10))
2146 bit >>= 10;
2147 else
2148 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
2149 }
2150 #endif /* SUPPORT_UTF */
2151 return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8));
2152
2153 #endif /* COMPILE_PCRE[8|16|32] */
2154 }
2155
2156 static void check_partial(compiler_common *common, BOOL force)
2157 {
2158 /* Checks whether a partial matching is occured. Does not modify registers. */
2159 DEFINE_COMPILER;
2160 struct sljit_jump *jump = NULL;
2161
2162 SLJIT_ASSERT(!force || common->mode != JIT_COMPILE);
2163
2164 if (common->mode == JIT_COMPILE)
2165 return;
2166
2167 if (!force)
2168 jump = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2169 else if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2170 jump = CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, SLJIT_IMM, -1);
2171
2172 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2173 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2174 else
2175 {
2176 if (common->partialmatchlabel != NULL)
2177 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2178 else
2179 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2180 }
2181
2182 if (jump != NULL)
2183 JUMPHERE(jump);
2184 }
2185
2186 static struct sljit_jump *check_str_end(compiler_common *common)
2187 {
2188 /* Does not affect registers. Usually used in a tight spot. */
2189 DEFINE_COMPILER;
2190 struct sljit_jump *jump;
2191 struct sljit_jump *nohit;
2192 struct sljit_jump *return_value;
2193
2194 if (common->mode == JIT_COMPILE)
2195 return CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2196
2197 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2198 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2199 {
2200 nohit = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2201 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2202 JUMPHERE(nohit);
2203 return_value = JUMP(SLJIT_JUMP);
2204 }
2205 else
2206 {
2207 return_value = CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0);
2208 if (common->partialmatchlabel != NULL)
2209 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2210 else
2211 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2212 }
2213 JUMPHERE(jump);
2214 return return_value;
2215 }
2216
2217 static void detect_partial_match(compiler_common *common, jump_list **backtracks)
2218 {
2219 DEFINE_COMPILER;
2220 struct sljit_jump *jump;
2221
2222 if (common->mode == JIT_COMPILE)
2223 {
2224 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
2225 return;
2226 }
2227
2228 /* Partial matching mode. */
2229 jump = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
2230 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), common->start_used_ptr, STR_PTR, 0));
2231 if (common->mode == JIT_PARTIAL_SOFT_COMPILE)
2232 {
2233 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->hit_start, SLJIT_IMM, -1);
2234 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
2235 }
2236 else
2237 {
2238 if (common->partialmatchlabel != NULL)
2239 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
2240 else
2241 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
2242 }
2243 JUMPHERE(jump);
2244 }
2245
2246 static void read_char(compiler_common *common)
2247 {
2248 /* Reads the character into TMP1, updates STR_PTR.
2249 Does not check STR_END. TMP2 Destroyed. */
2250 DEFINE_COMPILER;
2251 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2252 struct sljit_jump *jump;
2253 #endif
2254
2255 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2256 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2257 if (common->utf)
2258 {
2259 #if defined COMPILE_PCRE8
2260 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2261 #elif defined COMPILE_PCRE16
2262 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2263 #endif /* COMPILE_PCRE[8|16] */
2264 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2265 JUMPHERE(jump);
2266 }
2267 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2268 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2269 }
2270
2271 static void peek_char(compiler_common *common)
2272 {
2273 /* Reads the character into TMP1, keeps STR_PTR.
2274 Does not check STR_END. TMP2 Destroyed. */
2275 DEFINE_COMPILER;
2276 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2277 struct sljit_jump *jump;
2278 #endif
2279
2280 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2281 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2282 if (common->utf)
2283 {
2284 #if defined COMPILE_PCRE8
2285 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2286 #elif defined COMPILE_PCRE16
2287 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2288 #endif /* COMPILE_PCRE[8|16] */
2289 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
2290 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2291 JUMPHERE(jump);
2292 }
2293 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2294 }
2295
2296 static void read_char8_type(compiler_common *common)
2297 {
2298 /* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
2299 DEFINE_COMPILER;
2300 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2301 struct sljit_jump *jump;
2302 #endif
2303
2304 #ifdef SUPPORT_UTF
2305 if (common->utf)
2306 {
2307 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2308 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2309 #if defined COMPILE_PCRE8
2310 /* This can be an extra read in some situations, but hopefully
2311 it is needed in most cases. */
2312 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2313 jump = CMP(SLJIT_C_LESS, TMP2, 0, SLJIT_IMM, 0xc0);
2314 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
2315 JUMPHERE(jump);
2316 #elif defined COMPILE_PCRE16
2317 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2318 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2319 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2320 JUMPHERE(jump);
2321 /* Skip low surrogate if necessary. */
2322 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xfc00);
2323 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xd800);
2324 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2325 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
2326 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2327 #elif defined COMPILE_PCRE32
2328 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2329 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2330 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2331 JUMPHERE(jump);
2332 #endif /* COMPILE_PCRE[8|16|32] */
2333 return;
2334 }
2335 #endif /* SUPPORT_UTF */
2336 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
2337 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2338 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2339 /* The ctypes array contains only 256 values. */
2340 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2341 jump = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2342 #endif
2343 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2344 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2345 JUMPHERE(jump);
2346 #endif
2347 }
2348
2349 static void skip_char_back(compiler_common *common)
2350 {
2351 /* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */
2352 DEFINE_COMPILER;
2353 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2354 #if defined COMPILE_PCRE8
2355 struct sljit_label *label;
2356
2357 if (common->utf)
2358 {
2359 label = LABEL();
2360 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2361 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2362 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
2363 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
2364 return;
2365 }
2366 #elif defined COMPILE_PCRE16
2367 if (common->utf)
2368 {
2369 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
2370 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2371 /* Skip low surrogate if necessary. */
2372 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2373 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
2374 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2375 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2376 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2377 return;
2378 }
2379 #endif /* COMPILE_PCRE[8|16] */
2380 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2381 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2382 }
2383
2384 static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpiftrue)
2385 {
2386 /* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
2387 DEFINE_COMPILER;
2388
2389 if (nltype == NLTYPE_ANY)
2390 {
2391 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
2392 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2393 }
2394 else if (nltype == NLTYPE_ANYCRLF)
2395 {
2396 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_CR);
2397 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2398 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2399 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2400 add_jump(compiler, backtracks, JUMP(jumpiftrue ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
2401 }
2402 else
2403 {
2404 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
2405 add_jump(compiler, backtracks, CMP(jumpiftrue ? SLJIT_C_EQUAL : SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
2406 }
2407 }
2408
2409 #ifdef SUPPORT_UTF
2410
2411 #if defined COMPILE_PCRE8
2412 static void do_utfreadchar(compiler_common *common)
2413 {
2414 /* Fast decoding a UTF-8 character. TMP1 contains the first byte
2415 of the character (>= 0xc0). Return char value in TMP1, length - 1 in TMP2. */
2416 DEFINE_COMPILER;
2417 struct sljit_jump *jump;
2418
2419 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2420 /* Searching for the first zero. */
2421 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
2422 jump = JUMP(SLJIT_C_NOT_ZERO);
2423 /* Two byte sequence. */
2424 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2425 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2426 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1f);
2427 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
2428 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2429 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2430 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2431 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2432 JUMPHERE(jump);
2433
2434 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10);
2435 jump = JUMP(SLJIT_C_NOT_ZERO);
2436 /* Three byte sequence. */
2437 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2438 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0f);
2439 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 12);
2440 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2441 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2442 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2443 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2444 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
2445 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2446 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2447 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2));
2448 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2449 JUMPHERE(jump);
2450
2451 /* Four byte sequence. */
2452 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2453 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x07);
2454 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 18);
2455 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2456 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
2457 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2458 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
2459 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2460 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2461 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2462 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(3));
2463 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
2464 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
2465 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2466 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
2467 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2468 }
2469
2470 static void do_utfreadtype8(compiler_common *common)
2471 {
2472 /* Fast decoding a UTF-8 character type. TMP2 contains the first byte
2473 of the character (>= 0xc0). Return value in TMP1. */
2474 DEFINE_COMPILER;
2475 struct sljit_jump *jump;
2476 struct sljit_jump *compare;
2477
2478 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2479
2480 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
2481 jump = JUMP(SLJIT_C_NOT_ZERO);
2482 /* Two byte sequence. */
2483 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2484 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2485 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
2486 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
2487 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
2488 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
2489 compare = CMP(SLJIT_C_GREATER, TMP2, 0, SLJIT_IMM, 255);
2490 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
2491 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2492
2493 JUMPHERE(compare);
2494 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2495 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2496 JUMPHERE(jump);
2497
2498 /* We only have types for characters less than 256. */
2499 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2500 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
2502 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2503 }
2504
2505 #elif defined COMPILE_PCRE16
2506
2507 static void do_utfreadchar(compiler_common *common)
2508 {
2509 /* Fast decoding a UTF-16 character. TMP1 contains the first 16 bit char
2510 of the character (>= 0xd800). Return char value in TMP1, length - 1 in TMP2. */
2511 DEFINE_COMPILER;
2512 struct sljit_jump *jump;
2513
2514 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2515 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
2516 /* Do nothing, only return. */
2517 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2518
2519 JUMPHERE(jump);
2520 /* Combine two 16 bit characters. */
2521 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2522 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2523 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff);
2524 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
2525 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3ff);
2526 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
2527 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
2528 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
2529 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2530 }
2531
2532 #endif /* COMPILE_PCRE[8|16] */
2533
2534 #endif /* SUPPORT_UTF */
2535
2536 #ifdef SUPPORT_UCP
2537
2538 /* UCD_BLOCK_SIZE must be 128 (see the assert below). */
2539 #define UCD_BLOCK_MASK 127
2540 #define UCD_BLOCK_SHIFT 7
2541
2542 static void do_getucd(compiler_common *common)
2543 {
2544 /* Search the UCD record for the character comes in TMP1.
2545 Returns chartype in TMP1 and UCD offset in TMP2. */
2546 DEFINE_COMPILER;
2547
2548 SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8);
2549
2550 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
2551 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2552 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
2553 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
2554 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
2555 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
2556 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
2557 OP1(SLJIT_MOV_UH, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
2558 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
2559 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3);
2560 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
2561 }
2562 #endif
2563
2564 static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf, BOOL firstline)
2565 {
2566 DEFINE_COMPILER;
2567 struct sljit_label *mainloop;
2568 struct sljit_label *newlinelabel = NULL;
2569 struct sljit_jump *start;
2570 struct sljit_jump *end = NULL;
2571 struct sljit_jump *nl = NULL;
2572 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2573 struct sljit_jump *singlechar;
2574 #endif
2575 jump_list *newline = NULL;
2576 BOOL newlinecheck = FALSE;
2577 BOOL readuchar = FALSE;
2578
2579 if (!(hascrorlf || firstline) && (common->nltype == NLTYPE_ANY ||
2580 common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
2581 newlinecheck = TRUE;
2582
2583 if (firstline)
2584 {
2585 /* Search for the end of the first line. */
2586 SLJIT_ASSERT(common->first_line_end != 0);
2587 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
2588
2589 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2590 {
2591 mainloop = LABEL();
2592 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2593 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2594 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2595 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2596 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
2597 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
2598 JUMPHERE(end);
2599 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2600 }
2601 else
2602 {
2603 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2604 mainloop = LABEL();
2605 /* Continual stores does not cause data dependency. */
2606 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2607 read_char(common);
2608 check_newlinechar(common, common->nltype, &newline, TRUE);
2609 CMPTO(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0, mainloop);
2610 JUMPHERE(end);
2611 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, STR_PTR, 0);
2612 set_jumps(newline, LABEL());
2613 }
2614
2615 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
2616 }
2617
2618 start = JUMP(SLJIT_JUMP);
2619
2620 if (newlinecheck)
2621 {
2622 newlinelabel = LABEL();
2623 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2624 end = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2625 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2626 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
2627 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2628 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2629 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2630 #endif
2631 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2632 nl = JUMP(SLJIT_JUMP);
2633 }
2634
2635 mainloop = LABEL();
2636
2637 /* Increasing the STR_PTR here requires one less jump in the most common case. */
2638 #ifdef SUPPORT_UTF
2639 if (common->utf) readuchar = TRUE;
2640 #endif
2641 if (newlinecheck) readuchar = TRUE;
2642
2643 if (readuchar)
2644 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2645
2646 if (newlinecheck)
2647 CMPTO(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
2648
2649 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2650 #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2651 #if defined COMPILE_PCRE8
2652 if (common->utf)
2653 {
2654 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
2655 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
2656 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2657 JUMPHERE(singlechar);
2658 }
2659 #elif defined COMPILE_PCRE16
2660 if (common->utf)
2661 {
2662 singlechar = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
2663 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
2664 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
2665 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2666 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
2667 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2668 JUMPHERE(singlechar);
2669 }
2670 #endif /* COMPILE_PCRE[8|16] */
2671 #endif /* SUPPORT_UTF && !COMPILE_PCRE32 */
2672 JUMPHERE(start);
2673
2674 if (newlinecheck)
2675 {
2676 JUMPHERE(end);
2677 JUMPHERE(nl);
2678 }
2679
2680 return mainloop;
2681 }
2682
2683 #define MAX_N_CHARS 3
2684
2685 static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common, BOOL firstline)
2686 {
2687 DEFINE_COMPILER;
2688 struct sljit_label *start;
2689 struct sljit_jump *quit;
2690 pcre_uint32 chars[MAX_N_CHARS * 2];
2691 pcre_uchar *cc = common->start + 1 + LINK_SIZE;
2692 int location = 0;
2693 pcre_int32 len, c, bit, caseless;
2694 int must_stop;
2695
2696 /* We do not support alternatives now. */
2697 if (*(common->start + GET(common->start, 1)) == OP_ALT)
2698 return FALSE;
2699
2700 while (TRUE)
2701 {
2702 caseless = 0;
2703 must_stop = 1;
2704 switch(*cc)
2705 {
2706 case OP_CHAR:
2707 must_stop = 0;
2708 cc++;
2709 break;
2710
2711 case OP_CHARI:
2712 caseless = 1;
2713 must_stop = 0;
2714 cc++;
2715 break;
2716
2717 case OP_SOD:
2718 case OP_SOM:
2719 case OP_SET_SOM:
2720 case OP_NOT_WORD_BOUNDARY:
2721 case OP_WORD_BOUNDARY:
2722 case OP_EODN:
2723 case OP_EOD:
2724 case OP_CIRC:
2725 case OP_CIRCM:
2726 case OP_DOLL:
2727 case OP_DOLLM:
2728 /* Zero width assertions. */
2729 cc++;
2730 continue;
2731
2732 case OP_PLUS:
2733 case OP_MINPLUS:
2734 case OP_POSPLUS:
2735 cc++;
2736 break;
2737
2738 case OP_EXACT:
2739 cc += 1 + IMM2_SIZE;
2740 break;
2741
2742 case OP_PLUSI:
2743 case OP_MINPLUSI:
2744 case OP_POSPLUSI:
2745 caseless = 1;
2746 cc++;
2747 break;
2748
2749 case OP_EXACTI:
2750 caseless = 1;
2751 cc += 1 + IMM2_SIZE;
2752 break;
2753
2754 default:
2755 must_stop = 2;
2756 break;
2757 }
2758
2759 if (must_stop == 2)
2760 break;
2761
2762 len = 1;
2763 #ifdef SUPPORT_UTF
2764 if (common->utf && HAS_EXTRALEN(cc[0])) len += GET_EXTRALEN(cc[0]);
2765 #endif
2766
2767 if (caseless && char_has_othercase(common, cc))
2768 {
2769 caseless = char_get_othercase_bit(common, cc);
2770 if (caseless == 0)
2771 return FALSE;
2772 #ifdef COMPILE_PCRE8
2773 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 8));
2774 #else
2775 if ((caseless & 0x100) != 0)
2776 caseless = ((caseless & 0xff) << 16) | (len - (caseless >> 9));
2777 else
2778 caseless = ((caseless & 0xff) << 8) | (len - (caseless >> 9));
2779 #endif
2780 }
2781 else
2782 caseless = 0;
2783
2784 while (len > 0 && location < MAX_N_CHARS * 2)
2785 {
2786 c = *cc;
2787 bit = 0;
2788 if (len == (caseless & 0xff))
2789 {
2790 bit = caseless >> 8;
2791 c |= bit;
2792 }
2793
2794 chars[location] = c;
2795 chars[location + 1] = bit;
2796
2797 len--;
2798 location += 2;
2799 cc++;
2800 }
2801
2802 if (location >= MAX_N_CHARS * 2 || must_stop != 0)
2803 break;
2804 }
2805
2806 /* At least two characters are required. */
2807 if (location < 2 * 2)
2808 return FALSE;
2809
2810 if (firstline)
2811 {
2812 SLJIT_ASSERT(common->first_line_end != 0);
2813 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2814 OP2(SLJIT_SUB, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2815 }
2816 else
2817 OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2818
2819 start = LABEL();
2820 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2821
2822 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
2823 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2824 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2825 if (chars[1] != 0)
2826 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[1]);
2827 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[0], start);
2828 if (location > 2 * 2)
2829 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
2830 if (chars[3] != 0)
2831 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, chars[3]);
2832 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, chars[2], start);
2833 if (location > 2 * 2)
2834 {
2835 if (chars[5] != 0)
2836 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, chars[5]);
2837 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[4], start);
2838 }
2839 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2840
2841 JUMPHERE(quit);
2842
2843 if (firstline)
2844 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2845 else
2846 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS((location >> 1) - 1));
2847 return TRUE;
2848 }
2849
2850 #undef MAX_N_CHARS
2851
2852 static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless, BOOL firstline)
2853 {
2854 DEFINE_COMPILER;
2855 struct sljit_label *start;
2856 struct sljit_jump *quit;
2857 struct sljit_jump *found;
2858 pcre_uchar oc, bit;
2859
2860 if (firstline)
2861 {
2862 SLJIT_ASSERT(common->first_line_end != 0);
2863 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2864 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2865 }
2866
2867 start = LABEL();
2868 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2869 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2870
2871 oc = first_char;
2872 if (caseless)
2873 {
2874 oc = TABLE_GET(first_char, common->fcc, first_char);
2875 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
2876 if (first_char > 127 && common->utf)
2877 oc = UCD_OTHERCASE(first_char);
2878 #endif
2879 }
2880 if (first_char == oc)
2881 found = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, first_char);
2882 else
2883 {
2884 bit = first_char ^ oc;
2885 if (is_powerof2(bit))
2886 {
2887 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, bit);
2888 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, first_char | bit);
2889 }
2890 else
2891 {
2892 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, first_char);
2893 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2894 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
2895 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
2896 found = JUMP(SLJIT_C_NOT_ZERO);
2897 }
2898 }
2899
2900 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2901 JUMPTO(SLJIT_JUMP, start);
2902 JUMPHERE(found);
2903 JUMPHERE(quit);
2904
2905 if (firstline)
2906 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2907 }
2908
2909 static SLJIT_INLINE void fast_forward_newline(compiler_common *common, BOOL firstline)
2910 {
2911 DEFINE_COMPILER;
2912 struct sljit_label *loop;
2913 struct sljit_jump *lastchar;
2914 struct sljit_jump *firstchar;
2915 struct sljit_jump *quit;
2916 struct sljit_jump *foundcr = NULL;
2917 struct sljit_jump *notfoundnl;
2918 jump_list *newline = NULL;
2919
2920 if (firstline)
2921 {
2922 SLJIT_ASSERT(common->first_line_end != 0);
2923 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
2924 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
2925 }
2926
2927 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
2928 {
2929 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2930 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2931 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2932 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
2933 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2934
2935 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
2936 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
2937 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_GREATER_EQUAL);
2938 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2939 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
2940 #endif
2941 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
2942
2943 loop = LABEL();
2944 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
2945 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2946 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
2947 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
2948 CMPTO(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
2949 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
2950
2951 JUMPHERE(quit);
2952 JUMPHERE(firstchar);
2953 JUMPHERE(lastchar);
2954
2955 if (firstline)
2956 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
2957 return;
2958 }
2959
2960 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
2961 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
2962 firstchar = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
2963 skip_char_back(common);
2964
2965 loop = LABEL();
2966 read_char(common);
2967 lastchar = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2968 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2969 foundcr = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
2970 check_newlinechar(common, common->nltype, &newline, FALSE);
2971 set_jumps(newline, loop);
2972
2973 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
2974 {
2975 quit = JUMP(SLJIT_JUMP);
2976 JUMPHERE(foundcr);
2977 notfoundnl = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
2978 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
2979 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
2980 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
2981 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
2982 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
2983 #endif
2984 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
2985 JUMPHERE(notfoundnl);
2986 JUMPHERE(quit);
2987 }
2988 JUMPHERE(lastchar);
2989 JUMPHERE(firstchar);
2990
2991 if (firstline)
2992 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
2993 }
2994
2995 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks);
2996
2997 static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, sljit_uw start_bits, BOOL firstline)
2998 {
2999 DEFINE_COMPILER;
3000 struct sljit_label *start;
3001 struct sljit_jump *quit;
3002 struct sljit_jump *found = NULL;
3003 jump_list *matches = NULL;
3004 pcre_uint8 inverted_start_bits[32];
3005 int i;
3006 #ifndef COMPILE_PCRE8
3007 struct sljit_jump *jump;
3008 #endif
3009
3010 for (i = 0; i < 32; ++i)
3011 inverted_start_bits[i] = ~(((pcre_uint8*)start_bits)[i]);
3012
3013 if (firstline)
3014 {
3015 SLJIT_ASSERT(common->first_line_end != 0);
3016 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
3017 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->first_line_end);
3018 }
3019
3020 start = LABEL();
3021 quit = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
3022 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
3023 #ifdef SUPPORT_UTF
3024 if (common->utf)
3025 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3026 #endif
3027
3028 if (!check_class_ranges(common, inverted_start_bits, (inverted_start_bits[31] & 0x80) != 0, &matches))
3029 {
3030 #ifndef COMPILE_PCRE8
3031 jump = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 255);
3032 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255);
3033 JUMPHERE(jump);
3034 #endif
3035 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3036 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3037 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), start_bits);
3038 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3039 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3040 found = JUMP(SLJIT_C_NOT_ZERO);
3041 }
3042
3043 #ifdef SUPPORT_UTF
3044 if (common->utf)
3045 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3046 #endif
3047 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3048 #ifdef SUPPORT_UTF
3049 #if defined COMPILE_PCRE8
3050 if (common->utf)
3051 {
3052 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start);
3053 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3054 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3055 }
3056 #elif defined COMPILE_PCRE16
3057 if (common->utf)
3058 {
3059 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start);
3060 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
3061 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3062 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3063 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3064 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
3065 }
3066 #endif /* COMPILE_PCRE[8|16] */
3067 #endif /* SUPPORT_UTF */
3068 JUMPTO(SLJIT_JUMP, start);
3069 if (found != NULL)
3070 JUMPHERE(found);
3071 if (matches != NULL)
3072 set_jumps(matches, LABEL());
3073 JUMPHERE(quit);
3074
3075 if (firstline)
3076 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
3077 }
3078
3079 static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar)
3080 {
3081 DEFINE_COMPILER;
3082 struct sljit_label *loop;
3083 struct sljit_jump *toolong;
3084 struct sljit_jump *alreadyfound;
3085 struct sljit_jump *found;
3086 struct sljit_jump *foundoc = NULL;
3087 struct sljit_jump *notfound;
3088 pcre_uint32 oc, bit;
3089
3090 SLJIT_ASSERT(common->req_char_ptr != 0);
3091 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr);
3092 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX);
3093 toolong = CMP(SLJIT_C_LESS, TMP1, 0, STR_END, 0);
3094 alreadyfound = CMP(SLJIT_C_LESS, STR_PTR, 0, TMP2, 0);
3095
3096 if (has_firstchar)
3097 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3098 else
3099 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
3100
3101 loop = LABEL();
3102 notfound = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, STR_END, 0);
3103
3104 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3105 oc = req_char;
3106 if (caseless)
3107 {
3108 oc = TABLE_GET(req_char, common->fcc, req_char);
3109 #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3110 if (req_char > 127 && common->utf)
3111 oc = UCD_OTHERCASE(req_char);
3112 #endif
3113 }
3114 if (req_char == oc)
3115 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3116 else
3117 {
3118 bit = req_char ^ oc;
3119 if (is_powerof2(bit))
3120 {
3121 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
3122 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
3123 }
3124 else
3125 {
3126 found = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
3127 foundoc = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, oc);
3128 }
3129 }
3130 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3131 JUMPTO(SLJIT_JUMP, loop);
3132
3133 JUMPHERE(found);
3134 if (foundoc)
3135 JUMPHERE(foundoc);
3136 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->req_char_ptr, TMP1, 0);
3137 JUMPHERE(alreadyfound);
3138 JUMPHERE(toolong);
3139 return notfound;
3140 }
3141
3142 static void do_revertframes(compiler_common *common)
3143 {
3144 DEFINE_COMPILER;
3145 struct sljit_jump *jump;
3146 struct sljit_label *mainloop;
3147
3148 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3149 OP1(SLJIT_MOV, TMP1, 0, STACK_TOP, 0);
3150 GET_LOCAL_BASE(TMP3, 0, 0);
3151
3152 /* Drop frames until we reach STACK_TOP. */
3153 mainloop = LABEL();
3154 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), 0);
3155 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0);
3156 jump = JUMP(SLJIT_C_SIG_LESS_EQUAL);
3157
3158 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3159 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3160 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(TMP1), 2 * sizeof(sljit_sw));
3161 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3162 JUMPTO(SLJIT_JUMP, mainloop);
3163
3164 JUMPHERE(jump);
3165 jump = JUMP(SLJIT_C_SIG_LESS);
3166 /* End of dropping frames. */
3167 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3168
3169 JUMPHERE(jump);
3170 OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
3171 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP3, 0);
3172 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(TMP1), sizeof(sljit_sw));
3173 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
3174 JUMPTO(SLJIT_JUMP, mainloop);
3175 }
3176
3177 static void check_wordboundary(compiler_common *common)
3178 {
3179 DEFINE_COMPILER;
3180 struct sljit_jump *skipread;
3181 #if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF
3182 struct sljit_jump *jump;
3183 #endif
3184
3185 SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
3186
3187 sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3188 /* Get type of the previous char, and put it to LOCALS1. */
3189 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
3190 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
3191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, 0);
3192 skipread = CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP1, 0);
3193 skip_char_back(common);
3194 check_start_used_ptr(common);
3195 read_char(common);
3196
3197 /* Testing char type. */
3198 #ifdef SUPPORT_UCP
3199 if (common->use_ucp)
3200 {
3201 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3202 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3203 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3204 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3205 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3206 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3207 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3208 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3209 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3210 JUMPHERE(jump);
3211 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP2, 0);
3212 }
3213 else
3214 #endif
3215 {
3216 #ifndef COMPILE_PCRE8
3217 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3218 #elif defined SUPPORT_UTF
3219 /* Here LOCALS1 has already been zeroed. */
3220 jump = NULL;
3221 if (common->utf)
3222 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3223 #endif /* COMPILE_PCRE8 */
3224 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
3225 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
3226 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
3227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, TMP1, 0);
3228 #ifndef COMPILE_PCRE8
3229 JUMPHERE(jump);
3230 #elif defined SUPPORT_UTF
3231 if (jump != NULL)
3232 JUMPHERE(jump);
3233 #endif /* COMPILE_PCRE8 */
3234 }
3235 JUMPHERE(skipread);
3236
3237 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3238 skipread = check_str_end(common);
3239 peek_char(common);
3240
3241 /* Testing char type. This is a code duplication. */
3242 #ifdef SUPPORT_UCP
3243 if (common->use_ucp)
3244 {
3245 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
3246 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
3247 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3248 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
3249 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
3250 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3251 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
3252 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
3253 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3254 JUMPHERE(jump);
3255 }
3256 else
3257 #endif
3258 {
3259 #ifndef COMPILE_PCRE8
3260 /* TMP2 may be destroyed by peek_char. */
3261 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3262 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3263 #elif defined SUPPORT_UTF
3264 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
3265 jump = NULL;
3266 if (common->utf)
3267 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3268 #endif
3269 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
3270 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
3271 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
3272 #ifndef COMPILE_PCRE8
3273 JUMPHERE(jump);
3274 #elif defined SUPPORT_UTF
3275 if (jump != NULL)
3276 JUMPHERE(jump);
3277 #endif /* COMPILE_PCRE8 */
3278 }
3279 JUMPHERE(skipread);
3280
3281 OP2(SLJIT_XOR | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3282 sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3283 }
3284
3285 /*
3286 range format:
3287
3288 ranges[0] = length of the range (max MAX_RANGE_SIZE, -1 means invalid range).
3289 ranges[1] = first bit (0 or 1)
3290 ranges[2-length] = position of the bit change (when the current bit is not equal to the previous)
3291 */
3292
3293 static BOOL check_ranges(compiler_common *common, int *ranges, jump_list **backtracks, BOOL readch)
3294 {
3295 DEFINE_COMPILER;
3296 struct sljit_jump *jump;
3297
3298 if (ranges[0] < 0)
3299 return FALSE;
3300
3301 switch(ranges[0])
3302 {
3303 case 1:
3304 if (readch)
3305 read_char(common);
3306 add_jump(compiler, backtracks, CMP(ranges[1] == 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3307 return TRUE;
3308
3309 case 2:
3310 if (readch)
3311 read_char(common);
3312 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
3313 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
3314 return TRUE;
3315
3316 case 4:
3317 if (ranges[2] + 1 == ranges[3] && ranges[4] + 1 == ranges[5])
3318 {
3319 if (readch)
3320 read_char(common);
3321 if (ranges[1] != 0)
3322 {
3323 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
3324 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3325 }
3326 else
3327 {
3328 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]);
3329 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[4]));
3330 JUMPHERE(jump);
3331 }
3332 return TRUE;
3333 }
3334 if ((ranges[3] - ranges[2]) == (ranges[5] - ranges[4]) && is_powerof2(ranges[4] - ranges[2]))
3335 {
3336 if (readch)
3337 read_char(common);
3338 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4] - ranges[2]);
3339 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[4]);
3340 add_jump(compiler, backtracks, CMP(ranges[1] != 0 ? SLJIT_C_LESS : SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[5] - ranges[4]));
3341 return TRUE;
3342 }
3343 return FALSE;
3344
3345 default:
3346 return FALSE;
3347 }
3348 }
3349
3350 static void get_ctype_ranges(compiler_common *common, int flag, int *ranges)
3351 {
3352 int i, bit, length;
3353 const pcre_uint8 *ctypes = (const pcre_uint8*)common->ctypes;
3354
3355 bit = ctypes[0] & flag;
3356 ranges[0] = -1;
3357 ranges[1] = bit != 0 ? 1 : 0;
3358 length = 0;
3359
3360 for (i = 1; i < 256; i++)
3361 if ((ctypes[i] & flag) != bit)
3362 {
3363 if (length >= MAX_RANGE_SIZE)
3364 return;
3365 ranges[2 + length] = i;
3366 length++;
3367 bit ^= flag;
3368 }
3369
3370 if (bit != 0)
3371 {
3372 if (length >= MAX_RANGE_SIZE)
3373 return;
3374 ranges[2 + length] = 256;
3375 length++;
3376 }
3377 ranges[0] = length;
3378 }
3379
3380 static BOOL check_class_ranges(compiler_common *common, const pcre_uint8 *bits, BOOL nclass, jump_list **backtracks)
3381 {
3382 int ranges[2 + MAX_RANGE_SIZE];
3383 pcre_uint8 bit, cbit, all;
3384 int i, byte, length = 0;
3385
3386 bit = bits[0] & 0x1;
3387 ranges[1] = bit;
3388 /* Can be 0 or 255. */
3389 all = -bit;
3390
3391 for (i = 0; i < 256; )
3392 {
3393 byte = i >> 3;
3394 if ((i & 0x7) == 0 && bits[byte] == all)
3395 i += 8;
3396 else
3397 {
3398 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
3399 if (cbit != bit)
3400 {
3401 if (length >= MAX_RANGE_SIZE)
3402 return FALSE;
3403 ranges[2 + length] = i;
3404 length++;
3405 bit = cbit;
3406 all = -cbit;
3407 }
3408 i++;
3409 }
3410 }
3411
3412 if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
3413 {
3414 if (length >= MAX_RANGE_SIZE)
3415 return FALSE;
3416 ranges[2 + length] = 256;
3417 length++;
3418 }
3419 ranges[0] = length;
3420
3421 return check_ranges(common, ranges, backtracks, FALSE);
3422 }
3423
3424 static void check_anynewline(compiler_common *common)
3425 {
3426 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3427 DEFINE_COMPILER;
3428
3429 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3430
3431 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3432 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3433 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3434 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3435 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3436 #ifdef COMPILE_PCRE8
3437 if (common->utf)
3438 {
3439 #endif
3440 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3441 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3442 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3443 #ifdef COMPILE_PCRE8
3444 }
3445 #endif
3446 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3447 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3448 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3449 }
3450
3451 static void check_hspace(compiler_common *common)
3452 {
3453 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3454 DEFINE_COMPILER;
3455
3456 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3457
3458 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
3459 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
3460 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
3461 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3462 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
3463 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3464 #ifdef COMPILE_PCRE8
3465 if (common->utf)
3466 {
3467 #endif
3468 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3469 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
3470 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3471 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
3472 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3473 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
3474 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
3475 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
3476 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
3477 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3478 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
3479 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3480 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
3481 #ifdef COMPILE_PCRE8
3482 }
3483 #endif
3484 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3485 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3486
3487 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3488 }
3489
3490 static void check_vspace(compiler_common *common)
3491 {
3492 /* Check whether TMP1 contains a newline character. TMP2 destroyed. */
3493 DEFINE_COMPILER;
3494
3495 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3496
3497 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
3498 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
3499 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
3500 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
3501 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3502 #ifdef COMPILE_PCRE8
3503 if (common->utf)
3504 {
3505 #endif
3506 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3507 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
3508 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
3509 #ifdef COMPILE_PCRE8
3510 }
3511 #endif
3512 #endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */
3513 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
3514
3515 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3516 }
3517
3518 #define CHAR1 STR_END
3519 #define CHAR2 STACK_TOP
3520
3521 static void do_casefulcmp(compiler_common *common)
3522 {
3523 DEFINE_COMPILER;
3524 struct sljit_jump *jump;
3525 struct sljit_label *label;
3526
3527 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3528 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3529 OP1(SLJIT_MOV, TMP3, 0, CHAR1, 0);
3530 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR2, 0);
3531 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3532 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3533
3534 label = LABEL();
3535 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3536 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3537 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3538 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3539 JUMPTO(SLJIT_C_NOT_ZERO, label);
3540
3541 JUMPHERE(jump);
3542 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3543 OP1(SLJIT_MOV, CHAR1, 0, TMP3, 0);
3544 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3545 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3546 }
3547
3548 #define LCC_TABLE STACK_LIMIT
3549
3550 static void do_caselesscmp(compiler_common *common)
3551 {
3552 DEFINE_COMPILER;
3553 struct sljit_jump *jump;
3554 struct sljit_label *label;
3555
3556 sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
3557 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3558
3559 OP1(SLJIT_MOV, TMP3, 0, LCC_TABLE, 0);
3560 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, CHAR1, 0);
3561 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, CHAR2, 0);
3562 OP1(SLJIT_MOV, LCC_TABLE, 0, SLJIT_IMM, common->lcc);
3563 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
3564 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3565
3566 label = LABEL();
3567 OP1(MOVU_UCHAR, CHAR1, 0, SLJIT_MEM1(TMP1), IN_UCHARS(1));
3568 OP1(MOVU_UCHAR, CHAR2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3569 #ifndef COMPILE_PCRE8
3570 jump = CMP(SLJIT_C_GREATER, CHAR1, 0, SLJIT_IMM, 255);
3571 #endif
3572 OP1(SLJIT_MOV_UB, CHAR1, 0, SLJIT_MEM2(LCC_TABLE, CHAR1), 0);
3573 #ifndef COMPILE_PCRE8
3574 JUMPHERE(jump);
3575 jump = CMP(SLJIT_C_GREATER, CHAR2, 0, SLJIT_IMM, 255);
3576 #endif
3577 OP1(SLJIT_MOV_UB, CHAR2, 0, SLJIT_MEM2(LCC_TABLE, CHAR2), 0);
3578 #ifndef COMPILE_PCRE8
3579 JUMPHERE(jump);
3580 #endif
3581 jump = CMP(SLJIT_C_NOT_EQUAL, CHAR1, 0, CHAR2, 0);
3582 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
3583 JUMPTO(SLJIT_C_NOT_ZERO, label);
3584
3585 JUMPHERE(jump);
3586 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3587 OP1(SLJIT_MOV, LCC_TABLE, 0, TMP3, 0);
3588 OP1(SLJIT_MOV, CHAR1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
3589 OP1(SLJIT_MOV, CHAR2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
3590 sljit_emit_fast_return(compiler, RETURN_ADDR, 0);
3591 }
3592
3593 #undef LCC_TABLE
3594 #undef CHAR1
3595 #undef CHAR2
3596
3597 #if defined SUPPORT_UTF && defined SUPPORT_UCP
3598
3599 static const pcre_uchar * SLJIT_CALL do_utf_caselesscmp(pcre_uchar *src1, jit_arguments *args, pcre_uchar *end1)
3600 {
3601 /* This function would be ineffective to do in JIT level. */
3602 pcre_uint32 c1, c2;
3603 const pcre_uchar *src2 = args->uchar_ptr;
3604 const pcre_uchar *end2 = args->end;
3605 const ucd_record *ur;
3606 const pcre_uint32 *pp;
3607
3608 while (src1 < end1)
3609 {
3610 if (src2 >= end2)
3611 return (pcre_uchar*)1;
3612 GETCHARINC(c1, src1);
3613 GETCHARINC(c2, src2);
3614 ur = GET_UCD(c2);
3615 if (c1 != c2 && c1 != c2 + ur->other_case)
3616 {
3617 pp = PRIV(ucd_caseless_sets) + ur->caseset;
3618 for (;;)
3619 {
3620 if (c1 < *pp) return NULL;
3621 if (c1 == *pp++) break;
3622 }
3623 }
3624 }
3625 return src2;
3626 }
3627
3628 #endif /* SUPPORT_UTF && SUPPORT_UCP */
3629
3630 static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc,
3631 compare_context* context, jump_list **backtracks)
3632 {
3633 DEFINE_COMPILER;
3634 unsigned int othercasebit = 0;
3635 pcre_uchar *othercasechar = NULL;
3636 #ifdef SUPPORT_UTF
3637 int utflength;
3638 #endif
3639
3640 if (caseless && char_has_othercase(common, cc))
3641 {
3642 othercasebit = char_get_othercase_bit(common, cc);
3643 SLJIT_ASSERT(othercasebit);
3644 /* Extracting bit difference info. */
3645 #if defined COMPILE_PCRE8
3646 othercasechar = cc + (othercasebit >> 8);
3647 othercasebit &= 0xff;
3648 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
3649 /* Note that this code only handles characters in the BMP. If there
3650 ever are characters outside the BMP whose othercase differs in only one
3651 bit from itself (there currently are none), this code will need to be
3652 revised for COMPILE_PCRE32. */
3653 othercasechar = cc + (othercasebit >> 9);
3654 if ((othercasebit & 0x100) != 0)
3655 othercasebit = (othercasebit & 0xff) << 8;
3656 else
3657 othercasebit &= 0xff;
3658 #endif /* COMPILE_PCRE[8|16|32] */
3659 }
3660
3661 if (context->sourcereg == -1)
3662 {
3663 #if defined COMPILE_PCRE8
3664 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3665 if (context->length >= 4)
3666 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3667 else if (context->length >= 2)
3668 OP1(SLJIT_MOV_UH, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3669 else
3670 #endif
3671 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3672 #elif defined COMPILE_PCRE16
3673 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
3674 if (context->length >= 4)
3675 OP1(SLJIT_MOV_SI, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3676 else
3677 #endif
3678 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3679 #elif defined COMPILE_PCRE32
3680 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
3681 #endif /* COMPILE_PCRE[8|16|32] */
3682 context->sourcereg = TMP2;
3683 }
3684
3685 #ifdef SUPPORT_UTF
3686 utflength = 1;
3687 if (common->utf && HAS_EXTRALEN(*cc))
3688 utflength += GET_EXTRALEN(*cc);
3689
3690 do
3691 {
3692 #endif
3693
3694 context->length -= IN_UCHARS(1);
3695 #if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16)
3696
3697 /* Unaligned read is supported. */
3698 if (othercasebit != 0 && othercasechar == cc)
3699 {
3700 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
3701 context->oc.asuchars[context->ucharptr] = othercasebit;
3702 }
3703 else
3704 {
3705 context->c.asuchars[context->ucharptr] = *cc;
3706 context->oc.asuchars[context->ucharptr] = 0;
3707 }
3708 context->ucharptr++;
3709
3710 #if defined COMPILE_PCRE8
3711 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
3712 #else
3713 if (context->ucharptr >= 2 || context->length == 0)
3714 #endif
3715 {
3716 if (context->length >= 4)
3717 OP1(SLJIT_MOV_SI, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3718 else if (context->length >= 2)
3719 OP1(SLJIT_MOV_UH, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3720 #if defined COMPILE_PCRE8
3721 else if (context->length >= 1)
3722 OP1(SLJIT_MOV_UB, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3723 #endif /* COMPILE_PCRE8 */
3724 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3725
3726 switch(context->ucharptr)
3727 {
3728 case 4 / sizeof(pcre_uchar):
3729 if (context->oc.asint != 0)
3730 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
3731 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
3732 break;
3733
3734 case 2 / sizeof(pcre_uchar):
3735 if (context->oc.asushort != 0)
3736 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
3737 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
3738 break;
3739
3740 #ifdef COMPILE_PCRE8
3741 case 1:
3742 if (context->oc.asbyte != 0)
3743 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
3744 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
3745 break;
3746 #endif
3747
3748 default:
3749 SLJIT_ASSERT_STOP();
3750 break;
3751 }
3752 context->ucharptr = 0;
3753 }
3754
3755 #else
3756
3757 /* Unaligned read is unsupported or in 32 bit mode. */
3758 if (context->length >= 1)
3759 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
3760
3761 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
3762
3763 if (othercasebit != 0 && othercasechar == cc)
3764 {
3765 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
3766 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
3767 }
3768 else
3769 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
3770
3771 #endif
3772
3773 cc++;
3774 #ifdef SUPPORT_UTF
3775 utflength--;
3776 }
3777 while (utflength > 0);
3778 #endif
3779
3780 return cc;
3781 }
3782
3783 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3784
3785 #define SET_TYPE_OFFSET(value) \
3786 if ((value) != typeoffset) \
3787 { \
3788 if ((value) > typeoffset) \
3789 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
3790 else \
3791 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
3792 } \
3793 typeoffset = (value);
3794
3795 #define SET_CHAR_OFFSET(value) \
3796 if ((value) != charoffset) \
3797 { \
3798 if ((value) > charoffset) \
3799 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (value) - charoffset); \
3800 else \
3801 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, charoffset - (value)); \
3802 } \
3803 charoffset = (value);
3804
3805 static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
3806 {
3807 DEFINE_COMPILER;
3808 jump_list *found = NULL;
3809 jump_list **list = (*cc & XCL_NOT) == 0 ? &found : backtracks;
3810 pcre_int32 c, charoffset;
3811 const pcre_uint32 *other_cases;
3812 struct sljit_jump *jump = NULL;
3813 pcre_uchar *ccbegin;
3814 int compares, invertcmp, numberofcmps;
3815 #ifdef SUPPORT_UCP
3816 BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
3817 BOOL charsaved = FALSE;
3818 int typereg = TMP1, scriptreg = TMP1;
3819 pcre_int32 typeoffset;
3820 #endif
3821
3822 /* Although SUPPORT_UTF must be defined, we are
3823 not necessary in utf mode even in 8 bit mode. */
3824 detect_partial_match(common, backtracks);
3825 read_char(common);
3826
3827 if ((*cc++ & XCL_MAP) != 0)
3828 {
3829 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3830 #ifndef COMPILE_PCRE8
3831 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3832 #elif defined SUPPORT_UTF
3833 if (common->utf)
3834 jump = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
3835 #endif
3836
3837 if (!check_class_ranges(common, (const pcre_uint8 *)cc, TRUE, list))
3838 {
3839 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
3840 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
3841 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
3842 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
3843 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
3844 add_jump(compiler, list, JUMP(SLJIT_C_NOT_ZERO));
3845 }
3846
3847 #ifndef COMPILE_PCRE8
3848 JUMPHERE(jump);
3849 #elif defined SUPPORT_UTF
3850 if (common->utf)
3851 JUMPHERE(jump);
3852 #endif
3853 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3854 #ifdef SUPPORT_UCP
3855 charsaved = TRUE;
3856 #endif
3857 cc += 32 / sizeof(pcre_uchar);
3858 }
3859
3860 /* Scanning the necessary info. */
3861 ccbegin = cc;
3862 compares = 0;
3863 while (*cc != XCL_END)
3864 {
3865 compares++;
3866 if (*cc == XCL_SINGLE)
3867 {
3868 cc += 2;
3869 #ifdef SUPPORT_UTF
3870 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3871 #endif
3872 #ifdef SUPPORT_UCP
3873 needschar = TRUE;
3874 #endif
3875 }
3876 else if (*cc == XCL_RANGE)
3877 {
3878 cc += 2;
3879 #ifdef SUPPORT_UTF
3880 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3881 #endif
3882 cc++;
3883 #ifdef SUPPORT_UTF
3884 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
3885 #endif
3886 #ifdef SUPPORT_UCP
3887 needschar = TRUE;
3888 #endif
3889 }
3890 #ifdef SUPPORT_UCP
3891 else
3892 {
3893 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
3894 cc++;
3895 switch(*cc)
3896 {
3897 case PT_ANY:
3898 break;
3899
3900 case PT_LAMP:
3901 case PT_GC:
3902 case PT_PC:
3903 case PT_ALNUM:
3904 needstype = TRUE;
3905 break;
3906
3907 case PT_SC:
3908 needsscript = TRUE;
3909 break;
3910
3911 case PT_SPACE:
3912 case PT_PXSPACE:
3913 case PT_WORD:
3914 needstype = TRUE;
3915 needschar = TRUE;
3916 break;
3917
3918 case PT_CLIST:
3919 needschar = TRUE;
3920 break;
3921
3922 default:
3923 SLJIT_ASSERT_STOP();
3924 break;
3925 }
3926 cc += 2;
3927 }
3928 #endif
3929 }
3930
3931 #ifdef SUPPORT_UCP
3932 /* Simple register allocation. TMP1 is preferred if possible. */
3933 if (needstype || needsscript)
3934 {
3935 if (needschar && !charsaved)
3936 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3937 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
3938 if (needschar)
3939 {
3940 if (needstype)
3941 {
3942 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3943 typereg = RETURN_ADDR;
3944 }
3945
3946 if (needsscript)
3947 scriptreg = TMP3;
3948 OP1(SLJIT_MOV, TMP1, 0, TMP3, 0);
3949 }
3950 else if (needstype && needsscript)
3951 scriptreg = TMP3;
3952 /* In all other cases only one of them was specified, and that can goes to TMP1. */
3953
3954 if (needsscript)
3955 {
3956 if (scriptreg == TMP1)
3957 {
3958 OP1(SLJIT_MOV, scriptreg, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3959 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM2(scriptreg, TMP2), 3);
3960 }
3961 else
3962 {
3963 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
3964 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
3965 OP1(SLJIT_MOV_UB, scriptreg, 0, SLJIT_MEM1(TMP2), 0);
3966 }
3967 }
3968 }
3969 #endif
3970
3971 /* Generating code. */
3972 cc = ccbegin;
3973 charoffset = 0;
3974 numberofcmps = 0;
3975 #ifdef SUPPORT_UCP
3976 typeoffset = 0;
3977 #endif
3978
3979 while (*cc != XCL_END)
3980 {
3981 compares--;
3982 invertcmp = (compares == 0 && list != backtracks);
3983 jump = NULL;
3984
3985 if (*cc == XCL_SINGLE)
3986 {
3987 cc ++;
3988 #ifdef SUPPORT_UTF
3989 if (common->utf)
3990 {
3991 GETCHARINC(c, cc);
3992 }
3993 else
3994 #endif
3995 c = *cc++;
3996
3997 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
3998 {
3999 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4000 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_EQUAL);
4001 numberofcmps++;
4002 }
4003 else if (numberofcmps > 0)
4004 {
4005 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4006 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4007 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4008 numberofcmps = 0;
4009 }
4010 else
4011 {
4012 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4013 numberofcmps = 0;
4014 }
4015 }
4016 else if (*cc == XCL_RANGE)
4017 {
4018 cc ++;
4019 #ifdef SUPPORT_UTF
4020 if (common->utf)
4021 {
4022 GETCHARINC(c, cc);
4023 }
4024 else
4025 #endif
4026 c = *cc++;
4027 SET_CHAR_OFFSET(c);
4028 #ifdef SUPPORT_UTF
4029 if (common->utf)
4030 {
4031 GETCHARINC(c, cc);
4032 }
4033 else
4034 #endif
4035 c = *cc++;
4036 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
4037 {
4038 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4039 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, numberofcmps == 0 ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4040 numberofcmps++;
4041 }
4042 else if (numberofcmps > 0)
4043 {
4044 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c - charoffset);
4045 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4046 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4047 numberofcmps = 0;
4048 }
4049 else
4050 {
4051 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, c - charoffset);
4052 numberofcmps = 0;
4053 }
4054 }
4055 #ifdef SUPPORT_UCP
4056 else
4057 {
4058 if (*cc == XCL_NOTPROP)
4059 invertcmp ^= 0x1;
4060 cc++;
4061 switch(*cc)
4062 {
4063 case PT_ANY:
4064 if (list != backtracks)
4065 {
4066 if ((cc[-1] == XCL_NOTPROP && compares > 0) || (cc[-1] == XCL_PROP && compares == 0))
4067 continue;
4068 }
4069 else if (cc[-1] == XCL_NOTPROP)
4070 continue;
4071 jump = JUMP(SLJIT_JUMP);
4072 break;
4073
4074 case PT_LAMP:
4075 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
4076 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4077 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
4078 OP_FLAGS(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4079 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
4080 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4081 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4082 break;
4083
4084 case PT_GC:
4085 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
4086 SET_TYPE_OFFSET(c);
4087 jump = CMP(SLJIT_C_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
4088 break;
4089
4090 case PT_PC:
4091 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
4092 break;
4093
4094 case PT_SC:
4095 jump = CMP(SLJIT_C_EQUAL ^ invertcmp, scriptreg, 0, SLJIT_IMM, (int)cc[1]);
4096 break;
4097
4098 case PT_SPACE:
4099 case PT_PXSPACE:
4100 if (*cc == PT_SPACE)
4101 {
4102 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
4103 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);
4104 }
4105 SET_CHAR_OFFSET(9);
4106 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4107 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
4108 if (*cc == PT_SPACE)
4109 JUMPHERE(jump);
4110
4111 SET_TYPE_OFFSET(ucp_Zl);
4112 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
4113 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4114 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4115 break;
4116
4117 case PT_WORD:
4118 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE - charoffset);
4119 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4120 /* Fall through. */
4121
4122 case PT_ALNUM:
4123 SET_TYPE_OFFSET(ucp_Ll);
4124 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
4125 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, (*cc == PT_ALNUM) ? SLJIT_UNUSED : TMP2, 0, SLJIT_C_LESS_EQUAL);
4126 SET_TYPE_OFFSET(ucp_Nd);
4127 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
4128 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_LESS_EQUAL);
4129 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4130 break;
4131
4132 case PT_CLIST:
4133 other_cases = PRIV(ucd_caseless_sets) + cc[1];
4134
4135 /* At least three characters are required.
4136 Otherwise this case would be handled by the normal code path. */
4137 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
4138 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
4139
4140 /* Optimizing character pairs, if their difference is power of 2. */
4141 if (is_powerof2(other_cases[1] ^ other_cases[0]))
4142 {
4143 if (charoffset == 0)
4144 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4145 else
4146 {
4147 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4148 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4149 }
4150 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
4151 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4152 other_cases += 2;
4153 }
4154 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
4155 {
4156 if (charoffset == 0)
4157 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
4158 else
4159 {
4160 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
4161 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
4162 }
4163 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
4164 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4165
4166 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, other_cases[0] - charoffset);
4167 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4168
4169 other_cases += 3;
4170 }
4171 else
4172 {
4173 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4174 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4175 }
4176
4177 while (*other_cases != NOTACHAR)
4178 {
4179 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, *other_cases++ - charoffset);
4180 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_E : 0), TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4181 }
4182 jump = JUMP(SLJIT_C_NOT_ZERO ^ invertcmp);
4183 break;
4184 }
4185 cc += 2;
4186 }
4187 #endif
4188
4189 if (jump != NULL)
4190 add_jump(compiler, compares > 0 ? list : backtracks, jump);
4191 }
4192
4193 if (found != NULL)
4194 set_jumps(found, LABEL());
4195 }
4196
4197 #undef SET_TYPE_OFFSET
4198 #undef SET_CHAR_OFFSET
4199
4200 #endif
4201
4202 static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks)
4203 {
4204 DEFINE_COMPILER;
4205 int length;
4206 unsigned int c, oc, bit;
4207 compare_context context;
4208 struct sljit_jump *jump[4];
4209 #ifdef SUPPORT_UTF
4210 struct sljit_label *label;
4211 #ifdef SUPPORT_UCP
4212 pcre_uchar propdata[5];
4213 #endif
4214 #endif
4215
4216 switch(type)
4217 {
4218 case OP_SOD:
4219 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4220 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4221 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4222 return cc;
4223
4224 case OP_SOM:
4225 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4226 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
4227 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
4228 return cc;
4229
4230 case OP_NOT_WORD_BOUNDARY:
4231 case OP_WORD_BOUNDARY:
4232 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
4233 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4234 return cc;
4235
4236 case OP_NOT_DIGIT:
4237 case OP_DIGIT:
4238 /* Digits are usually 0-9, so it is worth to optimize them. */
4239 if (common->digits[0] == -2)
4240 get_ctype_ranges(common, ctype_digit, common->digits);
4241 detect_partial_match(common, backtracks);
4242 /* Flip the starting bit in the negative case. */
4243 if (type == OP_NOT_DIGIT)
4244 common->digits[1] ^= 1;
4245 if (!check_ranges(common, common->digits, backtracks, TRUE))
4246 {
4247 read_char8_type(common);
4248 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
4249 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4250 }
4251 if (type == OP_NOT_DIGIT)
4252 common->digits[1] ^= 1;
4253 return cc;
4254
4255 case OP_NOT_WHITESPACE:
4256 case OP_WHITESPACE:
4257 detect_partial_match(common, backtracks);
4258 read_char8_type(common);
4259 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
4260 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4261 return cc;
4262
4263 case OP_NOT_WORDCHAR:
4264 case OP_WORDCHAR:
4265 detect_partial_match(common, backtracks);
4266 read_char8_type(common);
4267 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
4268 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_C_ZERO : SLJIT_C_NOT_ZERO));
4269 return cc;
4270
4271 case OP_ANY:
4272 detect_partial_match(common, backtracks);
4273 read_char(common);
4274 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4275 {
4276 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4277 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4278 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4279 else
4280 jump[1] = check_str_end(common);
4281
4282 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4283 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
4284 if (jump[1] != NULL)
4285 JUMPHERE(jump[1]);
4286 JUMPHERE(jump[0]);
4287 }
4288 else
4289 check_newlinechar(common, common->nltype, backtracks, TRUE);
4290 return cc;
4291
4292 case OP_ALLANY:
4293 detect_partial_match(common, backtracks);
4294 #ifdef SUPPORT_UTF
4295 if (common->utf)
4296 {
4297 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4298 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4299 #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
4300 #if defined COMPILE_PCRE8
4301 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4302 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4303 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4304 #elif defined COMPILE_PCRE16
4305 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
4306 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4307 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4308 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4309 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
4310 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4311 #endif
4312 JUMPHERE(jump[0]);
4313 #endif /* COMPILE_PCRE[8|16] */
4314 return cc;
4315 }
4316 #endif
4317 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4318 return cc;
4319
4320 case OP_ANYBYTE:
4321 detect_partial_match(common, backtracks);
4322 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4323 return cc;
4324
4325 #ifdef SUPPORT_UTF
4326 #ifdef SUPPORT_UCP
4327 case OP_NOTPROP:
4328 case OP_PROP:
4329 propdata[0] = 0;
4330 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
4331 propdata[2] = cc[0];
4332 propdata[3] = cc[1];
4333 propdata[4] = XCL_END;
4334 compile_xclass_matchingpath(common, propdata, backtracks);
4335 return cc + 2;
4336 #endif
4337 #endif
4338
4339 case OP_ANYNL:
4340 detect_partial_match(common, backtracks);
4341 read_char(common);
4342 jump[0] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4343 /* We don't need to handle soft partial matching case. */
4344 if (common->mode != JIT_PARTIAL_HARD_COMPILE)
4345 jump[1] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4346 else
4347 jump[1] = check_str_end(common);
4348 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4349 jump[2] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4350 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4351 jump[3] = JUMP(SLJIT_JUMP);
4352 JUMPHERE(jump[0]);
4353 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
4354 JUMPHERE(jump[1]);
4355 JUMPHERE(jump[2]);
4356 JUMPHERE(jump[3]);
4357 return cc;
4358
4359 case OP_NOT_HSPACE:
4360 case OP_HSPACE:
4361 detect_partial_match(common, backtracks);
4362 read_char(common);
4363 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
4364 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4365 return cc;
4366
4367 case OP_NOT_VSPACE:
4368 case OP_VSPACE:
4369 detect_partial_match(common, backtracks);
4370 read_char(common);
4371 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
4372 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_C_NOT_ZERO : SLJIT_C_ZERO));
4373 return cc;
4374
4375 #ifdef SUPPORT_UCP
4376 case OP_EXTUNI:
4377 detect_partial_match(common, backtracks);
4378 read_char(common);
4379 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4380 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4381 /* Optimize register allocation: use a real register. */
4382 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4383 OP1(SLJIT_MOV_UB, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4384
4385 label = LABEL();
4386 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4387 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
4388 read_char(common);
4389 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
4390 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop));
4391 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3);
4392
4393 OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2);
4394 OP1(SLJIT_MOV_UI, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable));
4395 OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0);
4396 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4397 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4398 JUMPTO(SLJIT_C_NOT_ZERO, label);
4399
4400 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
4401 JUMPHERE(jump[0]);
4402 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4403
4404 if (common->mode == JIT_PARTIAL_HARD_COMPILE)
4405 {
4406 jump[0] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4407 /* Since we successfully read a char above, partial matching must occure. */
4408 check_partial(common, TRUE);
4409 JUMPHERE(jump[0]);
4410 }
4411 return cc;
4412 #endif
4413
4414 case OP_EODN:
4415 /* Requires rather complex checks. */
4416 jump[0] = CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4417 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4418 {
4419 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4420 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4421 if (common->mode == JIT_COMPILE)
4422 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4423 else
4424 {
4425 jump[1] = CMP(SLJIT_C_EQUAL, TMP2, 0, STR_END, 0);
4426 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4427 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS);
4428 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
4429 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_NOT_EQUAL);
4430 add_jump(compiler, backtracks, JUMP(SLJIT_C_NOT_EQUAL));
4431 check_partial(common, TRUE);
4432 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4433 JUMPHERE(jump[1]);
4434 }
4435 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4436 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4437 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4438 }
4439 else if (common->nltype == NLTYPE_FIXED)
4440 {
4441 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4442 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4443 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_END, 0));
4444 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4445 }
4446 else
4447 {
4448 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4449 jump[1] = CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4450 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4451 OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
4452 jump[2] = JUMP(SLJIT_C_GREATER);
4453 add_jump(compiler, backtracks, JUMP(SLJIT_C_LESS));
4454 /* Equal. */
4455 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4456 jump[3] = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
4457 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4458
4459 JUMPHERE(jump[1]);
4460 if (common->nltype == NLTYPE_ANYCRLF)
4461 {
4462 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4463 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, STR_END, 0));
4464 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4465 }
4466 else
4467 {
4468 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, STR_PTR, 0);
4469 read_char(common);
4470 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
4471 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4472 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4473 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1);
4474 }
4475 JUMPHERE(jump[2]);
4476 JUMPHERE(jump[3]);
4477 }
4478 JUMPHERE(jump[0]);
4479 check_partial(common, FALSE);
4480 return cc;
4481
4482 case OP_EOD:
4483 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4484 check_partial(common, FALSE);
4485 return cc;
4486
4487 case OP_CIRC:
4488 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4489 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4490 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0));
4491 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4492 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4493 return cc;
4494
4495 case OP_CIRCM:
4496 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4497 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
4498 jump[1] = CMP(SLJIT_C_GREATER, STR_PTR, 0, TMP1, 0);
4499 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol));
4500 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4501 jump[0] = JUMP(SLJIT_JUMP);
4502 JUMPHERE(jump[1]);
4503
4504 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4505 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4506 {
4507 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4508 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, TMP2, 0, TMP1, 0));
4509 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4510 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4511 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4512 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4513 }
4514 else
4515 {
4516 skip_char_back(common);
4517 read_char(common);
4518 check_newlinechar(common, common->nltype, backtracks, FALSE);
4519 }
4520 JUMPHERE(jump[0]);
4521 return cc;
4522
4523 case OP_DOLL:
4524 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4525 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4526 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4527
4528 if (!common->endonly)
4529 compile_char1_matchingpath(common, OP_EODN, cc, backtracks);
4530 else
4531 {
4532 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0));
4533 check_partial(common, FALSE);
4534 }
4535 return cc;
4536
4537 case OP_DOLLM:
4538 jump[1] = CMP(SLJIT_C_LESS, STR_PTR, 0, STR_END, 0);
4539 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
4540 OP1(SLJIT_MOV_UB, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol));
4541 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4542 check_partial(common, FALSE);
4543 jump[0] = JUMP(SLJIT_JUMP);
4544 JUMPHERE(jump[1]);
4545
4546 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
4547 {
4548 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4549 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4550 if (common->mode == JIT_COMPILE)
4551 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, TMP2, 0, STR_END, 0));
4552 else
4553 {
4554 jump[1] = CMP(SLJIT_C_LESS_EQUAL, TMP2, 0, STR_END, 0);
4555 /* STR_PTR = STR_END - IN_UCHARS(1) */
4556 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4557 check_partial(common, TRUE);
4558 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4559 JUMPHERE(jump[1]);
4560 }
4561
4562 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4563 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
4564 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
4565 }
4566 else
4567 {
4568 peek_char(common);
4569 check_newlinechar(common, common->nltype, backtracks, FALSE);
4570 }
4571 JUMPHERE(jump[0]);
4572 return cc;
4573
4574 case OP_CHAR:
4575 case OP_CHARI:
4576 length = 1;
4577 #ifdef SUPPORT_UTF
4578 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
4579 #endif
4580 if (common->mode == JIT_COMPILE && (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0))
4581 {
4582 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4583 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4584
4585 context.length = IN_UCHARS(length);
4586 context.sourcereg = -1;
4587 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4588 context.ucharptr = 0;
4589 #endif
4590 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
4591 }
4592 detect_partial_match(common, backtracks);
4593 read_char(common);
4594 #ifdef SUPPORT_UTF
4595 if (common->utf)
4596 {
4597 GETCHAR(c, cc);
4598 }
4599 else
4600 #endif
4601 c = *cc;
4602 if (type == OP_CHAR || !char_has_othercase(common, cc))
4603 {
4604 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
4605 return cc + length;
4606 }
4607 oc = char_othercase(common, c);
4608 bit = c ^ oc;
4609 if (is_powerof2(bit))
4610 {
4611 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4612 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4613 return cc + length;
4614 }
4615 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, c);
4616 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4617 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
4618 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4619 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4620 return cc + length;
4621
4622 case OP_NOT:
4623 case OP_NOTI:
4624 detect_partial_match(common, backtracks);
4625 length = 1;
4626 #ifdef SUPPORT_UTF
4627 if (common->utf)
4628 {
4629 #ifdef COMPILE_PCRE8
4630 c = *cc;
4631 if (c < 128)
4632 {
4633 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
4634 if (type == OP_NOT || !char_has_othercase(common, cc))
4635 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4636 else
4637 {
4638 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
4639 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
4640 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
4641 }
4642 /* Skip the variable-length character. */
4643 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4644 jump[0] = CMP(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
4645 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4646 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4647 JUMPHERE(jump[0]);
4648 return cc + 1;
4649 }
4650 else
4651 #endif /* COMPILE_PCRE8 */
4652 {
4653 GETCHARLEN(c, cc, length);
4654 read_char(common);
4655 }
4656 }
4657 else
4658 #endif /* SUPPORT_UTF */
4659 {
4660 read_char(common);
4661 c = *cc;
4662 }
4663
4664 if (type == OP_NOT || !char_has_othercase(common, cc))
4665 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4666 else
4667 {
4668 oc = char_othercase(common, c);
4669 bit = c ^ oc;
4670 if (is_powerof2(bit))
4671 {
4672 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
4673 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
4674 }
4675 else
4676 {
4677 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, c));
4678 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, oc));
4679 }
4680 }
4681 return cc + length;
4682
4683 case OP_CLASS:
4684 case OP_NCLASS:
4685 detect_partial_match(common, backtracks);
4686 read_char(common);
4687 if (check_class_ranges(common, (const pcre_uint8 *)cc, type == OP_NCLASS, backtracks))
4688 return cc + 32 / sizeof(pcre_uchar);
4689
4690 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4691 jump[0] = NULL;
4692 #ifdef COMPILE_PCRE8
4693 /* This check only affects 8 bit mode. In other modes, we
4694 always need to compare the value with 255. */
4695 if (common->utf)
4696 #endif /* COMPILE_PCRE8 */
4697 {
4698 jump[0] = CMP(SLJIT_C_GREATER, TMP1, 0, SLJIT_IMM, 255);
4699 if (type == OP_CLASS)
4700 {
4701 add_jump(compiler, backtracks, jump[0]);
4702 jump[0] = NULL;
4703 }
4704 }
4705 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4706 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
4707 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
4708 OP1(SLJIT_MOV_UB, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
4709 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
4710 OP2(SLJIT_AND | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
4711 add_jump(compiler, backtracks, JUMP(SLJIT_C_ZERO));
4712 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
4713 if (jump[0] != NULL)
4714 JUMPHERE(jump[0]);
4715 #endif /* SUPPORT_UTF || !COMPILE_PCRE8 */
4716 return cc + 32 / sizeof(pcre_uchar);
4717
4718 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4719 case OP_XCLASS:
4720 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
4721 return cc + GET(cc, 0) - 1;
4722 #endif
4723
4724 case OP_REVERSE:
4725 length = GET(cc, 0);
4726 if (length == 0)
4727 return cc + LINK_SIZE;
4728 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
4729 #ifdef SUPPORT_UTF
4730 if (common->utf)
4731 {
4732 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4733 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length);
4734 label = LABEL();
4735 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, STR_PTR, 0, TMP3, 0));
4736 skip_char_back(common);
4737 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
4738 JUMPTO(SLJIT_C_NOT_ZERO, label);
4739 }
4740 else
4741 #endif
4742 {
4743 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
4744 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
4745 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS, STR_PTR, 0, TMP1, 0));
4746 }
4747 check_start_used_ptr(common);
4748 return cc + LINK_SIZE;
4749 }
4750 SLJIT_ASSERT_STOP();
4751 return cc;
4752 }
4753
4754 static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks)
4755 {
4756 /* This function consumes at least one input character. */
4757 /* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
4758 DEFINE_COMPILER;
4759 pcre_uchar *ccbegin = cc;
4760 compare_context context;
4761 int size;
4762
4763 context.length = 0;
4764 do
4765 {
4766 if (cc >= ccend)
4767 break;
4768
4769 if (*cc == OP_CHAR)
4770 {
4771 size = 1;
4772 #ifdef SUPPORT_UTF
4773 if (common->utf && HAS_EXTRALEN(cc[1]))
4774 size += GET_EXTRALEN(cc[1]);
4775 #endif
4776 }
4777 else if (*cc == OP_CHARI)
4778 {
4779 size = 1;
4780 #ifdef SUPPORT_UTF
4781 if (common->utf)
4782 {
4783 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4784 size = 0;
4785 else if (HAS_EXTRALEN(cc[1]))
4786 size += GET_EXTRALEN(cc[1]);
4787 }
4788 else
4789 #endif
4790 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
4791 size = 0;
4792 }
4793 else
4794 size = 0;
4795
4796 cc += 1 + size;
4797 context.length += IN_UCHARS(size);
4798 }
4799 while (size > 0 && context.length <= 128);
4800
4801 cc = ccbegin;
4802 if (context.length > 0)
4803 {
4804 /* We have a fixed-length byte sequence. */
4805 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
4806 add_jump(compiler, backtracks, CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0));
4807
4808 context.sourcereg = -1;
4809 #if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
4810 context.ucharptr = 0;
4811 #endif
4812 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
4813 return cc;
4814 }
4815
4816 /* A non-fixed length character will be checked if length == 0. */
4817 return compile_char1_matchingpath(common, *cc, cc + 1, backtracks);
4818 }
4819
4820 static struct sljit_jump *compile_ref_checks(compiler_common *common, pcre_uchar *cc, jump_list **backtracks)
4821 {
4822 DEFINE_COMPILER;
4823 int offset = GET2(cc, 1) << 1;
4824
4825 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4826 if (!common->jscript_compat)
4827 {
4828 if (backtracks == NULL)
4829 {
4830 /* OVECTOR(1) contains the "string begin - 1" constant. */
4831 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
4832 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_EQUAL);
4833 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4834 OP_FLAGS(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, SLJIT_C_EQUAL);
4835 return JUMP(SLJIT_C_NOT_ZERO);
4836 }
4837 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4838 }
4839 return CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4840 }
4841
4842 /* Forward definitions. */
4843 static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *);
4844 static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
4845
4846 #define PUSH_BACKTRACK(size, ccstart, error) \
4847 do \
4848 { \
4849 backtrack = sljit_alloc_memory(compiler, (size)); \
4850 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4851 return error; \
4852 memset(backtrack, 0, size); \
4853 backtrack->prev = parent->top; \
4854 backtrack->cc = (ccstart); \
4855 parent->top = backtrack; \
4856 } \
4857 while (0)
4858
4859 #define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
4860 do \
4861 { \
4862 backtrack = sljit_alloc_memory(compiler, (size)); \
4863 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
4864 return; \
4865 memset(backtrack, 0, size); \
4866 backtrack->prev = parent->top; \
4867 backtrack->cc = (ccstart); \
4868 parent->top = backtrack; \
4869 } \
4870 while (0)
4871
4872 #define BACKTRACK_AS(type) ((type *)backtrack)
4873
4874 static pcre_uchar *compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
4875 {
4876 DEFINE_COMPILER;
4877 int offset = GET2(cc, 1) << 1;
4878 struct sljit_jump *jump = NULL;
4879 struct sljit_jump *partial;
4880 struct sljit_jump *nopartial;
4881
4882 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
4883 /* OVECTOR(1) contains the "string begin - 1" constant. */
4884 if (withchecks && !common->jscript_compat)
4885 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
4886
4887 #if defined SUPPORT_UTF && defined SUPPORT_UCP
4888 if (common->utf && *cc == OP_REFI)
4889 {
4890 SLJIT_ASSERT(TMP1 == SLJIT_SCRATCH_REG1 && STACK_TOP == SLJIT_SCRATCH_REG2 && TMP2 == SLJIT_SCRATCH_REG3);
4891 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
4892 if (withchecks)
4893 jump = CMP(SLJIT_C_EQUAL, TMP1, 0, TMP2, 0);
4894
4895 /* Needed to save important temporary registers. */
4896 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
4897 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG2, 0, ARGUMENTS, 0);
4898 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SCRATCH_REG2), SLJIT_OFFSETOF(jit_arguments, uchar_ptr), STR_PTR, 0);
4899 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp));
4900 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
4901 if (common->mode == JIT_COMPILE)
4902 add_jump(compiler, backtracks, CMP(SLJIT_C_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1));
4903 else
4904 {
4905 add_jump(compiler, backtracks, CMP(SLJIT_C_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
4906 nopartial = CMP(SLJIT_C_NOT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
4907 check_partial(common, FALSE);
4908 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4909 JUMPHERE(nopartial);
4910 }
4911 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
4912 }
4913 else
4914 #endif /* SUPPORT_UTF && SUPPORT_UCP */
4915 {
4916 OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), TMP1, 0);
4917 if (withchecks)
4918 jump = JUMP(SLJIT_C_ZERO);
4919
4920 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4921 partial = CMP(SLJIT_C_GREATER, STR_PTR, 0, STR_END, 0);
4922 if (common->mode == JIT_COMPILE)
4923 add_jump(compiler, backtracks, partial);
4924
4925 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4926 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4927
4928 if (common->mode != JIT_COMPILE)
4929 {
4930 nopartial = JUMP(SLJIT_JUMP);
4931 JUMPHERE(partial);
4932 /* TMP2 -= STR_END - STR_PTR */
4933 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
4934 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
4935 partial = CMP(SLJIT_C_EQUAL, TMP2, 0, SLJIT_IMM, 0);
4936 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
4937 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
4938 add_jump(compiler, backtracks, CMP(SLJIT_C_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
4939 JUMPHERE(partial);
4940 check_partial(common, FALSE);
4941 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
4942 JUMPHERE(nopartial);
4943 }
4944 }
4945
4946 if (jump != NULL)
4947 {
4948 if (emptyfail)
4949 add_jump(compiler, backtracks, jump);
4950 else
4951 JUMPHERE(jump);
4952 }
4953 return cc + 1 + IMM2_SIZE;
4954 }
4955
4956 static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
4957 {
4958 DEFINE_COMPILER;
4959 backtrack_common *backtrack;
4960 pcre_uchar type;
4961 struct sljit_label *label;
4962 struct sljit_jump *zerolength;
4963 struct sljit_jump *jump = NULL;
4964 pcre_uchar *ccbegin = cc;
4965 int min = 0, max = 0;
4966 BOOL minimize;
4967
4968 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
4969
4970 type = cc[1 + IMM2_SIZE];
4971 minimize = (type & 0x1) != 0;
4972 switch(type)
4973 {
4974 case OP_CRSTAR:
4975 case OP_CRMINSTAR:
4976 min = 0;
4977 max = 0;
4978 cc += 1 + IMM2_SIZE + 1;
4979 break;
4980 case OP_CRPLUS:
4981 case OP_CRMINPLUS:
4982 min = 1;
4983 max = 0;
4984 cc += 1 + IMM2_SIZE + 1;
4985 break;
4986 case OP_CRQUERY:
4987 case OP_CRMINQUERY:
4988 min = 0;
4989 max = 1;
4990 cc += 1 + IMM2_SIZE + 1;
4991 break;
4992 case OP_CRRANGE:
4993 case OP_CRMINRANGE:
4994 min = GET2(cc, 1 + IMM2_SIZE + 1);
4995 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
4996 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
4997 break;
4998 default:
4999 SLJIT_ASSERT_STOP();
5000 break;
5001 }
5002
5003 if (!minimize)
5004 {
5005 if (min == 0)
5006 {
5007 allocate_stack(common, 2);
5008 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5009 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5010 /* Temporary release of STR_PTR. */
5011 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5012 zerolength = compile_ref_checks(common, ccbegin, NULL);
5013 /* Restore if not zero length. */
5014 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5015 }
5016 else
5017 {
5018 allocate_stack(common, 1);
5019 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5020 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5021 }
5022
5023 if (min > 1 || max > 1)
5024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
5025
5026 label = LABEL();
5027 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
5028
5029 if (min > 1 || max > 1)
5030 {
5031 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
5032 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5033 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
5034 if (min > 1)
5035 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
5036 if (max > 1)
5037 {
5038 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
5039 allocate_stack(common, 1);
5040 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5041 JUMPTO(SLJIT_JUMP, label);
5042 JUMPHERE(jump);
5043 }
5044 }
5045
5046 if (max == 0)
5047 {
5048 /* Includes min > 1 case as well. */
5049 allocate_stack(common, 1);
5050 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5051 JUMPTO(SLJIT_JUMP, label);
5052 }
5053
5054 JUMPHERE(zerolength);
5055 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5056
5057 decrease_call_count(common);
5058 return cc;
5059 }
5060
5061 allocate_stack(common, 2);
5062 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5063 if (type != OP_CRMINSTAR)
5064 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
5065
5066 if (min == 0)
5067 {
5068 zerolength = compile_ref_checks(common, ccbegin, NULL);
5069 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5070 jump = JUMP(SLJIT_JUMP);
5071 }
5072 else
5073 zerolength = compile_ref_checks(common, ccbegin, &backtrack->topbacktracks);
5074
5075 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
5076 if (max > 0)
5077 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
5078
5079 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
5080 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5081
5082 if (min > 1)
5083 {
5084 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5085 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
5086 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5087 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(iterator_backtrack)->matchingpath);
5088 }
5089 else if (max > 0)
5090 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
5091
5092 if (jump != NULL)
5093 JUMPHERE(jump);
5094 JUMPHERE(zerolength);
5095
5096 decrease_call_count(common);
5097 return cc;
5098 }
5099
5100 static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5101 {
5102 DEFINE_COMPILER;
5103 backtrack_common *backtrack;
5104 recurse_entry *entry = common->entries;
5105 recurse_entry *prev = NULL;
5106 int start = GET(cc, 1);
5107 pcre_uchar *start_cc;
5108
5109 PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
5110
5111 /* Inlining simple patterns. */
5112 if (get_framesize(common, common->start + start, TRUE) == no_stack)
5113 {
5114 start_cc = common->start + start;
5115 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
5116 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
5117 return cc + 1 + LINK_SIZE;
5118 }
5119
5120 while (entry != NULL)
5121 {
5122 if (entry->start == start)
5123 break;
5124 prev = entry;
5125 entry = entry->next;
5126 }
5127
5128 if (entry == NULL)
5129 {
5130 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
5131 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5132 return NULL;
5133 entry->next = NULL;
5134 entry->entry = NULL;
5135 entry->calls = NULL;
5136 entry->start = start;
5137
5138 if (prev != NULL)
5139 prev->next = entry;
5140 else
5141 common->entries = entry;
5142 }
5143
5144 if (common->has_set_som && common->mark_ptr != 0)
5145 {
5146 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5147 allocate_stack(common, 2);
5148 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->mark_ptr);
5149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5150 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5151 }
5152 else if (common->has_set_som || common->mark_ptr != 0)
5153 {
5154 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr);
5155 allocate_stack(common, 1);
5156 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5157 }
5158
5159 if (entry->entry == NULL)
5160 add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL));
5161 else
5162 JUMPTO(SLJIT_FAST_CALL, entry->entry);
5163 /* Leave if the match is failed. */
5164 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 0));
5165 return cc + 1 + LINK_SIZE;
5166 }
5167
5168 static int SLJIT_CALL do_callout(struct jit_arguments* arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector)
5169 {
5170 const pcre_uchar *begin = arguments->begin;
5171 int *offset_vector = arguments->offsets;
5172 int offset_count = arguments->offset_count;
5173 int i;
5174
5175 if (PUBL(callout) == NULL)
5176 return 0;
5177
5178 callout_block->version = 2;
5179 callout_block->callout_data = arguments->callout_data;
5180
5181 /* Offsets in subject. */
5182 callout_block->subject_length = arguments->end - arguments->begin;
5183 callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin;
5184 callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin;
5185 #if defined COMPILE_PCRE8
5186 callout_block->subject = (PCRE_SPTR)begin;
5187 #elif defined COMPILE_PCRE16
5188 callout_block->subject = (PCRE_SPTR16)begin;
5189 #elif defined COMPILE_PCRE32
5190 callout_block->subject = (PCRE_SPTR32)begin;
5191 #endif
5192
5193 /* Convert and copy the JIT offset vector to the offset_vector array. */
5194 callout_block->capture_top = 0;
5195 callout_block->offset_vector = offset_vector;
5196 for (i = 2; i < offset_count; i += 2)
5197 {
5198 offset_vector[i] = jit_ovector[i] - begin;
5199 offset_vector[i + 1] = jit_ovector[i + 1] - begin;
5200 if (jit_ovector[i] >= begin)
5201 callout_block->capture_top = i;
5202 }
5203
5204 callout_block->capture_top = (callout_block->capture_top >> 1) + 1;
5205 if (offset_count > 0)
5206 offset_vector[0] = -1;
5207 if (offset_count > 1)
5208 offset_vector[1] = -1;
5209 return (*PUBL(callout))(callout_block);
5210 }
5211
5212 /* Aligning to 8 byte. */
5213 #define CALLOUT_ARG_SIZE \
5214 (((int)sizeof(PUBL(callout_block)) + 7) & ~7)
5215
5216 #define CALLOUT_ARG_OFFSET(arg) \
5217 (-CALLOUT_ARG_SIZE + SLJIT_OFFSETOF(PUBL(callout_block), arg))
5218
5219 static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5220 {
5221 DEFINE_COMPILER;
5222 backtrack_common *backtrack;
5223
5224 PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
5225
5226 allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5227
5228 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
5229 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5230 SLJIT_ASSERT(common->capture_last_ptr != 0);
5231 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]);
5232 OP1(SLJIT_MOV_UI, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
5233
5234 /* These pointer sized fields temporarly stores internal variables. */
5235 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(0));
5236 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
5237 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0);
5238
5239 if (common->mark_ptr != 0)
5240 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
5241 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2));
5242 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE));
5243 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
5244
5245 /* Needed to save important temporary registers. */
5246 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, STACK_TOP, 0);
5247 OP2(SLJIT_SUB, SLJIT_SCRATCH_REG2, 0, STACK_TOP, 0, SLJIT_IMM, CALLOUT_ARG_SIZE);
5248 GET_LOCAL_BASE(SLJIT_SCRATCH_REG3, 0, OVECTOR_START);
5249 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
5250 OP1(SLJIT_MOV_SI, SLJIT_RETURN_REG, 0, SLJIT_RETURN_REG, 0);
5251 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0);
5252 free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw));
5253
5254 /* Check return value. */
5255 OP2(SLJIT_SUB | SLJIT_SET_S, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
5256 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_C_SIG_GREATER));
5257 if (common->forced_quit_label == NULL)
5258 add_jump(compiler, &common->forced_quit, JUMP(SLJIT_C_SIG_LESS));
5259 else
5260 JUMPTO(SLJIT_C_SIG_LESS, common->forced_quit_label);
5261 return cc + 2 + 2 * LINK_SIZE;
5262 }
5263
5264 #undef CALLOUT_ARG_SIZE
5265 #undef CALLOUT_ARG_OFFSET
5266
5267 static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional)
5268 {
5269 DEFINE_COMPILER;
5270 int framesize;
5271 int private_data_ptr;
5272 backtrack_common altbacktrack;
5273 pcre_uchar *ccbegin;
5274 pcre_uchar opcode;
5275 pcre_uchar bra = OP_BRA;
5276 jump_list *tmp = NULL;
5277 jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
5278 jump_list **found;
5279 /* Saving previous accept variables. */
5280 struct sljit_label *save_quit_label = common->quit_label;
5281 struct sljit_label *save_accept_label = common->accept_label;
5282 jump_list *save_quit = common->quit;
5283 jump_list *save_accept = common->accept;
5284 struct sljit_jump *jump;
5285 struct sljit_jump *brajump = NULL;
5286
5287 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5288 {
5289 SLJIT_ASSERT(!conditional);
5290 bra = *cc;
5291 cc++;
5292 }
5293 private_data_ptr = PRIVATE_DATA(cc);
5294 SLJIT_ASSERT(private_data_ptr != 0);
5295 framesize = get_framesize(common, cc, FALSE);
5296 backtrack->framesize = framesize;
5297 backtrack->private_data_ptr = private_data_ptr;
5298 opcode = *cc;
5299 SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
5300 found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
5301 ccbegin = cc;
5302 cc += GET(cc, 1);
5303
5304 if (bra == OP_BRAMINZERO)
5305 {
5306 /* This is a braminzero backtrack path. */
5307 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5308 free_stack(common, 1);
5309 brajump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5310 }
5311
5312 if (framesize < 0)
5313 {
5314 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5315 allocate_stack(common, 1);
5316 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5317 }
5318 else
5319 {
5320 allocate_stack(common, framesize + 2);
5321 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5322 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(framesize + 1));
5323 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5324 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5325 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5326 init_frame(common, ccbegin, framesize + 1, 2, FALSE);
5327 }
5328
5329 memset(&altbacktrack, 0, sizeof(backtrack_common));
5330 common->quit_label = NULL;
5331 common->quit = NULL;
5332 while (1)
5333 {
5334 common->accept_label = NULL;
5335 common->accept = NULL;
5336 altbacktrack.top = NULL;
5337 altbacktrack.topbacktracks = NULL;
5338
5339 if (*ccbegin == OP_ALT)
5340 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5341
5342 altbacktrack.cc = ccbegin;
5343 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
5344 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5345 {
5346 common->quit_label = save_quit_label;
5347 common->accept_label = save_accept_label;
5348 common->quit = save_quit;
5349 common->accept = save_accept;
5350 return NULL;
5351 }
5352 common->accept_label = LABEL();
5353 if (common->accept != NULL)
5354 set_jumps(common->accept, common->accept_label);
5355
5356 /* Reset stack. */
5357 if (framesize < 0)
5358 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5359 else {
5360 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
5361 {
5362 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5363 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5364 }
5365 else
5366 {
5367 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5368 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5369 }
5370 }
5371
5372 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
5373 {
5374 /* We know that STR_PTR was stored on the top of the stack. */
5375 if (conditional)
5376 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5377 else if (bra == OP_BRAZERO)
5378 {
5379 if (framesize < 0)
5380 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5381 else
5382 {
5383 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5384 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), (framesize + 1) * sizeof(sljit_sw));
5385 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5386 }
5387 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5388 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5389 }
5390 else if (framesize >= 0)
5391 {
5392 /* For OP_BRA and OP_BRAMINZERO. */
5393 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5394 }
5395 }
5396 add_jump(compiler, found, JUMP(SLJIT_JUMP));
5397
5398 compile_backtrackingpath(common, altbacktrack.top);
5399 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
5400 {
5401 common->quit_label = save_quit_label;
5402 common->accept_label = save_accept_label;
5403 common->quit = save_quit;
5404 common->accept = save_accept;
5405 return NULL;
5406 }
5407 set_jumps(altbacktrack.topbacktracks, LABEL());
5408
5409 if (*cc != OP_ALT)
5410 break;
5411
5412 ccbegin = cc;
5413 cc += GET(cc, 1);
5414 }
5415 /* None of them matched. */
5416 if (common->quit != NULL)
5417 set_jumps(common->quit, LABEL());
5418
5419 if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
5420 {
5421 /* Assert is failed. */
5422 if (conditional || bra == OP_BRAZERO)
5423 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5424
5425 if (framesize < 0)
5426 {
5427 /* The topmost item should be 0. */
5428 if (bra == OP_BRAZERO)
5429 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5430 else
5431 free_stack(common, 1);
5432 }
5433 else
5434 {
5435 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5436 /* The topmost item should be 0. */
5437 if (bra == OP_BRAZERO)
5438 {
5439 free_stack(common, framesize + 1);
5440 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5441 }
5442 else
5443 free_stack(common, framesize + 2);
5444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5445 }
5446 jump = JUMP(SLJIT_JUMP);
5447 if (bra != OP_BRAZERO)
5448 add_jump(compiler, target, jump);
5449
5450 /* Assert is successful. */
5451 set_jumps(tmp, LABEL());
5452 if (framesize < 0)
5453 {
5454 /* We know that STR_PTR was stored on the top of the stack. */
5455 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5456 /* Keep the STR_PTR on the top of the stack. */
5457 if (bra == OP_BRAZERO)
5458 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5459 else if (bra == OP_BRAMINZERO)
5460 {
5461 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5462 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5463 }
5464 }
5465 else
5466 {
5467 if (bra == OP_BRA)
5468 {
5469 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5470 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
5471 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0);
5472 }
5473 else
5474 {
5475 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
5476 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
5477 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5478 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
5479 }
5480 }
5481
5482 if (bra == OP_BRAZERO)
5483 {
5484 backtrack->matchingpath = LABEL();
5485 SET_LABEL(jump, backtrack->matchingpath);
5486 }
5487 else if (bra == OP_BRAMINZERO)
5488 {
5489 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5490 JUMPHERE(brajump);
5491 if (framesize >= 0)
5492 {
5493 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5494 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
5495 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), framesize * sizeof(sljit_sw));
5496 }
5497 set_jumps(backtrack->common.topbacktracks, LABEL());
5498 }
5499 }
5500 else
5501 {
5502 /* AssertNot is successful. */
5503 if (framesize < 0)
5504 {
5505 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5506 if (bra != OP_BRA)
5507 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5508 else
5509 free_stack(common, 1);
5510 }
5511 else
5512 {
5513 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5514 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5515 /* The topmost item should be 0. */
5516 if (bra != OP_BRA)
5517 {
5518 free_stack(common, framesize + 1);
5519 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
5520 }
5521 else
5522 free_stack(common, framesize + 2);
5523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP1, 0);
5524 }
5525
5526 if (bra == OP_BRAZERO)
5527 backtrack->matchingpath = LABEL();
5528 else if (bra == OP_BRAMINZERO)
5529 {
5530 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
5531 JUMPHERE(brajump);
5532 }
5533
5534 if (bra != OP_BRA)
5535 {
5536 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
5537 set_jumps(backtrack->common.topbacktracks, LABEL());
5538 backtrack->common.topbacktracks = NULL;
5539 }
5540 }
5541
5542 common->quit_label = save_quit_label;
5543 common->accept_label = save_accept_label;
5544 common->quit = save_quit;
5545 common->accept = save_accept;
5546 return cc + 1 + LINK_SIZE;
5547 }
5548
5549 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)
5550 {
5551 int condition = FALSE;
5552 pcre_uchar *slotA = name_table;
5553 pcre_uchar *slotB;
5554 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5555 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5556 sljit_sw no_capture;
5557 int i;
5558
5559 locals += refno & 0xff;
5560 refno >>= 8;
5561 no_capture = locals[1];
5562
5563 for (i = 0; i < name_count; i++)
5564 {
5565 if (GET2(slotA, 0) == refno) break;
5566 slotA += name_entry_size;
5567 }
5568
5569 if (i < name_count)
5570 {
5571 /* Found a name for the number - there can be only one; duplicate names
5572 for different numbers are allowed, but not vice versa. First scan down
5573 for duplicates. */
5574
5575 slotB = slotA;
5576 while (slotB > name_table)
5577 {
5578 slotB -= name_entry_size;
5579 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5580 {
5581 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5582 if (condition) break;
5583 }
5584 else break;
5585 }
5586
5587 /* Scan up for duplicates */
5588 if (!condition)
5589 {
5590 slotB = slotA;
5591 for (i++; i < name_count; i++)
5592 {
5593 slotB += name_entry_size;
5594 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5595 {
5596 condition = locals[GET2(slotB, 0) << 1] != no_capture;
5597 if (condition) break;
5598 }
5599 else break;
5600 }
5601 }
5602 }
5603 return condition;
5604 }
5605
5606 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)
5607 {
5608 int condition = FALSE;
5609 pcre_uchar *slotA = name_table;
5610 pcre_uchar *slotB;
5611 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];
5612 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];
5613 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];
5614 sljit_uw i;
5615
5616 for (i = 0; i < name_count; i++)
5617 {
5618 if (GET2(slotA, 0) == recno) break;
5619 slotA += name_entry_size;
5620 }
5621
5622 if (i < name_count)
5623 {
5624 /* Found a name for the number - there can be only one; duplicate
5625 names for different numbers are allowed, but not vice versa. First
5626 scan down for duplicates. */
5627
5628 slotB = slotA;
5629 while (slotB > name_table)
5630 {
5631 slotB -= name_entry_size;
5632 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5633 {
5634 condition = GET2(slotB, 0) == group_num;
5635 if (condition) break;
5636 }
5637 else break;
5638 }
5639
5640 /* Scan up for duplicates */
5641 if (!condition)
5642 {
5643 slotB = slotA;
5644 for (i++; i < name_count; i++)
5645 {
5646 slotB += name_entry_size;
5647 if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)
5648 {
5649 condition = GET2(slotB, 0) == group_num;
5650 if (condition) break;
5651 }
5652 else break;
5653 }
5654 }
5655 }
5656 return condition;
5657 }
5658
5659 /*
5660 Handling bracketed expressions is probably the most complex part.
5661
5662 Stack layout naming characters:
5663 S - Push the current STR_PTR
5664 0 - Push a 0 (NULL)
5665 A - Push the current STR_PTR. Needed for restoring the STR_PTR
5666 before the next alternative. Not pushed if there are no alternatives.
5667 M - Any values pushed by the current alternative. Can be empty, or anything.
5668 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
5669 L - Push the previous local (pointed by localptr) to the stack
5670 () - opional values stored on the stack
5671 ()* - optonal, can be stored multiple times
5672
5673 The following list shows the regular expression templates, their PCRE byte codes
5674 and stack layout supported by pcre-sljit.
5675
5676 (?:) OP_BRA | OP_KET A M
5677 () OP_CBRA | OP_KET C M
5678 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
5679 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
5680 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
5681 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
5682 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
5683 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
5684 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
5685 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
5686 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
5687 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
5688 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
5689 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
5690 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
5691 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
5692 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
5693 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
5694 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
5695 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
5696 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
5697 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
5698
5699
5700 Stack layout naming characters:
5701 A - Push the alternative index (starting from 0) on the stack.
5702 Not pushed if there is no alternatives.
5703 M - Any values pushed by the current alternative. Can be empty, or anything.
5704
5705 The next list shows the possible content of a bracket:
5706 (|) OP_*BRA | OP_ALT ... M A
5707 (?()|) OP_*COND | OP_ALT M A
5708 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
5709 (?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A
5710 Or nothing, if trace is unnecessary
5711 */
5712
5713 static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
5714 {
5715 DEFINE_COMPILER;
5716 backtrack_common *backtrack;
5717 pcre_uchar opcode;
5718 int private_data_ptr = 0;
5719 int offset = 0;
5720 int stacksize;
5721 pcre_uchar *ccbegin;
5722 pcre_uchar *matchingpath;
5723 pcre_uchar bra = OP_BRA;
5724 pcre_uchar ket;
5725 assert_backtrack *assert;
5726 BOOL has_alternatives;
5727 struct sljit_jump *jump;
5728 struct sljit_jump *skip;
5729 struct sljit_label *rmaxlabel = NULL;
5730 struct sljit_jump *braminzerojump = NULL;
5731
5732 PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
5733
5734 if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
5735 {
5736 bra = *cc;
5737 cc++;
5738 opcode = *cc;
5739 }
5740
5741 opcode = *cc;
5742 ccbegin = cc;
5743 matchingpath = ccbegin + 1 + LINK_SIZE;
5744
5745 if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF)
5746 {
5747 /* Drop this bracket_backtrack. */
5748 parent->top = backtrack->prev;
5749 return bracketend(cc);
5750 }
5751
5752 ket = *(bracketend(cc) - 1 - LINK_SIZE);
5753 SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
5754 SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
5755 cc += GET(cc, 1);
5756
5757 has_alternatives = *cc == OP_ALT;
5758 if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
5759 {
5760 has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;
5761 if (*matchingpath == OP_NRREF)
5762 {
5763 stacksize = GET2(matchingpath, 1);
5764 if (common->currententry == NULL || stacksize == RREF_ANY)
5765 has_alternatives = FALSE;
5766 else if (common->currententry->start == 0)
5767 has_alternatives = stacksize != 0;
5768 else
5769 has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
5770 }
5771 }
5772
5773 if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
5774 opcode = OP_SCOND;
5775 if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC))
5776 opcode = OP_ONCE;
5777
5778 if (opcode == OP_CBRA || opcode == OP_SCBRA)
5779 {
5780 /* Capturing brackets has a pre-allocated space. */
5781 offset = GET2(ccbegin, 1 + LINK_SIZE);
5782 if (common->optimized_cbracket[offset] == 0)
5783 {
5784 private_data_ptr = OVECTOR_PRIV(offset);
5785 offset <<= 1;
5786 }
5787 else
5788 {
5789 offset <<= 1;
5790 private_data_ptr = OVECTOR(offset);
5791 }
5792 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5793 matchingpath += IMM2_SIZE;
5794 }
5795 else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND)
5796 {
5797 /* Other brackets simply allocate the next entry. */
5798 private_data_ptr = PRIVATE_DATA(ccbegin);
5799 SLJIT_ASSERT(private_data_ptr != 0);
5800 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
5801 if (opcode == OP_ONCE)
5802 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, FALSE);
5803 }
5804
5805 /* Instructions before the first alternative. */
5806 stacksize = 0;
5807 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5808 stacksize++;
5809 if (bra == OP_BRAZERO)
5810 stacksize++;
5811
5812 if (stacksize > 0)
5813 allocate_stack(common, stacksize);
5814
5815 stacksize = 0;
5816 if ((ket == OP_KETRMAX) || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
5817 {
5818 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
5819 stacksize++;
5820 }
5821
5822 if (bra == OP_BRAZERO)
5823 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
5824
5825 if (bra == OP_BRAMINZERO)
5826 {
5827 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
5828 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
5829 if (ket != OP_KETRMIN)
5830 {
5831 free_stack(common, 1);
5832 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5833 }
5834 else
5835 {
5836 if (opcode == OP_ONCE || opcode >= OP_SBRA)
5837 {
5838 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5839 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5840 /* Nothing stored during the first run. */
5841 skip = JUMP(SLJIT_JUMP);
5842 JUMPHERE(jump);
5843 /* Checking zero-length iteration. */
5844 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5845 {
5846 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
5847 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5848 }
5849 else
5850 {
5851 /* Except when the whole stack frame must be saved. */
5852 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5853 braminzerojump = CMP(SLJIT_C_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), (BACKTRACK_AS(bracket_backtrack)->u.framesize + 1) * sizeof(sljit_sw));
5854 }
5855 JUMPHERE(skip);
5856 }
5857 else
5858 {
5859 jump = CMP(SLJIT_C_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
5860 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
5861 JUMPHERE(jump);
5862 }
5863 }
5864 }
5865
5866 if (ket == OP_KETRMIN)
5867 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
5868
5869 if (ket == OP_KETRMAX)
5870 {
5871 rmaxlabel = LABEL();
5872 if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA)
5873 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmaxlabel;
5874 }
5875
5876 /* Handling capturing brackets and alternatives. */
5877 if (opcode == OP_ONCE)
5878 {
5879 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
5880 {
5881 /* Neither capturing brackets nor recursions are not found in the block. */
5882 if (ket == OP_KETRMIN)
5883 {
5884 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5885 allocate_stack(common, 2);
5886 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5887 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5888 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
5889 }
5890 else if (ket == OP_KETRMAX || has_alternatives)
5891 {
5892 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5893 allocate_stack(common, 1);
5894 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5895 }
5896 else
5897 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
5898 }
5899 else
5900 {
5901 if (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives)
5902 {
5903 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 2);
5904 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5905 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize + 1));
5906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5907 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5908 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
5909 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1, 2, FALSE);
5910 }
5911 else
5912 {
5913 allocate_stack(common, BACKTRACK_AS(bracket_backtrack)->u.framesize + 1);
5914 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5915 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(BACKTRACK_AS(bracket_backtrack)->u.framesize));
5916 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
5917 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5918 init_frame(common, ccbegin, BACKTRACK_AS(bracket_backtrack)->u.framesize, 1, FALSE);
5919 }
5920 }
5921 }
5922 else if (opcode == OP_CBRA || opcode == OP_SCBRA)
5923 {
5924 /* Saving the previous values. */
5925 if (common->optimized_cbracket[offset >> 1] != 0)
5926 {
5927 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
5928 allocate_stack(common, 2);
5929 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5930 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr + sizeof(sljit_sw));
5931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5932 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
5933 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
5934 }
5935 else
5936 {
5937 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5938 allocate_stack(common, 1);
5939 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5940 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5941 }
5942 }
5943 else if (opcode == OP_SBRA || opcode == OP_SCOND)
5944 {
5945 /* Saving the previous value. */
5946 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
5947 allocate_stack(common, 1);
5948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0);
5949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
5950 }
5951 else if (has_alternatives)
5952 {
5953 /* Pushing the starting string pointer. */
5954 allocate_stack(common, 1);
5955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
5956 }
5957
5958 /* Generating code for the first alternative. */
5959 if (opcode == OP_COND || opcode == OP_SCOND)
5960 {
5961 if (*matchingpath == OP_CREF)
5962 {
5963 SLJIT_ASSERT(has_alternatives);
5964 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
5965 CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
5966 matchingpath += 1 + IMM2_SIZE;
5967 }
5968 else if (*matchingpath == OP_NCREF)
5969 {
5970 SLJIT_ASSERT(has_alternatives);
5971 stacksize = GET2(matchingpath, 1);
5972 jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
5973
5974 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
5975 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
5976 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
5977 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));
5978 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
5979 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
5980 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));
5981 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
5982 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
5983
5984 JUMPHERE(jump);
5985 matchingpath += 1 + IMM2_SIZE;
5986 }
5987 else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)
5988 {
5989 /* Never has other case. */
5990 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
5991
5992 stacksize = GET2(matchingpath, 1);
5993 if (common->currententry == NULL)
5994 stacksize = 0;
5995 else if (stacksize == RREF_ANY)
5996 stacksize = 1;
5997 else if (common->currententry->start == 0)
5998 stacksize = stacksize == 0;
5999 else
6000 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6001
6002 if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)
6003 {
6004 SLJIT_ASSERT(!has_alternatives);
6005 if (stacksize != 0)
6006 matchingpath += 1 + IMM2_SIZE;
6007 else
6008 {
6009 if (*cc == OP_ALT)
6010 {
6011 matchingpath = cc + 1 + LINK_SIZE;
6012 cc += GET(cc, 1);
6013 }
6014 else
6015 matchingpath = cc;
6016 }
6017 }
6018 else
6019 {
6020 SLJIT_ASSERT(has_alternatives);
6021
6022 stacksize = GET2(matchingpath, 1);
6023 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);
6024 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);
6025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);
6026 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));
6027 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);
6028 GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);
6029 OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);
6030 sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));
6031 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);
6032 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));
6033 matchingpath += 1 + IMM2_SIZE;
6034 }
6035 }
6036 else
6037 {
6038 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
6039 /* Similar code as PUSH_BACKTRACK macro. */
6040 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
6041 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6042 return NULL;
6043 memset(assert, 0, sizeof(assert_backtrack));
6044 assert->common.cc = matchingpath;
6045 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
6046 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
6047 }
6048 }
6049
6050 compile_matchingpath(common, matchingpath, cc, backtrack);
6051 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6052 return NULL;
6053
6054 if (opcode == OP_ONCE)
6055 {
6056 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
6057 {
6058 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6059 /* TMP2 which is set here used by OP_KETRMAX below. */
6060 if (ket == OP_KETRMAX)
6061 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), 0);
6062 else if (ket == OP_KETRMIN)
6063 {
6064 /* Move the STR_PTR to the private_data_ptr. */
6065 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_MEM1(STACK_TOP), 0);
6066 }
6067 }
6068 else
6069 {
6070 stacksize = (ket == OP_KETRMIN || ket == OP_KETRMAX || has_alternatives) ? 2 : 1;
6071 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize) * sizeof(sljit_sw));
6072 if (ket == OP_KETRMAX)
6073 {
6074 /* TMP2 which is set here used by OP_KETRMAX below. */
6075 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6076 }
6077 }
6078 }
6079
6080 stacksize = 0;
6081 if (ket != OP_KET || bra != OP_BRA)
6082 stacksize++;
6083 if (offset != 0)
6084 {
6085 if (common->capture_last_ptr != 0)
6086 stacksize++;
6087 if (common->optimized_cbracket[offset >> 1] == 0)
6088 stacksize += 2;
6089 }
6090 if (has_alternatives && opcode != OP_ONCE)
6091 stacksize++;
6092
6093 if (stacksize > 0)
6094 allocate_stack(common, stacksize);
6095
6096 stacksize = 0;
6097 if (ket != OP_KET || bra != OP_BRA)
6098 {
6099 if (ket != OP_KET)
6100 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
6101 else
6102 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6103 stacksize++;
6104 }
6105
6106 if (offset != 0)
6107 {
6108 if (common->capture_last_ptr != 0)
6109 {
6110 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6111 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6112 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0);
6113 stacksize++;
6114 }
6115 if (common->optimized_cbracket[offset >> 1] == 0)
6116 {
6117 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6118 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6119 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
6120 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6121 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
6122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6123 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6124 stacksize += 2;
6125 }
6126 }
6127
6128 if (has_alternatives)
6129 {
6130 if (opcode != OP_ONCE)
6131 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
6132 if (ket != OP_KETRMAX)
6133 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6134 }
6135
6136 /* Must be after the matchingpath label. */
6137 if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
6138 {
6139 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
6140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6141 }
6142
6143 if (ket == OP_KETRMAX)
6144 {
6145 if (opcode == OP_ONCE || opcode >= OP_SBRA)
6146 {
6147 if (has_alternatives)
6148 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
6149 /* Checking zero-length iteration. */
6150 if (opcode != OP_ONCE)
6151 {
6152 CMPTO(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STR_PTR, 0, rmaxlabel);
6153 /* Drop STR_PTR for greedy plus quantifier. */
6154 if (bra != OP_BRAZERO)
6155 free_stack(common, 1);
6156 }
6157 else
6158 /* TMP2 must contain the starting STR_PTR. */
6159 CMPTO(SLJIT_C_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmaxlabel);
6160 }
6161 else
6162 JUMPTO(SLJIT_JUMP, rmaxlabel);
6163 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
6164 }
6165
6166 if (bra == OP_BRAZERO)
6167 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
6168
6169 if (bra == OP_BRAMINZERO)
6170 {
6171 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
6172 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
6173 if (braminzerojump != NULL)
6174 {
6175 JUMPHERE(braminzerojump);
6176 /* We need to release the end pointer to perform the
6177 backtrack for the zero-length iteration. When
6178 framesize is < 0, OP_ONCE will do the release itself. */
6179 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
6180 {
6181 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6182 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
6183 }
6184 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
6185 free_stack(common, 1);
6186 }
6187 /* Continue to the normal backtrack. */
6188 }
6189
6190 if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
6191 decrease_call_count(common);
6192
6193 /* Skip the other alternatives. */
6194 while (*cc == OP_ALT)
6195 cc += GET(cc, 1);
6196 cc += 1 + LINK_SIZE;
6197 return cc;
6198 }
6199
6200 static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6201 {
6202 DEFINE_COMPILER;
6203 backtrack_common *backtrack;
6204 pcre_uchar opcode;
6205 int private_data_ptr;
6206 int cbraprivptr = 0;
6207 int framesize;
6208 int stacksize;
6209 int offset = 0;
6210 BOOL zero = FALSE;
6211 pcre_uchar *ccbegin = NULL;
6212 int stack;
6213 struct sljit_label *loop = NULL;
6214 struct jump_list *emptymatch = NULL;
6215
6216 PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
6217 if (*cc == OP_BRAPOSZERO)
6218 {
6219 zero = TRUE;
6220 cc++;
6221 }
6222
6223 opcode = *cc;
6224 private_data_ptr = PRIVATE_DATA(cc);
6225 SLJIT_ASSERT(private_data_ptr != 0);
6226 BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
6227 switch(opcode)
6228 {
6229 case OP_BRAPOS:
6230 case OP_SBRAPOS:
6231 ccbegin = cc + 1 + LINK_SIZE;
6232 break;
6233
6234 case OP_CBRAPOS:
6235 case OP_SCBRAPOS:
6236 offset = GET2(cc, 1 + LINK_SIZE);
6237 /* This case cannot be optimized in the same was as
6238 normal capturing brackets. */
6239 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
6240 cbraprivptr = OVECTOR_PRIV(offset);
6241 offset <<= 1;
6242 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
6243 break;
6244
6245 default:
6246 SLJIT_ASSERT_STOP();
6247 break;
6248 }
6249
6250 framesize = get_framesize(common, cc, FALSE);
6251 BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
6252 if (framesize < 0)
6253 {
6254 if (offset != 0)
6255 {
6256 stacksize = 2;
6257 if (common->capture_last_ptr != 0)
6258 stacksize++;
6259 }
6260 else
6261 stacksize = 1;
6262
6263 if (!zero)
6264 stacksize++;
6265
6266 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6267 allocate_stack(common, stacksize);
6268 if (framesize == no_frame)
6269 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, STACK_TOP, 0);
6270
6271 if (offset != 0)
6272 {
6273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset));
6274 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1));
6275 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
6276 if (common->capture_last_ptr != 0)
6277 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr);
6278 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
6279 if (common->capture_last_ptr != 0)
6280 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
6281 }
6282 else
6283 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6284
6285 if (!zero)
6286 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 1);
6287 }
6288 else
6289 {
6290 stacksize = framesize + 1;
6291 if (!zero)
6292 stacksize++;
6293 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6294 stacksize++;
6295 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
6296
6297 allocate_stack(common, stacksize);
6298 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6299 OP2(SLJIT_SUB, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, -STACK(stacksize - 1));
6300 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, TMP2, 0);
6301
6302 stack = 0;
6303 if (!zero)
6304 {
6305 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
6306 stack++;
6307 }
6308 if (opcode == OP_BRAPOS || opcode == OP_SBRAPOS)
6309 {
6310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
6311 stack++;
6312 }
6313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
6314 init_frame(common, cc, stacksize - 1, stacksize - framesize, FALSE);
6315 }
6316
6317 if (offset != 0)
6318 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6319
6320 loop = LABEL();
6321 while (*cc != OP_KETRPOS)
6322 {
6323 backtrack->top = NULL;
6324 backtrack->topbacktracks = NULL;
6325 cc += GET(cc, 1);
6326
6327 compile_matchingpath(common, ccbegin, cc, backtrack);
6328 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6329 return NULL;
6330
6331 if (framesize < 0)
6332 {
6333 if (framesize == no_frame)
6334 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6335
6336 if (offset != 0)
6337 {
6338 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6339 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6340 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6341 if (common->capture_last_ptr != 0)
6342 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6343 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6344 }
6345 else
6346 {
6347 if (opcode == OP_SBRAPOS)
6348 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6349 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6350 }
6351
6352 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6353 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6354
6355 if (!zero)
6356 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6357 }
6358 else
6359 {
6360 if (offset != 0)
6361 {
6362 OP2(SLJIT_ADD, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6363 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6364 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset + 1), STR_PTR, 0);
6365 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr, STR_PTR, 0);
6366 if (common->capture_last_ptr != 0)
6367 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
6368 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(offset), TMP1, 0);
6369 }
6370 else
6371 {
6372 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6373 OP2(SLJIT_ADD, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
6374 if (opcode == OP_SBRAPOS)
6375 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6376 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw), STR_PTR, 0);
6377 }
6378
6379 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
6380 add_jump(compiler, &emptymatch, CMP(SLJIT_C_EQUAL, TMP1, 0, STR_PTR, 0));
6381
6382 if (!zero)
6383 {
6384 if (framesize < 0)
6385 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
6386 else
6387 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6388 }
6389 }
6390 JUMPTO(SLJIT_JUMP, loop);
6391 flush_stubs(common);
6392
6393 compile_backtrackingpath(common, backtrack->top);
6394 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
6395 return NULL;
6396 set_jumps(backtrack->topbacktracks, LABEL());
6397
6398 if (framesize < 0)
6399 {
6400 if (offset != 0)
6401 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6402 else
6403 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
6404 }
6405 else
6406 {
6407 if (offset != 0)
6408 {
6409 /* Last alternative. */
6410 if (*cc == OP_KETRPOS)
6411 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6412 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), cbraprivptr);
6413 }
6414 else
6415 {
6416 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), private_data_ptr);
6417 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), (framesize + 1) * sizeof(sljit_sw));
6418 }
6419 }
6420
6421 if (*cc == OP_KETRPOS)
6422 break;
6423 ccbegin = cc + 1 + LINK_SIZE;
6424 }
6425
6426 backtrack->topbacktracks = NULL;
6427 if (!zero)
6428 {
6429 if (framesize < 0)
6430 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
6431 else /* TMP2 is set to [private_data_ptr] above. */
6432 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(TMP2), (stacksize - 1) * sizeof(sljit_sw), SLJIT_IMM, 0));
6433 }
6434
6435 /* None of them matched. */
6436 set_jumps(emptymatch, LABEL());
6437 decrease_call_count(common);
6438 return cc + 1 + LINK_SIZE;
6439 }
6440
6441 static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
6442 {
6443 int class_len;
6444
6445 *opcode = *cc;
6446 if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
6447 {
6448 cc++;
6449 *type = OP_CHAR;
6450 }
6451 else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
6452 {
6453 cc++;
6454 *type = OP_CHARI;
6455 *opcode -= OP_STARI - OP_STAR;
6456 }
6457 else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
6458 {
6459 cc++;
6460 *type = OP_NOT;
6461 *opcode -= OP_NOTSTAR - OP_STAR;
6462 }
6463 else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
6464 {
6465 cc++;
6466 *type = OP_NOTI;
6467 *opcode -= OP_NOTSTARI - OP_STAR;
6468 }
6469 else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
6470 {
6471 cc++;
6472 *opcode -= OP_TYPESTAR - OP_STAR;
6473 *type = 0;
6474 }
6475 else
6476 {
6477 SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
6478 *type = *opcode;
6479 cc++;
6480 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
6481 *opcode = cc[class_len - 1];
6482 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
6483 {
6484 *opcode -= OP_CRSTAR - OP_STAR;
6485 if (end != NULL)
6486 *end = cc + class_len;
6487 }
6488 else
6489 {
6490 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
6491 *arg1 = GET2(cc, (class_len + IMM2_SIZE));
6492 *arg2 = GET2(cc, class_len);
6493
6494 if (*arg2 == 0)
6495 {
6496 SLJIT_ASSERT(*arg1 != 0);
6497 *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
6498 }
6499 if (*arg1 == *arg2)
6500 *opcode = OP_EXACT;
6501
6502 if (end != NULL)
6503 *end = cc + class_len + 2 * IMM2_SIZE;
6504 }
6505 return cc;
6506 }
6507
6508 if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
6509 {
6510 *arg1 = GET2(cc, 0);
6511 cc += IMM2_SIZE;
6512 }
6513
6514 if (*type == 0)
6515 {
6516 *type = *cc;
6517 if (end != NULL)
6518 *end = next_opcode(common, cc);
6519 cc++;
6520 return cc;
6521 }
6522
6523 if (end != NULL)
6524 {
6525 *end = cc + 1;
6526 #ifdef SUPPORT_UTF
6527 if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
6528 #endif
6529 }
6530 return cc;
6531 }
6532
6533 static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent)
6534 {
6535 DEFINE_COMPILER;
6536 backtrack_common *backtrack;
6537 pcre_uchar opcode;
6538 pcre_uchar type;
6539 int arg1 = -1, arg2 = -1;
6540 pcre_uchar* end;
6541 jump_list *nomatch = NULL;
6542 struct sljit_jump *jump = NULL;
6543 struct sljit_label *label;
6544 int private_data_ptr = PRIVATE_DATA(cc);
6545 int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LOCALS_REG);
6546 int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
6547 int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
6548 int tmp_base, tmp_offset;
6549
6550 PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
6551
6552 cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
6553
6554 switch(type)
6555 {
6556 case OP_NOT_DIGIT:
6557 case OP_DIGIT:
6558 case OP_NOT_WHITESPACE:
6559 case OP_WHITESPACE:
6560 case OP_NOT_WORDCHAR:
6561 case OP_WORDCHAR:
6562 case OP_ANY:
6563 case OP_ALLANY:
6564 case OP_ANYBYTE:
6565 case OP_ANYNL:
6566 case OP_NOT_HSPACE:
6567 case OP_HSPACE:
6568 case OP_NOT_VSPACE:
6569 case OP_VSPACE:
6570 case OP_CHAR:
6571 case OP_CHARI:
6572 case OP_NOT:
6573 case OP_NOTI:
6574 case OP_CLASS:
6575 case OP_NCLASS:
6576 tmp_base = TMP3;
6577 tmp_offset = 0;
6578 break;
6579
6580 default:
6581 SLJIT_ASSERT_STOP();
6582 /* Fall through. */
6583
6584 case OP_EXTUNI:
6585 case OP_XCLASS:
6586 case OP_NOTPROP:
6587 case OP_PROP:
6588 tmp_base = SLJIT_MEM1(SLJIT_LOCALS_REG);
6589 tmp_offset = POSSESSIVE0;
6590 break;
6591 }
6592
6593 switch(opcode)
6594 {
6595 case OP_STAR:
6596 case OP_PLUS:
6597 case OP_UPTO:
6598 case OP_CRRANGE:
6599 if (type == OP_ANYNL || type == OP_EXTUNI)
6600 {
6601 SLJIT_ASSERT(private_data_ptr == 0);
6602 if (opcode == OP_STAR || opcode == OP_UPTO)
6603 {
6604 allocate_stack(common, 2);
6605 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6606 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
6607 }
6608 else
6609 {
6610 allocate_stack(common, 1);
6611 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
6612 }
6613
6614 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6615 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, 0);
6616
6617 label = LABEL();
6618 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6619 if (opcode == OP_UPTO || opcode == OP_CRRANGE)
6620 {
6621 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
6622 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6623 if (opcode == OP_CRRANGE && arg2 > 0)
6624 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
6625 if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
6626 jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
6627 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
6628 }
6629
6630 /* We cannot use TMP3 because of this allocate_stack. */
6631 allocate_stack(common, 1);
6632 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
6633 JUMPTO(SLJIT_JUMP, label);
6634 if (jump != NULL)
6635 JUMPHERE(jump);
6636 }
6637 else
6638 {
6639 if (opcode == OP_PLUS)
6640 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6641 if (private_data_ptr == 0)
6642 allocate_stack(common, 2);
6643 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6644 if (opcode <= OP_PLUS)
6645 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
6646 else
6647 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6648 label = LABEL();
6649 compile_char1_matchingpath(common, type, cc, &nomatch);
6650 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6651 if (opcode <= OP_PLUS)
6652 JUMPTO(SLJIT_JUMP, label);
6653 else if (opcode == OP_CRRANGE && arg1 == 0)
6654 {
6655 OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
6656 JUMPTO(SLJIT_JUMP, label);
6657 }
6658 else
6659 {
6660 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
6661 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
6662 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
6663 CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
6664 }
6665 set_jumps(nomatch, LABEL());
6666 if (opcode == OP_CRRANGE)
6667 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
6668 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
6669 }
6670 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6671 break;
6672
6673 case OP_MINSTAR:
6674 case OP_MINPLUS:
6675 if (opcode == OP_MINPLUS)
6676 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6677 if (private_data_ptr == 0)
6678 allocate_stack(common, 1);
6679 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6680 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6681 break;
6682
6683 case OP_MINUPTO:
6684 case OP_CRMINRANGE:
6685 if (private_data_ptr == 0)
6686 allocate_stack(common, 2);
6687 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6688 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, 1);
6689 if (opcode == OP_CRMINRANGE)
6690 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
6691 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6692 break;
6693
6694 case OP_QUERY:
6695 case OP_MINQUERY:
6696 if (private_data_ptr == 0)
6697 allocate_stack(common, 1);
6698 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
6699 if (opcode == OP_QUERY)
6700 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6701 BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
6702 break;
6703
6704 case OP_EXACT:
6705 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
6706 label = LABEL();
6707 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6708 OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
6709 JUMPTO(SLJIT_C_NOT_ZERO, label);
6710 break;
6711
6712 case OP_POSSTAR:
6713 case OP_POSPLUS:
6714 case OP_POSUPTO:
6715 if (opcode == OP_POSPLUS)
6716 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
6717 if (opcode == OP_POSUPTO)
6718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
6719 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6720 label = LABEL();
6721 compile_char1_matchingpath(common, type, cc, &nomatch);
6722 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6723 if (opcode != OP_POSUPTO)
6724 JUMPTO(SLJIT_JUMP, label);
6725 else
6726 {
6727 OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
6728 JUMPTO(SLJIT_C_NOT_ZERO, label);
6729 }
6730 set_jumps(nomatch, LABEL());
6731 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6732 break;
6733
6734 case OP_POSQUERY:
6735 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6736 compile_char1_matchingpath(common, type, cc, &nomatch);
6737 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
6738 set_jumps(nomatch, LABEL());
6739 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
6740 break;
6741
6742 default:
6743 SLJIT_ASSERT_STOP();
6744 break;
6745 }
6746
6747 decrease_call_count(common);
6748 return end;
6749 }
6750